1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512FINTRIN_H_INCLUDED 29 #define _AVX512FINTRIN_H_INCLUDED 30 31 #ifndef __AVX512F__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512f") 34 #define __DISABLE_AVX512F__ 35 #endif /* __AVX512F__ */ 36 37 /* Internal data types for implementing the intrinsics. */ 38 typedef double __v8df __attribute__ ((__vector_size__ (64))); 39 typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40 typedef long long __v8di __attribute__ ((__vector_size__ (64))); 41 typedef int __v16si __attribute__ ((__vector_size__ (64))); 42 typedef short __v32hi __attribute__ ((__vector_size__ (64))); 43 typedef char __v64qi __attribute__ ((__vector_size__ (64))); 44 45 /* The Intel API is flexible enough that we must allow aliasing with other 46 vector types, and their scalar components. */ 47 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 48 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); 49 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 50 51 typedef unsigned char __mmask8; 52 typedef unsigned short __mmask16; 53 54 extern __inline __m512i 55 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 56 _mm512_set_epi64 (long long __A, long long __B, long long __C, 57 long long __D, long long __E, long long __F, 58 long long __G, long long __H) 59 { 60 return __extension__ (__m512i) (__v8di) 61 { __H, __G, __F, __E, __D, __C, __B, __A }; 62 } 63 64 /* Create the vector [A B C D E F G H I J K L M N O P]. */ 65 extern __inline __m512i 66 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 67 _mm512_set_epi32 (int __A, int __B, int __C, int __D, 68 int __E, int __F, int __G, int __H, 69 int __I, int __J, int __K, int __L, 70 int __M, int __N, int __O, int __P) 71 { 72 return __extension__ (__m512i)(__v16si) 73 { __P, __O, __N, __M, __L, __K, __J, __I, 74 __H, __G, __F, __E, __D, __C, __B, __A }; 75 } 76 77 extern __inline __m512d 78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 79 _mm512_set_pd (double __A, double __B, double __C, double __D, 80 double __E, double __F, double __G, double __H) 81 { 82 return __extension__ (__m512d) 83 { __H, __G, __F, __E, __D, __C, __B, __A }; 84 } 85 86 extern __inline __m512 87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 88 _mm512_set_ps (float __A, float __B, float __C, float __D, 89 float __E, float __F, float __G, float __H, 90 float __I, float __J, float __K, float __L, 91 float __M, float __N, float __O, float __P) 92 { 93 return __extension__ (__m512) 94 { __P, __O, __N, __M, __L, __K, __J, __I, 95 __H, __G, __F, __E, __D, __C, __B, __A }; 96 } 97 98 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 99 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) 100 101 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 102 e8,e9,e10,e11,e12,e13,e14,e15) \ 103 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 104 105 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 106 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) 107 108 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 109 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 110 111 extern __inline __m512 112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 113 _mm512_undefined_ps (void) 114 { 115 __m512 __Y = __Y; 116 return __Y; 117 } 118 119 extern __inline __m512d 120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 121 _mm512_undefined_pd (void) 122 { 123 __m512d __Y = __Y; 124 return __Y; 125 } 126 127 extern __inline __m512i 128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 129 _mm512_undefined_si512 (void) 130 { 131 __m512i __Y = __Y; 132 return __Y; 133 } 134 135 extern __inline __m512i 136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 137 _mm512_set1_epi8 (char __A) 138 { 139 return __extension__ (__m512i)(__v64qi) 140 { __A, __A, __A, __A, __A, __A, __A, __A, 141 __A, __A, __A, __A, __A, __A, __A, __A, 142 __A, __A, __A, __A, __A, __A, __A, __A, 143 __A, __A, __A, __A, __A, __A, __A, __A, 144 __A, __A, __A, __A, __A, __A, __A, __A, 145 __A, __A, __A, __A, __A, __A, __A, __A, 146 __A, __A, __A, __A, __A, __A, __A, __A, 147 __A, __A, __A, __A, __A, __A, __A, __A }; 148 } 149 150 extern __inline __m512i 151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 152 _mm512_set1_epi16 (short __A) 153 { 154 return __extension__ (__m512i)(__v32hi) 155 { __A, __A, __A, __A, __A, __A, __A, __A, 156 __A, __A, __A, __A, __A, __A, __A, __A, 157 __A, __A, __A, __A, __A, __A, __A, __A, 158 __A, __A, __A, __A, __A, __A, __A, __A }; 159 } 160 161 extern __inline __m512d 162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 163 _mm512_set1_pd (double __A) 164 { 165 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ 166 (__v2df) { __A, }, 167 (__v8df) 168 _mm512_undefined_pd (), 169 (__mmask8) -1); 170 } 171 172 extern __inline __m512 173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 174 _mm512_set1_ps (float __A) 175 { 176 return (__m512) __builtin_ia32_broadcastss512 (__extension__ 177 (__v4sf) { __A, }, 178 (__v16sf) 179 _mm512_undefined_ps (), 180 (__mmask16) -1); 181 } 182 183 /* Create the vector [A B C D A B C D A B C D A B C D]. */ 184 extern __inline __m512i 185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 186 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) 187 { 188 return __extension__ (__m512i)(__v16si) 189 { __D, __C, __B, __A, __D, __C, __B, __A, 190 __D, __C, __B, __A, __D, __C, __B, __A }; 191 } 192 193 extern __inline __m512i 194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 195 _mm512_set4_epi64 (long long __A, long long __B, long long __C, 196 long long __D) 197 { 198 return __extension__ (__m512i) (__v8di) 199 { __D, __C, __B, __A, __D, __C, __B, __A }; 200 } 201 202 extern __inline __m512d 203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 204 _mm512_set4_pd (double __A, double __B, double __C, double __D) 205 { 206 return __extension__ (__m512d) 207 { __D, __C, __B, __A, __D, __C, __B, __A }; 208 } 209 210 extern __inline __m512 211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 212 _mm512_set4_ps (float __A, float __B, float __C, float __D) 213 { 214 return __extension__ (__m512) 215 { __D, __C, __B, __A, __D, __C, __B, __A, 216 __D, __C, __B, __A, __D, __C, __B, __A }; 217 } 218 219 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 220 _mm512_set4_epi64(e3,e2,e1,e0) 221 222 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 223 _mm512_set4_epi32(e3,e2,e1,e0) 224 225 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 226 _mm512_set4_pd(e3,e2,e1,e0) 227 228 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 229 _mm512_set4_ps(e3,e2,e1,e0) 230 231 extern __inline __m512 232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 233 _mm512_setzero_ps (void) 234 { 235 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 236 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 237 } 238 239 extern __inline __m512d 240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 241 _mm512_setzero_pd (void) 242 { 243 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 244 } 245 246 extern __inline __m512i 247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 248 _mm512_setzero_epi32 (void) 249 { 250 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 251 } 252 253 extern __inline __m512i 254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 255 _mm512_setzero_si512 (void) 256 { 257 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 258 } 259 260 extern __inline __m512d 261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 262 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 263 { 264 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 265 (__v8df) __W, 266 (__mmask8) __U); 267 } 268 269 extern __inline __m512d 270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 271 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 272 { 273 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 274 (__v8df) 275 _mm512_setzero_pd (), 276 (__mmask8) __U); 277 } 278 279 extern __inline __m512 280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 281 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 282 { 283 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 284 (__v16sf) __W, 285 (__mmask16) __U); 286 } 287 288 extern __inline __m512 289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 290 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 291 { 292 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 293 (__v16sf) 294 _mm512_setzero_ps (), 295 (__mmask16) __U); 296 } 297 298 extern __inline __m512d 299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 300 _mm512_load_pd (void const *__P) 301 { 302 return *(__m512d *) __P; 303 } 304 305 extern __inline __m512d 306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 307 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 308 { 309 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 310 (__v8df) __W, 311 (__mmask8) __U); 312 } 313 314 extern __inline __m512d 315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 316 _mm512_maskz_load_pd (__mmask8 __U, void const *__P) 317 { 318 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 319 (__v8df) 320 _mm512_setzero_pd (), 321 (__mmask8) __U); 322 } 323 324 extern __inline void 325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 326 _mm512_store_pd (void *__P, __m512d __A) 327 { 328 *(__m512d *) __P = __A; 329 } 330 331 extern __inline void 332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 333 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) 334 { 335 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, 336 (__mmask8) __U); 337 } 338 339 extern __inline __m512 340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 341 _mm512_load_ps (void const *__P) 342 { 343 return *(__m512 *) __P; 344 } 345 346 extern __inline __m512 347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 348 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 349 { 350 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 351 (__v16sf) __W, 352 (__mmask16) __U); 353 } 354 355 extern __inline __m512 356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 357 _mm512_maskz_load_ps (__mmask16 __U, void const *__P) 358 { 359 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 360 (__v16sf) 361 _mm512_setzero_ps (), 362 (__mmask16) __U); 363 } 364 365 extern __inline void 366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 367 _mm512_store_ps (void *__P, __m512 __A) 368 { 369 *(__m512 *) __P = __A; 370 } 371 372 extern __inline void 373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 374 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) 375 { 376 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, 377 (__mmask16) __U); 378 } 379 380 extern __inline __m512i 381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 382 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 383 { 384 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 385 (__v8di) __W, 386 (__mmask8) __U); 387 } 388 389 extern __inline __m512i 390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 391 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 392 { 393 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 394 (__v8di) 395 _mm512_setzero_si512 (), 396 (__mmask8) __U); 397 } 398 399 extern __inline __m512i 400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 401 _mm512_load_epi64 (void const *__P) 402 { 403 return *(__m512i *) __P; 404 } 405 406 extern __inline __m512i 407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 408 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 409 { 410 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 411 (__v8di) __W, 412 (__mmask8) __U); 413 } 414 415 extern __inline __m512i 416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 417 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 418 { 419 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 420 (__v8di) 421 _mm512_setzero_si512 (), 422 (__mmask8) __U); 423 } 424 425 extern __inline void 426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 427 _mm512_store_epi64 (void *__P, __m512i __A) 428 { 429 *(__m512i *) __P = __A; 430 } 431 432 extern __inline void 433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 434 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 435 { 436 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 437 (__mmask8) __U); 438 } 439 440 extern __inline __m512i 441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 442 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 443 { 444 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 445 (__v16si) __W, 446 (__mmask16) __U); 447 } 448 449 extern __inline __m512i 450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 451 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 452 { 453 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 454 (__v16si) 455 _mm512_setzero_si512 (), 456 (__mmask16) __U); 457 } 458 459 extern __inline __m512i 460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 461 _mm512_load_si512 (void const *__P) 462 { 463 return *(__m512i *) __P; 464 } 465 466 extern __inline __m512i 467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 468 _mm512_load_epi32 (void const *__P) 469 { 470 return *(__m512i *) __P; 471 } 472 473 extern __inline __m512i 474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 475 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 476 { 477 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 478 (__v16si) __W, 479 (__mmask16) __U); 480 } 481 482 extern __inline __m512i 483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 484 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 485 { 486 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 487 (__v16si) 488 _mm512_setzero_si512 (), 489 (__mmask16) __U); 490 } 491 492 extern __inline void 493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 494 _mm512_store_si512 (void *__P, __m512i __A) 495 { 496 *(__m512i *) __P = __A; 497 } 498 499 extern __inline void 500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 501 _mm512_store_epi32 (void *__P, __m512i __A) 502 { 503 *(__m512i *) __P = __A; 504 } 505 506 extern __inline void 507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 508 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 509 { 510 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 511 (__mmask16) __U); 512 } 513 514 extern __inline __m512i 515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 516 _mm512_mullo_epi32 (__m512i __A, __m512i __B) 517 { 518 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 519 (__v16si) __B, 520 (__v16si) 521 _mm512_undefined_si512 (), 522 (__mmask16) -1); 523 } 524 525 extern __inline __m512i 526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 528 { 529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 530 (__v16si) __B, 531 (__v16si) 532 _mm512_setzero_si512 (), 533 __M); 534 } 535 536 extern __inline __m512i 537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 539 { 540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 541 (__v16si) __B, 542 (__v16si) __W, __M); 543 } 544 545 extern __inline __m512i 546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y) 548 { 549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 550 (__v16si) __Y, 551 (__v16si) 552 _mm512_undefined_si512 (), 553 (__mmask16) -1); 554 } 555 556 extern __inline __m512i 557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 559 { 560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 561 (__v16si) __Y, 562 (__v16si) __W, 563 (__mmask16) __U); 564 } 565 566 extern __inline __m512i 567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 569 { 570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 571 (__v16si) __Y, 572 (__v16si) 573 _mm512_setzero_si512 (), 574 (__mmask16) __U); 575 } 576 577 extern __inline __m512i 578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 579 _mm512_srav_epi32 (__m512i __X, __m512i __Y) 580 { 581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 582 (__v16si) __Y, 583 (__v16si) 584 _mm512_undefined_si512 (), 585 (__mmask16) -1); 586 } 587 588 extern __inline __m512i 589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 591 { 592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 593 (__v16si) __Y, 594 (__v16si) __W, 595 (__mmask16) __U); 596 } 597 598 extern __inline __m512i 599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 601 { 602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 603 (__v16si) __Y, 604 (__v16si) 605 _mm512_setzero_si512 (), 606 (__mmask16) __U); 607 } 608 609 extern __inline __m512i 610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y) 612 { 613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 614 (__v16si) __Y, 615 (__v16si) 616 _mm512_undefined_si512 (), 617 (__mmask16) -1); 618 } 619 620 extern __inline __m512i 621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 623 { 624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 625 (__v16si) __Y, 626 (__v16si) __W, 627 (__mmask16) __U); 628 } 629 630 extern __inline __m512i 631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 633 { 634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 635 (__v16si) __Y, 636 (__v16si) 637 _mm512_setzero_si512 (), 638 (__mmask16) __U); 639 } 640 641 extern __inline __m512i 642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 643 _mm512_add_epi64 (__m512i __A, __m512i __B) 644 { 645 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 646 (__v8di) __B, 647 (__v8di) 648 _mm512_undefined_si512 (), 649 (__mmask8) -1); 650 } 651 652 extern __inline __m512i 653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 654 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 655 { 656 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 657 (__v8di) __B, 658 (__v8di) __W, 659 (__mmask8) __U); 660 } 661 662 extern __inline __m512i 663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 664 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 665 { 666 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 667 (__v8di) __B, 668 (__v8di) 669 _mm512_setzero_si512 (), 670 (__mmask8) __U); 671 } 672 673 extern __inline __m512i 674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 675 _mm512_sub_epi64 (__m512i __A, __m512i __B) 676 { 677 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 678 (__v8di) __B, 679 (__v8di) 680 _mm512_undefined_pd (), 681 (__mmask8) -1); 682 } 683 684 extern __inline __m512i 685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 686 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 687 { 688 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 689 (__v8di) __B, 690 (__v8di) __W, 691 (__mmask8) __U); 692 } 693 694 extern __inline __m512i 695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 696 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 697 { 698 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 699 (__v8di) __B, 700 (__v8di) 701 _mm512_setzero_si512 (), 702 (__mmask8) __U); 703 } 704 705 extern __inline __m512i 706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 707 _mm512_sllv_epi64 (__m512i __X, __m512i __Y) 708 { 709 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 710 (__v8di) __Y, 711 (__v8di) 712 _mm512_undefined_pd (), 713 (__mmask8) -1); 714 } 715 716 extern __inline __m512i 717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 718 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 719 { 720 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 721 (__v8di) __Y, 722 (__v8di) __W, 723 (__mmask8) __U); 724 } 725 726 extern __inline __m512i 727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 728 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 729 { 730 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 731 (__v8di) __Y, 732 (__v8di) 733 _mm512_setzero_si512 (), 734 (__mmask8) __U); 735 } 736 737 extern __inline __m512i 738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 739 _mm512_srav_epi64 (__m512i __X, __m512i __Y) 740 { 741 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 742 (__v8di) __Y, 743 (__v8di) 744 _mm512_undefined_si512 (), 745 (__mmask8) -1); 746 } 747 748 extern __inline __m512i 749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 750 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 751 { 752 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 753 (__v8di) __Y, 754 (__v8di) __W, 755 (__mmask8) __U); 756 } 757 758 extern __inline __m512i 759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 760 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 761 { 762 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 763 (__v8di) __Y, 764 (__v8di) 765 _mm512_setzero_si512 (), 766 (__mmask8) __U); 767 } 768 769 extern __inline __m512i 770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 771 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) 772 { 773 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 774 (__v8di) __Y, 775 (__v8di) 776 _mm512_undefined_si512 (), 777 (__mmask8) -1); 778 } 779 780 extern __inline __m512i 781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 782 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 783 { 784 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 785 (__v8di) __Y, 786 (__v8di) __W, 787 (__mmask8) __U); 788 } 789 790 extern __inline __m512i 791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 792 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 793 { 794 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 795 (__v8di) __Y, 796 (__v8di) 797 _mm512_setzero_si512 (), 798 (__mmask8) __U); 799 } 800 801 extern __inline __m512i 802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 803 _mm512_add_epi32 (__m512i __A, __m512i __B) 804 { 805 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 806 (__v16si) __B, 807 (__v16si) 808 _mm512_undefined_si512 (), 809 (__mmask16) -1); 810 } 811 812 extern __inline __m512i 813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 814 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 815 { 816 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 817 (__v16si) __B, 818 (__v16si) __W, 819 (__mmask16) __U); 820 } 821 822 extern __inline __m512i 823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 824 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 825 { 826 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 827 (__v16si) __B, 828 (__v16si) 829 _mm512_setzero_si512 (), 830 (__mmask16) __U); 831 } 832 833 extern __inline __m512i 834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 835 _mm512_mul_epi32 (__m512i __X, __m512i __Y) 836 { 837 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 838 (__v16si) __Y, 839 (__v8di) 840 _mm512_undefined_si512 (), 841 (__mmask8) -1); 842 } 843 844 extern __inline __m512i 845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 846 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 847 { 848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 849 (__v16si) __Y, 850 (__v8di) __W, __M); 851 } 852 853 extern __inline __m512i 854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 855 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 856 { 857 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 858 (__v16si) __Y, 859 (__v8di) 860 _mm512_setzero_si512 (), 861 __M); 862 } 863 864 extern __inline __m512i 865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 866 _mm512_sub_epi32 (__m512i __A, __m512i __B) 867 { 868 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 869 (__v16si) __B, 870 (__v16si) 871 _mm512_undefined_si512 (), 872 (__mmask16) -1); 873 } 874 875 extern __inline __m512i 876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 877 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 878 { 879 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 880 (__v16si) __B, 881 (__v16si) __W, 882 (__mmask16) __U); 883 } 884 885 extern __inline __m512i 886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 887 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 888 { 889 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 890 (__v16si) __B, 891 (__v16si) 892 _mm512_setzero_si512 (), 893 (__mmask16) __U); 894 } 895 896 extern __inline __m512i 897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 898 _mm512_mul_epu32 (__m512i __X, __m512i __Y) 899 { 900 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 901 (__v16si) __Y, 902 (__v8di) 903 _mm512_undefined_si512 (), 904 (__mmask8) -1); 905 } 906 907 extern __inline __m512i 908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 909 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 910 { 911 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 912 (__v16si) __Y, 913 (__v8di) __W, __M); 914 } 915 916 extern __inline __m512i 917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 918 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 919 { 920 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 921 (__v16si) __Y, 922 (__v8di) 923 _mm512_setzero_si512 (), 924 __M); 925 } 926 927 #ifdef __OPTIMIZE__ 928 extern __inline __m512i 929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 930 _mm512_slli_epi64 (__m512i __A, unsigned int __B) 931 { 932 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 933 (__v8di) 934 _mm512_undefined_si512 (), 935 (__mmask8) -1); 936 } 937 938 extern __inline __m512i 939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 940 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 941 unsigned int __B) 942 { 943 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 944 (__v8di) __W, 945 (__mmask8) __U); 946 } 947 948 extern __inline __m512i 949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 950 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 951 { 952 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 953 (__v8di) 954 _mm512_setzero_si512 (), 955 (__mmask8) __U); 956 } 957 #else 958 #define _mm512_slli_epi64(X, C) \ 959 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 960 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 961 (__mmask8)-1)) 962 963 #define _mm512_mask_slli_epi64(W, U, X, C) \ 964 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 965 (__v8di)(__m512i)(W),\ 966 (__mmask8)(U))) 967 968 #define _mm512_maskz_slli_epi64(U, X, C) \ 969 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 970 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 971 (__mmask8)(U))) 972 #endif 973 974 extern __inline __m512i 975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 976 _mm512_sll_epi64 (__m512i __A, __m128i __B) 977 { 978 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 979 (__v2di) __B, 980 (__v8di) 981 _mm512_undefined_si512 (), 982 (__mmask8) -1); 983 } 984 985 extern __inline __m512i 986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 987 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 988 { 989 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 990 (__v2di) __B, 991 (__v8di) __W, 992 (__mmask8) __U); 993 } 994 995 extern __inline __m512i 996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 997 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 998 { 999 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 1000 (__v2di) __B, 1001 (__v8di) 1002 _mm512_setzero_si512 (), 1003 (__mmask8) __U); 1004 } 1005 1006 #ifdef __OPTIMIZE__ 1007 extern __inline __m512i 1008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1009 _mm512_srli_epi64 (__m512i __A, unsigned int __B) 1010 { 1011 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1012 (__v8di) 1013 _mm512_undefined_si512 (), 1014 (__mmask8) -1); 1015 } 1016 1017 extern __inline __m512i 1018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1019 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, 1020 __m512i __A, unsigned int __B) 1021 { 1022 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1023 (__v8di) __W, 1024 (__mmask8) __U); 1025 } 1026 1027 extern __inline __m512i 1028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1029 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1030 { 1031 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1032 (__v8di) 1033 _mm512_setzero_si512 (), 1034 (__mmask8) __U); 1035 } 1036 #else 1037 #define _mm512_srli_epi64(X, C) \ 1038 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1039 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 1040 (__mmask8)-1)) 1041 1042 #define _mm512_mask_srli_epi64(W, U, X, C) \ 1043 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1044 (__v8di)(__m512i)(W),\ 1045 (__mmask8)(U))) 1046 1047 #define _mm512_maskz_srli_epi64(U, X, C) \ 1048 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1049 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1050 (__mmask8)(U))) 1051 #endif 1052 1053 extern __inline __m512i 1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1055 _mm512_srl_epi64 (__m512i __A, __m128i __B) 1056 { 1057 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1058 (__v2di) __B, 1059 (__v8di) 1060 _mm512_undefined_si512 (), 1061 (__mmask8) -1); 1062 } 1063 1064 extern __inline __m512i 1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1066 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1067 { 1068 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1069 (__v2di) __B, 1070 (__v8di) __W, 1071 (__mmask8) __U); 1072 } 1073 1074 extern __inline __m512i 1075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1076 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1077 { 1078 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1079 (__v2di) __B, 1080 (__v8di) 1081 _mm512_setzero_si512 (), 1082 (__mmask8) __U); 1083 } 1084 1085 #ifdef __OPTIMIZE__ 1086 extern __inline __m512i 1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1088 _mm512_srai_epi64 (__m512i __A, unsigned int __B) 1089 { 1090 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1091 (__v8di) 1092 _mm512_undefined_si512 (), 1093 (__mmask8) -1); 1094 } 1095 1096 extern __inline __m512i 1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1098 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 1099 unsigned int __B) 1100 { 1101 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1102 (__v8di) __W, 1103 (__mmask8) __U); 1104 } 1105 1106 extern __inline __m512i 1107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1108 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1109 { 1110 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1111 (__v8di) 1112 _mm512_setzero_si512 (), 1113 (__mmask8) __U); 1114 } 1115 #else 1116 #define _mm512_srai_epi64(X, C) \ 1117 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1118 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 1119 (__mmask8)-1)) 1120 1121 #define _mm512_mask_srai_epi64(W, U, X, C) \ 1122 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1123 (__v8di)(__m512i)(W),\ 1124 (__mmask8)(U))) 1125 1126 #define _mm512_maskz_srai_epi64(U, X, C) \ 1127 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1128 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1129 (__mmask8)(U))) 1130 #endif 1131 1132 extern __inline __m512i 1133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1134 _mm512_sra_epi64 (__m512i __A, __m128i __B) 1135 { 1136 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1137 (__v2di) __B, 1138 (__v8di) 1139 _mm512_undefined_si512 (), 1140 (__mmask8) -1); 1141 } 1142 1143 extern __inline __m512i 1144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1145 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1146 { 1147 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1148 (__v2di) __B, 1149 (__v8di) __W, 1150 (__mmask8) __U); 1151 } 1152 1153 extern __inline __m512i 1154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1155 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1156 { 1157 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1158 (__v2di) __B, 1159 (__v8di) 1160 _mm512_setzero_si512 (), 1161 (__mmask8) __U); 1162 } 1163 1164 #ifdef __OPTIMIZE__ 1165 extern __inline __m512i 1166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1167 _mm512_slli_epi32 (__m512i __A, unsigned int __B) 1168 { 1169 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1170 (__v16si) 1171 _mm512_undefined_si512 (), 1172 (__mmask16) -1); 1173 } 1174 1175 extern __inline __m512i 1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1177 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1178 unsigned int __B) 1179 { 1180 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1181 (__v16si) __W, 1182 (__mmask16) __U); 1183 } 1184 1185 extern __inline __m512i 1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1187 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1188 { 1189 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1190 (__v16si) 1191 _mm512_setzero_si512 (), 1192 (__mmask16) __U); 1193 } 1194 #else 1195 #define _mm512_slli_epi32(X, C) \ 1196 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1197 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1198 (__mmask16)-1)) 1199 1200 #define _mm512_mask_slli_epi32(W, U, X, C) \ 1201 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1202 (__v16si)(__m512i)(W),\ 1203 (__mmask16)(U))) 1204 1205 #define _mm512_maskz_slli_epi32(U, X, C) \ 1206 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1207 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1208 (__mmask16)(U))) 1209 #endif 1210 1211 extern __inline __m512i 1212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1213 _mm512_sll_epi32 (__m512i __A, __m128i __B) 1214 { 1215 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1216 (__v4si) __B, 1217 (__v16si) 1218 _mm512_undefined_si512 (), 1219 (__mmask16) -1); 1220 } 1221 1222 extern __inline __m512i 1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1224 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1225 { 1226 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1227 (__v4si) __B, 1228 (__v16si) __W, 1229 (__mmask16) __U); 1230 } 1231 1232 extern __inline __m512i 1233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1234 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1235 { 1236 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1237 (__v4si) __B, 1238 (__v16si) 1239 _mm512_setzero_si512 (), 1240 (__mmask16) __U); 1241 } 1242 1243 #ifdef __OPTIMIZE__ 1244 extern __inline __m512i 1245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1246 _mm512_srli_epi32 (__m512i __A, unsigned int __B) 1247 { 1248 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1249 (__v16si) 1250 _mm512_undefined_si512 (), 1251 (__mmask16) -1); 1252 } 1253 1254 extern __inline __m512i 1255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1256 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, 1257 __m512i __A, unsigned int __B) 1258 { 1259 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1260 (__v16si) __W, 1261 (__mmask16) __U); 1262 } 1263 1264 extern __inline __m512i 1265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1266 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1267 { 1268 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1269 (__v16si) 1270 _mm512_setzero_si512 (), 1271 (__mmask16) __U); 1272 } 1273 #else 1274 #define _mm512_srli_epi32(X, C) \ 1275 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1276 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1277 (__mmask16)-1)) 1278 1279 #define _mm512_mask_srli_epi32(W, U, X, C) \ 1280 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1281 (__v16si)(__m512i)(W),\ 1282 (__mmask16)(U))) 1283 1284 #define _mm512_maskz_srli_epi32(U, X, C) \ 1285 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1286 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1287 (__mmask16)(U))) 1288 #endif 1289 1290 extern __inline __m512i 1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1292 _mm512_srl_epi32 (__m512i __A, __m128i __B) 1293 { 1294 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1295 (__v4si) __B, 1296 (__v16si) 1297 _mm512_undefined_si512 (), 1298 (__mmask16) -1); 1299 } 1300 1301 extern __inline __m512i 1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1303 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1304 { 1305 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1306 (__v4si) __B, 1307 (__v16si) __W, 1308 (__mmask16) __U); 1309 } 1310 1311 extern __inline __m512i 1312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1313 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1314 { 1315 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1316 (__v4si) __B, 1317 (__v16si) 1318 _mm512_setzero_si512 (), 1319 (__mmask16) __U); 1320 } 1321 1322 #ifdef __OPTIMIZE__ 1323 extern __inline __m512i 1324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1325 _mm512_srai_epi32 (__m512i __A, unsigned int __B) 1326 { 1327 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1328 (__v16si) 1329 _mm512_undefined_si512 (), 1330 (__mmask16) -1); 1331 } 1332 1333 extern __inline __m512i 1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1335 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1336 unsigned int __B) 1337 { 1338 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1339 (__v16si) __W, 1340 (__mmask16) __U); 1341 } 1342 1343 extern __inline __m512i 1344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1345 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1346 { 1347 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1348 (__v16si) 1349 _mm512_setzero_si512 (), 1350 (__mmask16) __U); 1351 } 1352 #else 1353 #define _mm512_srai_epi32(X, C) \ 1354 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1355 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1356 (__mmask16)-1)) 1357 1358 #define _mm512_mask_srai_epi32(W, U, X, C) \ 1359 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1360 (__v16si)(__m512i)(W),\ 1361 (__mmask16)(U))) 1362 1363 #define _mm512_maskz_srai_epi32(U, X, C) \ 1364 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1365 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1366 (__mmask16)(U))) 1367 #endif 1368 1369 extern __inline __m512i 1370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1371 _mm512_sra_epi32 (__m512i __A, __m128i __B) 1372 { 1373 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1374 (__v4si) __B, 1375 (__v16si) 1376 _mm512_undefined_si512 (), 1377 (__mmask16) -1); 1378 } 1379 1380 extern __inline __m512i 1381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1382 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1383 { 1384 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1385 (__v4si) __B, 1386 (__v16si) __W, 1387 (__mmask16) __U); 1388 } 1389 1390 extern __inline __m512i 1391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1392 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1393 { 1394 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1395 (__v4si) __B, 1396 (__v16si) 1397 _mm512_setzero_si512 (), 1398 (__mmask16) __U); 1399 } 1400 1401 #ifdef __OPTIMIZE__ 1402 extern __inline __m128d 1403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1404 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) 1405 { 1406 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1407 (__v2df) __B, 1408 __R); 1409 } 1410 1411 extern __inline __m128 1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1413 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) 1414 { 1415 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1416 (__v4sf) __B, 1417 __R); 1418 } 1419 1420 extern __inline __m128d 1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1422 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) 1423 { 1424 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1425 (__v2df) __B, 1426 __R); 1427 } 1428 1429 extern __inline __m128 1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1431 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) 1432 { 1433 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1434 (__v4sf) __B, 1435 __R); 1436 } 1437 1438 #else 1439 #define _mm_add_round_sd(A, B, C) \ 1440 (__m128d)__builtin_ia32_addsd_round(A, B, C) 1441 1442 #define _mm_add_round_ss(A, B, C) \ 1443 (__m128)__builtin_ia32_addss_round(A, B, C) 1444 1445 #define _mm_sub_round_sd(A, B, C) \ 1446 (__m128d)__builtin_ia32_subsd_round(A, B, C) 1447 1448 #define _mm_sub_round_ss(A, B, C) \ 1449 (__m128)__builtin_ia32_subss_round(A, B, C) 1450 #endif 1451 1452 #ifdef __OPTIMIZE__ 1453 extern __inline __m512i 1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1455 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm) 1456 { 1457 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1458 (__v8di) __B, 1459 (__v8di) __C, imm, 1460 (__mmask8) -1); 1461 } 1462 1463 extern __inline __m512i 1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1465 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, 1466 __m512i __C, const int imm) 1467 { 1468 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1469 (__v8di) __B, 1470 (__v8di) __C, imm, 1471 (__mmask8) __U); 1472 } 1473 1474 extern __inline __m512i 1475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1476 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 1477 __m512i __C, const int imm) 1478 { 1479 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, 1480 (__v8di) __B, 1481 (__v8di) __C, 1482 imm, (__mmask8) __U); 1483 } 1484 1485 extern __inline __m512i 1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1487 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm) 1488 { 1489 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1490 (__v16si) __B, 1491 (__v16si) __C, 1492 imm, (__mmask16) -1); 1493 } 1494 1495 extern __inline __m512i 1496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1497 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, 1498 __m512i __C, const int imm) 1499 { 1500 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1501 (__v16si) __B, 1502 (__v16si) __C, 1503 imm, (__mmask16) __U); 1504 } 1505 1506 extern __inline __m512i 1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1508 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 1509 __m512i __C, const int imm) 1510 { 1511 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, 1512 (__v16si) __B, 1513 (__v16si) __C, 1514 imm, (__mmask16) __U); 1515 } 1516 #else 1517 #define _mm512_ternarylogic_epi64(A, B, C, I) \ 1518 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1519 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) 1520 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ 1521 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1522 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1523 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ 1524 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ 1525 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1526 #define _mm512_ternarylogic_epi32(A, B, C, I) \ 1527 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1528 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1529 (__mmask16)-1)) 1530 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ 1531 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1532 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1533 (__mmask16)(U))) 1534 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ 1535 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ 1536 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1537 (__mmask16)(U))) 1538 #endif 1539 1540 extern __inline __m512d 1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1542 _mm512_rcp14_pd (__m512d __A) 1543 { 1544 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1545 (__v8df) 1546 _mm512_undefined_pd (), 1547 (__mmask8) -1); 1548 } 1549 1550 extern __inline __m512d 1551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1552 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1553 { 1554 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1555 (__v8df) __W, 1556 (__mmask8) __U); 1557 } 1558 1559 extern __inline __m512d 1560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1561 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1562 { 1563 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1564 (__v8df) 1565 _mm512_setzero_pd (), 1566 (__mmask8) __U); 1567 } 1568 1569 extern __inline __m512 1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1571 _mm512_rcp14_ps (__m512 __A) 1572 { 1573 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1574 (__v16sf) 1575 _mm512_undefined_ps (), 1576 (__mmask16) -1); 1577 } 1578 1579 extern __inline __m512 1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1581 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1582 { 1583 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1584 (__v16sf) __W, 1585 (__mmask16) __U); 1586 } 1587 1588 extern __inline __m512 1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1590 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1591 { 1592 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1593 (__v16sf) 1594 _mm512_setzero_ps (), 1595 (__mmask16) __U); 1596 } 1597 1598 extern __inline __m128d 1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1600 _mm_rcp14_sd (__m128d __A, __m128d __B) 1601 { 1602 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, 1603 (__v2df) __A); 1604 } 1605 1606 extern __inline __m128 1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1608 _mm_rcp14_ss (__m128 __A, __m128 __B) 1609 { 1610 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, 1611 (__v4sf) __A); 1612 } 1613 1614 extern __inline __m512d 1615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1616 _mm512_rsqrt14_pd (__m512d __A) 1617 { 1618 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1619 (__v8df) 1620 _mm512_undefined_pd (), 1621 (__mmask8) -1); 1622 } 1623 1624 extern __inline __m512d 1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1626 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1627 { 1628 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1629 (__v8df) __W, 1630 (__mmask8) __U); 1631 } 1632 1633 extern __inline __m512d 1634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1635 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1636 { 1637 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1638 (__v8df) 1639 _mm512_setzero_pd (), 1640 (__mmask8) __U); 1641 } 1642 1643 extern __inline __m512 1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1645 _mm512_rsqrt14_ps (__m512 __A) 1646 { 1647 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1648 (__v16sf) 1649 _mm512_undefined_ps (), 1650 (__mmask16) -1); 1651 } 1652 1653 extern __inline __m512 1654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1655 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1656 { 1657 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1658 (__v16sf) __W, 1659 (__mmask16) __U); 1660 } 1661 1662 extern __inline __m512 1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1664 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1665 { 1666 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1667 (__v16sf) 1668 _mm512_setzero_ps (), 1669 (__mmask16) __U); 1670 } 1671 1672 extern __inline __m128d 1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1674 _mm_rsqrt14_sd (__m128d __A, __m128d __B) 1675 { 1676 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, 1677 (__v2df) __A); 1678 } 1679 1680 extern __inline __m128 1681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1682 _mm_rsqrt14_ss (__m128 __A, __m128 __B) 1683 { 1684 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, 1685 (__v4sf) __A); 1686 } 1687 1688 #ifdef __OPTIMIZE__ 1689 extern __inline __m512d 1690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1691 _mm512_sqrt_round_pd (__m512d __A, const int __R) 1692 { 1693 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1694 (__v8df) 1695 _mm512_undefined_pd (), 1696 (__mmask8) -1, __R); 1697 } 1698 1699 extern __inline __m512d 1700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1701 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 1702 const int __R) 1703 { 1704 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1705 (__v8df) __W, 1706 (__mmask8) __U, __R); 1707 } 1708 1709 extern __inline __m512d 1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1711 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) 1712 { 1713 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1714 (__v8df) 1715 _mm512_setzero_pd (), 1716 (__mmask8) __U, __R); 1717 } 1718 1719 extern __inline __m512 1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1721 _mm512_sqrt_round_ps (__m512 __A, const int __R) 1722 { 1723 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1724 (__v16sf) 1725 _mm512_undefined_ps (), 1726 (__mmask16) -1, __R); 1727 } 1728 1729 extern __inline __m512 1730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1731 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) 1732 { 1733 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1734 (__v16sf) __W, 1735 (__mmask16) __U, __R); 1736 } 1737 1738 extern __inline __m512 1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1740 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) 1741 { 1742 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1743 (__v16sf) 1744 _mm512_setzero_ps (), 1745 (__mmask16) __U, __R); 1746 } 1747 1748 extern __inline __m128d 1749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1750 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) 1751 { 1752 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B, 1753 (__v2df) __A, 1754 __R); 1755 } 1756 1757 extern __inline __m128 1758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1759 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) 1760 { 1761 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B, 1762 (__v4sf) __A, 1763 __R); 1764 } 1765 #else 1766 #define _mm512_sqrt_round_pd(A, C) \ 1767 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) 1768 1769 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \ 1770 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) 1771 1772 #define _mm512_maskz_sqrt_round_pd(U, A, C) \ 1773 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 1774 1775 #define _mm512_sqrt_round_ps(A, C) \ 1776 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) 1777 1778 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \ 1779 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) 1780 1781 #define _mm512_maskz_sqrt_round_ps(U, A, C) \ 1782 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 1783 1784 #define _mm_sqrt_round_sd(A, B, C) \ 1785 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C) 1786 1787 #define _mm_sqrt_round_ss(A, B, C) \ 1788 (__m128)__builtin_ia32_sqrtss_round(A, B, C) 1789 #endif 1790 1791 extern __inline __m512i 1792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1793 _mm512_cvtepi8_epi32 (__m128i __A) 1794 { 1795 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1796 (__v16si) 1797 _mm512_undefined_si512 (), 1798 (__mmask16) -1); 1799 } 1800 1801 extern __inline __m512i 1802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1803 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 1804 { 1805 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1806 (__v16si) __W, 1807 (__mmask16) __U); 1808 } 1809 1810 extern __inline __m512i 1811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1812 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) 1813 { 1814 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1815 (__v16si) 1816 _mm512_setzero_si512 (), 1817 (__mmask16) __U); 1818 } 1819 1820 extern __inline __m512i 1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1822 _mm512_cvtepi8_epi64 (__m128i __A) 1823 { 1824 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1825 (__v8di) 1826 _mm512_undefined_si512 (), 1827 (__mmask8) -1); 1828 } 1829 1830 extern __inline __m512i 1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1832 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1833 { 1834 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1835 (__v8di) __W, 1836 (__mmask8) __U); 1837 } 1838 1839 extern __inline __m512i 1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1841 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 1842 { 1843 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1844 (__v8di) 1845 _mm512_setzero_si512 (), 1846 (__mmask8) __U); 1847 } 1848 1849 extern __inline __m512i 1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1851 _mm512_cvtepi16_epi32 (__m256i __A) 1852 { 1853 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1854 (__v16si) 1855 _mm512_undefined_si512 (), 1856 (__mmask16) -1); 1857 } 1858 1859 extern __inline __m512i 1860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1861 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 1862 { 1863 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1864 (__v16si) __W, 1865 (__mmask16) __U); 1866 } 1867 1868 extern __inline __m512i 1869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1870 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) 1871 { 1872 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1873 (__v16si) 1874 _mm512_setzero_si512 (), 1875 (__mmask16) __U); 1876 } 1877 1878 extern __inline __m512i 1879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1880 _mm512_cvtepi16_epi64 (__m128i __A) 1881 { 1882 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1883 (__v8di) 1884 _mm512_undefined_si512 (), 1885 (__mmask8) -1); 1886 } 1887 1888 extern __inline __m512i 1889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1890 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1891 { 1892 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1893 (__v8di) __W, 1894 (__mmask8) __U); 1895 } 1896 1897 extern __inline __m512i 1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1899 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 1900 { 1901 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1902 (__v8di) 1903 _mm512_setzero_si512 (), 1904 (__mmask8) __U); 1905 } 1906 1907 extern __inline __m512i 1908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1909 _mm512_cvtepi32_epi64 (__m256i __X) 1910 { 1911 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1912 (__v8di) 1913 _mm512_undefined_si512 (), 1914 (__mmask8) -1); 1915 } 1916 1917 extern __inline __m512i 1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1919 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 1920 { 1921 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1922 (__v8di) __W, 1923 (__mmask8) __U); 1924 } 1925 1926 extern __inline __m512i 1927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1928 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) 1929 { 1930 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1931 (__v8di) 1932 _mm512_setzero_si512 (), 1933 (__mmask8) __U); 1934 } 1935 1936 extern __inline __m512i 1937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1938 _mm512_cvtepu8_epi32 (__m128i __A) 1939 { 1940 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1941 (__v16si) 1942 _mm512_undefined_si512 (), 1943 (__mmask16) -1); 1944 } 1945 1946 extern __inline __m512i 1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1948 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 1949 { 1950 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1951 (__v16si) __W, 1952 (__mmask16) __U); 1953 } 1954 1955 extern __inline __m512i 1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1957 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) 1958 { 1959 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1960 (__v16si) 1961 _mm512_setzero_si512 (), 1962 (__mmask16) __U); 1963 } 1964 1965 extern __inline __m512i 1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1967 _mm512_cvtepu8_epi64 (__m128i __A) 1968 { 1969 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1970 (__v8di) 1971 _mm512_undefined_si512 (), 1972 (__mmask8) -1); 1973 } 1974 1975 extern __inline __m512i 1976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1977 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1978 { 1979 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1980 (__v8di) __W, 1981 (__mmask8) __U); 1982 } 1983 1984 extern __inline __m512i 1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1986 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 1987 { 1988 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1989 (__v8di) 1990 _mm512_setzero_si512 (), 1991 (__mmask8) __U); 1992 } 1993 1994 extern __inline __m512i 1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1996 _mm512_cvtepu16_epi32 (__m256i __A) 1997 { 1998 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 1999 (__v16si) 2000 _mm512_undefined_si512 (), 2001 (__mmask16) -1); 2002 } 2003 2004 extern __inline __m512i 2005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2006 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 2007 { 2008 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2009 (__v16si) __W, 2010 (__mmask16) __U); 2011 } 2012 2013 extern __inline __m512i 2014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2015 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) 2016 { 2017 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2018 (__v16si) 2019 _mm512_setzero_si512 (), 2020 (__mmask16) __U); 2021 } 2022 2023 extern __inline __m512i 2024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2025 _mm512_cvtepu16_epi64 (__m128i __A) 2026 { 2027 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2028 (__v8di) 2029 _mm512_undefined_si512 (), 2030 (__mmask8) -1); 2031 } 2032 2033 extern __inline __m512i 2034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2035 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2036 { 2037 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2038 (__v8di) __W, 2039 (__mmask8) __U); 2040 } 2041 2042 extern __inline __m512i 2043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2044 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 2045 { 2046 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2047 (__v8di) 2048 _mm512_setzero_si512 (), 2049 (__mmask8) __U); 2050 } 2051 2052 extern __inline __m512i 2053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2054 _mm512_cvtepu32_epi64 (__m256i __X) 2055 { 2056 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2057 (__v8di) 2058 _mm512_undefined_si512 (), 2059 (__mmask8) -1); 2060 } 2061 2062 extern __inline __m512i 2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2064 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2065 { 2066 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2067 (__v8di) __W, 2068 (__mmask8) __U); 2069 } 2070 2071 extern __inline __m512i 2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2073 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) 2074 { 2075 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2076 (__v8di) 2077 _mm512_setzero_si512 (), 2078 (__mmask8) __U); 2079 } 2080 2081 #ifdef __OPTIMIZE__ 2082 extern __inline __m512d 2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2084 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) 2085 { 2086 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2087 (__v8df) __B, 2088 (__v8df) 2089 _mm512_undefined_pd (), 2090 (__mmask8) -1, __R); 2091 } 2092 2093 extern __inline __m512d 2094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2095 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2096 __m512d __B, const int __R) 2097 { 2098 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2099 (__v8df) __B, 2100 (__v8df) __W, 2101 (__mmask8) __U, __R); 2102 } 2103 2104 extern __inline __m512d 2105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2106 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2107 const int __R) 2108 { 2109 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2110 (__v8df) __B, 2111 (__v8df) 2112 _mm512_setzero_pd (), 2113 (__mmask8) __U, __R); 2114 } 2115 2116 extern __inline __m512 2117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2118 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) 2119 { 2120 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2121 (__v16sf) __B, 2122 (__v16sf) 2123 _mm512_undefined_ps (), 2124 (__mmask16) -1, __R); 2125 } 2126 2127 extern __inline __m512 2128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2129 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2130 __m512 __B, const int __R) 2131 { 2132 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2133 (__v16sf) __B, 2134 (__v16sf) __W, 2135 (__mmask16) __U, __R); 2136 } 2137 2138 extern __inline __m512 2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2140 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2141 { 2142 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2143 (__v16sf) __B, 2144 (__v16sf) 2145 _mm512_setzero_ps (), 2146 (__mmask16) __U, __R); 2147 } 2148 2149 extern __inline __m512d 2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2151 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) 2152 { 2153 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2154 (__v8df) __B, 2155 (__v8df) 2156 _mm512_undefined_pd (), 2157 (__mmask8) -1, __R); 2158 } 2159 2160 extern __inline __m512d 2161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2162 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2163 __m512d __B, const int __R) 2164 { 2165 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2166 (__v8df) __B, 2167 (__v8df) __W, 2168 (__mmask8) __U, __R); 2169 } 2170 2171 extern __inline __m512d 2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2173 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2174 const int __R) 2175 { 2176 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2177 (__v8df) __B, 2178 (__v8df) 2179 _mm512_setzero_pd (), 2180 (__mmask8) __U, __R); 2181 } 2182 2183 extern __inline __m512 2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2185 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) 2186 { 2187 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2188 (__v16sf) __B, 2189 (__v16sf) 2190 _mm512_undefined_ps (), 2191 (__mmask16) -1, __R); 2192 } 2193 2194 extern __inline __m512 2195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2196 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2197 __m512 __B, const int __R) 2198 { 2199 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2200 (__v16sf) __B, 2201 (__v16sf) __W, 2202 (__mmask16) __U, __R); 2203 } 2204 2205 extern __inline __m512 2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2207 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2208 { 2209 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2210 (__v16sf) __B, 2211 (__v16sf) 2212 _mm512_setzero_ps (), 2213 (__mmask16) __U, __R); 2214 } 2215 #else 2216 #define _mm512_add_round_pd(A, B, C) \ 2217 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2218 2219 #define _mm512_mask_add_round_pd(W, U, A, B, C) \ 2220 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) 2221 2222 #define _mm512_maskz_add_round_pd(U, A, B, C) \ 2223 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2224 2225 #define _mm512_add_round_ps(A, B, C) \ 2226 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2227 2228 #define _mm512_mask_add_round_ps(W, U, A, B, C) \ 2229 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) 2230 2231 #define _mm512_maskz_add_round_ps(U, A, B, C) \ 2232 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2233 2234 #define _mm512_sub_round_pd(A, B, C) \ 2235 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2236 2237 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \ 2238 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) 2239 2240 #define _mm512_maskz_sub_round_pd(U, A, B, C) \ 2241 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2242 2243 #define _mm512_sub_round_ps(A, B, C) \ 2244 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2245 2246 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \ 2247 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) 2248 2249 #define _mm512_maskz_sub_round_ps(U, A, B, C) \ 2250 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2251 #endif 2252 2253 #ifdef __OPTIMIZE__ 2254 extern __inline __m512d 2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2256 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) 2257 { 2258 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2259 (__v8df) __B, 2260 (__v8df) 2261 _mm512_undefined_pd (), 2262 (__mmask8) -1, __R); 2263 } 2264 2265 extern __inline __m512d 2266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2267 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2268 __m512d __B, const int __R) 2269 { 2270 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2271 (__v8df) __B, 2272 (__v8df) __W, 2273 (__mmask8) __U, __R); 2274 } 2275 2276 extern __inline __m512d 2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2278 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2279 const int __R) 2280 { 2281 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2282 (__v8df) __B, 2283 (__v8df) 2284 _mm512_setzero_pd (), 2285 (__mmask8) __U, __R); 2286 } 2287 2288 extern __inline __m512 2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2290 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) 2291 { 2292 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2293 (__v16sf) __B, 2294 (__v16sf) 2295 _mm512_undefined_ps (), 2296 (__mmask16) -1, __R); 2297 } 2298 2299 extern __inline __m512 2300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2301 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2302 __m512 __B, const int __R) 2303 { 2304 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2305 (__v16sf) __B, 2306 (__v16sf) __W, 2307 (__mmask16) __U, __R); 2308 } 2309 2310 extern __inline __m512 2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2312 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2313 { 2314 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2315 (__v16sf) __B, 2316 (__v16sf) 2317 _mm512_setzero_ps (), 2318 (__mmask16) __U, __R); 2319 } 2320 2321 extern __inline __m512d 2322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2323 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) 2324 { 2325 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2326 (__v8df) __V, 2327 (__v8df) 2328 _mm512_undefined_pd (), 2329 (__mmask8) -1, __R); 2330 } 2331 2332 extern __inline __m512d 2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2334 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, 2335 __m512d __V, const int __R) 2336 { 2337 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2338 (__v8df) __V, 2339 (__v8df) __W, 2340 (__mmask8) __U, __R); 2341 } 2342 2343 extern __inline __m512d 2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2345 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, 2346 const int __R) 2347 { 2348 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2349 (__v8df) __V, 2350 (__v8df) 2351 _mm512_setzero_pd (), 2352 (__mmask8) __U, __R); 2353 } 2354 2355 extern __inline __m512 2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2357 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) 2358 { 2359 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2360 (__v16sf) __B, 2361 (__v16sf) 2362 _mm512_undefined_ps (), 2363 (__mmask16) -1, __R); 2364 } 2365 2366 extern __inline __m512 2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2368 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2369 __m512 __B, const int __R) 2370 { 2371 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2372 (__v16sf) __B, 2373 (__v16sf) __W, 2374 (__mmask16) __U, __R); 2375 } 2376 2377 extern __inline __m512 2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2379 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2380 { 2381 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2382 (__v16sf) __B, 2383 (__v16sf) 2384 _mm512_setzero_ps (), 2385 (__mmask16) __U, __R); 2386 } 2387 2388 extern __inline __m128d 2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2390 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) 2391 { 2392 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 2393 (__v2df) __B, 2394 __R); 2395 } 2396 2397 extern __inline __m128 2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2399 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) 2400 { 2401 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 2402 (__v4sf) __B, 2403 __R); 2404 } 2405 2406 extern __inline __m128d 2407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2408 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) 2409 { 2410 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 2411 (__v2df) __B, 2412 __R); 2413 } 2414 2415 extern __inline __m128 2416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2417 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) 2418 { 2419 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 2420 (__v4sf) __B, 2421 __R); 2422 } 2423 2424 #else 2425 #define _mm512_mul_round_pd(A, B, C) \ 2426 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2427 2428 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \ 2429 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) 2430 2431 #define _mm512_maskz_mul_round_pd(U, A, B, C) \ 2432 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2433 2434 #define _mm512_mul_round_ps(A, B, C) \ 2435 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2436 2437 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \ 2438 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) 2439 2440 #define _mm512_maskz_mul_round_ps(U, A, B, C) \ 2441 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2442 2443 #define _mm512_div_round_pd(A, B, C) \ 2444 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2445 2446 #define _mm512_mask_div_round_pd(W, U, A, B, C) \ 2447 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) 2448 2449 #define _mm512_maskz_div_round_pd(U, A, B, C) \ 2450 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2451 2452 #define _mm512_div_round_ps(A, B, C) \ 2453 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2454 2455 #define _mm512_mask_div_round_ps(W, U, A, B, C) \ 2456 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) 2457 2458 #define _mm512_maskz_div_round_ps(U, A, B, C) \ 2459 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2460 2461 #define _mm_mul_round_sd(A, B, C) \ 2462 (__m128d)__builtin_ia32_mulsd_round(A, B, C) 2463 2464 #define _mm_mul_round_ss(A, B, C) \ 2465 (__m128)__builtin_ia32_mulss_round(A, B, C) 2466 2467 #define _mm_div_round_sd(A, B, C) \ 2468 (__m128d)__builtin_ia32_divsd_round(A, B, C) 2469 2470 #define _mm_div_round_ss(A, B, C) \ 2471 (__m128)__builtin_ia32_divss_round(A, B, C) 2472 #endif 2473 2474 #ifdef __OPTIMIZE__ 2475 extern __inline __m512d 2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2477 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) 2478 { 2479 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2480 (__v8df) __B, 2481 (__v8df) 2482 _mm512_undefined_pd (), 2483 (__mmask8) -1, __R); 2484 } 2485 2486 extern __inline __m512d 2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2488 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2489 __m512d __B, const int __R) 2490 { 2491 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2492 (__v8df) __B, 2493 (__v8df) __W, 2494 (__mmask8) __U, __R); 2495 } 2496 2497 extern __inline __m512d 2498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2499 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2500 const int __R) 2501 { 2502 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2503 (__v8df) __B, 2504 (__v8df) 2505 _mm512_setzero_pd (), 2506 (__mmask8) __U, __R); 2507 } 2508 2509 extern __inline __m512 2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2511 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) 2512 { 2513 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2514 (__v16sf) __B, 2515 (__v16sf) 2516 _mm512_undefined_ps (), 2517 (__mmask16) -1, __R); 2518 } 2519 2520 extern __inline __m512 2521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2522 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2523 __m512 __B, const int __R) 2524 { 2525 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2526 (__v16sf) __B, 2527 (__v16sf) __W, 2528 (__mmask16) __U, __R); 2529 } 2530 2531 extern __inline __m512 2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2533 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2534 { 2535 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2536 (__v16sf) __B, 2537 (__v16sf) 2538 _mm512_setzero_ps (), 2539 (__mmask16) __U, __R); 2540 } 2541 2542 extern __inline __m512d 2543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2544 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) 2545 { 2546 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2547 (__v8df) __B, 2548 (__v8df) 2549 _mm512_undefined_pd (), 2550 (__mmask8) -1, __R); 2551 } 2552 2553 extern __inline __m512d 2554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2555 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2556 __m512d __B, const int __R) 2557 { 2558 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2559 (__v8df) __B, 2560 (__v8df) __W, 2561 (__mmask8) __U, __R); 2562 } 2563 2564 extern __inline __m512d 2565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2566 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2567 const int __R) 2568 { 2569 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2570 (__v8df) __B, 2571 (__v8df) 2572 _mm512_setzero_pd (), 2573 (__mmask8) __U, __R); 2574 } 2575 2576 extern __inline __m512 2577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2578 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) 2579 { 2580 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2581 (__v16sf) __B, 2582 (__v16sf) 2583 _mm512_undefined_ps (), 2584 (__mmask16) -1, __R); 2585 } 2586 2587 extern __inline __m512 2588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2589 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2590 __m512 __B, const int __R) 2591 { 2592 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2593 (__v16sf) __B, 2594 (__v16sf) __W, 2595 (__mmask16) __U, __R); 2596 } 2597 2598 extern __inline __m512 2599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2600 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2601 { 2602 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2603 (__v16sf) __B, 2604 (__v16sf) 2605 _mm512_setzero_ps (), 2606 (__mmask16) __U, __R); 2607 } 2608 #else 2609 #define _mm512_max_round_pd(A, B, R) \ 2610 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 2611 2612 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 2613 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) 2614 2615 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 2616 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 2617 2618 #define _mm512_max_round_ps(A, B, R) \ 2619 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) 2620 2621 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 2622 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) 2623 2624 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 2625 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 2626 2627 #define _mm512_min_round_pd(A, B, R) \ 2628 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 2629 2630 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 2631 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) 2632 2633 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 2634 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 2635 2636 #define _mm512_min_round_ps(A, B, R) \ 2637 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) 2638 2639 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 2640 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) 2641 2642 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 2643 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 2644 #endif 2645 2646 #ifdef __OPTIMIZE__ 2647 extern __inline __m512d 2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2649 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) 2650 { 2651 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2652 (__v8df) __B, 2653 (__v8df) 2654 _mm512_undefined_pd (), 2655 (__mmask8) -1, __R); 2656 } 2657 2658 extern __inline __m512d 2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2660 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2661 __m512d __B, const int __R) 2662 { 2663 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2664 (__v8df) __B, 2665 (__v8df) __W, 2666 (__mmask8) __U, __R); 2667 } 2668 2669 extern __inline __m512d 2670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2671 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2672 const int __R) 2673 { 2674 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2675 (__v8df) __B, 2676 (__v8df) 2677 _mm512_setzero_pd (), 2678 (__mmask8) __U, __R); 2679 } 2680 2681 extern __inline __m512 2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2683 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) 2684 { 2685 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2686 (__v16sf) __B, 2687 (__v16sf) 2688 _mm512_undefined_ps (), 2689 (__mmask16) -1, __R); 2690 } 2691 2692 extern __inline __m512 2693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2694 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2695 __m512 __B, const int __R) 2696 { 2697 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2698 (__v16sf) __B, 2699 (__v16sf) __W, 2700 (__mmask16) __U, __R); 2701 } 2702 2703 extern __inline __m512 2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2705 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2706 const int __R) 2707 { 2708 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2709 (__v16sf) __B, 2710 (__v16sf) 2711 _mm512_setzero_ps (), 2712 (__mmask16) __U, __R); 2713 } 2714 2715 extern __inline __m128d 2716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2717 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) 2718 { 2719 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, 2720 (__v2df) __B, 2721 __R); 2722 } 2723 2724 extern __inline __m128 2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2726 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) 2727 { 2728 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, 2729 (__v4sf) __B, 2730 __R); 2731 } 2732 #else 2733 #define _mm512_scalef_round_pd(A, B, C) \ 2734 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2735 2736 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ 2737 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) 2738 2739 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \ 2740 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2741 2742 #define _mm512_scalef_round_ps(A, B, C) \ 2743 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2744 2745 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ 2746 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) 2747 2748 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \ 2749 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2750 2751 #define _mm_scalef_round_sd(A, B, C) \ 2752 (__m128d)__builtin_ia32_scalefsd_round(A, B, C) 2753 2754 #define _mm_scalef_round_ss(A, B, C) \ 2755 (__m128)__builtin_ia32_scalefss_round(A, B, C) 2756 #endif 2757 2758 #ifdef __OPTIMIZE__ 2759 extern __inline __m512d 2760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2761 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2762 { 2763 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2764 (__v8df) __B, 2765 (__v8df) __C, 2766 (__mmask8) -1, __R); 2767 } 2768 2769 extern __inline __m512d 2770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2771 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2772 __m512d __C, const int __R) 2773 { 2774 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2775 (__v8df) __B, 2776 (__v8df) __C, 2777 (__mmask8) __U, __R); 2778 } 2779 2780 extern __inline __m512d 2781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2782 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 2783 __mmask8 __U, const int __R) 2784 { 2785 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2786 (__v8df) __B, 2787 (__v8df) __C, 2788 (__mmask8) __U, __R); 2789 } 2790 2791 extern __inline __m512d 2792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2793 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2794 __m512d __C, const int __R) 2795 { 2796 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2797 (__v8df) __B, 2798 (__v8df) __C, 2799 (__mmask8) __U, __R); 2800 } 2801 2802 extern __inline __m512 2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2804 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2805 { 2806 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2807 (__v16sf) __B, 2808 (__v16sf) __C, 2809 (__mmask16) -1, __R); 2810 } 2811 2812 extern __inline __m512 2813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2814 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2815 __m512 __C, const int __R) 2816 { 2817 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2818 (__v16sf) __B, 2819 (__v16sf) __C, 2820 (__mmask16) __U, __R); 2821 } 2822 2823 extern __inline __m512 2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2825 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 2826 __mmask16 __U, const int __R) 2827 { 2828 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2829 (__v16sf) __B, 2830 (__v16sf) __C, 2831 (__mmask16) __U, __R); 2832 } 2833 2834 extern __inline __m512 2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2836 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2837 __m512 __C, const int __R) 2838 { 2839 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2840 (__v16sf) __B, 2841 (__v16sf) __C, 2842 (__mmask16) __U, __R); 2843 } 2844 2845 extern __inline __m512d 2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2847 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2848 { 2849 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2850 (__v8df) __B, 2851 -(__v8df) __C, 2852 (__mmask8) -1, __R); 2853 } 2854 2855 extern __inline __m512d 2856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2857 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2858 __m512d __C, const int __R) 2859 { 2860 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2861 (__v8df) __B, 2862 -(__v8df) __C, 2863 (__mmask8) __U, __R); 2864 } 2865 2866 extern __inline __m512d 2867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2868 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 2869 __mmask8 __U, const int __R) 2870 { 2871 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 2872 (__v8df) __B, 2873 (__v8df) __C, 2874 (__mmask8) __U, __R); 2875 } 2876 2877 extern __inline __m512d 2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2879 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2880 __m512d __C, const int __R) 2881 { 2882 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2883 (__v8df) __B, 2884 -(__v8df) __C, 2885 (__mmask8) __U, __R); 2886 } 2887 2888 extern __inline __m512 2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2890 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2891 { 2892 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2893 (__v16sf) __B, 2894 -(__v16sf) __C, 2895 (__mmask16) -1, __R); 2896 } 2897 2898 extern __inline __m512 2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2900 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2901 __m512 __C, const int __R) 2902 { 2903 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2904 (__v16sf) __B, 2905 -(__v16sf) __C, 2906 (__mmask16) __U, __R); 2907 } 2908 2909 extern __inline __m512 2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2911 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 2912 __mmask16 __U, const int __R) 2913 { 2914 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 2915 (__v16sf) __B, 2916 (__v16sf) __C, 2917 (__mmask16) __U, __R); 2918 } 2919 2920 extern __inline __m512 2921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2922 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2923 __m512 __C, const int __R) 2924 { 2925 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2926 (__v16sf) __B, 2927 -(__v16sf) __C, 2928 (__mmask16) __U, __R); 2929 } 2930 2931 extern __inline __m512d 2932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2933 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2934 { 2935 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2936 (__v8df) __B, 2937 (__v8df) __C, 2938 (__mmask8) -1, __R); 2939 } 2940 2941 extern __inline __m512d 2942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2943 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2944 __m512d __C, const int __R) 2945 { 2946 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2947 (__v8df) __B, 2948 (__v8df) __C, 2949 (__mmask8) __U, __R); 2950 } 2951 2952 extern __inline __m512d 2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2954 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 2955 __mmask8 __U, const int __R) 2956 { 2957 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 2958 (__v8df) __B, 2959 (__v8df) __C, 2960 (__mmask8) __U, __R); 2961 } 2962 2963 extern __inline __m512d 2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2965 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2966 __m512d __C, const int __R) 2967 { 2968 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2969 (__v8df) __B, 2970 (__v8df) __C, 2971 (__mmask8) __U, __R); 2972 } 2973 2974 extern __inline __m512 2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2976 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2977 { 2978 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2979 (__v16sf) __B, 2980 (__v16sf) __C, 2981 (__mmask16) -1, __R); 2982 } 2983 2984 extern __inline __m512 2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2986 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2987 __m512 __C, const int __R) 2988 { 2989 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2990 (__v16sf) __B, 2991 (__v16sf) __C, 2992 (__mmask16) __U, __R); 2993 } 2994 2995 extern __inline __m512 2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2997 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 2998 __mmask16 __U, const int __R) 2999 { 3000 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3001 (__v16sf) __B, 3002 (__v16sf) __C, 3003 (__mmask16) __U, __R); 3004 } 3005 3006 extern __inline __m512 3007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3008 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3009 __m512 __C, const int __R) 3010 { 3011 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3012 (__v16sf) __B, 3013 (__v16sf) __C, 3014 (__mmask16) __U, __R); 3015 } 3016 3017 extern __inline __m512d 3018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3019 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3020 { 3021 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3022 (__v8df) __B, 3023 -(__v8df) __C, 3024 (__mmask8) -1, __R); 3025 } 3026 3027 extern __inline __m512d 3028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3029 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3030 __m512d __C, const int __R) 3031 { 3032 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3033 (__v8df) __B, 3034 -(__v8df) __C, 3035 (__mmask8) __U, __R); 3036 } 3037 3038 extern __inline __m512d 3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3040 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3041 __mmask8 __U, const int __R) 3042 { 3043 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3044 (__v8df) __B, 3045 (__v8df) __C, 3046 (__mmask8) __U, __R); 3047 } 3048 3049 extern __inline __m512d 3050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3051 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3052 __m512d __C, const int __R) 3053 { 3054 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3055 (__v8df) __B, 3056 -(__v8df) __C, 3057 (__mmask8) __U, __R); 3058 } 3059 3060 extern __inline __m512 3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3062 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3063 { 3064 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3065 (__v16sf) __B, 3066 -(__v16sf) __C, 3067 (__mmask16) -1, __R); 3068 } 3069 3070 extern __inline __m512 3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3072 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3073 __m512 __C, const int __R) 3074 { 3075 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3076 (__v16sf) __B, 3077 -(__v16sf) __C, 3078 (__mmask16) __U, __R); 3079 } 3080 3081 extern __inline __m512 3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3083 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3084 __mmask16 __U, const int __R) 3085 { 3086 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3087 (__v16sf) __B, 3088 (__v16sf) __C, 3089 (__mmask16) __U, __R); 3090 } 3091 3092 extern __inline __m512 3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3094 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3095 __m512 __C, const int __R) 3096 { 3097 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3098 (__v16sf) __B, 3099 -(__v16sf) __C, 3100 (__mmask16) __U, __R); 3101 } 3102 3103 extern __inline __m512d 3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3105 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3106 { 3107 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3108 (__v8df) __B, 3109 (__v8df) __C, 3110 (__mmask8) -1, __R); 3111 } 3112 3113 extern __inline __m512d 3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3115 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3116 __m512d __C, const int __R) 3117 { 3118 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3119 (__v8df) __B, 3120 (__v8df) __C, 3121 (__mmask8) __U, __R); 3122 } 3123 3124 extern __inline __m512d 3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3126 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3127 __mmask8 __U, const int __R) 3128 { 3129 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 3130 (__v8df) __B, 3131 (__v8df) __C, 3132 (__mmask8) __U, __R); 3133 } 3134 3135 extern __inline __m512d 3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3137 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3138 __m512d __C, const int __R) 3139 { 3140 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3141 (__v8df) __B, 3142 (__v8df) __C, 3143 (__mmask8) __U, __R); 3144 } 3145 3146 extern __inline __m512 3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3148 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3149 { 3150 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3151 (__v16sf) __B, 3152 (__v16sf) __C, 3153 (__mmask16) -1, __R); 3154 } 3155 3156 extern __inline __m512 3157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3158 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3159 __m512 __C, const int __R) 3160 { 3161 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3162 (__v16sf) __B, 3163 (__v16sf) __C, 3164 (__mmask16) __U, __R); 3165 } 3166 3167 extern __inline __m512 3168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3169 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3170 __mmask16 __U, const int __R) 3171 { 3172 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 3173 (__v16sf) __B, 3174 (__v16sf) __C, 3175 (__mmask16) __U, __R); 3176 } 3177 3178 extern __inline __m512 3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3180 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3181 __m512 __C, const int __R) 3182 { 3183 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3184 (__v16sf) __B, 3185 (__v16sf) __C, 3186 (__mmask16) __U, __R); 3187 } 3188 3189 extern __inline __m512d 3190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3191 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3192 { 3193 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3194 (__v8df) __B, 3195 -(__v8df) __C, 3196 (__mmask8) -1, __R); 3197 } 3198 3199 extern __inline __m512d 3200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3201 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3202 __m512d __C, const int __R) 3203 { 3204 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3205 (__v8df) __B, 3206 (__v8df) __C, 3207 (__mmask8) __U, __R); 3208 } 3209 3210 extern __inline __m512d 3211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3212 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3213 __mmask8 __U, const int __R) 3214 { 3215 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3216 (__v8df) __B, 3217 (__v8df) __C, 3218 (__mmask8) __U, __R); 3219 } 3220 3221 extern __inline __m512d 3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3223 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3224 __m512d __C, const int __R) 3225 { 3226 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3227 (__v8df) __B, 3228 -(__v8df) __C, 3229 (__mmask8) __U, __R); 3230 } 3231 3232 extern __inline __m512 3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3234 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3235 { 3236 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3237 (__v16sf) __B, 3238 -(__v16sf) __C, 3239 (__mmask16) -1, __R); 3240 } 3241 3242 extern __inline __m512 3243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3244 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3245 __m512 __C, const int __R) 3246 { 3247 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3248 (__v16sf) __B, 3249 (__v16sf) __C, 3250 (__mmask16) __U, __R); 3251 } 3252 3253 extern __inline __m512 3254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3255 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3256 __mmask16 __U, const int __R) 3257 { 3258 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3259 (__v16sf) __B, 3260 (__v16sf) __C, 3261 (__mmask16) __U, __R); 3262 } 3263 3264 extern __inline __m512 3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3266 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3267 __m512 __C, const int __R) 3268 { 3269 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3270 (__v16sf) __B, 3271 -(__v16sf) __C, 3272 (__mmask16) __U, __R); 3273 } 3274 #else 3275 #define _mm512_fmadd_round_pd(A, B, C, R) \ 3276 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) 3277 3278 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 3279 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3280 3281 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 3282 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) 3283 3284 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 3285 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) 3286 3287 #define _mm512_fmadd_round_ps(A, B, C, R) \ 3288 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) 3289 3290 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 3291 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) 3292 3293 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 3294 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) 3295 3296 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 3297 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) 3298 3299 #define _mm512_fmsub_round_pd(A, B, C, R) \ 3300 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) 3301 3302 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 3303 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) 3304 3305 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3306 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) 3307 3308 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 3309 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) 3310 3311 #define _mm512_fmsub_round_ps(A, B, C, R) \ 3312 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) 3313 3314 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 3315 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) 3316 3317 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3318 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) 3319 3320 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 3321 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) 3322 3323 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 3324 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) 3325 3326 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 3327 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3328 3329 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 3330 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) 3331 3332 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 3333 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) 3334 3335 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 3336 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) 3337 3338 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 3339 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) 3340 3341 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 3342 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) 3343 3344 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 3345 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) 3346 3347 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 3348 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) 3349 3350 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 3351 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) 3352 3353 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3354 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) 3355 3356 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 3357 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) 3358 3359 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 3360 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) 3361 3362 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3363 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) 3364 3365 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3366 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) 3367 3368 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3369 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) 3370 3371 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 3372 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) 3373 3374 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3375 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) 3376 3377 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 3378 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) 3379 3380 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 3381 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) 3382 3383 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 3384 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) 3385 3386 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3387 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) 3388 3389 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 3390 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) 3391 3392 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 3393 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) 3394 3395 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 3396 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) 3397 3398 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3399 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) 3400 3401 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3402 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) 3403 3404 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 3405 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) 3406 3407 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 3408 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) 3409 3410 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3411 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) 3412 3413 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3414 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) 3415 3416 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 3417 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) 3418 #endif 3419 3420 extern __inline __m512i 3421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3422 _mm512_abs_epi64 (__m512i __A) 3423 { 3424 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3425 (__v8di) 3426 _mm512_undefined_si512 (), 3427 (__mmask8) -1); 3428 } 3429 3430 extern __inline __m512i 3431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3432 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 3433 { 3434 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3435 (__v8di) __W, 3436 (__mmask8) __U); 3437 } 3438 3439 extern __inline __m512i 3440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3441 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 3442 { 3443 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3444 (__v8di) 3445 _mm512_setzero_si512 (), 3446 (__mmask8) __U); 3447 } 3448 3449 extern __inline __m512i 3450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3451 _mm512_abs_epi32 (__m512i __A) 3452 { 3453 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3454 (__v16si) 3455 _mm512_undefined_si512 (), 3456 (__mmask16) -1); 3457 } 3458 3459 extern __inline __m512i 3460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3461 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 3462 { 3463 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3464 (__v16si) __W, 3465 (__mmask16) __U); 3466 } 3467 3468 extern __inline __m512i 3469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3470 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 3471 { 3472 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3473 (__v16si) 3474 _mm512_setzero_si512 (), 3475 (__mmask16) __U); 3476 } 3477 3478 extern __inline __m512 3479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3480 _mm512_broadcastss_ps (__m128 __A) 3481 { 3482 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3483 (__v16sf) 3484 _mm512_undefined_ps (), 3485 (__mmask16) -1); 3486 } 3487 3488 extern __inline __m512 3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3490 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 3491 { 3492 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3493 (__v16sf) __O, __M); 3494 } 3495 3496 extern __inline __m512 3497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3498 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 3499 { 3500 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3501 (__v16sf) 3502 _mm512_setzero_ps (), 3503 __M); 3504 } 3505 3506 extern __inline __m512d 3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3508 _mm512_broadcastsd_pd (__m128d __A) 3509 { 3510 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3511 (__v8df) 3512 _mm512_undefined_pd (), 3513 (__mmask8) -1); 3514 } 3515 3516 extern __inline __m512d 3517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3518 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 3519 { 3520 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3521 (__v8df) __O, __M); 3522 } 3523 3524 extern __inline __m512d 3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3526 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 3527 { 3528 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3529 (__v8df) 3530 _mm512_setzero_pd (), 3531 __M); 3532 } 3533 3534 extern __inline __m512i 3535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3536 _mm512_broadcastd_epi32 (__m128i __A) 3537 { 3538 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3539 (__v16si) 3540 _mm512_undefined_si512 (), 3541 (__mmask16) -1); 3542 } 3543 3544 extern __inline __m512i 3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3546 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 3547 { 3548 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3549 (__v16si) __O, __M); 3550 } 3551 3552 extern __inline __m512i 3553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3554 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 3555 { 3556 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3557 (__v16si) 3558 _mm512_setzero_si512 (), 3559 __M); 3560 } 3561 3562 extern __inline __m512i 3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3564 _mm512_set1_epi32 (int __A) 3565 { 3566 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 3567 (__v16si) 3568 _mm512_undefined_si512 (), 3569 (__mmask16)(-1)); 3570 } 3571 3572 extern __inline __m512i 3573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3574 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 3575 { 3576 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 3577 __M); 3578 } 3579 3580 extern __inline __m512i 3581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3582 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A) 3583 { 3584 return (__m512i) 3585 __builtin_ia32_pbroadcastd512_gpr_mask (__A, 3586 (__v16si) _mm512_setzero_si512 (), 3587 __M); 3588 } 3589 3590 extern __inline __m512i 3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3592 _mm512_broadcastq_epi64 (__m128i __A) 3593 { 3594 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3595 (__v8di) 3596 _mm512_undefined_si512 (), 3597 (__mmask8) -1); 3598 } 3599 3600 extern __inline __m512i 3601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3602 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 3603 { 3604 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3605 (__v8di) __O, __M); 3606 } 3607 3608 extern __inline __m512i 3609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3610 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 3611 { 3612 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3613 (__v8di) 3614 _mm512_setzero_si512 (), 3615 __M); 3616 } 3617 3618 extern __inline __m512i 3619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3620 _mm512_set1_epi64 (long long __A) 3621 { 3622 #ifdef TARGET_64BIT 3623 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 3624 (__v8di) 3625 _mm512_undefined_si512 (), 3626 (__mmask8)(-1)); 3627 #else 3628 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 3629 (__v8di) 3630 _mm512_undefined_si512 (), 3631 (__mmask8)(-1)); 3632 #endif 3633 } 3634 3635 extern __inline __m512i 3636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3637 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 3638 { 3639 #ifdef TARGET_64BIT 3640 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 3641 __M); 3642 #else 3643 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O, 3644 __M); 3645 #endif 3646 } 3647 3648 extern __inline __m512i 3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3650 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) 3651 { 3652 #ifdef TARGET_64BIT 3653 return (__m512i) 3654 __builtin_ia32_pbroadcastq512_gpr_mask (__A, 3655 (__v8di) _mm512_setzero_si512 (), 3656 __M); 3657 #else 3658 return (__m512i) 3659 __builtin_ia32_pbroadcastq512_mem_mask (__A, 3660 (__v8di) _mm512_setzero_si512 (), 3661 __M); 3662 #endif 3663 } 3664 3665 extern __inline __m512 3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3667 _mm512_broadcast_f32x4 (__m128 __A) 3668 { 3669 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3670 (__v16sf) 3671 _mm512_undefined_ps (), 3672 (__mmask16) -1); 3673 } 3674 3675 extern __inline __m512 3676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3677 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 3678 { 3679 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3680 (__v16sf) __O, 3681 __M); 3682 } 3683 3684 extern __inline __m512 3685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3686 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 3687 { 3688 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3689 (__v16sf) 3690 _mm512_setzero_ps (), 3691 __M); 3692 } 3693 3694 extern __inline __m512i 3695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3696 _mm512_broadcast_i32x4 (__m128i __A) 3697 { 3698 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3699 (__v16si) 3700 _mm512_undefined_si512 (), 3701 (__mmask16) -1); 3702 } 3703 3704 extern __inline __m512i 3705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3706 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 3707 { 3708 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3709 (__v16si) __O, 3710 __M); 3711 } 3712 3713 extern __inline __m512i 3714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3715 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 3716 { 3717 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3718 (__v16si) 3719 _mm512_setzero_si512 (), 3720 __M); 3721 } 3722 3723 extern __inline __m512d 3724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3725 _mm512_broadcast_f64x4 (__m256d __A) 3726 { 3727 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3728 (__v8df) 3729 _mm512_undefined_pd (), 3730 (__mmask8) -1); 3731 } 3732 3733 extern __inline __m512d 3734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3735 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 3736 { 3737 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3738 (__v8df) __O, 3739 __M); 3740 } 3741 3742 extern __inline __m512d 3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3744 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 3745 { 3746 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3747 (__v8df) 3748 _mm512_setzero_pd (), 3749 __M); 3750 } 3751 3752 extern __inline __m512i 3753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3754 _mm512_broadcast_i64x4 (__m256i __A) 3755 { 3756 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3757 (__v8di) 3758 _mm512_undefined_si512 (), 3759 (__mmask8) -1); 3760 } 3761 3762 extern __inline __m512i 3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3764 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 3765 { 3766 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3767 (__v8di) __O, 3768 __M); 3769 } 3770 3771 extern __inline __m512i 3772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3773 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 3774 { 3775 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3776 (__v8di) 3777 _mm512_setzero_si512 (), 3778 __M); 3779 } 3780 3781 typedef enum 3782 { 3783 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 3784 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 3785 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 3786 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 3787 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 3788 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 3789 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 3790 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 3791 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 3792 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 3793 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 3794 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 3795 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 3796 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 3797 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 3798 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 3799 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 3800 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 3801 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 3802 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 3803 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 3804 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 3805 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 3806 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 3807 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 3808 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 3809 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 3810 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 3811 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 3812 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 3813 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 3814 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 3815 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 3816 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 3817 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 3818 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 3819 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 3820 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 3821 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 3822 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 3823 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 3824 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 3825 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 3826 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 3827 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 3828 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 3829 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 3830 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 3831 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 3832 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 3833 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 3834 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 3835 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 3836 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 3837 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 3838 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 3839 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 3840 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 3841 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 3842 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 3843 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 3844 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 3845 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 3846 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 3847 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 3848 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 3849 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 3850 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 3851 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 3852 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 3853 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 3854 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 3855 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 3856 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 3857 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 3858 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 3859 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 3860 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 3861 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 3862 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 3863 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 3864 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 3865 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 3866 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 3867 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 3868 _MM_PERM_DDDD = 0xFF 3869 } _MM_PERM_ENUM; 3870 3871 #ifdef __OPTIMIZE__ 3872 extern __inline __m512i 3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3874 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) 3875 { 3876 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3877 __mask, 3878 (__v16si) 3879 _mm512_undefined_si512 (), 3880 (__mmask16) -1); 3881 } 3882 3883 extern __inline __m512i 3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3885 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 3886 _MM_PERM_ENUM __mask) 3887 { 3888 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3889 __mask, 3890 (__v16si) __W, 3891 (__mmask16) __U); 3892 } 3893 3894 extern __inline __m512i 3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3896 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) 3897 { 3898 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3899 __mask, 3900 (__v16si) 3901 _mm512_setzero_si512 (), 3902 (__mmask16) __U); 3903 } 3904 3905 extern __inline __m512i 3906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3907 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) 3908 { 3909 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3910 (__v8di) __B, __imm, 3911 (__v8di) 3912 _mm512_undefined_si512 (), 3913 (__mmask8) -1); 3914 } 3915 3916 extern __inline __m512i 3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3918 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, 3919 __m512i __B, const int __imm) 3920 { 3921 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3922 (__v8di) __B, __imm, 3923 (__v8di) __W, 3924 (__mmask8) __U); 3925 } 3926 3927 extern __inline __m512i 3928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3929 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, 3930 const int __imm) 3931 { 3932 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3933 (__v8di) __B, __imm, 3934 (__v8di) 3935 _mm512_setzero_si512 (), 3936 (__mmask8) __U); 3937 } 3938 3939 extern __inline __m512i 3940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3941 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) 3942 { 3943 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3944 (__v16si) __B, 3945 __imm, 3946 (__v16si) 3947 _mm512_undefined_si512 (), 3948 (__mmask16) -1); 3949 } 3950 3951 extern __inline __m512i 3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3953 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, 3954 __m512i __B, const int __imm) 3955 { 3956 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3957 (__v16si) __B, 3958 __imm, 3959 (__v16si) __W, 3960 (__mmask16) __U); 3961 } 3962 3963 extern __inline __m512i 3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3965 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, 3966 const int __imm) 3967 { 3968 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3969 (__v16si) __B, 3970 __imm, 3971 (__v16si) 3972 _mm512_setzero_si512 (), 3973 (__mmask16) __U); 3974 } 3975 3976 extern __inline __m512d 3977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3978 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) 3979 { 3980 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 3981 (__v8df) __B, __imm, 3982 (__v8df) 3983 _mm512_undefined_pd (), 3984 (__mmask8) -1); 3985 } 3986 3987 extern __inline __m512d 3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3989 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, 3990 __m512d __B, const int __imm) 3991 { 3992 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 3993 (__v8df) __B, __imm, 3994 (__v8df) __W, 3995 (__mmask8) __U); 3996 } 3997 3998 extern __inline __m512d 3999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4000 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, 4001 const int __imm) 4002 { 4003 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4004 (__v8df) __B, __imm, 4005 (__v8df) 4006 _mm512_setzero_pd (), 4007 (__mmask8) __U); 4008 } 4009 4010 extern __inline __m512 4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4012 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) 4013 { 4014 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4015 (__v16sf) __B, __imm, 4016 (__v16sf) 4017 _mm512_undefined_ps (), 4018 (__mmask16) -1); 4019 } 4020 4021 extern __inline __m512 4022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4023 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, 4024 __m512 __B, const int __imm) 4025 { 4026 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4027 (__v16sf) __B, __imm, 4028 (__v16sf) __W, 4029 (__mmask16) __U); 4030 } 4031 4032 extern __inline __m512 4033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4034 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, 4035 const int __imm) 4036 { 4037 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4038 (__v16sf) __B, __imm, 4039 (__v16sf) 4040 _mm512_setzero_ps (), 4041 (__mmask16) __U); 4042 } 4043 4044 #else 4045 #define _mm512_shuffle_epi32(X, C) \ 4046 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4047 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 4048 (__mmask16)-1)) 4049 4050 #define _mm512_mask_shuffle_epi32(W, U, X, C) \ 4051 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4052 (__v16si)(__m512i)(W),\ 4053 (__mmask16)(U))) 4054 4055 #define _mm512_maskz_shuffle_epi32(U, X, C) \ 4056 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4057 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4058 (__mmask16)(U))) 4059 4060 #define _mm512_shuffle_i64x2(X, Y, C) \ 4061 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4062 (__v8di)(__m512i)(Y), (int)(C),\ 4063 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 4064 (__mmask8)-1)) 4065 4066 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ 4067 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4068 (__v8di)(__m512i)(Y), (int)(C),\ 4069 (__v8di)(__m512i)(W),\ 4070 (__mmask8)(U))) 4071 4072 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ 4073 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4074 (__v8di)(__m512i)(Y), (int)(C),\ 4075 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 4076 (__mmask8)(U))) 4077 4078 #define _mm512_shuffle_i32x4(X, Y, C) \ 4079 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4080 (__v16si)(__m512i)(Y), (int)(C),\ 4081 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 4082 (__mmask16)-1)) 4083 4084 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ 4085 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4086 (__v16si)(__m512i)(Y), (int)(C),\ 4087 (__v16si)(__m512i)(W),\ 4088 (__mmask16)(U))) 4089 4090 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ 4091 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4092 (__v16si)(__m512i)(Y), (int)(C),\ 4093 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4094 (__mmask16)(U))) 4095 4096 #define _mm512_shuffle_f64x2(X, Y, C) \ 4097 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4098 (__v8df)(__m512d)(Y), (int)(C),\ 4099 (__v8df)(__m512d)_mm512_undefined_pd(),\ 4100 (__mmask8)-1)) 4101 4102 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ 4103 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4104 (__v8df)(__m512d)(Y), (int)(C),\ 4105 (__v8df)(__m512d)(W),\ 4106 (__mmask8)(U))) 4107 4108 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ 4109 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4110 (__v8df)(__m512d)(Y), (int)(C),\ 4111 (__v8df)(__m512d)_mm512_setzero_pd(),\ 4112 (__mmask8)(U))) 4113 4114 #define _mm512_shuffle_f32x4(X, Y, C) \ 4115 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4116 (__v16sf)(__m512)(Y), (int)(C),\ 4117 (__v16sf)(__m512)_mm512_undefined_ps(),\ 4118 (__mmask16)-1)) 4119 4120 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ 4121 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4122 (__v16sf)(__m512)(Y), (int)(C),\ 4123 (__v16sf)(__m512)(W),\ 4124 (__mmask16)(U))) 4125 4126 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ 4127 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4128 (__v16sf)(__m512)(Y), (int)(C),\ 4129 (__v16sf)(__m512)_mm512_setzero_ps(),\ 4130 (__mmask16)(U))) 4131 #endif 4132 4133 extern __inline __m512i 4134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4135 _mm512_rolv_epi32 (__m512i __A, __m512i __B) 4136 { 4137 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4138 (__v16si) __B, 4139 (__v16si) 4140 _mm512_undefined_si512 (), 4141 (__mmask16) -1); 4142 } 4143 4144 extern __inline __m512i 4145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4146 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4147 { 4148 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4149 (__v16si) __B, 4150 (__v16si) __W, 4151 (__mmask16) __U); 4152 } 4153 4154 extern __inline __m512i 4155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4156 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4157 { 4158 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4159 (__v16si) __B, 4160 (__v16si) 4161 _mm512_setzero_si512 (), 4162 (__mmask16) __U); 4163 } 4164 4165 extern __inline __m512i 4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4167 _mm512_rorv_epi32 (__m512i __A, __m512i __B) 4168 { 4169 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4170 (__v16si) __B, 4171 (__v16si) 4172 _mm512_undefined_si512 (), 4173 (__mmask16) -1); 4174 } 4175 4176 extern __inline __m512i 4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4178 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4179 { 4180 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4181 (__v16si) __B, 4182 (__v16si) __W, 4183 (__mmask16) __U); 4184 } 4185 4186 extern __inline __m512i 4187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4188 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4189 { 4190 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4191 (__v16si) __B, 4192 (__v16si) 4193 _mm512_setzero_si512 (), 4194 (__mmask16) __U); 4195 } 4196 4197 extern __inline __m512i 4198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4199 _mm512_rolv_epi64 (__m512i __A, __m512i __B) 4200 { 4201 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4202 (__v8di) __B, 4203 (__v8di) 4204 _mm512_undefined_si512 (), 4205 (__mmask8) -1); 4206 } 4207 4208 extern __inline __m512i 4209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4210 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4211 { 4212 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4213 (__v8di) __B, 4214 (__v8di) __W, 4215 (__mmask8) __U); 4216 } 4217 4218 extern __inline __m512i 4219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4220 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4221 { 4222 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4223 (__v8di) __B, 4224 (__v8di) 4225 _mm512_setzero_si512 (), 4226 (__mmask8) __U); 4227 } 4228 4229 extern __inline __m512i 4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4231 _mm512_rorv_epi64 (__m512i __A, __m512i __B) 4232 { 4233 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4234 (__v8di) __B, 4235 (__v8di) 4236 _mm512_undefined_si512 (), 4237 (__mmask8) -1); 4238 } 4239 4240 extern __inline __m512i 4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4242 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4243 { 4244 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4245 (__v8di) __B, 4246 (__v8di) __W, 4247 (__mmask8) __U); 4248 } 4249 4250 extern __inline __m512i 4251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4252 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4253 { 4254 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4255 (__v8di) __B, 4256 (__v8di) 4257 _mm512_setzero_si512 (), 4258 (__mmask8) __U); 4259 } 4260 4261 #ifdef __OPTIMIZE__ 4262 extern __inline __m256i 4263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4264 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) 4265 { 4266 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4267 (__v8si) 4268 _mm256_undefined_si256 (), 4269 (__mmask8) -1, __R); 4270 } 4271 4272 extern __inline __m256i 4273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4274 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4275 const int __R) 4276 { 4277 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4278 (__v8si) __W, 4279 (__mmask8) __U, __R); 4280 } 4281 4282 extern __inline __m256i 4283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4284 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4285 { 4286 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4287 (__v8si) 4288 _mm256_setzero_si256 (), 4289 (__mmask8) __U, __R); 4290 } 4291 4292 extern __inline __m256i 4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4294 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) 4295 { 4296 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4297 (__v8si) 4298 _mm256_undefined_si256 (), 4299 (__mmask8) -1, __R); 4300 } 4301 4302 extern __inline __m256i 4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4304 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4305 const int __R) 4306 { 4307 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4308 (__v8si) __W, 4309 (__mmask8) __U, __R); 4310 } 4311 4312 extern __inline __m256i 4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4314 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4315 { 4316 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4317 (__v8si) 4318 _mm256_setzero_si256 (), 4319 (__mmask8) __U, __R); 4320 } 4321 #else 4322 #define _mm512_cvtt_roundpd_epi32(A, B) \ 4323 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4324 4325 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ 4326 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) 4327 4328 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ 4329 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4330 4331 #define _mm512_cvtt_roundpd_epu32(A, B) \ 4332 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4333 4334 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ 4335 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) 4336 4337 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ 4338 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4339 #endif 4340 4341 #ifdef __OPTIMIZE__ 4342 extern __inline __m256i 4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4344 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) 4345 { 4346 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4347 (__v8si) 4348 _mm256_undefined_si256 (), 4349 (__mmask8) -1, __R); 4350 } 4351 4352 extern __inline __m256i 4353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4354 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4355 const int __R) 4356 { 4357 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4358 (__v8si) __W, 4359 (__mmask8) __U, __R); 4360 } 4361 4362 extern __inline __m256i 4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4364 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4365 { 4366 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4367 (__v8si) 4368 _mm256_setzero_si256 (), 4369 (__mmask8) __U, __R); 4370 } 4371 4372 extern __inline __m256i 4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4374 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) 4375 { 4376 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4377 (__v8si) 4378 _mm256_undefined_si256 (), 4379 (__mmask8) -1, __R); 4380 } 4381 4382 extern __inline __m256i 4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4384 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4385 const int __R) 4386 { 4387 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4388 (__v8si) __W, 4389 (__mmask8) __U, __R); 4390 } 4391 4392 extern __inline __m256i 4393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4394 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4395 { 4396 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4397 (__v8si) 4398 _mm256_setzero_si256 (), 4399 (__mmask8) __U, __R); 4400 } 4401 #else 4402 #define _mm512_cvt_roundpd_epi32(A, B) \ 4403 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4404 4405 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ 4406 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) 4407 4408 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ 4409 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4410 4411 #define _mm512_cvt_roundpd_epu32(A, B) \ 4412 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4413 4414 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ 4415 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) 4416 4417 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ 4418 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4419 #endif 4420 4421 #ifdef __OPTIMIZE__ 4422 extern __inline __m512i 4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4424 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) 4425 { 4426 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4427 (__v16si) 4428 _mm512_undefined_si512 (), 4429 (__mmask16) -1, __R); 4430 } 4431 4432 extern __inline __m512i 4433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4434 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4435 const int __R) 4436 { 4437 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4438 (__v16si) __W, 4439 (__mmask16) __U, __R); 4440 } 4441 4442 extern __inline __m512i 4443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4444 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4445 { 4446 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4447 (__v16si) 4448 _mm512_setzero_si512 (), 4449 (__mmask16) __U, __R); 4450 } 4451 4452 extern __inline __m512i 4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4454 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) 4455 { 4456 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4457 (__v16si) 4458 _mm512_undefined_si512 (), 4459 (__mmask16) -1, __R); 4460 } 4461 4462 extern __inline __m512i 4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4464 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4465 const int __R) 4466 { 4467 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4468 (__v16si) __W, 4469 (__mmask16) __U, __R); 4470 } 4471 4472 extern __inline __m512i 4473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4474 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4475 { 4476 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4477 (__v16si) 4478 _mm512_setzero_si512 (), 4479 (__mmask16) __U, __R); 4480 } 4481 #else 4482 #define _mm512_cvtt_roundps_epi32(A, B) \ 4483 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4484 4485 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ 4486 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) 4487 4488 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ 4489 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4490 4491 #define _mm512_cvtt_roundps_epu32(A, B) \ 4492 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4493 4494 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ 4495 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) 4496 4497 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ 4498 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4499 #endif 4500 4501 #ifdef __OPTIMIZE__ 4502 extern __inline __m512i 4503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4504 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R) 4505 { 4506 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4507 (__v16si) 4508 _mm512_undefined_si512 (), 4509 (__mmask16) -1, __R); 4510 } 4511 4512 extern __inline __m512i 4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4514 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4515 const int __R) 4516 { 4517 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4518 (__v16si) __W, 4519 (__mmask16) __U, __R); 4520 } 4521 4522 extern __inline __m512i 4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4524 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4525 { 4526 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4527 (__v16si) 4528 _mm512_setzero_si512 (), 4529 (__mmask16) __U, __R); 4530 } 4531 4532 extern __inline __m512i 4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4534 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R) 4535 { 4536 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4537 (__v16si) 4538 _mm512_undefined_si512 (), 4539 (__mmask16) -1, __R); 4540 } 4541 4542 extern __inline __m512i 4543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4544 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4545 const int __R) 4546 { 4547 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4548 (__v16si) __W, 4549 (__mmask16) __U, __R); 4550 } 4551 4552 extern __inline __m512i 4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4554 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4555 { 4556 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4557 (__v16si) 4558 _mm512_setzero_si512 (), 4559 (__mmask16) __U, __R); 4560 } 4561 #else 4562 #define _mm512_cvt_roundps_epi32(A, B) \ 4563 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4564 4565 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ 4566 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) 4567 4568 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ 4569 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4570 4571 #define _mm512_cvt_roundps_epu32(A, B) \ 4572 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4573 4574 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ 4575 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) 4576 4577 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ 4578 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4579 #endif 4580 4581 extern __inline __m128d 4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4583 _mm_cvtu32_sd (__m128d __A, unsigned __B) 4584 { 4585 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 4586 } 4587 4588 #ifdef __x86_64__ 4589 #ifdef __OPTIMIZE__ 4590 extern __inline __m128d 4591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4592 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) 4593 { 4594 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); 4595 } 4596 4597 extern __inline __m128d 4598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4599 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) 4600 { 4601 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 4602 } 4603 4604 extern __inline __m128d 4605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4606 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) 4607 { 4608 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 4609 } 4610 #else 4611 #define _mm_cvt_roundu64_sd(A, B, C) \ 4612 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) 4613 4614 #define _mm_cvt_roundi64_sd(A, B, C) \ 4615 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 4616 4617 #define _mm_cvt_roundsi64_sd(A, B, C) \ 4618 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 4619 #endif 4620 4621 #endif 4622 4623 #ifdef __OPTIMIZE__ 4624 extern __inline __m128 4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4626 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) 4627 { 4628 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); 4629 } 4630 4631 extern __inline __m128 4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4633 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) 4634 { 4635 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 4636 } 4637 4638 extern __inline __m128 4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4640 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) 4641 { 4642 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 4643 } 4644 #else 4645 #define _mm_cvt_roundu32_ss(A, B, C) \ 4646 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C) 4647 4648 #define _mm_cvt_roundi32_ss(A, B, C) \ 4649 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 4650 4651 #define _mm_cvt_roundsi32_ss(A, B, C) \ 4652 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 4653 #endif 4654 4655 #ifdef __x86_64__ 4656 #ifdef __OPTIMIZE__ 4657 extern __inline __m128 4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4659 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) 4660 { 4661 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); 4662 } 4663 4664 extern __inline __m128 4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4666 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) 4667 { 4668 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 4669 } 4670 4671 extern __inline __m128 4672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4673 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) 4674 { 4675 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 4676 } 4677 #else 4678 #define _mm_cvt_roundu64_ss(A, B, C) \ 4679 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C) 4680 4681 #define _mm_cvt_roundi64_ss(A, B, C) \ 4682 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 4683 4684 #define _mm_cvt_roundsi64_ss(A, B, C) \ 4685 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 4686 #endif 4687 4688 #endif 4689 4690 extern __inline __m128i 4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4692 _mm512_cvtepi32_epi8 (__m512i __A) 4693 { 4694 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4695 (__v16qi) 4696 _mm_undefined_si128 (), 4697 (__mmask16) -1); 4698 } 4699 4700 extern __inline void 4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4702 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4703 { 4704 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4705 } 4706 4707 extern __inline __m128i 4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4709 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4710 { 4711 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4712 (__v16qi) __O, __M); 4713 } 4714 4715 extern __inline __m128i 4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4717 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 4718 { 4719 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4720 (__v16qi) 4721 _mm_setzero_si128 (), 4722 __M); 4723 } 4724 4725 extern __inline __m128i 4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4727 _mm512_cvtsepi32_epi8 (__m512i __A) 4728 { 4729 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4730 (__v16qi) 4731 _mm_undefined_si128 (), 4732 (__mmask16) -1); 4733 } 4734 4735 extern __inline void 4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4737 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4738 { 4739 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4740 } 4741 4742 extern __inline __m128i 4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4744 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4745 { 4746 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4747 (__v16qi) __O, __M); 4748 } 4749 4750 extern __inline __m128i 4751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4752 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 4753 { 4754 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4755 (__v16qi) 4756 _mm_setzero_si128 (), 4757 __M); 4758 } 4759 4760 extern __inline __m128i 4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4762 _mm512_cvtusepi32_epi8 (__m512i __A) 4763 { 4764 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4765 (__v16qi) 4766 _mm_undefined_si128 (), 4767 (__mmask16) -1); 4768 } 4769 4770 extern __inline void 4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4772 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4773 { 4774 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4775 } 4776 4777 extern __inline __m128i 4778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4779 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4780 { 4781 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4782 (__v16qi) __O, 4783 __M); 4784 } 4785 4786 extern __inline __m128i 4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4788 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 4789 { 4790 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4791 (__v16qi) 4792 _mm_setzero_si128 (), 4793 __M); 4794 } 4795 4796 extern __inline __m256i 4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4798 _mm512_cvtepi32_epi16 (__m512i __A) 4799 { 4800 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4801 (__v16hi) 4802 _mm256_undefined_si256 (), 4803 (__mmask16) -1); 4804 } 4805 4806 extern __inline void 4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4808 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 4809 { 4810 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 4811 } 4812 4813 extern __inline __m256i 4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4815 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4816 { 4817 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4818 (__v16hi) __O, __M); 4819 } 4820 4821 extern __inline __m256i 4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4823 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 4824 { 4825 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4826 (__v16hi) 4827 _mm256_setzero_si256 (), 4828 __M); 4829 } 4830 4831 extern __inline __m256i 4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4833 _mm512_cvtsepi32_epi16 (__m512i __A) 4834 { 4835 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4836 (__v16hi) 4837 _mm256_undefined_si256 (), 4838 (__mmask16) -1); 4839 } 4840 4841 extern __inline void 4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4843 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 4844 { 4845 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 4846 } 4847 4848 extern __inline __m256i 4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4850 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4851 { 4852 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4853 (__v16hi) __O, __M); 4854 } 4855 4856 extern __inline __m256i 4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4858 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 4859 { 4860 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4861 (__v16hi) 4862 _mm256_setzero_si256 (), 4863 __M); 4864 } 4865 4866 extern __inline __m256i 4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4868 _mm512_cvtusepi32_epi16 (__m512i __A) 4869 { 4870 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4871 (__v16hi) 4872 _mm256_undefined_si256 (), 4873 (__mmask16) -1); 4874 } 4875 4876 extern __inline void 4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4878 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 4879 { 4880 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 4881 } 4882 4883 extern __inline __m256i 4884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4885 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4886 { 4887 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4888 (__v16hi) __O, 4889 __M); 4890 } 4891 4892 extern __inline __m256i 4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4894 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 4895 { 4896 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4897 (__v16hi) 4898 _mm256_setzero_si256 (), 4899 __M); 4900 } 4901 4902 extern __inline __m256i 4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4904 _mm512_cvtepi64_epi32 (__m512i __A) 4905 { 4906 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4907 (__v8si) 4908 _mm256_undefined_si256 (), 4909 (__mmask8) -1); 4910 } 4911 4912 extern __inline void 4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4914 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 4915 { 4916 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 4917 } 4918 4919 extern __inline __m256i 4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4921 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4922 { 4923 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4924 (__v8si) __O, __M); 4925 } 4926 4927 extern __inline __m256i 4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4929 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 4930 { 4931 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4932 (__v8si) 4933 _mm256_setzero_si256 (), 4934 __M); 4935 } 4936 4937 extern __inline __m256i 4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4939 _mm512_cvtsepi64_epi32 (__m512i __A) 4940 { 4941 __v8si __O; 4942 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4943 (__v8si) 4944 _mm256_undefined_si256 (), 4945 (__mmask8) -1); 4946 } 4947 4948 extern __inline void 4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4950 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 4951 { 4952 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 4953 } 4954 4955 extern __inline __m256i 4956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4957 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4958 { 4959 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4960 (__v8si) __O, __M); 4961 } 4962 4963 extern __inline __m256i 4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4965 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 4966 { 4967 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4968 (__v8si) 4969 _mm256_setzero_si256 (), 4970 __M); 4971 } 4972 4973 extern __inline __m256i 4974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4975 _mm512_cvtusepi64_epi32 (__m512i __A) 4976 { 4977 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 4978 (__v8si) 4979 _mm256_undefined_si256 (), 4980 (__mmask8) -1); 4981 } 4982 4983 extern __inline void 4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4985 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 4986 { 4987 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 4988 } 4989 4990 extern __inline __m256i 4991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4992 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4993 { 4994 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 4995 (__v8si) __O, __M); 4996 } 4997 4998 extern __inline __m256i 4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5000 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 5001 { 5002 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5003 (__v8si) 5004 _mm256_setzero_si256 (), 5005 __M); 5006 } 5007 5008 extern __inline __m128i 5009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5010 _mm512_cvtepi64_epi16 (__m512i __A) 5011 { 5012 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5013 (__v8hi) 5014 _mm_undefined_si128 (), 5015 (__mmask8) -1); 5016 } 5017 5018 extern __inline void 5019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5020 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5021 { 5022 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5023 } 5024 5025 extern __inline __m128i 5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5027 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5028 { 5029 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5030 (__v8hi) __O, __M); 5031 } 5032 5033 extern __inline __m128i 5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5035 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 5036 { 5037 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5038 (__v8hi) 5039 _mm_setzero_si128 (), 5040 __M); 5041 } 5042 5043 extern __inline __m128i 5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5045 _mm512_cvtsepi64_epi16 (__m512i __A) 5046 { 5047 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5048 (__v8hi) 5049 _mm_undefined_si128 (), 5050 (__mmask8) -1); 5051 } 5052 5053 extern __inline void 5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5055 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 5056 { 5057 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5058 } 5059 5060 extern __inline __m128i 5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5062 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5063 { 5064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5065 (__v8hi) __O, __M); 5066 } 5067 5068 extern __inline __m128i 5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5070 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 5071 { 5072 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5073 (__v8hi) 5074 _mm_setzero_si128 (), 5075 __M); 5076 } 5077 5078 extern __inline __m128i 5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5080 _mm512_cvtusepi64_epi16 (__m512i __A) 5081 { 5082 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5083 (__v8hi) 5084 _mm_undefined_si128 (), 5085 (__mmask8) -1); 5086 } 5087 5088 extern __inline void 5089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5090 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5091 { 5092 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 5093 } 5094 5095 extern __inline __m128i 5096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5097 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5098 { 5099 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5100 (__v8hi) __O, __M); 5101 } 5102 5103 extern __inline __m128i 5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5105 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 5106 { 5107 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5108 (__v8hi) 5109 _mm_setzero_si128 (), 5110 __M); 5111 } 5112 5113 extern __inline __m128i 5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5115 _mm512_cvtepi64_epi8 (__m512i __A) 5116 { 5117 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5118 (__v16qi) 5119 _mm_undefined_si128 (), 5120 (__mmask8) -1); 5121 } 5122 5123 extern __inline void 5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5125 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5126 { 5127 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5128 } 5129 5130 extern __inline __m128i 5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5132 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5133 { 5134 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5135 (__v16qi) __O, __M); 5136 } 5137 5138 extern __inline __m128i 5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5140 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 5141 { 5142 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5143 (__v16qi) 5144 _mm_setzero_si128 (), 5145 __M); 5146 } 5147 5148 extern __inline __m128i 5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5150 _mm512_cvtsepi64_epi8 (__m512i __A) 5151 { 5152 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5153 (__v16qi) 5154 _mm_undefined_si128 (), 5155 (__mmask8) -1); 5156 } 5157 5158 extern __inline void 5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5160 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5161 { 5162 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5163 } 5164 5165 extern __inline __m128i 5166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5167 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5168 { 5169 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5170 (__v16qi) __O, __M); 5171 } 5172 5173 extern __inline __m128i 5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5175 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 5176 { 5177 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5178 (__v16qi) 5179 _mm_setzero_si128 (), 5180 __M); 5181 } 5182 5183 extern __inline __m128i 5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5185 _mm512_cvtusepi64_epi8 (__m512i __A) 5186 { 5187 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5188 (__v16qi) 5189 _mm_undefined_si128 (), 5190 (__mmask8) -1); 5191 } 5192 5193 extern __inline void 5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5195 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5196 { 5197 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5198 } 5199 5200 extern __inline __m128i 5201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5202 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5203 { 5204 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5205 (__v16qi) __O, 5206 __M); 5207 } 5208 5209 extern __inline __m128i 5210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5211 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 5212 { 5213 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5214 (__v16qi) 5215 _mm_setzero_si128 (), 5216 __M); 5217 } 5218 5219 extern __inline __m512d 5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5221 _mm512_cvtepi32_pd (__m256i __A) 5222 { 5223 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5224 (__v8df) 5225 _mm512_undefined_pd (), 5226 (__mmask8) -1); 5227 } 5228 5229 extern __inline __m512d 5230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5231 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5232 { 5233 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5234 (__v8df) __W, 5235 (__mmask8) __U); 5236 } 5237 5238 extern __inline __m512d 5239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5240 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 5241 { 5242 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5243 (__v8df) 5244 _mm512_setzero_pd (), 5245 (__mmask8) __U); 5246 } 5247 5248 extern __inline __m512d 5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5250 _mm512_cvtepu32_pd (__m256i __A) 5251 { 5252 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5253 (__v8df) 5254 _mm512_undefined_pd (), 5255 (__mmask8) -1); 5256 } 5257 5258 extern __inline __m512d 5259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5260 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5261 { 5262 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5263 (__v8df) __W, 5264 (__mmask8) __U); 5265 } 5266 5267 extern __inline __m512d 5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5269 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 5270 { 5271 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5272 (__v8df) 5273 _mm512_setzero_pd (), 5274 (__mmask8) __U); 5275 } 5276 5277 #ifdef __OPTIMIZE__ 5278 extern __inline __m512 5279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5280 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R) 5281 { 5282 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5283 (__v16sf) 5284 _mm512_undefined_ps (), 5285 (__mmask16) -1, __R); 5286 } 5287 5288 extern __inline __m512 5289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5290 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5291 const int __R) 5292 { 5293 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5294 (__v16sf) __W, 5295 (__mmask16) __U, __R); 5296 } 5297 5298 extern __inline __m512 5299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5300 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) 5301 { 5302 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5303 (__v16sf) 5304 _mm512_setzero_ps (), 5305 (__mmask16) __U, __R); 5306 } 5307 5308 extern __inline __m512 5309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5310 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R) 5311 { 5312 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5313 (__v16sf) 5314 _mm512_undefined_ps (), 5315 (__mmask16) -1, __R); 5316 } 5317 5318 extern __inline __m512 5319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5320 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5321 const int __R) 5322 { 5323 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5324 (__v16sf) __W, 5325 (__mmask16) __U, __R); 5326 } 5327 5328 extern __inline __m512 5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5330 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) 5331 { 5332 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5333 (__v16sf) 5334 _mm512_setzero_ps (), 5335 (__mmask16) __U, __R); 5336 } 5337 5338 #else 5339 #define _mm512_cvt_roundepi32_ps(A, B) \ 5340 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5341 5342 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ 5343 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) 5344 5345 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ 5346 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5347 5348 #define _mm512_cvt_roundepu32_ps(A, B) \ 5349 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5350 5351 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ 5352 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) 5353 5354 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ 5355 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5356 #endif 5357 5358 #ifdef __OPTIMIZE__ 5359 extern __inline __m256d 5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5361 _mm512_extractf64x4_pd (__m512d __A, const int __imm) 5362 { 5363 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5364 __imm, 5365 (__v4df) 5366 _mm256_undefined_pd (), 5367 (__mmask8) -1); 5368 } 5369 5370 extern __inline __m256d 5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5372 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, 5373 const int __imm) 5374 { 5375 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5376 __imm, 5377 (__v4df) __W, 5378 (__mmask8) __U); 5379 } 5380 5381 extern __inline __m256d 5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5383 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) 5384 { 5385 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5386 __imm, 5387 (__v4df) 5388 _mm256_setzero_pd (), 5389 (__mmask8) __U); 5390 } 5391 5392 extern __inline __m128 5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5394 _mm512_extractf32x4_ps (__m512 __A, const int __imm) 5395 { 5396 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5397 __imm, 5398 (__v4sf) 5399 _mm_undefined_ps (), 5400 (__mmask8) -1); 5401 } 5402 5403 extern __inline __m128 5404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5405 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, 5406 const int __imm) 5407 { 5408 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5409 __imm, 5410 (__v4sf) __W, 5411 (__mmask8) __U); 5412 } 5413 5414 extern __inline __m128 5415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5416 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) 5417 { 5418 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5419 __imm, 5420 (__v4sf) 5421 _mm_setzero_ps (), 5422 (__mmask8) __U); 5423 } 5424 5425 extern __inline __m256i 5426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5427 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm) 5428 { 5429 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5430 __imm, 5431 (__v4di) 5432 _mm256_undefined_si256 (), 5433 (__mmask8) -1); 5434 } 5435 5436 extern __inline __m256i 5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5438 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, 5439 const int __imm) 5440 { 5441 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5442 __imm, 5443 (__v4di) __W, 5444 (__mmask8) __U); 5445 } 5446 5447 extern __inline __m256i 5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5449 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) 5450 { 5451 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5452 __imm, 5453 (__v4di) 5454 _mm256_setzero_si256 (), 5455 (__mmask8) __U); 5456 } 5457 5458 extern __inline __m128i 5459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5460 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm) 5461 { 5462 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5463 __imm, 5464 (__v4si) 5465 _mm_undefined_si128 (), 5466 (__mmask8) -1); 5467 } 5468 5469 extern __inline __m128i 5470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5471 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, 5472 const int __imm) 5473 { 5474 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5475 __imm, 5476 (__v4si) __W, 5477 (__mmask8) __U); 5478 } 5479 5480 extern __inline __m128i 5481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5482 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) 5483 { 5484 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5485 __imm, 5486 (__v4si) 5487 _mm_setzero_si128 (), 5488 (__mmask8) __U); 5489 } 5490 #else 5491 5492 #define _mm512_extractf64x4_pd(X, C) \ 5493 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5494 (int) (C),\ 5495 (__v4df)(__m256d)_mm256_undefined_pd(),\ 5496 (__mmask8)-1)) 5497 5498 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \ 5499 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5500 (int) (C),\ 5501 (__v4df)(__m256d)(W),\ 5502 (__mmask8)(U))) 5503 5504 #define _mm512_maskz_extractf64x4_pd(U, X, C) \ 5505 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5506 (int) (C),\ 5507 (__v4df)(__m256d)_mm256_setzero_pd(),\ 5508 (__mmask8)(U))) 5509 5510 #define _mm512_extractf32x4_ps(X, C) \ 5511 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5512 (int) (C),\ 5513 (__v4sf)(__m128)_mm_undefined_ps(),\ 5514 (__mmask8)-1)) 5515 5516 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \ 5517 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5518 (int) (C),\ 5519 (__v4sf)(__m128)(W),\ 5520 (__mmask8)(U))) 5521 5522 #define _mm512_maskz_extractf32x4_ps(U, X, C) \ 5523 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5524 (int) (C),\ 5525 (__v4sf)(__m128)_mm_setzero_ps(),\ 5526 (__mmask8)(U))) 5527 5528 #define _mm512_extracti64x4_epi64(X, C) \ 5529 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5530 (int) (C),\ 5531 (__v4di)(__m256i)_mm256_undefined_si256 (),\ 5532 (__mmask8)-1)) 5533 5534 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ 5535 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5536 (int) (C),\ 5537 (__v4di)(__m256i)(W),\ 5538 (__mmask8)(U))) 5539 5540 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \ 5541 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5542 (int) (C),\ 5543 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 5544 (__mmask8)(U))) 5545 5546 #define _mm512_extracti32x4_epi32(X, C) \ 5547 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5548 (int) (C),\ 5549 (__v4si)(__m128i)_mm_undefined_si128 (),\ 5550 (__mmask8)-1)) 5551 5552 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ 5553 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5554 (int) (C),\ 5555 (__v4si)(__m128i)(W),\ 5556 (__mmask8)(U))) 5557 5558 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \ 5559 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5560 (int) (C),\ 5561 (__v4si)(__m128i)_mm_setzero_si128 (),\ 5562 (__mmask8)(U))) 5563 #endif 5564 5565 #ifdef __OPTIMIZE__ 5566 extern __inline __m512i 5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5568 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) 5569 { 5570 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, 5571 (__v4si) __B, 5572 __imm, 5573 (__v16si) __A, -1); 5574 } 5575 5576 extern __inline __m512 5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5578 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) 5579 { 5580 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, 5581 (__v4sf) __B, 5582 __imm, 5583 (__v16sf) __A, -1); 5584 } 5585 5586 extern __inline __m512i 5587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5588 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) 5589 { 5590 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5591 (__v4di) __B, 5592 __imm, 5593 (__v8di) 5594 _mm512_undefined_si512 (), 5595 (__mmask8) -1); 5596 } 5597 5598 extern __inline __m512i 5599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5600 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, 5601 __m256i __B, const int __imm) 5602 { 5603 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5604 (__v4di) __B, 5605 __imm, 5606 (__v8di) __W, 5607 (__mmask8) __U); 5608 } 5609 5610 extern __inline __m512i 5611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5612 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, 5613 const int __imm) 5614 { 5615 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5616 (__v4di) __B, 5617 __imm, 5618 (__v8di) 5619 _mm512_setzero_si512 (), 5620 (__mmask8) __U); 5621 } 5622 5623 extern __inline __m512d 5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5625 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) 5626 { 5627 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5628 (__v4df) __B, 5629 __imm, 5630 (__v8df) 5631 _mm512_undefined_pd (), 5632 (__mmask8) -1); 5633 } 5634 5635 extern __inline __m512d 5636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5637 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, 5638 __m256d __B, const int __imm) 5639 { 5640 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5641 (__v4df) __B, 5642 __imm, 5643 (__v8df) __W, 5644 (__mmask8) __U); 5645 } 5646 5647 extern __inline __m512d 5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5649 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, 5650 const int __imm) 5651 { 5652 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5653 (__v4df) __B, 5654 __imm, 5655 (__v8df) 5656 _mm512_setzero_pd (), 5657 (__mmask8) __U); 5658 } 5659 #else 5660 #define _mm512_insertf32x4(X, Y, C) \ 5661 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 5662 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) 5663 5664 #define _mm512_inserti32x4(X, Y, C) \ 5665 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 5666 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) 5667 5668 #define _mm512_insertf64x4(X, Y, C) \ 5669 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5670 (__v4df)(__m256d) (Y), (int) (C), \ 5671 (__v8df)(__m512d)_mm512_undefined_pd(), \ 5672 (__mmask8)-1)) 5673 5674 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \ 5675 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5676 (__v4df)(__m256d) (Y), (int) (C), \ 5677 (__v8df)(__m512d)(W), \ 5678 (__mmask8)(U))) 5679 5680 #define _mm512_maskz_insertf64x4(U, X, Y, C) \ 5681 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5682 (__v4df)(__m256d) (Y), (int) (C), \ 5683 (__v8df)(__m512d)_mm512_setzero_pd(), \ 5684 (__mmask8)(U))) 5685 5686 #define _mm512_inserti64x4(X, Y, C) \ 5687 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5688 (__v4di)(__m256i) (Y), (int) (C), \ 5689 (__v8di)(__m512i)_mm512_undefined_si512 (), \ 5690 (__mmask8)-1)) 5691 5692 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \ 5693 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5694 (__v4di)(__m256i) (Y), (int) (C),\ 5695 (__v8di)(__m512i)(W),\ 5696 (__mmask8)(U))) 5697 5698 #define _mm512_maskz_inserti64x4(U, X, Y, C) \ 5699 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5700 (__v4di)(__m256i) (Y), (int) (C), \ 5701 (__v8di)(__m512i)_mm512_setzero_si512 (), \ 5702 (__mmask8)(U))) 5703 #endif 5704 5705 extern __inline __m512d 5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5707 _mm512_loadu_pd (void const *__P) 5708 { 5709 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5710 (__v8df) 5711 _mm512_undefined_pd (), 5712 (__mmask8) -1); 5713 } 5714 5715 extern __inline __m512d 5716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5717 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 5718 { 5719 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5720 (__v8df) __W, 5721 (__mmask8) __U); 5722 } 5723 5724 extern __inline __m512d 5725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5726 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) 5727 { 5728 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5729 (__v8df) 5730 _mm512_setzero_pd (), 5731 (__mmask8) __U); 5732 } 5733 5734 extern __inline void 5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5736 _mm512_storeu_pd (void *__P, __m512d __A) 5737 { 5738 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, 5739 (__mmask8) -1); 5740 } 5741 5742 extern __inline void 5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5744 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) 5745 { 5746 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, 5747 (__mmask8) __U); 5748 } 5749 5750 extern __inline __m512 5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5752 _mm512_loadu_ps (void const *__P) 5753 { 5754 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5755 (__v16sf) 5756 _mm512_undefined_ps (), 5757 (__mmask16) -1); 5758 } 5759 5760 extern __inline __m512 5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5762 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 5763 { 5764 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5765 (__v16sf) __W, 5766 (__mmask16) __U); 5767 } 5768 5769 extern __inline __m512 5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5771 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) 5772 { 5773 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5774 (__v16sf) 5775 _mm512_setzero_ps (), 5776 (__mmask16) __U); 5777 } 5778 5779 extern __inline void 5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5781 _mm512_storeu_ps (void *__P, __m512 __A) 5782 { 5783 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, 5784 (__mmask16) -1); 5785 } 5786 5787 extern __inline void 5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5789 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) 5790 { 5791 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, 5792 (__mmask16) __U); 5793 } 5794 5795 extern __inline __m512i 5796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5797 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5798 { 5799 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, 5800 (__v8di) __W, 5801 (__mmask8) __U); 5802 } 5803 5804 extern __inline __m512i 5805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5806 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5807 { 5808 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, 5809 (__v8di) 5810 _mm512_setzero_si512 (), 5811 (__mmask8) __U); 5812 } 5813 5814 extern __inline void 5815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5816 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) 5817 { 5818 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, 5819 (__mmask8) __U); 5820 } 5821 5822 extern __inline __m512i 5823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5824 _mm512_loadu_si512 (void const *__P) 5825 { 5826 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5827 (__v16si) 5828 _mm512_setzero_si512 (), 5829 (__mmask16) -1); 5830 } 5831 5832 extern __inline __m512i 5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5834 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5835 { 5836 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5837 (__v16si) __W, 5838 (__mmask16) __U); 5839 } 5840 5841 extern __inline __m512i 5842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5843 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) 5844 { 5845 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5846 (__v16si) 5847 _mm512_setzero_si512 (), 5848 (__mmask16) __U); 5849 } 5850 5851 extern __inline void 5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5853 _mm512_storeu_si512 (void *__P, __m512i __A) 5854 { 5855 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, 5856 (__mmask16) -1); 5857 } 5858 5859 extern __inline void 5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5861 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) 5862 { 5863 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, 5864 (__mmask16) __U); 5865 } 5866 5867 extern __inline __m512d 5868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5869 _mm512_permutevar_pd (__m512d __A, __m512i __C) 5870 { 5871 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5872 (__v8di) __C, 5873 (__v8df) 5874 _mm512_undefined_pd (), 5875 (__mmask8) -1); 5876 } 5877 5878 extern __inline __m512d 5879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5880 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 5881 { 5882 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5883 (__v8di) __C, 5884 (__v8df) __W, 5885 (__mmask8) __U); 5886 } 5887 5888 extern __inline __m512d 5889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5890 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) 5891 { 5892 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5893 (__v8di) __C, 5894 (__v8df) 5895 _mm512_setzero_pd (), 5896 (__mmask8) __U); 5897 } 5898 5899 extern __inline __m512 5900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5901 _mm512_permutevar_ps (__m512 __A, __m512i __C) 5902 { 5903 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5904 (__v16si) __C, 5905 (__v16sf) 5906 _mm512_undefined_ps (), 5907 (__mmask16) -1); 5908 } 5909 5910 extern __inline __m512 5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5912 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 5913 { 5914 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5915 (__v16si) __C, 5916 (__v16sf) __W, 5917 (__mmask16) __U); 5918 } 5919 5920 extern __inline __m512 5921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5922 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) 5923 { 5924 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5925 (__v16si) __C, 5926 (__v16sf) 5927 _mm512_setzero_ps (), 5928 (__mmask16) __U); 5929 } 5930 5931 extern __inline __m512i 5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5933 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) 5934 { 5935 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 5936 /* idx */ , 5937 (__v8di) __A, 5938 (__v8di) __B, 5939 (__mmask8) -1); 5940 } 5941 5942 extern __inline __m512i 5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5944 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 5945 __m512i __B) 5946 { 5947 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 5948 /* idx */ , 5949 (__v8di) __A, 5950 (__v8di) __B, 5951 (__mmask8) __U); 5952 } 5953 5954 extern __inline __m512i 5955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5956 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 5957 __mmask8 __U, __m512i __B) 5958 { 5959 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 5960 (__v8di) __I 5961 /* idx */ , 5962 (__v8di) __B, 5963 (__mmask8) __U); 5964 } 5965 5966 extern __inline __m512i 5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5968 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 5969 __m512i __I, __m512i __B) 5970 { 5971 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 5972 /* idx */ , 5973 (__v8di) __A, 5974 (__v8di) __B, 5975 (__mmask8) __U); 5976 } 5977 5978 extern __inline __m512i 5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5980 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) 5981 { 5982 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 5983 /* idx */ , 5984 (__v16si) __A, 5985 (__v16si) __B, 5986 (__mmask16) -1); 5987 } 5988 5989 extern __inline __m512i 5990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5991 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 5992 __m512i __I, __m512i __B) 5993 { 5994 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 5995 /* idx */ , 5996 (__v16si) __A, 5997 (__v16si) __B, 5998 (__mmask16) __U); 5999 } 6000 6001 extern __inline __m512i 6002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6003 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6004 __mmask16 __U, __m512i __B) 6005 { 6006 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6007 (__v16si) __I 6008 /* idx */ , 6009 (__v16si) __B, 6010 (__mmask16) __U); 6011 } 6012 6013 extern __inline __m512i 6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6015 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 6016 __m512i __I, __m512i __B) 6017 { 6018 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 6019 /* idx */ , 6020 (__v16si) __A, 6021 (__v16si) __B, 6022 (__mmask16) __U); 6023 } 6024 6025 extern __inline __m512d 6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6027 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) 6028 { 6029 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6030 /* idx */ , 6031 (__v8df) __A, 6032 (__v8df) __B, 6033 (__mmask8) -1); 6034 } 6035 6036 extern __inline __m512d 6037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6038 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, 6039 __m512d __B) 6040 { 6041 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6042 /* idx */ , 6043 (__v8df) __A, 6044 (__v8df) __B, 6045 (__mmask8) __U); 6046 } 6047 6048 extern __inline __m512d 6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6050 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6051 __m512d __B) 6052 { 6053 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6054 (__v8di) __I 6055 /* idx */ , 6056 (__v8df) __B, 6057 (__mmask8) __U); 6058 } 6059 6060 extern __inline __m512d 6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6062 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6063 __m512d __B) 6064 { 6065 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6066 /* idx */ , 6067 (__v8df) __A, 6068 (__v8df) __B, 6069 (__mmask8) __U); 6070 } 6071 6072 extern __inline __m512 6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6074 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) 6075 { 6076 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6077 /* idx */ , 6078 (__v16sf) __A, 6079 (__v16sf) __B, 6080 (__mmask16) -1); 6081 } 6082 6083 extern __inline __m512 6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6085 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6086 { 6087 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6088 /* idx */ , 6089 (__v16sf) __A, 6090 (__v16sf) __B, 6091 (__mmask16) __U); 6092 } 6093 6094 extern __inline __m512 6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6096 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6097 __m512 __B) 6098 { 6099 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6100 (__v16si) __I 6101 /* idx */ , 6102 (__v16sf) __B, 6103 (__mmask16) __U); 6104 } 6105 6106 extern __inline __m512 6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6108 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6109 __m512 __B) 6110 { 6111 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6112 /* idx */ , 6113 (__v16sf) __A, 6114 (__v16sf) __B, 6115 (__mmask16) __U); 6116 } 6117 6118 #ifdef __OPTIMIZE__ 6119 extern __inline __m512d 6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6121 _mm512_permute_pd (__m512d __X, const int __C) 6122 { 6123 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6124 (__v8df) 6125 _mm512_undefined_pd (), 6126 (__mmask8) -1); 6127 } 6128 6129 extern __inline __m512d 6130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6131 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) 6132 { 6133 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6134 (__v8df) __W, 6135 (__mmask8) __U); 6136 } 6137 6138 extern __inline __m512d 6139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6140 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) 6141 { 6142 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6143 (__v8df) 6144 _mm512_setzero_pd (), 6145 (__mmask8) __U); 6146 } 6147 6148 extern __inline __m512 6149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6150 _mm512_permute_ps (__m512 __X, const int __C) 6151 { 6152 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6153 (__v16sf) 6154 _mm512_undefined_ps (), 6155 (__mmask16) -1); 6156 } 6157 6158 extern __inline __m512 6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6160 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) 6161 { 6162 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6163 (__v16sf) __W, 6164 (__mmask16) __U); 6165 } 6166 6167 extern __inline __m512 6168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6169 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) 6170 { 6171 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6172 (__v16sf) 6173 _mm512_setzero_ps (), 6174 (__mmask16) __U); 6175 } 6176 #else 6177 #define _mm512_permute_pd(X, C) \ 6178 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6179 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6180 (__mmask8)(-1))) 6181 6182 #define _mm512_mask_permute_pd(W, U, X, C) \ 6183 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6184 (__v8df)(__m512d)(W), \ 6185 (__mmask8)(U))) 6186 6187 #define _mm512_maskz_permute_pd(U, X, C) \ 6188 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6189 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6190 (__mmask8)(U))) 6191 6192 #define _mm512_permute_ps(X, C) \ 6193 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6194 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6195 (__mmask16)(-1))) 6196 6197 #define _mm512_mask_permute_ps(W, U, X, C) \ 6198 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6199 (__v16sf)(__m512)(W), \ 6200 (__mmask16)(U))) 6201 6202 #define _mm512_maskz_permute_ps(U, X, C) \ 6203 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6204 (__v16sf)(__m512)_mm512_setzero_ps(), \ 6205 (__mmask16)(U))) 6206 #endif 6207 6208 #ifdef __OPTIMIZE__ 6209 extern __inline __m512i 6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6211 _mm512_permutex_epi64 (__m512i __X, const int __I) 6212 { 6213 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6214 (__v8di) 6215 _mm512_undefined_si512 (), 6216 (__mmask8) (-1)); 6217 } 6218 6219 extern __inline __m512i 6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6221 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, 6222 __m512i __X, const int __I) 6223 { 6224 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6225 (__v8di) __W, 6226 (__mmask8) __M); 6227 } 6228 6229 extern __inline __m512i 6230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6231 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) 6232 { 6233 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6234 (__v8di) 6235 _mm512_setzero_si512 (), 6236 (__mmask8) __M); 6237 } 6238 6239 extern __inline __m512d 6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6241 _mm512_permutex_pd (__m512d __X, const int __M) 6242 { 6243 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6244 (__v8df) 6245 _mm512_undefined_pd (), 6246 (__mmask8) -1); 6247 } 6248 6249 extern __inline __m512d 6250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6251 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) 6252 { 6253 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6254 (__v8df) __W, 6255 (__mmask8) __U); 6256 } 6257 6258 extern __inline __m512d 6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6260 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) 6261 { 6262 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6263 (__v8df) 6264 _mm512_setzero_pd (), 6265 (__mmask8) __U); 6266 } 6267 #else 6268 #define _mm512_permutex_pd(X, M) \ 6269 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6270 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6271 (__mmask8)-1)) 6272 6273 #define _mm512_mask_permutex_pd(W, U, X, M) \ 6274 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6275 (__v8df)(__m512d)(W), (__mmask8)(U))) 6276 6277 #define _mm512_maskz_permutex_pd(U, X, M) \ 6278 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6279 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6280 (__mmask8)(U))) 6281 6282 #define _mm512_permutex_epi64(X, I) \ 6283 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6284 (int)(I), \ 6285 (__v8di)(__m512i) \ 6286 (_mm512_undefined_si512 ()),\ 6287 (__mmask8)(-1))) 6288 6289 #define _mm512_maskz_permutex_epi64(M, X, I) \ 6290 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6291 (int)(I), \ 6292 (__v8di)(__m512i) \ 6293 (_mm512_setzero_si512 ()),\ 6294 (__mmask8)(M))) 6295 6296 #define _mm512_mask_permutex_epi64(W, M, X, I) \ 6297 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6298 (int)(I), \ 6299 (__v8di)(__m512i)(W), \ 6300 (__mmask8)(M))) 6301 #endif 6302 6303 extern __inline __m512i 6304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6305 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 6306 { 6307 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6308 (__v8di) __X, 6309 (__v8di) 6310 _mm512_setzero_si512 (), 6311 __M); 6312 } 6313 6314 extern __inline __m512i 6315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6316 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 6317 { 6318 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6319 (__v8di) __X, 6320 (__v8di) 6321 _mm512_undefined_si512 (), 6322 (__mmask8) -1); 6323 } 6324 6325 extern __inline __m512i 6326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6327 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 6328 __m512i __Y) 6329 { 6330 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6331 (__v8di) __X, 6332 (__v8di) __W, 6333 __M); 6334 } 6335 6336 extern __inline __m512i 6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6338 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 6339 { 6340 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6341 (__v16si) __X, 6342 (__v16si) 6343 _mm512_setzero_si512 (), 6344 __M); 6345 } 6346 6347 extern __inline __m512i 6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6349 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 6350 { 6351 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6352 (__v16si) __X, 6353 (__v16si) 6354 _mm512_undefined_si512 (), 6355 (__mmask16) -1); 6356 } 6357 6358 extern __inline __m512i 6359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6360 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 6361 __m512i __Y) 6362 { 6363 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6364 (__v16si) __X, 6365 (__v16si) __W, 6366 __M); 6367 } 6368 6369 extern __inline __m512d 6370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6371 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) 6372 { 6373 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6374 (__v8di) __X, 6375 (__v8df) 6376 _mm512_undefined_pd (), 6377 (__mmask8) -1); 6378 } 6379 6380 extern __inline __m512d 6381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6382 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 6383 { 6384 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6385 (__v8di) __X, 6386 (__v8df) __W, 6387 (__mmask8) __U); 6388 } 6389 6390 extern __inline __m512d 6391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6392 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 6393 { 6394 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6395 (__v8di) __X, 6396 (__v8df) 6397 _mm512_setzero_pd (), 6398 (__mmask8) __U); 6399 } 6400 6401 extern __inline __m512 6402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6403 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) 6404 { 6405 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6406 (__v16si) __X, 6407 (__v16sf) 6408 _mm512_undefined_ps (), 6409 (__mmask16) -1); 6410 } 6411 6412 extern __inline __m512 6413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6414 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 6415 { 6416 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6417 (__v16si) __X, 6418 (__v16sf) __W, 6419 (__mmask16) __U); 6420 } 6421 6422 extern __inline __m512 6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6424 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 6425 { 6426 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6427 (__v16si) __X, 6428 (__v16sf) 6429 _mm512_setzero_ps (), 6430 (__mmask16) __U); 6431 } 6432 6433 #ifdef __OPTIMIZE__ 6434 extern __inline __m512 6435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6436 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) 6437 { 6438 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6439 (__v16sf) __V, __imm, 6440 (__v16sf) 6441 _mm512_undefined_ps (), 6442 (__mmask16) -1); 6443 } 6444 6445 extern __inline __m512 6446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6447 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, 6448 __m512 __V, const int __imm) 6449 { 6450 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6451 (__v16sf) __V, __imm, 6452 (__v16sf) __W, 6453 (__mmask16) __U); 6454 } 6455 6456 extern __inline __m512 6457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6458 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) 6459 { 6460 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6461 (__v16sf) __V, __imm, 6462 (__v16sf) 6463 _mm512_setzero_ps (), 6464 (__mmask16) __U); 6465 } 6466 6467 extern __inline __m512d 6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6469 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) 6470 { 6471 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6472 (__v8df) __V, __imm, 6473 (__v8df) 6474 _mm512_undefined_pd (), 6475 (__mmask8) -1); 6476 } 6477 6478 extern __inline __m512d 6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6480 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, 6481 __m512d __V, const int __imm) 6482 { 6483 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6484 (__v8df) __V, __imm, 6485 (__v8df) __W, 6486 (__mmask8) __U); 6487 } 6488 6489 extern __inline __m512d 6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6491 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, 6492 const int __imm) 6493 { 6494 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6495 (__v8df) __V, __imm, 6496 (__v8df) 6497 _mm512_setzero_pd (), 6498 (__mmask8) __U); 6499 } 6500 6501 extern __inline __m512d 6502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6503 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, 6504 const int __imm, const int __R) 6505 { 6506 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6507 (__v8df) __B, 6508 (__v8di) __C, 6509 __imm, 6510 (__mmask8) -1, __R); 6511 } 6512 6513 extern __inline __m512d 6514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6515 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 6516 __m512i __C, const int __imm, const int __R) 6517 { 6518 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6519 (__v8df) __B, 6520 (__v8di) __C, 6521 __imm, 6522 (__mmask8) __U, __R); 6523 } 6524 6525 extern __inline __m512d 6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6527 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 6528 __m512i __C, const int __imm, const int __R) 6529 { 6530 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 6531 (__v8df) __B, 6532 (__v8di) __C, 6533 __imm, 6534 (__mmask8) __U, __R); 6535 } 6536 6537 extern __inline __m512 6538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6539 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, 6540 const int __imm, const int __R) 6541 { 6542 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6543 (__v16sf) __B, 6544 (__v16si) __C, 6545 __imm, 6546 (__mmask16) -1, __R); 6547 } 6548 6549 extern __inline __m512 6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6551 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 6552 __m512i __C, const int __imm, const int __R) 6553 { 6554 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6555 (__v16sf) __B, 6556 (__v16si) __C, 6557 __imm, 6558 (__mmask16) __U, __R); 6559 } 6560 6561 extern __inline __m512 6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6563 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 6564 __m512i __C, const int __imm, const int __R) 6565 { 6566 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 6567 (__v16sf) __B, 6568 (__v16si) __C, 6569 __imm, 6570 (__mmask16) __U, __R); 6571 } 6572 6573 extern __inline __m128d 6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6575 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, 6576 const int __imm, const int __R) 6577 { 6578 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6579 (__v2df) __B, 6580 (__v2di) __C, __imm, 6581 (__mmask8) -1, __R); 6582 } 6583 6584 extern __inline __m128d 6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6586 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, 6587 __m128i __C, const int __imm, const int __R) 6588 { 6589 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6590 (__v2df) __B, 6591 (__v2di) __C, __imm, 6592 (__mmask8) __U, __R); 6593 } 6594 6595 extern __inline __m128d 6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6597 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 6598 __m128i __C, const int __imm, const int __R) 6599 { 6600 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 6601 (__v2df) __B, 6602 (__v2di) __C, 6603 __imm, 6604 (__mmask8) __U, __R); 6605 } 6606 6607 extern __inline __m128 6608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6609 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, 6610 const int __imm, const int __R) 6611 { 6612 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 6613 (__v4sf) __B, 6614 (__v4si) __C, __imm, 6615 (__mmask8) -1, __R); 6616 } 6617 6618 extern __inline __m128 6619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6620 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, 6621 __m128i __C, const int __imm, const int __R) 6622 { 6623 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 6624 (__v4sf) __B, 6625 (__v4si) __C, __imm, 6626 (__mmask8) __U, __R); 6627 } 6628 6629 extern __inline __m128 6630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6631 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 6632 __m128i __C, const int __imm, const int __R) 6633 { 6634 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 6635 (__v4sf) __B, 6636 (__v4si) __C, __imm, 6637 (__mmask8) __U, __R); 6638 } 6639 6640 #else 6641 #define _mm512_shuffle_pd(X, Y, C) \ 6642 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6643 (__v8df)(__m512d)(Y), (int)(C),\ 6644 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6645 (__mmask8)-1)) 6646 6647 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ 6648 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6649 (__v8df)(__m512d)(Y), (int)(C),\ 6650 (__v8df)(__m512d)(W),\ 6651 (__mmask8)(U))) 6652 6653 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \ 6654 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6655 (__v8df)(__m512d)(Y), (int)(C),\ 6656 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6657 (__mmask8)(U))) 6658 6659 #define _mm512_shuffle_ps(X, Y, C) \ 6660 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6661 (__v16sf)(__m512)(Y), (int)(C),\ 6662 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6663 (__mmask16)-1)) 6664 6665 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ 6666 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6667 (__v16sf)(__m512)(Y), (int)(C),\ 6668 (__v16sf)(__m512)(W),\ 6669 (__mmask16)(U))) 6670 6671 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \ 6672 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6673 (__v16sf)(__m512)(Y), (int)(C),\ 6674 (__v16sf)(__m512)_mm512_setzero_ps(),\ 6675 (__mmask16)(U))) 6676 6677 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ 6678 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 6679 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6680 (__mmask8)(-1), (R))) 6681 6682 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ 6683 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 6684 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6685 (__mmask8)(U), (R))) 6686 6687 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ 6688 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 6689 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6690 (__mmask8)(U), (R))) 6691 6692 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ 6693 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 6694 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6695 (__mmask16)(-1), (R))) 6696 6697 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ 6698 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 6699 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6700 (__mmask16)(U), (R))) 6701 6702 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ 6703 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 6704 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6705 (__mmask16)(U), (R))) 6706 6707 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ 6708 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 6709 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6710 (__mmask8)(-1), (R))) 6711 6712 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ 6713 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 6714 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6715 (__mmask8)(U), (R))) 6716 6717 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ 6718 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 6719 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6720 (__mmask8)(U), (R))) 6721 6722 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ 6723 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 6724 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6725 (__mmask8)(-1), (R))) 6726 6727 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ 6728 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 6729 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6730 (__mmask8)(U), (R))) 6731 6732 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ 6733 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 6734 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6735 (__mmask8)(U), (R))) 6736 #endif 6737 6738 extern __inline __m512 6739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6740 _mm512_movehdup_ps (__m512 __A) 6741 { 6742 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6743 (__v16sf) 6744 _mm512_undefined_ps (), 6745 (__mmask16) -1); 6746 } 6747 6748 extern __inline __m512 6749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6750 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 6751 { 6752 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6753 (__v16sf) __W, 6754 (__mmask16) __U); 6755 } 6756 6757 extern __inline __m512 6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6759 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 6760 { 6761 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6762 (__v16sf) 6763 _mm512_setzero_ps (), 6764 (__mmask16) __U); 6765 } 6766 6767 extern __inline __m512 6768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6769 _mm512_moveldup_ps (__m512 __A) 6770 { 6771 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6772 (__v16sf) 6773 _mm512_undefined_ps (), 6774 (__mmask16) -1); 6775 } 6776 6777 extern __inline __m512 6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6779 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 6780 { 6781 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6782 (__v16sf) __W, 6783 (__mmask16) __U); 6784 } 6785 6786 extern __inline __m512 6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6788 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 6789 { 6790 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6791 (__v16sf) 6792 _mm512_setzero_ps (), 6793 (__mmask16) __U); 6794 } 6795 6796 extern __inline __m512i 6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6798 _mm512_or_si512 (__m512i __A, __m512i __B) 6799 { 6800 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6801 (__v16si) __B, 6802 (__v16si) 6803 _mm512_undefined_si512 (), 6804 (__mmask16) -1); 6805 } 6806 6807 extern __inline __m512i 6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6809 _mm512_or_epi32 (__m512i __A, __m512i __B) 6810 { 6811 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6812 (__v16si) __B, 6813 (__v16si) 6814 _mm512_undefined_si512 (), 6815 (__mmask16) -1); 6816 } 6817 6818 extern __inline __m512i 6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6820 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6821 { 6822 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6823 (__v16si) __B, 6824 (__v16si) __W, 6825 (__mmask16) __U); 6826 } 6827 6828 extern __inline __m512i 6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6830 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6831 { 6832 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6833 (__v16si) __B, 6834 (__v16si) 6835 _mm512_setzero_si512 (), 6836 (__mmask16) __U); 6837 } 6838 6839 extern __inline __m512i 6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6841 _mm512_or_epi64 (__m512i __A, __m512i __B) 6842 { 6843 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 6844 (__v8di) __B, 6845 (__v8di) 6846 _mm512_undefined_si512 (), 6847 (__mmask8) -1); 6848 } 6849 6850 extern __inline __m512i 6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6852 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 6853 { 6854 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 6855 (__v8di) __B, 6856 (__v8di) __W, 6857 (__mmask8) __U); 6858 } 6859 6860 extern __inline __m512i 6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6862 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 6863 { 6864 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 6865 (__v8di) __B, 6866 (__v8di) 6867 _mm512_setzero_si512 (), 6868 (__mmask8) __U); 6869 } 6870 6871 extern __inline __m512i 6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6873 _mm512_xor_si512 (__m512i __A, __m512i __B) 6874 { 6875 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6876 (__v16si) __B, 6877 (__v16si) 6878 _mm512_undefined_si512 (), 6879 (__mmask16) -1); 6880 } 6881 6882 extern __inline __m512i 6883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6884 _mm512_xor_epi32 (__m512i __A, __m512i __B) 6885 { 6886 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6887 (__v16si) __B, 6888 (__v16si) 6889 _mm512_undefined_si512 (), 6890 (__mmask16) -1); 6891 } 6892 6893 extern __inline __m512i 6894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6895 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6896 { 6897 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6898 (__v16si) __B, 6899 (__v16si) __W, 6900 (__mmask16) __U); 6901 } 6902 6903 extern __inline __m512i 6904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6905 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6906 { 6907 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6908 (__v16si) __B, 6909 (__v16si) 6910 _mm512_setzero_si512 (), 6911 (__mmask16) __U); 6912 } 6913 6914 extern __inline __m512i 6915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6916 _mm512_xor_epi64 (__m512i __A, __m512i __B) 6917 { 6918 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 6919 (__v8di) __B, 6920 (__v8di) 6921 _mm512_undefined_si512 (), 6922 (__mmask8) -1); 6923 } 6924 6925 extern __inline __m512i 6926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6927 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6928 { 6929 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 6930 (__v8di) __B, 6931 (__v8di) __W, 6932 (__mmask8) __U); 6933 } 6934 6935 extern __inline __m512i 6936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6937 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B) 6938 { 6939 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 6940 (__v8di) __B, 6941 (__v8di) 6942 _mm512_setzero_si512 (), 6943 (__mmask8) __U); 6944 } 6945 6946 #ifdef __OPTIMIZE__ 6947 extern __inline __m512i 6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6949 _mm512_rol_epi32 (__m512i __A, const int __B) 6950 { 6951 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6952 (__v16si) 6953 _mm512_undefined_si512 (), 6954 (__mmask16) -1); 6955 } 6956 6957 extern __inline __m512i 6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6959 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) 6960 { 6961 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6962 (__v16si) __W, 6963 (__mmask16) __U); 6964 } 6965 6966 extern __inline __m512i 6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6968 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) 6969 { 6970 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6971 (__v16si) 6972 _mm512_setzero_si512 (), 6973 (__mmask16) __U); 6974 } 6975 6976 extern __inline __m512i 6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6978 _mm512_ror_epi32 (__m512i __A, int __B) 6979 { 6980 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 6981 (__v16si) 6982 _mm512_undefined_si512 (), 6983 (__mmask16) -1); 6984 } 6985 6986 extern __inline __m512i 6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6988 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) 6989 { 6990 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 6991 (__v16si) __W, 6992 (__mmask16) __U); 6993 } 6994 6995 extern __inline __m512i 6996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6997 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) 6998 { 6999 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7000 (__v16si) 7001 _mm512_setzero_si512 (), 7002 (__mmask16) __U); 7003 } 7004 7005 extern __inline __m512i 7006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7007 _mm512_rol_epi64 (__m512i __A, const int __B) 7008 { 7009 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7010 (__v8di) 7011 _mm512_undefined_si512 (), 7012 (__mmask8) -1); 7013 } 7014 7015 extern __inline __m512i 7016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7017 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) 7018 { 7019 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7020 (__v8di) __W, 7021 (__mmask8) __U); 7022 } 7023 7024 extern __inline __m512i 7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7026 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) 7027 { 7028 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7029 (__v8di) 7030 _mm512_setzero_si512 (), 7031 (__mmask8) __U); 7032 } 7033 7034 extern __inline __m512i 7035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7036 _mm512_ror_epi64 (__m512i __A, int __B) 7037 { 7038 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7039 (__v8di) 7040 _mm512_undefined_si512 (), 7041 (__mmask8) -1); 7042 } 7043 7044 extern __inline __m512i 7045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7046 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) 7047 { 7048 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7049 (__v8di) __W, 7050 (__mmask8) __U); 7051 } 7052 7053 extern __inline __m512i 7054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7055 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) 7056 { 7057 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7058 (__v8di) 7059 _mm512_setzero_si512 (), 7060 (__mmask8) __U); 7061 } 7062 7063 #else 7064 #define _mm512_rol_epi32(A, B) \ 7065 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7066 (int)(B), \ 7067 (__v16si)_mm512_undefined_si512 (), \ 7068 (__mmask16)(-1))) 7069 #define _mm512_mask_rol_epi32(W, U, A, B) \ 7070 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7071 (int)(B), \ 7072 (__v16si)(__m512i)(W), \ 7073 (__mmask16)(U))) 7074 #define _mm512_maskz_rol_epi32(U, A, B) \ 7075 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7076 (int)(B), \ 7077 (__v16si)_mm512_setzero_si512 (), \ 7078 (__mmask16)(U))) 7079 #define _mm512_ror_epi32(A, B) \ 7080 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7081 (int)(B), \ 7082 (__v16si)_mm512_undefined_si512 (), \ 7083 (__mmask16)(-1))) 7084 #define _mm512_mask_ror_epi32(W, U, A, B) \ 7085 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7086 (int)(B), \ 7087 (__v16si)(__m512i)(W), \ 7088 (__mmask16)(U))) 7089 #define _mm512_maskz_ror_epi32(U, A, B) \ 7090 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7091 (int)(B), \ 7092 (__v16si)_mm512_setzero_si512 (), \ 7093 (__mmask16)(U))) 7094 #define _mm512_rol_epi64(A, B) \ 7095 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7096 (int)(B), \ 7097 (__v8di)_mm512_undefined_si512 (), \ 7098 (__mmask8)(-1))) 7099 #define _mm512_mask_rol_epi64(W, U, A, B) \ 7100 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7101 (int)(B), \ 7102 (__v8di)(__m512i)(W), \ 7103 (__mmask8)(U))) 7104 #define _mm512_maskz_rol_epi64(U, A, B) \ 7105 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7106 (int)(B), \ 7107 (__v8di)_mm512_setzero_si512 (), \ 7108 (__mmask8)(U))) 7109 7110 #define _mm512_ror_epi64(A, B) \ 7111 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7112 (int)(B), \ 7113 (__v8di)_mm512_undefined_si512 (), \ 7114 (__mmask8)(-1))) 7115 #define _mm512_mask_ror_epi64(W, U, A, B) \ 7116 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7117 (int)(B), \ 7118 (__v8di)(__m512i)(W), \ 7119 (__mmask8)(U))) 7120 #define _mm512_maskz_ror_epi64(U, A, B) \ 7121 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7122 (int)(B), \ 7123 (__v8di)_mm512_setzero_si512 (), \ 7124 (__mmask8)(U))) 7125 #endif 7126 7127 extern __inline __m512i 7128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7129 _mm512_and_si512 (__m512i __A, __m512i __B) 7130 { 7131 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7132 (__v16si) __B, 7133 (__v16si) 7134 _mm512_undefined_si512 (), 7135 (__mmask16) -1); 7136 } 7137 7138 extern __inline __m512i 7139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7140 _mm512_and_epi32 (__m512i __A, __m512i __B) 7141 { 7142 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7143 (__v16si) __B, 7144 (__v16si) 7145 _mm512_undefined_si512 (), 7146 (__mmask16) -1); 7147 } 7148 7149 extern __inline __m512i 7150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7151 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7152 { 7153 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7154 (__v16si) __B, 7155 (__v16si) __W, 7156 (__mmask16) __U); 7157 } 7158 7159 extern __inline __m512i 7160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7161 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7162 { 7163 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7164 (__v16si) __B, 7165 (__v16si) 7166 _mm512_setzero_si512 (), 7167 (__mmask16) __U); 7168 } 7169 7170 extern __inline __m512i 7171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7172 _mm512_and_epi64 (__m512i __A, __m512i __B) 7173 { 7174 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7175 (__v8di) __B, 7176 (__v8di) 7177 _mm512_undefined_si512 (), 7178 (__mmask8) -1); 7179 } 7180 7181 extern __inline __m512i 7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7183 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7184 { 7185 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7186 (__v8di) __B, 7187 (__v8di) __W, __U); 7188 } 7189 7190 extern __inline __m512i 7191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7192 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7193 { 7194 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7195 (__v8di) __B, 7196 (__v8di) 7197 _mm512_setzero_pd (), 7198 __U); 7199 } 7200 7201 extern __inline __m512i 7202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7203 _mm512_andnot_si512 (__m512i __A, __m512i __B) 7204 { 7205 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7206 (__v16si) __B, 7207 (__v16si) 7208 _mm512_undefined_si512 (), 7209 (__mmask16) -1); 7210 } 7211 7212 extern __inline __m512i 7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7214 _mm512_andnot_epi32 (__m512i __A, __m512i __B) 7215 { 7216 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7217 (__v16si) __B, 7218 (__v16si) 7219 _mm512_undefined_si512 (), 7220 (__mmask16) -1); 7221 } 7222 7223 extern __inline __m512i 7224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7225 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7226 { 7227 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7228 (__v16si) __B, 7229 (__v16si) __W, 7230 (__mmask16) __U); 7231 } 7232 7233 extern __inline __m512i 7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7235 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7236 { 7237 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7238 (__v16si) __B, 7239 (__v16si) 7240 _mm512_setzero_si512 (), 7241 (__mmask16) __U); 7242 } 7243 7244 extern __inline __m512i 7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7246 _mm512_andnot_epi64 (__m512i __A, __m512i __B) 7247 { 7248 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7249 (__v8di) __B, 7250 (__v8di) 7251 _mm512_undefined_si512 (), 7252 (__mmask8) -1); 7253 } 7254 7255 extern __inline __m512i 7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7257 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7258 { 7259 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7260 (__v8di) __B, 7261 (__v8di) __W, __U); 7262 } 7263 7264 extern __inline __m512i 7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7266 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7267 { 7268 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7269 (__v8di) __B, 7270 (__v8di) 7271 _mm512_setzero_pd (), 7272 __U); 7273 } 7274 7275 extern __inline __mmask16 7276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7277 _mm512_test_epi32_mask (__m512i __A, __m512i __B) 7278 { 7279 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7280 (__v16si) __B, 7281 (__mmask16) -1); 7282 } 7283 7284 extern __inline __mmask16 7285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7286 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7287 { 7288 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7289 (__v16si) __B, __U); 7290 } 7291 7292 extern __inline __mmask8 7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7294 _mm512_test_epi64_mask (__m512i __A, __m512i __B) 7295 { 7296 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 7297 (__v8di) __B, 7298 (__mmask8) -1); 7299 } 7300 7301 extern __inline __mmask8 7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7303 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7304 { 7305 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 7306 } 7307 7308 extern __inline __mmask16 7309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7310 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) 7311 { 7312 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7313 (__v16si) __B, 7314 (__mmask16) -1); 7315 } 7316 7317 extern __inline __mmask16 7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7319 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7320 { 7321 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7322 (__v16si) __B, __U); 7323 } 7324 7325 extern __inline __mmask8 7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7327 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) 7328 { 7329 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7330 (__v8di) __B, 7331 (__mmask8) -1); 7332 } 7333 7334 extern __inline __mmask8 7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7336 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7337 { 7338 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7339 (__v8di) __B, __U); 7340 } 7341 7342 extern __inline __m512i 7343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7344 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B) 7345 { 7346 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7347 (__v16si) __B, 7348 (__v16si) 7349 _mm512_undefined_si512 (), 7350 (__mmask16) -1); 7351 } 7352 7353 extern __inline __m512i 7354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7355 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7356 __m512i __B) 7357 { 7358 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7359 (__v16si) __B, 7360 (__v16si) __W, 7361 (__mmask16) __U); 7362 } 7363 7364 extern __inline __m512i 7365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7366 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7367 { 7368 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7369 (__v16si) __B, 7370 (__v16si) 7371 _mm512_setzero_si512 (), 7372 (__mmask16) __U); 7373 } 7374 7375 extern __inline __m512i 7376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7377 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B) 7378 { 7379 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7380 (__v8di) __B, 7381 (__v8di) 7382 _mm512_undefined_si512 (), 7383 (__mmask8) -1); 7384 } 7385 7386 extern __inline __m512i 7387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7388 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7389 { 7390 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7391 (__v8di) __B, 7392 (__v8di) __W, 7393 (__mmask8) __U); 7394 } 7395 7396 extern __inline __m512i 7397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7398 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7399 { 7400 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7401 (__v8di) __B, 7402 (__v8di) 7403 _mm512_setzero_si512 (), 7404 (__mmask8) __U); 7405 } 7406 7407 extern __inline __m512i 7408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7409 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B) 7410 { 7411 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7412 (__v16si) __B, 7413 (__v16si) 7414 _mm512_undefined_si512 (), 7415 (__mmask16) -1); 7416 } 7417 7418 extern __inline __m512i 7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7420 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7421 __m512i __B) 7422 { 7423 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7424 (__v16si) __B, 7425 (__v16si) __W, 7426 (__mmask16) __U); 7427 } 7428 7429 extern __inline __m512i 7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7431 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7432 { 7433 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7434 (__v16si) __B, 7435 (__v16si) 7436 _mm512_setzero_si512 (), 7437 (__mmask16) __U); 7438 } 7439 7440 extern __inline __m512i 7441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7442 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 7443 { 7444 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7445 (__v8di) __B, 7446 (__v8di) 7447 _mm512_undefined_si512 (), 7448 (__mmask8) -1); 7449 } 7450 7451 extern __inline __m512i 7452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7453 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7454 { 7455 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7456 (__v8di) __B, 7457 (__v8di) __W, 7458 (__mmask8) __U); 7459 } 7460 7461 extern __inline __m512i 7462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7463 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7464 { 7465 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7466 (__v8di) __B, 7467 (__v8di) 7468 _mm512_setzero_si512 (), 7469 (__mmask8) __U); 7470 } 7471 7472 #ifdef __x86_64__ 7473 #ifdef __OPTIMIZE__ 7474 extern __inline unsigned long long 7475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7476 _mm_cvt_roundss_u64 (__m128 __A, const int __R) 7477 { 7478 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); 7479 } 7480 7481 extern __inline long long 7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7483 _mm_cvt_roundss_si64 (__m128 __A, const int __R) 7484 { 7485 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7486 } 7487 7488 extern __inline long long 7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7490 _mm_cvt_roundss_i64 (__m128 __A, const int __R) 7491 { 7492 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7493 } 7494 7495 extern __inline unsigned long long 7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7497 _mm_cvtt_roundss_u64 (__m128 __A, const int __R) 7498 { 7499 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); 7500 } 7501 7502 extern __inline long long 7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7504 _mm_cvtt_roundss_i64 (__m128 __A, const int __R) 7505 { 7506 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7507 } 7508 7509 extern __inline long long 7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7511 _mm_cvtt_roundss_si64 (__m128 __A, const int __R) 7512 { 7513 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7514 } 7515 #else 7516 #define _mm_cvt_roundss_u64(A, B) \ 7517 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) 7518 7519 #define _mm_cvt_roundss_si64(A, B) \ 7520 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7521 7522 #define _mm_cvt_roundss_i64(A, B) \ 7523 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7524 7525 #define _mm_cvtt_roundss_u64(A, B) \ 7526 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) 7527 7528 #define _mm_cvtt_roundss_i64(A, B) \ 7529 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7530 7531 #define _mm_cvtt_roundss_si64(A, B) \ 7532 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7533 #endif 7534 #endif 7535 7536 #ifdef __OPTIMIZE__ 7537 extern __inline unsigned 7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7539 _mm_cvt_roundss_u32 (__m128 __A, const int __R) 7540 { 7541 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); 7542 } 7543 7544 extern __inline int 7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7546 _mm_cvt_roundss_si32 (__m128 __A, const int __R) 7547 { 7548 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7549 } 7550 7551 extern __inline int 7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7553 _mm_cvt_roundss_i32 (__m128 __A, const int __R) 7554 { 7555 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7556 } 7557 7558 extern __inline unsigned 7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7560 _mm_cvtt_roundss_u32 (__m128 __A, const int __R) 7561 { 7562 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); 7563 } 7564 7565 extern __inline int 7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7567 _mm_cvtt_roundss_i32 (__m128 __A, const int __R) 7568 { 7569 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7570 } 7571 7572 extern __inline int 7573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7574 _mm_cvtt_roundss_si32 (__m128 __A, const int __R) 7575 { 7576 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7577 } 7578 #else 7579 #define _mm_cvt_roundss_u32(A, B) \ 7580 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) 7581 7582 #define _mm_cvt_roundss_si32(A, B) \ 7583 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7584 7585 #define _mm_cvt_roundss_i32(A, B) \ 7586 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7587 7588 #define _mm_cvtt_roundss_u32(A, B) \ 7589 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) 7590 7591 #define _mm_cvtt_roundss_si32(A, B) \ 7592 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7593 7594 #define _mm_cvtt_roundss_i32(A, B) \ 7595 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7596 #endif 7597 7598 #ifdef __x86_64__ 7599 #ifdef __OPTIMIZE__ 7600 extern __inline unsigned long long 7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7602 _mm_cvt_roundsd_u64 (__m128d __A, const int __R) 7603 { 7604 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); 7605 } 7606 7607 extern __inline long long 7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7609 _mm_cvt_roundsd_si64 (__m128d __A, const int __R) 7610 { 7611 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 7612 } 7613 7614 extern __inline long long 7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7616 _mm_cvt_roundsd_i64 (__m128d __A, const int __R) 7617 { 7618 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 7619 } 7620 7621 extern __inline unsigned long long 7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7623 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R) 7624 { 7625 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); 7626 } 7627 7628 extern __inline long long 7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7630 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R) 7631 { 7632 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 7633 } 7634 7635 extern __inline long long 7636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7637 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R) 7638 { 7639 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 7640 } 7641 #else 7642 #define _mm_cvt_roundsd_u64(A, B) \ 7643 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) 7644 7645 #define _mm_cvt_roundsd_si64(A, B) \ 7646 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 7647 7648 #define _mm_cvt_roundsd_i64(A, B) \ 7649 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 7650 7651 #define _mm_cvtt_roundsd_u64(A, B) \ 7652 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) 7653 7654 #define _mm_cvtt_roundsd_si64(A, B) \ 7655 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 7656 7657 #define _mm_cvtt_roundsd_i64(A, B) \ 7658 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 7659 #endif 7660 #endif 7661 7662 #ifdef __OPTIMIZE__ 7663 extern __inline unsigned 7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7665 _mm_cvt_roundsd_u32 (__m128d __A, const int __R) 7666 { 7667 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); 7668 } 7669 7670 extern __inline int 7671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7672 _mm_cvt_roundsd_si32 (__m128d __A, const int __R) 7673 { 7674 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 7675 } 7676 7677 extern __inline int 7678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7679 _mm_cvt_roundsd_i32 (__m128d __A, const int __R) 7680 { 7681 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 7682 } 7683 7684 extern __inline unsigned 7685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7686 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R) 7687 { 7688 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); 7689 } 7690 7691 extern __inline int 7692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7693 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R) 7694 { 7695 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 7696 } 7697 7698 extern __inline int 7699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7700 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R) 7701 { 7702 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 7703 } 7704 #else 7705 #define _mm_cvt_roundsd_u32(A, B) \ 7706 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) 7707 7708 #define _mm_cvt_roundsd_si32(A, B) \ 7709 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 7710 7711 #define _mm_cvt_roundsd_i32(A, B) \ 7712 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 7713 7714 #define _mm_cvtt_roundsd_u32(A, B) \ 7715 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) 7716 7717 #define _mm_cvtt_roundsd_si32(A, B) \ 7718 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 7719 7720 #define _mm_cvtt_roundsd_i32(A, B) \ 7721 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 7722 #endif 7723 7724 extern __inline __m512d 7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7726 _mm512_movedup_pd (__m512d __A) 7727 { 7728 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7729 (__v8df) 7730 _mm512_undefined_pd (), 7731 (__mmask8) -1); 7732 } 7733 7734 extern __inline __m512d 7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7736 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 7737 { 7738 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7739 (__v8df) __W, 7740 (__mmask8) __U); 7741 } 7742 7743 extern __inline __m512d 7744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7745 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 7746 { 7747 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7748 (__v8df) 7749 _mm512_setzero_pd (), 7750 (__mmask8) __U); 7751 } 7752 7753 extern __inline __m512d 7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7755 _mm512_unpacklo_pd (__m512d __A, __m512d __B) 7756 { 7757 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7758 (__v8df) __B, 7759 (__v8df) 7760 _mm512_undefined_pd (), 7761 (__mmask8) -1); 7762 } 7763 7764 extern __inline __m512d 7765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7766 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 7767 { 7768 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7769 (__v8df) __B, 7770 (__v8df) __W, 7771 (__mmask8) __U); 7772 } 7773 7774 extern __inline __m512d 7775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7776 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 7777 { 7778 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7779 (__v8df) __B, 7780 (__v8df) 7781 _mm512_setzero_pd (), 7782 (__mmask8) __U); 7783 } 7784 7785 extern __inline __m512d 7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7787 _mm512_unpackhi_pd (__m512d __A, __m512d __B) 7788 { 7789 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7790 (__v8df) __B, 7791 (__v8df) 7792 _mm512_undefined_pd (), 7793 (__mmask8) -1); 7794 } 7795 7796 extern __inline __m512d 7797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7798 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 7799 { 7800 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7801 (__v8df) __B, 7802 (__v8df) __W, 7803 (__mmask8) __U); 7804 } 7805 7806 extern __inline __m512d 7807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7808 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) 7809 { 7810 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7811 (__v8df) __B, 7812 (__v8df) 7813 _mm512_setzero_pd (), 7814 (__mmask8) __U); 7815 } 7816 7817 extern __inline __m512 7818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7819 _mm512_unpackhi_ps (__m512 __A, __m512 __B) 7820 { 7821 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7822 (__v16sf) __B, 7823 (__v16sf) 7824 _mm512_undefined_ps (), 7825 (__mmask16) -1); 7826 } 7827 7828 extern __inline __m512 7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7830 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 7831 { 7832 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7833 (__v16sf) __B, 7834 (__v16sf) __W, 7835 (__mmask16) __U); 7836 } 7837 7838 extern __inline __m512 7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7840 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 7841 { 7842 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7843 (__v16sf) __B, 7844 (__v16sf) 7845 _mm512_setzero_ps (), 7846 (__mmask16) __U); 7847 } 7848 7849 #ifdef __OPTIMIZE__ 7850 extern __inline __m512d 7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7852 _mm512_cvt_roundps_pd (__m256 __A, const int __R) 7853 { 7854 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7855 (__v8df) 7856 _mm512_undefined_pd (), 7857 (__mmask8) -1, __R); 7858 } 7859 7860 extern __inline __m512d 7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7862 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, 7863 const int __R) 7864 { 7865 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7866 (__v8df) __W, 7867 (__mmask8) __U, __R); 7868 } 7869 7870 extern __inline __m512d 7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7872 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) 7873 { 7874 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7875 (__v8df) 7876 _mm512_setzero_pd (), 7877 (__mmask8) __U, __R); 7878 } 7879 7880 extern __inline __m512 7881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7882 _mm512_cvt_roundph_ps (__m256i __A, const int __R) 7883 { 7884 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7885 (__v16sf) 7886 _mm512_undefined_ps (), 7887 (__mmask16) -1, __R); 7888 } 7889 7890 extern __inline __m512 7891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7892 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, 7893 const int __R) 7894 { 7895 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7896 (__v16sf) __W, 7897 (__mmask16) __U, __R); 7898 } 7899 7900 extern __inline __m512 7901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7902 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) 7903 { 7904 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7905 (__v16sf) 7906 _mm512_setzero_ps (), 7907 (__mmask16) __U, __R); 7908 } 7909 7910 extern __inline __m256i 7911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7912 _mm512_cvt_roundps_ph (__m512 __A, const int __I) 7913 { 7914 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7915 __I, 7916 (__v16hi) 7917 _mm256_undefined_si256 (), 7918 -1); 7919 } 7920 7921 extern __inline __m256i 7922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7923 _mm512_cvtps_ph (__m512 __A, const int __I) 7924 { 7925 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7926 __I, 7927 (__v16hi) 7928 _mm256_undefined_si256 (), 7929 -1); 7930 } 7931 7932 extern __inline __m256i 7933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7934 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, 7935 const int __I) 7936 { 7937 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7938 __I, 7939 (__v16hi) __U, 7940 (__mmask16) __W); 7941 } 7942 7943 extern __inline __m256i 7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7945 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) 7946 { 7947 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7948 __I, 7949 (__v16hi) __U, 7950 (__mmask16) __W); 7951 } 7952 7953 extern __inline __m256i 7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7955 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) 7956 { 7957 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7958 __I, 7959 (__v16hi) 7960 _mm256_setzero_si256 (), 7961 (__mmask16) __W); 7962 } 7963 7964 extern __inline __m256i 7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7966 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) 7967 { 7968 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7969 __I, 7970 (__v16hi) 7971 _mm256_setzero_si256 (), 7972 (__mmask16) __W); 7973 } 7974 #else 7975 #define _mm512_cvt_roundps_pd(A, B) \ 7976 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B) 7977 7978 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ 7979 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) 7980 7981 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \ 7982 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) 7983 7984 #define _mm512_cvt_roundph_ps(A, B) \ 7985 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 7986 7987 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ 7988 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) 7989 7990 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \ 7991 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) 7992 7993 #define _mm512_cvt_roundps_ph(A, I) \ 7994 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7995 (__v16hi)_mm256_undefined_si256 (), -1)) 7996 #define _mm512_cvtps_ph(A, I) \ 7997 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7998 (__v16hi)_mm256_undefined_si256 (), -1)) 7999 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 8000 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8001 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8002 #define _mm512_mask_cvtps_ph(U, W, A, I) \ 8003 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8004 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8005 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 8006 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8007 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8008 #define _mm512_maskz_cvtps_ph(W, A, I) \ 8009 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8010 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8011 #endif 8012 8013 #ifdef __OPTIMIZE__ 8014 extern __inline __m256 8015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8016 _mm512_cvt_roundpd_ps (__m512d __A, const int __R) 8017 { 8018 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8019 (__v8sf) 8020 _mm256_undefined_ps (), 8021 (__mmask8) -1, __R); 8022 } 8023 8024 extern __inline __m256 8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8026 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, 8027 const int __R) 8028 { 8029 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8030 (__v8sf) __W, 8031 (__mmask8) __U, __R); 8032 } 8033 8034 extern __inline __m256 8035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8036 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) 8037 { 8038 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8039 (__v8sf) 8040 _mm256_setzero_ps (), 8041 (__mmask8) __U, __R); 8042 } 8043 8044 extern __inline __m128 8045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8046 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R) 8047 { 8048 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A, 8049 (__v2df) __B, 8050 __R); 8051 } 8052 8053 extern __inline __m128d 8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8055 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R) 8056 { 8057 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A, 8058 (__v4sf) __B, 8059 __R); 8060 } 8061 #else 8062 #define _mm512_cvt_roundpd_ps(A, B) \ 8063 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B) 8064 8065 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ 8066 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) 8067 8068 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ 8069 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) 8070 8071 #define _mm_cvt_roundsd_ss(A, B, C) \ 8072 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C) 8073 8074 #define _mm_cvt_roundss_sd(A, B, C) \ 8075 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C) 8076 #endif 8077 8078 extern __inline void 8079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8080 _mm512_stream_si512 (__m512i * __P, __m512i __A) 8081 { 8082 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); 8083 } 8084 8085 extern __inline void 8086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8087 _mm512_stream_ps (float *__P, __m512 __A) 8088 { 8089 __builtin_ia32_movntps512 (__P, (__v16sf) __A); 8090 } 8091 8092 extern __inline void 8093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8094 _mm512_stream_pd (double *__P, __m512d __A) 8095 { 8096 __builtin_ia32_movntpd512 (__P, (__v8df) __A); 8097 } 8098 8099 extern __inline __m512i 8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8101 _mm512_stream_load_si512 (void *__P) 8102 { 8103 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8104 } 8105 8106 /* Constants for mantissa extraction */ 8107 typedef enum 8108 { 8109 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 8110 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 8111 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 8112 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 8113 } _MM_MANTISSA_NORM_ENUM; 8114 8115 typedef enum 8116 { 8117 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 8118 _MM_MANT_SIGN_zero, /* sign = 0 */ 8119 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 8120 } _MM_MANTISSA_SIGN_ENUM; 8121 8122 #ifdef __OPTIMIZE__ 8123 extern __inline __m128 8124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8125 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R) 8126 { 8127 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 8128 (__v4sf) __B, 8129 __R); 8130 } 8131 8132 extern __inline __m128d 8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8134 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R) 8135 { 8136 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 8137 (__v2df) __B, 8138 __R); 8139 } 8140 8141 extern __inline __m512 8142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8143 _mm512_getexp_round_ps (__m512 __A, const int __R) 8144 { 8145 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8146 (__v16sf) 8147 _mm512_undefined_ps (), 8148 (__mmask16) -1, __R); 8149 } 8150 8151 extern __inline __m512 8152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8153 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8154 const int __R) 8155 { 8156 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8157 (__v16sf) __W, 8158 (__mmask16) __U, __R); 8159 } 8160 8161 extern __inline __m512 8162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8163 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) 8164 { 8165 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8166 (__v16sf) 8167 _mm512_setzero_ps (), 8168 (__mmask16) __U, __R); 8169 } 8170 8171 extern __inline __m512d 8172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8173 _mm512_getexp_round_pd (__m512d __A, const int __R) 8174 { 8175 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8176 (__v8df) 8177 _mm512_undefined_pd (), 8178 (__mmask8) -1, __R); 8179 } 8180 8181 extern __inline __m512d 8182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8183 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8184 const int __R) 8185 { 8186 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8187 (__v8df) __W, 8188 (__mmask8) __U, __R); 8189 } 8190 8191 extern __inline __m512d 8192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8193 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) 8194 { 8195 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8196 (__v8df) 8197 _mm512_setzero_pd (), 8198 (__mmask8) __U, __R); 8199 } 8200 8201 extern __inline __m512d 8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8203 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 8204 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8205 { 8206 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8207 (__C << 2) | __B, 8208 _mm512_undefined_pd (), 8209 (__mmask8) -1, __R); 8210 } 8211 8212 extern __inline __m512d 8213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8214 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8215 _MM_MANTISSA_NORM_ENUM __B, 8216 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8217 { 8218 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8219 (__C << 2) | __B, 8220 (__v8df) __W, __U, 8221 __R); 8222 } 8223 8224 extern __inline __m512d 8225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8226 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, 8227 _MM_MANTISSA_NORM_ENUM __B, 8228 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8229 { 8230 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8231 (__C << 2) | __B, 8232 (__v8df) 8233 _mm512_setzero_pd (), 8234 __U, __R); 8235 } 8236 8237 extern __inline __m512 8238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8239 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 8240 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8241 { 8242 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8243 (__C << 2) | __B, 8244 _mm512_undefined_ps (), 8245 (__mmask16) -1, __R); 8246 } 8247 8248 extern __inline __m512 8249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8250 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8251 _MM_MANTISSA_NORM_ENUM __B, 8252 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8253 { 8254 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8255 (__C << 2) | __B, 8256 (__v16sf) __W, __U, 8257 __R); 8258 } 8259 8260 extern __inline __m512 8261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8262 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, 8263 _MM_MANTISSA_NORM_ENUM __B, 8264 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8265 { 8266 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8267 (__C << 2) | __B, 8268 (__v16sf) 8269 _mm512_setzero_ps (), 8270 __U, __R); 8271 } 8272 8273 extern __inline __m128d 8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8275 _mm_getmant_round_sd (__m128d __A, __m128d __B, 8276 _MM_MANTISSA_NORM_ENUM __C, 8277 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8278 { 8279 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 8280 (__v2df) __B, 8281 (__D << 2) | __C, 8282 __R); 8283 } 8284 8285 extern __inline __m128 8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8287 _mm_getmant_round_ss (__m128 __A, __m128 __B, 8288 _MM_MANTISSA_NORM_ENUM __C, 8289 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8290 { 8291 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 8292 (__v4sf) __B, 8293 (__D << 2) | __C, 8294 __R); 8295 } 8296 8297 #else 8298 #define _mm512_getmant_round_pd(X, B, C, R) \ 8299 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8300 (int)(((C)<<2) | (B)), \ 8301 (__v8df)(__m512d)_mm512_undefined_pd(), \ 8302 (__mmask8)-1,\ 8303 (R))) 8304 8305 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ 8306 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8307 (int)(((C)<<2) | (B)), \ 8308 (__v8df)(__m512d)(W), \ 8309 (__mmask8)(U),\ 8310 (R))) 8311 8312 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ 8313 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8314 (int)(((C)<<2) | (B)), \ 8315 (__v8df)(__m512d)_mm512_setzero_pd(), \ 8316 (__mmask8)(U),\ 8317 (R))) 8318 #define _mm512_getmant_round_ps(X, B, C, R) \ 8319 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8320 (int)(((C)<<2) | (B)), \ 8321 (__v16sf)(__m512)_mm512_undefined_ps(), \ 8322 (__mmask16)-1,\ 8323 (R))) 8324 8325 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ 8326 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8327 (int)(((C)<<2) | (B)), \ 8328 (__v16sf)(__m512)(W), \ 8329 (__mmask16)(U),\ 8330 (R))) 8331 8332 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ 8333 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8334 (int)(((C)<<2) | (B)), \ 8335 (__v16sf)(__m512)_mm512_setzero_ps(), \ 8336 (__mmask16)(U),\ 8337 (R))) 8338 #define _mm_getmant_round_sd(X, Y, C, D, R) \ 8339 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 8340 (__v2df)(__m128d)(Y), \ 8341 (int)(((D)<<2) | (C)), \ 8342 (R))) 8343 8344 #define _mm_getmant_round_ss(X, Y, C, D, R) \ 8345 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 8346 (__v4sf)(__m128)(Y), \ 8347 (int)(((D)<<2) | (C)), \ 8348 (R))) 8349 8350 #define _mm_getexp_round_ss(A, B, R) \ 8351 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R)) 8352 8353 #define _mm_getexp_round_sd(A, B, R) \ 8354 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R)) 8355 8356 #define _mm512_getexp_round_ps(A, R) \ 8357 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8358 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R)) 8359 8360 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 8361 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8362 (__v16sf)(__m512)(W), (__mmask16)(U), R)) 8363 8364 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 8365 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8366 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) 8367 8368 #define _mm512_getexp_round_pd(A, R) \ 8369 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8370 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R)) 8371 8372 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 8373 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8374 (__v8df)(__m512d)(W), (__mmask8)(U), R)) 8375 8376 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 8377 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8378 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) 8379 #endif 8380 8381 #ifdef __OPTIMIZE__ 8382 extern __inline __m512 8383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8384 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) 8385 { 8386 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 8387 (__v16sf) 8388 _mm512_undefined_ps (), 8389 -1, __R); 8390 } 8391 8392 extern __inline __m512 8393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8394 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, 8395 const int __imm, const int __R) 8396 { 8397 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 8398 (__v16sf) __A, 8399 (__mmask16) __B, __R); 8400 } 8401 8402 extern __inline __m512 8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8404 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, 8405 const int __imm, const int __R) 8406 { 8407 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 8408 __imm, 8409 (__v16sf) 8410 _mm512_setzero_ps (), 8411 (__mmask16) __A, __R); 8412 } 8413 8414 extern __inline __m512d 8415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8416 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) 8417 { 8418 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 8419 (__v8df) 8420 _mm512_undefined_pd (), 8421 -1, __R); 8422 } 8423 8424 extern __inline __m512d 8425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8426 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, 8427 __m512d __C, const int __imm, const int __R) 8428 { 8429 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 8430 (__v8df) __A, 8431 (__mmask8) __B, __R); 8432 } 8433 8434 extern __inline __m512d 8435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8436 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, 8437 const int __imm, const int __R) 8438 { 8439 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 8440 __imm, 8441 (__v8df) 8442 _mm512_setzero_pd (), 8443 (__mmask8) __A, __R); 8444 } 8445 8446 extern __inline __m128 8447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8448 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R) 8449 { 8450 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 8451 (__v4sf) __B, __imm, __R); 8452 } 8453 8454 extern __inline __m128d 8455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8456 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, 8457 const int __R) 8458 { 8459 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 8460 (__v2df) __B, __imm, __R); 8461 } 8462 8463 #else 8464 #define _mm512_roundscale_round_ps(A, B, R) \ 8465 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 8466 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R)) 8467 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ 8468 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 8469 (int)(D), \ 8470 (__v16sf)(__m512)(A), \ 8471 (__mmask16)(B), R)) 8472 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ 8473 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 8474 (int)(C), \ 8475 (__v16sf)_mm512_setzero_ps(),\ 8476 (__mmask16)(A), R)) 8477 #define _mm512_roundscale_round_pd(A, B, R) \ 8478 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 8479 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R)) 8480 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ 8481 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 8482 (int)(D), \ 8483 (__v8df)(__m512d)(A), \ 8484 (__mmask8)(B), R)) 8485 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ 8486 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 8487 (int)(C), \ 8488 (__v8df)_mm512_setzero_pd(),\ 8489 (__mmask8)(A), R)) 8490 #define _mm_roundscale_round_ss(A, B, C, R) \ 8491 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 8492 (__v4sf)(__m128)(B), (int)(C), R)) 8493 #define _mm_roundscale_round_sd(A, B, C, R) \ 8494 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 8495 (__v2df)(__m128d)(B), (int)(C), R)) 8496 #endif 8497 8498 extern __inline __m512 8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8500 _mm512_floor_ps (__m512 __A) 8501 { 8502 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8503 _MM_FROUND_FLOOR, 8504 (__v16sf) __A, -1, 8505 _MM_FROUND_CUR_DIRECTION); 8506 } 8507 8508 extern __inline __m512d 8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8510 _mm512_floor_pd (__m512d __A) 8511 { 8512 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8513 _MM_FROUND_FLOOR, 8514 (__v8df) __A, -1, 8515 _MM_FROUND_CUR_DIRECTION); 8516 } 8517 8518 extern __inline __m512 8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8520 _mm512_ceil_ps (__m512 __A) 8521 { 8522 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8523 _MM_FROUND_CEIL, 8524 (__v16sf) __A, -1, 8525 _MM_FROUND_CUR_DIRECTION); 8526 } 8527 8528 extern __inline __m512d 8529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8530 _mm512_ceil_pd (__m512d __A) 8531 { 8532 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8533 _MM_FROUND_CEIL, 8534 (__v8df) __A, -1, 8535 _MM_FROUND_CUR_DIRECTION); 8536 } 8537 8538 extern __inline __m512 8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8540 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 8541 { 8542 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8543 _MM_FROUND_FLOOR, 8544 (__v16sf) __W, __U, 8545 _MM_FROUND_CUR_DIRECTION); 8546 } 8547 8548 extern __inline __m512d 8549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8550 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 8551 { 8552 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8553 _MM_FROUND_FLOOR, 8554 (__v8df) __W, __U, 8555 _MM_FROUND_CUR_DIRECTION); 8556 } 8557 8558 extern __inline __m512 8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8560 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 8561 { 8562 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8563 _MM_FROUND_CEIL, 8564 (__v16sf) __W, __U, 8565 _MM_FROUND_CUR_DIRECTION); 8566 } 8567 8568 extern __inline __m512d 8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8570 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 8571 { 8572 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8573 _MM_FROUND_CEIL, 8574 (__v8df) __W, __U, 8575 _MM_FROUND_CUR_DIRECTION); 8576 } 8577 8578 #ifdef __OPTIMIZE__ 8579 extern __inline __m512i 8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8581 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) 8582 { 8583 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8584 (__v16si) __B, __imm, 8585 (__v16si) 8586 _mm512_undefined_si512 (), 8587 (__mmask16) -1); 8588 } 8589 8590 extern __inline __m512i 8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8592 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 8593 __m512i __B, const int __imm) 8594 { 8595 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8596 (__v16si) __B, __imm, 8597 (__v16si) __W, 8598 (__mmask16) __U); 8599 } 8600 8601 extern __inline __m512i 8602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8603 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 8604 const int __imm) 8605 { 8606 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8607 (__v16si) __B, __imm, 8608 (__v16si) 8609 _mm512_setzero_si512 (), 8610 (__mmask16) __U); 8611 } 8612 8613 extern __inline __m512i 8614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8615 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) 8616 { 8617 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8618 (__v8di) __B, __imm, 8619 (__v8di) 8620 _mm512_undefined_si512 (), 8621 (__mmask8) -1); 8622 } 8623 8624 extern __inline __m512i 8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8626 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 8627 __m512i __B, const int __imm) 8628 { 8629 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8630 (__v8di) __B, __imm, 8631 (__v8di) __W, 8632 (__mmask8) __U); 8633 } 8634 8635 extern __inline __m512i 8636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8637 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 8638 const int __imm) 8639 { 8640 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8641 (__v8di) __B, __imm, 8642 (__v8di) 8643 _mm512_setzero_si512 (), 8644 (__mmask8) __U); 8645 } 8646 #else 8647 #define _mm512_alignr_epi32(X, Y, C) \ 8648 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8649 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\ 8650 (__mmask16)-1)) 8651 8652 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ 8653 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8654 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ 8655 (__mmask16)(U))) 8656 8657 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \ 8658 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8659 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\ 8660 (__mmask16)(U))) 8661 8662 #define _mm512_alignr_epi64(X, Y, C) \ 8663 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8664 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \ 8665 (__mmask8)-1)) 8666 8667 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ 8668 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8669 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) 8670 8671 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \ 8672 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8673 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\ 8674 (__mmask8)(U))) 8675 #endif 8676 8677 extern __inline __mmask16 8678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8679 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) 8680 { 8681 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 8682 (__v16si) __B, 8683 (__mmask16) -1); 8684 } 8685 8686 extern __inline __mmask16 8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8688 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8689 { 8690 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 8691 (__v16si) __B, __U); 8692 } 8693 8694 extern __inline __mmask8 8695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8696 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8697 { 8698 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 8699 (__v8di) __B, __U); 8700 } 8701 8702 extern __inline __mmask8 8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8704 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) 8705 { 8706 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 8707 (__v8di) __B, 8708 (__mmask8) -1); 8709 } 8710 8711 extern __inline __mmask16 8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8713 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) 8714 { 8715 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 8716 (__v16si) __B, 8717 (__mmask16) -1); 8718 } 8719 8720 extern __inline __mmask16 8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8722 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8723 { 8724 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 8725 (__v16si) __B, __U); 8726 } 8727 8728 extern __inline __mmask8 8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8730 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8731 { 8732 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 8733 (__v8di) __B, __U); 8734 } 8735 8736 extern __inline __mmask8 8737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8738 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) 8739 { 8740 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 8741 (__v8di) __B, 8742 (__mmask8) -1); 8743 } 8744 8745 extern __inline __mmask16 8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8747 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y) 8748 { 8749 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8750 (__v16si) __Y, 5, 8751 (__mmask16) -1); 8752 } 8753 8754 extern __inline __mmask16 8755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8756 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y) 8757 { 8758 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8759 (__v16si) __Y, 5, 8760 (__mmask16) -1); 8761 } 8762 8763 extern __inline __mmask8 8764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8765 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y) 8766 { 8767 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8768 (__v8di) __Y, 5, 8769 (__mmask8) -1); 8770 } 8771 8772 extern __inline __mmask8 8773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8774 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y) 8775 { 8776 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8777 (__v8di) __Y, 5, 8778 (__mmask8) -1); 8779 } 8780 8781 extern __inline __mmask16 8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8783 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y) 8784 { 8785 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8786 (__v16si) __Y, 2, 8787 (__mmask16) -1); 8788 } 8789 8790 extern __inline __mmask16 8791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8792 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y) 8793 { 8794 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8795 (__v16si) __Y, 2, 8796 (__mmask16) -1); 8797 } 8798 8799 extern __inline __mmask8 8800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8801 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y) 8802 { 8803 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8804 (__v8di) __Y, 2, 8805 (__mmask8) -1); 8806 } 8807 8808 extern __inline __mmask8 8809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8810 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y) 8811 { 8812 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8813 (__v8di) __Y, 2, 8814 (__mmask8) -1); 8815 } 8816 8817 extern __inline __mmask16 8818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8819 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y) 8820 { 8821 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8822 (__v16si) __Y, 1, 8823 (__mmask16) -1); 8824 } 8825 8826 extern __inline __mmask16 8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8828 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y) 8829 { 8830 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8831 (__v16si) __Y, 1, 8832 (__mmask16) -1); 8833 } 8834 8835 extern __inline __mmask8 8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8837 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y) 8838 { 8839 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8840 (__v8di) __Y, 1, 8841 (__mmask8) -1); 8842 } 8843 8844 extern __inline __mmask8 8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8846 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y) 8847 { 8848 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8849 (__v8di) __Y, 1, 8850 (__mmask8) -1); 8851 } 8852 8853 extern __inline __mmask16 8854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8855 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y) 8856 { 8857 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8858 (__v16si) __Y, 4, 8859 (__mmask16) -1); 8860 } 8861 8862 extern __inline __mmask16 8863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8864 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) 8865 { 8866 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8867 (__v16si) __Y, 4, 8868 (__mmask16) -1); 8869 } 8870 8871 extern __inline __mmask8 8872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8873 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y) 8874 { 8875 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8876 (__v8di) __Y, 4, 8877 (__mmask8) -1); 8878 } 8879 8880 extern __inline __mmask8 8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8882 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y) 8883 { 8884 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8885 (__v8di) __Y, 4, 8886 (__mmask8) -1); 8887 } 8888 8889 #define _MM_CMPINT_EQ 0x0 8890 #define _MM_CMPINT_LT 0x1 8891 #define _MM_CMPINT_LE 0x2 8892 #define _MM_CMPINT_UNUSED 0x3 8893 #define _MM_CMPINT_NE 0x4 8894 #define _MM_CMPINT_NLT 0x5 8895 #define _MM_CMPINT_GE 0x5 8896 #define _MM_CMPINT_NLE 0x6 8897 #define _MM_CMPINT_GT 0x6 8898 8899 #ifdef __OPTIMIZE__ 8900 extern __inline __mmask8 8901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8902 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) 8903 { 8904 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8905 (__v8di) __Y, __P, 8906 (__mmask8) -1); 8907 } 8908 8909 extern __inline __mmask16 8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8911 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) 8912 { 8913 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8914 (__v16si) __Y, __P, 8915 (__mmask16) -1); 8916 } 8917 8918 extern __inline __mmask8 8919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8920 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) 8921 { 8922 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8923 (__v8di) __Y, __P, 8924 (__mmask8) -1); 8925 } 8926 8927 extern __inline __mmask16 8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8929 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) 8930 { 8931 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8932 (__v16si) __Y, __P, 8933 (__mmask16) -1); 8934 } 8935 8936 extern __inline __mmask8 8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8938 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, 8939 const int __R) 8940 { 8941 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 8942 (__v8df) __Y, __P, 8943 (__mmask8) -1, __R); 8944 } 8945 8946 extern __inline __mmask16 8947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8948 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) 8949 { 8950 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 8951 (__v16sf) __Y, __P, 8952 (__mmask16) -1, __R); 8953 } 8954 8955 extern __inline __mmask8 8956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8957 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 8958 const int __P) 8959 { 8960 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8961 (__v8di) __Y, __P, 8962 (__mmask8) __U); 8963 } 8964 8965 extern __inline __mmask16 8966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8967 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 8968 const int __P) 8969 { 8970 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8971 (__v16si) __Y, __P, 8972 (__mmask16) __U); 8973 } 8974 8975 extern __inline __mmask8 8976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8977 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 8978 const int __P) 8979 { 8980 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8981 (__v8di) __Y, __P, 8982 (__mmask8) __U); 8983 } 8984 8985 extern __inline __mmask16 8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8987 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 8988 const int __P) 8989 { 8990 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8991 (__v16si) __Y, __P, 8992 (__mmask16) __U); 8993 } 8994 8995 extern __inline __mmask8 8996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8997 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, 8998 const int __P, const int __R) 8999 { 9000 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9001 (__v8df) __Y, __P, 9002 (__mmask8) __U, __R); 9003 } 9004 9005 extern __inline __mmask16 9006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9007 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, 9008 const int __P, const int __R) 9009 { 9010 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9011 (__v16sf) __Y, __P, 9012 (__mmask16) __U, __R); 9013 } 9014 9015 extern __inline __mmask8 9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9017 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) 9018 { 9019 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9020 (__v2df) __Y, __P, 9021 (__mmask8) -1, __R); 9022 } 9023 9024 extern __inline __mmask8 9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9026 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, 9027 const int __P, const int __R) 9028 { 9029 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9030 (__v2df) __Y, __P, 9031 (__mmask8) __M, __R); 9032 } 9033 9034 extern __inline __mmask8 9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9036 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) 9037 { 9038 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9039 (__v4sf) __Y, __P, 9040 (__mmask8) -1, __R); 9041 } 9042 9043 extern __inline __mmask8 9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9045 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, 9046 const int __P, const int __R) 9047 { 9048 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9049 (__v4sf) __Y, __P, 9050 (__mmask8) __M, __R); 9051 } 9052 9053 #else 9054 #define _mm512_cmp_epi64_mask(X, Y, P) \ 9055 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9056 (__v8di)(__m512i)(Y), (int)(P),\ 9057 (__mmask8)-1)) 9058 9059 #define _mm512_cmp_epi32_mask(X, Y, P) \ 9060 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9061 (__v16si)(__m512i)(Y), (int)(P),\ 9062 (__mmask16)-1)) 9063 9064 #define _mm512_cmp_epu64_mask(X, Y, P) \ 9065 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9066 (__v8di)(__m512i)(Y), (int)(P),\ 9067 (__mmask8)-1)) 9068 9069 #define _mm512_cmp_epu32_mask(X, Y, P) \ 9070 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9071 (__v16si)(__m512i)(Y), (int)(P),\ 9072 (__mmask16)-1)) 9073 9074 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \ 9075 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9076 (__v8df)(__m512d)(Y), (int)(P),\ 9077 (__mmask8)-1, R)) 9078 9079 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \ 9080 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9081 (__v16sf)(__m512)(Y), (int)(P),\ 9082 (__mmask16)-1, R)) 9083 9084 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ 9085 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9086 (__v8di)(__m512i)(Y), (int)(P),\ 9087 (__mmask8)M)) 9088 9089 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ 9090 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9091 (__v16si)(__m512i)(Y), (int)(P),\ 9092 (__mmask16)M)) 9093 9094 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ 9095 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9096 (__v8di)(__m512i)(Y), (int)(P),\ 9097 (__mmask8)M)) 9098 9099 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ 9100 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9101 (__v16si)(__m512i)(Y), (int)(P),\ 9102 (__mmask16)M)) 9103 9104 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ 9105 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9106 (__v8df)(__m512d)(Y), (int)(P),\ 9107 (__mmask8)M, R)) 9108 9109 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ 9110 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9111 (__v16sf)(__m512)(Y), (int)(P),\ 9112 (__mmask16)M, R)) 9113 9114 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 9115 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9116 (__v2df)(__m128d)(Y), (int)(P),\ 9117 (__mmask8)-1, R)) 9118 9119 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 9120 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9121 (__v2df)(__m128d)(Y), (int)(P),\ 9122 (M), R)) 9123 9124 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 9125 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9126 (__v4sf)(__m128)(Y), (int)(P), \ 9127 (__mmask8)-1, R)) 9128 9129 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 9130 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9131 (__v4sf)(__m128)(Y), (int)(P), \ 9132 (M), R)) 9133 #endif 9134 9135 #ifdef __OPTIMIZE__ 9136 extern __inline __m512 9137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9138 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale) 9139 { 9140 __m512 v1_old = _mm512_undefined_ps (); 9141 __mmask16 mask = 0xFFFF; 9142 9143 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, 9144 __addr, 9145 (__v16si) __index, 9146 mask, __scale); 9147 } 9148 9149 extern __inline __m512 9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9151 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask, 9152 __m512i __index, float const *__addr, int __scale) 9153 { 9154 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, 9155 __addr, 9156 (__v16si) __index, 9157 __mask, __scale); 9158 } 9159 9160 extern __inline __m512d 9161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9162 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale) 9163 { 9164 __m512d v1_old = _mm512_undefined_pd (); 9165 __mmask8 mask = 0xFF; 9166 9167 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old, 9168 __addr, 9169 (__v8si) __index, mask, 9170 __scale); 9171 } 9172 9173 extern __inline __m512d 9174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9175 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, 9176 __m256i __index, double const *__addr, int __scale) 9177 { 9178 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 9179 __addr, 9180 (__v8si) __index, 9181 __mask, __scale); 9182 } 9183 9184 extern __inline __m256 9185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9186 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale) 9187 { 9188 __m256 v1_old = _mm256_undefined_ps (); 9189 __mmask8 mask = 0xFF; 9190 9191 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old, 9192 __addr, 9193 (__v8di) __index, mask, 9194 __scale); 9195 } 9196 9197 extern __inline __m256 9198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9199 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, 9200 __m512i __index, float const *__addr, int __scale) 9201 { 9202 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 9203 __addr, 9204 (__v8di) __index, 9205 __mask, __scale); 9206 } 9207 9208 extern __inline __m512d 9209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9210 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale) 9211 { 9212 __m512d v1_old = _mm512_undefined_pd (); 9213 __mmask8 mask = 0xFF; 9214 9215 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old, 9216 __addr, 9217 (__v8di) __index, mask, 9218 __scale); 9219 } 9220 9221 extern __inline __m512d 9222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9223 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, 9224 __m512i __index, double const *__addr, int __scale) 9225 { 9226 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 9227 __addr, 9228 (__v8di) __index, 9229 __mask, __scale); 9230 } 9231 9232 extern __inline __m512i 9233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9234 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale) 9235 { 9236 __m512i v1_old = _mm512_undefined_si512 (); 9237 __mmask16 mask = 0xFFFF; 9238 9239 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old, 9240 __addr, 9241 (__v16si) __index, 9242 mask, __scale); 9243 } 9244 9245 extern __inline __m512i 9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9247 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, 9248 __m512i __index, int const *__addr, int __scale) 9249 { 9250 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 9251 __addr, 9252 (__v16si) __index, 9253 __mask, __scale); 9254 } 9255 9256 extern __inline __m512i 9257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9258 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale) 9259 { 9260 __m512i v1_old = _mm512_undefined_si512 (); 9261 __mmask8 mask = 0xFF; 9262 9263 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old, 9264 __addr, 9265 (__v8si) __index, mask, 9266 __scale); 9267 } 9268 9269 extern __inline __m512i 9270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9271 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, 9272 __m256i __index, long long const *__addr, 9273 int __scale) 9274 { 9275 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 9276 __addr, 9277 (__v8si) __index, 9278 __mask, __scale); 9279 } 9280 9281 extern __inline __m256i 9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9283 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale) 9284 { 9285 __m256i v1_old = _mm256_undefined_si256 (); 9286 __mmask8 mask = 0xFF; 9287 9288 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old, 9289 __addr, 9290 (__v8di) __index, 9291 mask, __scale); 9292 } 9293 9294 extern __inline __m256i 9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9296 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, 9297 __m512i __index, int const *__addr, int __scale) 9298 { 9299 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 9300 __addr, 9301 (__v8di) __index, 9302 __mask, __scale); 9303 } 9304 9305 extern __inline __m512i 9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9307 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale) 9308 { 9309 __m512i v1_old = _mm512_undefined_si512 (); 9310 __mmask8 mask = 0xFF; 9311 9312 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old, 9313 __addr, 9314 (__v8di) __index, mask, 9315 __scale); 9316 } 9317 9318 extern __inline __m512i 9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9320 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, 9321 __m512i __index, long long const *__addr, 9322 int __scale) 9323 { 9324 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 9325 __addr, 9326 (__v8di) __index, 9327 __mask, __scale); 9328 } 9329 9330 extern __inline void 9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9332 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale) 9333 { 9334 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, 9335 (__v16si) __index, (__v16sf) __v1, __scale); 9336 } 9337 9338 extern __inline void 9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9340 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask, 9341 __m512i __index, __m512 __v1, int __scale) 9342 { 9343 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, 9344 (__v16sf) __v1, __scale); 9345 } 9346 9347 extern __inline void 9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9349 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1, 9350 int __scale) 9351 { 9352 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, 9353 (__v8si) __index, (__v8df) __v1, __scale); 9354 } 9355 9356 extern __inline void 9357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9358 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask, 9359 __m256i __index, __m512d __v1, int __scale) 9360 { 9361 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, 9362 (__v8df) __v1, __scale); 9363 } 9364 9365 extern __inline void 9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9367 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale) 9368 { 9369 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, 9370 (__v8di) __index, (__v8sf) __v1, __scale); 9371 } 9372 9373 extern __inline void 9374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9375 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask, 9376 __m512i __index, __m256 __v1, int __scale) 9377 { 9378 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, 9379 (__v8sf) __v1, __scale); 9380 } 9381 9382 extern __inline void 9383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9384 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1, 9385 int __scale) 9386 { 9387 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, 9388 (__v8di) __index, (__v8df) __v1, __scale); 9389 } 9390 9391 extern __inline void 9392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9393 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask, 9394 __m512i __index, __m512d __v1, int __scale) 9395 { 9396 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, 9397 (__v8df) __v1, __scale); 9398 } 9399 9400 extern __inline void 9401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9402 _mm512_i32scatter_epi32 (int *__addr, __m512i __index, 9403 __m512i __v1, int __scale) 9404 { 9405 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, 9406 (__v16si) __index, (__v16si) __v1, __scale); 9407 } 9408 9409 extern __inline void 9410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9411 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask, 9412 __m512i __index, __m512i __v1, int __scale) 9413 { 9414 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, 9415 (__v16si) __v1, __scale); 9416 } 9417 9418 extern __inline void 9419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9420 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index, 9421 __m512i __v1, int __scale) 9422 { 9423 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, 9424 (__v8si) __index, (__v8di) __v1, __scale); 9425 } 9426 9427 extern __inline void 9428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9429 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask, 9430 __m256i __index, __m512i __v1, int __scale) 9431 { 9432 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, 9433 (__v8di) __v1, __scale); 9434 } 9435 9436 extern __inline void 9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9438 _mm512_i64scatter_epi32 (int *__addr, __m512i __index, 9439 __m256i __v1, int __scale) 9440 { 9441 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, 9442 (__v8di) __index, (__v8si) __v1, __scale); 9443 } 9444 9445 extern __inline void 9446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9447 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask, 9448 __m512i __index, __m256i __v1, int __scale) 9449 { 9450 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, 9451 (__v8si) __v1, __scale); 9452 } 9453 9454 extern __inline void 9455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9456 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index, 9457 __m512i __v1, int __scale) 9458 { 9459 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, 9460 (__v8di) __index, (__v8di) __v1, __scale); 9461 } 9462 9463 extern __inline void 9464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9465 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, 9466 __m512i __index, __m512i __v1, int __scale) 9467 { 9468 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, 9469 (__v8di) __v1, __scale); 9470 } 9471 #else 9472 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ 9473 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ 9474 (float const *)ADDR, \ 9475 (__v16si)(__m512i)INDEX, \ 9476 (__mmask16)0xFFFF, (int)SCALE) 9477 9478 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9479 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ 9480 (float const *)ADDR, \ 9481 (__v16si)(__m512i)INDEX, \ 9482 (__mmask16)MASK, (int)SCALE) 9483 9484 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ 9485 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ 9486 (double const *)ADDR, \ 9487 (__v8si)(__m256i)INDEX, \ 9488 (__mmask8)0xFF, (int)SCALE) 9489 9490 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9491 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ 9492 (double const *)ADDR, \ 9493 (__v8si)(__m256i)INDEX, \ 9494 (__mmask8)MASK, (int)SCALE) 9495 9496 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ 9497 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ 9498 (float const *)ADDR, \ 9499 (__v8di)(__m512i)INDEX, \ 9500 (__mmask8)0xFF, (int)SCALE) 9501 9502 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9503 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ 9504 (float const *)ADDR, \ 9505 (__v8di)(__m512i)INDEX, \ 9506 (__mmask8)MASK, (int)SCALE) 9507 9508 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ 9509 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ 9510 (double const *)ADDR, \ 9511 (__v8di)(__m512i)INDEX, \ 9512 (__mmask8)0xFF, (int)SCALE) 9513 9514 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9515 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ 9516 (double const *)ADDR, \ 9517 (__v8di)(__m512i)INDEX, \ 9518 (__mmask8)MASK, (int)SCALE) 9519 9520 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ 9521 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \ 9522 (int const *)ADDR, \ 9523 (__v16si)(__m512i)INDEX, \ 9524 (__mmask16)0xFFFF, (int)SCALE) 9525 9526 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9527 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ 9528 (int const *)ADDR, \ 9529 (__v16si)(__m512i)INDEX, \ 9530 (__mmask16)MASK, (int)SCALE) 9531 9532 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ 9533 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \ 9534 (long long const *)ADDR, \ 9535 (__v8si)(__m256i)INDEX, \ 9536 (__mmask8)0xFF, (int)SCALE) 9537 9538 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9539 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ 9540 (long long const *)ADDR, \ 9541 (__v8si)(__m256i)INDEX, \ 9542 (__mmask8)MASK, (int)SCALE) 9543 9544 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ 9545 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \ 9546 (int const *)ADDR, \ 9547 (__v8di)(__m512i)INDEX, \ 9548 (__mmask8)0xFF, (int)SCALE) 9549 9550 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9551 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ 9552 (int const *)ADDR, \ 9553 (__v8di)(__m512i)INDEX, \ 9554 (__mmask8)MASK, (int)SCALE) 9555 9556 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ 9557 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \ 9558 (long long const *)ADDR, \ 9559 (__v8di)(__m512i)INDEX, \ 9560 (__mmask8)0xFF, (int)SCALE) 9561 9562 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9563 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ 9564 (long long const *)ADDR, \ 9565 (__v8di)(__m512i)INDEX, \ 9566 (__mmask8)MASK, (int)SCALE) 9567 9568 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 9569 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \ 9570 (__v16si)(__m512i)INDEX, \ 9571 (__v16sf)(__m512)V1, (int)SCALE) 9572 9573 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 9574 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \ 9575 (__v16si)(__m512i)INDEX, \ 9576 (__v16sf)(__m512)V1, (int)SCALE) 9577 9578 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 9579 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \ 9580 (__v8si)(__m256i)INDEX, \ 9581 (__v8df)(__m512d)V1, (int)SCALE) 9582 9583 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 9584 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \ 9585 (__v8si)(__m256i)INDEX, \ 9586 (__v8df)(__m512d)V1, (int)SCALE) 9587 9588 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 9589 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \ 9590 (__v8di)(__m512i)INDEX, \ 9591 (__v8sf)(__m256)V1, (int)SCALE) 9592 9593 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 9594 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \ 9595 (__v8di)(__m512i)INDEX, \ 9596 (__v8sf)(__m256)V1, (int)SCALE) 9597 9598 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 9599 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \ 9600 (__v8di)(__m512i)INDEX, \ 9601 (__v8df)(__m512d)V1, (int)SCALE) 9602 9603 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 9604 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \ 9605 (__v8di)(__m512i)INDEX, \ 9606 (__v8df)(__m512d)V1, (int)SCALE) 9607 9608 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 9609 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \ 9610 (__v16si)(__m512i)INDEX, \ 9611 (__v16si)(__m512i)V1, (int)SCALE) 9612 9613 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 9614 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \ 9615 (__v16si)(__m512i)INDEX, \ 9616 (__v16si)(__m512i)V1, (int)SCALE) 9617 9618 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 9619 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \ 9620 (__v8si)(__m256i)INDEX, \ 9621 (__v8di)(__m512i)V1, (int)SCALE) 9622 9623 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 9624 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \ 9625 (__v8si)(__m256i)INDEX, \ 9626 (__v8di)(__m512i)V1, (int)SCALE) 9627 9628 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 9629 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \ 9630 (__v8di)(__m512i)INDEX, \ 9631 (__v8si)(__m256i)V1, (int)SCALE) 9632 9633 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 9634 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \ 9635 (__v8di)(__m512i)INDEX, \ 9636 (__v8si)(__m256i)V1, (int)SCALE) 9637 9638 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 9639 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \ 9640 (__v8di)(__m512i)INDEX, \ 9641 (__v8di)(__m512i)V1, (int)SCALE) 9642 9643 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 9644 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \ 9645 (__v8di)(__m512i)INDEX, \ 9646 (__v8di)(__m512i)V1, (int)SCALE) 9647 #endif 9648 9649 extern __inline __m512d 9650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9651 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 9652 { 9653 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9654 (__v8df) __W, 9655 (__mmask8) __U); 9656 } 9657 9658 extern __inline __m512d 9659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9660 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 9661 { 9662 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9663 (__v8df) 9664 _mm512_setzero_pd (), 9665 (__mmask8) __U); 9666 } 9667 9668 extern __inline void 9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9670 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9671 { 9672 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9673 (__mmask8) __U); 9674 } 9675 9676 extern __inline __m512 9677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9678 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 9679 { 9680 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9681 (__v16sf) __W, 9682 (__mmask16) __U); 9683 } 9684 9685 extern __inline __m512 9686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9687 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 9688 { 9689 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9690 (__v16sf) 9691 _mm512_setzero_ps (), 9692 (__mmask16) __U); 9693 } 9694 9695 extern __inline void 9696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9697 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9698 { 9699 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9700 (__mmask16) __U); 9701 } 9702 9703 extern __inline __m512i 9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9705 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9706 { 9707 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9708 (__v8di) __W, 9709 (__mmask8) __U); 9710 } 9711 9712 extern __inline __m512i 9713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9714 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 9715 { 9716 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9717 (__v8di) 9718 _mm512_setzero_si512 (), 9719 (__mmask8) __U); 9720 } 9721 9722 extern __inline void 9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9724 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9725 { 9726 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9727 (__mmask8) __U); 9728 } 9729 9730 extern __inline __m512i 9731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9732 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9733 { 9734 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9735 (__v16si) __W, 9736 (__mmask16) __U); 9737 } 9738 9739 extern __inline __m512i 9740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9741 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 9742 { 9743 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9744 (__v16si) 9745 _mm512_setzero_si512 (), 9746 (__mmask16) __U); 9747 } 9748 9749 extern __inline void 9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9751 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9752 { 9753 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9754 (__mmask16) __U); 9755 } 9756 9757 extern __inline __m512d 9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9759 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9760 { 9761 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9762 (__v8df) __W, 9763 (__mmask8) __U); 9764 } 9765 9766 extern __inline __m512d 9767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9768 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9769 { 9770 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, 9771 (__v8df) 9772 _mm512_setzero_pd (), 9773 (__mmask8) __U); 9774 } 9775 9776 extern __inline __m512d 9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9778 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) 9779 { 9780 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, 9781 (__v8df) __W, 9782 (__mmask8) __U); 9783 } 9784 9785 extern __inline __m512d 9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9787 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 9788 { 9789 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, 9790 (__v8df) 9791 _mm512_setzero_pd (), 9792 (__mmask8) __U); 9793 } 9794 9795 extern __inline __m512 9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9797 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9798 { 9799 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9800 (__v16sf) __W, 9801 (__mmask16) __U); 9802 } 9803 9804 extern __inline __m512 9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9806 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9807 { 9808 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, 9809 (__v16sf) 9810 _mm512_setzero_ps (), 9811 (__mmask16) __U); 9812 } 9813 9814 extern __inline __m512 9815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9816 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) 9817 { 9818 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, 9819 (__v16sf) __W, 9820 (__mmask16) __U); 9821 } 9822 9823 extern __inline __m512 9824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9825 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) 9826 { 9827 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, 9828 (__v16sf) 9829 _mm512_setzero_ps (), 9830 (__mmask16) __U); 9831 } 9832 9833 extern __inline __m512i 9834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9835 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9836 { 9837 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9838 (__v8di) __W, 9839 (__mmask8) __U); 9840 } 9841 9842 extern __inline __m512i 9843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9844 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) 9845 { 9846 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, 9847 (__v8di) 9848 _mm512_setzero_si512 (), 9849 (__mmask8) __U); 9850 } 9851 9852 extern __inline __m512i 9853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9854 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 9855 { 9856 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, 9857 (__v8di) __W, 9858 (__mmask8) __U); 9859 } 9860 9861 extern __inline __m512i 9862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9863 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 9864 { 9865 return (__m512i) 9866 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, 9867 (__v8di) 9868 _mm512_setzero_si512 (), 9869 (__mmask8) __U); 9870 } 9871 9872 extern __inline __m512i 9873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9874 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9875 { 9876 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9877 (__v16si) __W, 9878 (__mmask16) __U); 9879 } 9880 9881 extern __inline __m512i 9882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9883 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9884 { 9885 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, 9886 (__v16si) 9887 _mm512_setzero_si512 (), 9888 (__mmask16) __U); 9889 } 9890 9891 extern __inline __m512i 9892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9893 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 9894 { 9895 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, 9896 (__v16si) __W, 9897 (__mmask16) __U); 9898 } 9899 9900 extern __inline __m512i 9901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9902 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) 9903 { 9904 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, 9905 (__v16si) 9906 _mm512_setzero_si512 9907 (), (__mmask16) __U); 9908 } 9909 9910 /* Mask arithmetic operations */ 9911 extern __inline __mmask16 9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9913 _mm512_kand (__mmask16 __A, __mmask16 __B) 9914 { 9915 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 9916 } 9917 9918 extern __inline __mmask16 9919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9920 _mm512_kandn (__mmask16 __A, __mmask16 __B) 9921 { 9922 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 9923 } 9924 9925 extern __inline __mmask16 9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9927 _mm512_kor (__mmask16 __A, __mmask16 __B) 9928 { 9929 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 9930 } 9931 9932 extern __inline int 9933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9934 _mm512_kortestz (__mmask16 __A, __mmask16 __B) 9935 { 9936 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, 9937 (__mmask16) __B); 9938 } 9939 9940 extern __inline int 9941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9942 _mm512_kortestc (__mmask16 __A, __mmask16 __B) 9943 { 9944 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, 9945 (__mmask16) __B); 9946 } 9947 9948 extern __inline __mmask16 9949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9950 _mm512_kxnor (__mmask16 __A, __mmask16 __B) 9951 { 9952 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 9953 } 9954 9955 extern __inline __mmask16 9956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9957 _mm512_kxor (__mmask16 __A, __mmask16 __B) 9958 { 9959 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 9960 } 9961 9962 extern __inline __mmask16 9963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9964 _mm512_knot (__mmask16 __A) 9965 { 9966 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); 9967 } 9968 9969 extern __inline __mmask16 9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9971 _mm512_kunpackb (__mmask16 __A, __mmask16 __B) 9972 { 9973 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 9974 } 9975 9976 #ifdef __OPTIMIZE__ 9977 extern __inline __m512i 9978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9979 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, 9980 const int __imm) 9981 { 9982 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 9983 (__v4si) __D, 9984 __imm, 9985 (__v16si) 9986 _mm512_setzero_si512 (), 9987 __B); 9988 } 9989 9990 extern __inline __m512 9991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9992 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, 9993 const int __imm) 9994 { 9995 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 9996 (__v4sf) __D, 9997 __imm, 9998 (__v16sf) 9999 _mm512_setzero_ps (), __B); 10000 } 10001 10002 extern __inline __m512i 10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10004 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, 10005 __m128i __D, const int __imm) 10006 { 10007 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 10008 (__v4si) __D, 10009 __imm, 10010 (__v16si) __A, 10011 __B); 10012 } 10013 10014 extern __inline __m512 10015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10016 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, 10017 __m128 __D, const int __imm) 10018 { 10019 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 10020 (__v4sf) __D, 10021 __imm, 10022 (__v16sf) __A, __B); 10023 } 10024 #else 10025 #define _mm512_maskz_insertf32x4(A, X, Y, C) \ 10026 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10027 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ 10028 (__mmask8)(A))) 10029 10030 #define _mm512_maskz_inserti32x4(A, X, Y, C) \ 10031 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10032 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ 10033 (__mmask8)(A))) 10034 10035 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \ 10036 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10037 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ 10038 (__mmask8)(B))) 10039 10040 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \ 10041 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10042 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ 10043 (__mmask8)(B))) 10044 #endif 10045 10046 extern __inline __m512i 10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10048 _mm512_max_epi64 (__m512i __A, __m512i __B) 10049 { 10050 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10051 (__v8di) __B, 10052 (__v8di) 10053 _mm512_undefined_si512 (), 10054 (__mmask8) -1); 10055 } 10056 10057 extern __inline __m512i 10058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10059 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10060 { 10061 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10062 (__v8di) __B, 10063 (__v8di) 10064 _mm512_setzero_si512 (), 10065 __M); 10066 } 10067 10068 extern __inline __m512i 10069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10070 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10071 { 10072 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10073 (__v8di) __B, 10074 (__v8di) __W, __M); 10075 } 10076 10077 extern __inline __m512i 10078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10079 _mm512_min_epi64 (__m512i __A, __m512i __B) 10080 { 10081 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10082 (__v8di) __B, 10083 (__v8di) 10084 _mm512_undefined_si512 (), 10085 (__mmask8) -1); 10086 } 10087 10088 extern __inline __m512i 10089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10090 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10091 { 10092 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10093 (__v8di) __B, 10094 (__v8di) __W, __M); 10095 } 10096 10097 extern __inline __m512i 10098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10099 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10100 { 10101 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10102 (__v8di) __B, 10103 (__v8di) 10104 _mm512_setzero_si512 (), 10105 __M); 10106 } 10107 10108 extern __inline __m512i 10109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10110 _mm512_max_epu64 (__m512i __A, __m512i __B) 10111 { 10112 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10113 (__v8di) __B, 10114 (__v8di) 10115 _mm512_undefined_si512 (), 10116 (__mmask8) -1); 10117 } 10118 10119 extern __inline __m512i 10120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10121 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10122 { 10123 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10124 (__v8di) __B, 10125 (__v8di) 10126 _mm512_setzero_si512 (), 10127 __M); 10128 } 10129 10130 extern __inline __m512i 10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10132 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10133 { 10134 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10135 (__v8di) __B, 10136 (__v8di) __W, __M); 10137 } 10138 10139 extern __inline __m512i 10140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10141 _mm512_min_epu64 (__m512i __A, __m512i __B) 10142 { 10143 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10144 (__v8di) __B, 10145 (__v8di) 10146 _mm512_undefined_si512 (), 10147 (__mmask8) -1); 10148 } 10149 10150 extern __inline __m512i 10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10152 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10153 { 10154 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10155 (__v8di) __B, 10156 (__v8di) __W, __M); 10157 } 10158 10159 extern __inline __m512i 10160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10161 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10162 { 10163 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10164 (__v8di) __B, 10165 (__v8di) 10166 _mm512_setzero_si512 (), 10167 __M); 10168 } 10169 10170 extern __inline __m512i 10171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10172 _mm512_max_epi32 (__m512i __A, __m512i __B) 10173 { 10174 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10175 (__v16si) __B, 10176 (__v16si) 10177 _mm512_undefined_si512 (), 10178 (__mmask16) -1); 10179 } 10180 10181 extern __inline __m512i 10182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10183 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10184 { 10185 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10186 (__v16si) __B, 10187 (__v16si) 10188 _mm512_setzero_si512 (), 10189 __M); 10190 } 10191 10192 extern __inline __m512i 10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10194 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10195 { 10196 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10197 (__v16si) __B, 10198 (__v16si) __W, __M); 10199 } 10200 10201 extern __inline __m512i 10202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10203 _mm512_min_epi32 (__m512i __A, __m512i __B) 10204 { 10205 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10206 (__v16si) __B, 10207 (__v16si) 10208 _mm512_undefined_si512 (), 10209 (__mmask16) -1); 10210 } 10211 10212 extern __inline __m512i 10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10214 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10215 { 10216 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10217 (__v16si) __B, 10218 (__v16si) 10219 _mm512_setzero_si512 (), 10220 __M); 10221 } 10222 10223 extern __inline __m512i 10224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10225 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10226 { 10227 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10228 (__v16si) __B, 10229 (__v16si) __W, __M); 10230 } 10231 10232 extern __inline __m512i 10233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10234 _mm512_max_epu32 (__m512i __A, __m512i __B) 10235 { 10236 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10237 (__v16si) __B, 10238 (__v16si) 10239 _mm512_undefined_si512 (), 10240 (__mmask16) -1); 10241 } 10242 10243 extern __inline __m512i 10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10245 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 10246 { 10247 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10248 (__v16si) __B, 10249 (__v16si) 10250 _mm512_setzero_si512 (), 10251 __M); 10252 } 10253 10254 extern __inline __m512i 10255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10256 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10257 { 10258 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10259 (__v16si) __B, 10260 (__v16si) __W, __M); 10261 } 10262 10263 extern __inline __m512i 10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10265 _mm512_min_epu32 (__m512i __A, __m512i __B) 10266 { 10267 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10268 (__v16si) __B, 10269 (__v16si) 10270 _mm512_undefined_si512 (), 10271 (__mmask16) -1); 10272 } 10273 10274 extern __inline __m512i 10275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10276 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 10277 { 10278 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10279 (__v16si) __B, 10280 (__v16si) 10281 _mm512_setzero_si512 (), 10282 __M); 10283 } 10284 10285 extern __inline __m512i 10286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10287 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10288 { 10289 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10290 (__v16si) __B, 10291 (__v16si) __W, __M); 10292 } 10293 10294 extern __inline __m512 10295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10296 _mm512_unpacklo_ps (__m512 __A, __m512 __B) 10297 { 10298 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10299 (__v16sf) __B, 10300 (__v16sf) 10301 _mm512_undefined_ps (), 10302 (__mmask16) -1); 10303 } 10304 10305 extern __inline __m512 10306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10307 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10308 { 10309 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10310 (__v16sf) __B, 10311 (__v16sf) __W, 10312 (__mmask16) __U); 10313 } 10314 10315 extern __inline __m512 10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10317 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 10318 { 10319 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10320 (__v16sf) __B, 10321 (__v16sf) 10322 _mm512_setzero_ps (), 10323 (__mmask16) __U); 10324 } 10325 10326 #ifdef __OPTIMIZE__ 10327 extern __inline __m128d 10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10329 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R) 10330 { 10331 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 10332 (__v2df) __B, 10333 __R); 10334 } 10335 10336 extern __inline __m128 10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10338 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R) 10339 { 10340 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 10341 (__v4sf) __B, 10342 __R); 10343 } 10344 10345 extern __inline __m128d 10346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10347 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R) 10348 { 10349 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 10350 (__v2df) __B, 10351 __R); 10352 } 10353 10354 extern __inline __m128 10355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10356 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R) 10357 { 10358 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 10359 (__v4sf) __B, 10360 __R); 10361 } 10362 10363 #else 10364 #define _mm_max_round_sd(A, B, C) \ 10365 (__m128d)__builtin_ia32_addsd_round(A, B, C) 10366 10367 #define _mm_max_round_ss(A, B, C) \ 10368 (__m128)__builtin_ia32_addss_round(A, B, C) 10369 10370 #define _mm_min_round_sd(A, B, C) \ 10371 (__m128d)__builtin_ia32_subsd_round(A, B, C) 10372 10373 #define _mm_min_round_ss(A, B, C) \ 10374 (__m128)__builtin_ia32_subss_round(A, B, C) 10375 #endif 10376 10377 extern __inline __m512d 10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10379 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) 10380 { 10381 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 10382 (__v8df) __W, 10383 (__mmask8) __U); 10384 } 10385 10386 extern __inline __m512 10387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10388 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) 10389 { 10390 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 10391 (__v16sf) __W, 10392 (__mmask16) __U); 10393 } 10394 10395 extern __inline __m512i 10396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10397 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) 10398 { 10399 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 10400 (__v8di) __W, 10401 (__mmask8) __U); 10402 } 10403 10404 extern __inline __m512i 10405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10406 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) 10407 { 10408 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 10409 (__v16si) __W, 10410 (__mmask16) __U); 10411 } 10412 10413 #ifdef __OPTIMIZE__ 10414 extern __inline __m128d 10415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10416 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10417 { 10418 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10419 (__v2df) __A, 10420 (__v2df) __B, 10421 __R); 10422 } 10423 10424 extern __inline __m128 10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10426 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10427 { 10428 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10429 (__v4sf) __A, 10430 (__v4sf) __B, 10431 __R); 10432 } 10433 10434 extern __inline __m128d 10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10436 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10437 { 10438 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10439 (__v2df) __A, 10440 -(__v2df) __B, 10441 __R); 10442 } 10443 10444 extern __inline __m128 10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10446 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10447 { 10448 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10449 (__v4sf) __A, 10450 -(__v4sf) __B, 10451 __R); 10452 } 10453 10454 extern __inline __m128d 10455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10456 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10457 { 10458 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10459 -(__v2df) __A, 10460 (__v2df) __B, 10461 __R); 10462 } 10463 10464 extern __inline __m128 10465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10466 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10467 { 10468 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10469 -(__v4sf) __A, 10470 (__v4sf) __B, 10471 __R); 10472 } 10473 10474 extern __inline __m128d 10475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10476 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10477 { 10478 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10479 -(__v2df) __A, 10480 -(__v2df) __B, 10481 __R); 10482 } 10483 10484 extern __inline __m128 10485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10486 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10487 { 10488 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10489 -(__v4sf) __A, 10490 -(__v4sf) __B, 10491 __R); 10492 } 10493 #else 10494 #define _mm_fmadd_round_sd(A, B, C, R) \ 10495 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R) 10496 10497 #define _mm_fmadd_round_ss(A, B, C, R) \ 10498 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R) 10499 10500 #define _mm_fmsub_round_sd(A, B, C, R) \ 10501 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R) 10502 10503 #define _mm_fmsub_round_ss(A, B, C, R) \ 10504 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R) 10505 10506 #define _mm_fnmadd_round_sd(A, B, C, R) \ 10507 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R) 10508 10509 #define _mm_fnmadd_round_ss(A, B, C, R) \ 10510 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R) 10511 10512 #define _mm_fnmsub_round_sd(A, B, C, R) \ 10513 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R) 10514 10515 #define _mm_fnmsub_round_ss(A, B, C, R) \ 10516 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) 10517 #endif 10518 10519 #ifdef __OPTIMIZE__ 10520 extern __inline int 10521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10522 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) 10523 { 10524 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); 10525 } 10526 10527 extern __inline int 10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10529 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) 10530 { 10531 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); 10532 } 10533 #else 10534 #define _mm_comi_round_ss(A, B, C, D)\ 10535 __builtin_ia32_vcomiss(A, B, C, D) 10536 #define _mm_comi_round_sd(A, B, C, D)\ 10537 __builtin_ia32_vcomisd(A, B, C, D) 10538 #endif 10539 10540 extern __inline __m512d 10541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10542 _mm512_sqrt_pd (__m512d __A) 10543 { 10544 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10545 (__v8df) 10546 _mm512_undefined_pd (), 10547 (__mmask8) -1, 10548 _MM_FROUND_CUR_DIRECTION); 10549 } 10550 10551 extern __inline __m512d 10552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10553 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 10554 { 10555 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10556 (__v8df) __W, 10557 (__mmask8) __U, 10558 _MM_FROUND_CUR_DIRECTION); 10559 } 10560 10561 extern __inline __m512d 10562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10563 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 10564 { 10565 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10566 (__v8df) 10567 _mm512_setzero_pd (), 10568 (__mmask8) __U, 10569 _MM_FROUND_CUR_DIRECTION); 10570 } 10571 10572 extern __inline __m512 10573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10574 _mm512_sqrt_ps (__m512 __A) 10575 { 10576 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10577 (__v16sf) 10578 _mm512_undefined_ps (), 10579 (__mmask16) -1, 10580 _MM_FROUND_CUR_DIRECTION); 10581 } 10582 10583 extern __inline __m512 10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10585 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) 10586 { 10587 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10588 (__v16sf) __W, 10589 (__mmask16) __U, 10590 _MM_FROUND_CUR_DIRECTION); 10591 } 10592 10593 extern __inline __m512 10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10595 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) 10596 { 10597 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10598 (__v16sf) 10599 _mm512_setzero_ps (), 10600 (__mmask16) __U, 10601 _MM_FROUND_CUR_DIRECTION); 10602 } 10603 10604 extern __inline __m512d 10605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10606 _mm512_add_pd (__m512d __A, __m512d __B) 10607 { 10608 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 10609 (__v8df) __B, 10610 (__v8df) 10611 _mm512_undefined_pd (), 10612 (__mmask8) -1, 10613 _MM_FROUND_CUR_DIRECTION); 10614 } 10615 10616 extern __inline __m512d 10617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10618 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10619 { 10620 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 10621 (__v8df) __B, 10622 (__v8df) __W, 10623 (__mmask8) __U, 10624 _MM_FROUND_CUR_DIRECTION); 10625 } 10626 10627 extern __inline __m512d 10628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10629 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) 10630 { 10631 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 10632 (__v8df) __B, 10633 (__v8df) 10634 _mm512_setzero_pd (), 10635 (__mmask8) __U, 10636 _MM_FROUND_CUR_DIRECTION); 10637 } 10638 10639 extern __inline __m512 10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10641 _mm512_add_ps (__m512 __A, __m512 __B) 10642 { 10643 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 10644 (__v16sf) __B, 10645 (__v16sf) 10646 _mm512_undefined_ps (), 10647 (__mmask16) -1, 10648 _MM_FROUND_CUR_DIRECTION); 10649 } 10650 10651 extern __inline __m512 10652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10653 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10654 { 10655 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 10656 (__v16sf) __B, 10657 (__v16sf) __W, 10658 (__mmask16) __U, 10659 _MM_FROUND_CUR_DIRECTION); 10660 } 10661 10662 extern __inline __m512 10663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10664 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) 10665 { 10666 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 10667 (__v16sf) __B, 10668 (__v16sf) 10669 _mm512_setzero_ps (), 10670 (__mmask16) __U, 10671 _MM_FROUND_CUR_DIRECTION); 10672 } 10673 10674 extern __inline __m512d 10675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10676 _mm512_sub_pd (__m512d __A, __m512d __B) 10677 { 10678 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 10679 (__v8df) __B, 10680 (__v8df) 10681 _mm512_undefined_pd (), 10682 (__mmask8) -1, 10683 _MM_FROUND_CUR_DIRECTION); 10684 } 10685 10686 extern __inline __m512d 10687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10688 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10689 { 10690 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 10691 (__v8df) __B, 10692 (__v8df) __W, 10693 (__mmask8) __U, 10694 _MM_FROUND_CUR_DIRECTION); 10695 } 10696 10697 extern __inline __m512d 10698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10699 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) 10700 { 10701 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 10702 (__v8df) __B, 10703 (__v8df) 10704 _mm512_setzero_pd (), 10705 (__mmask8) __U, 10706 _MM_FROUND_CUR_DIRECTION); 10707 } 10708 10709 extern __inline __m512 10710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10711 _mm512_sub_ps (__m512 __A, __m512 __B) 10712 { 10713 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 10714 (__v16sf) __B, 10715 (__v16sf) 10716 _mm512_undefined_ps (), 10717 (__mmask16) -1, 10718 _MM_FROUND_CUR_DIRECTION); 10719 } 10720 10721 extern __inline __m512 10722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10723 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10724 { 10725 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 10726 (__v16sf) __B, 10727 (__v16sf) __W, 10728 (__mmask16) __U, 10729 _MM_FROUND_CUR_DIRECTION); 10730 } 10731 10732 extern __inline __m512 10733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10734 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) 10735 { 10736 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 10737 (__v16sf) __B, 10738 (__v16sf) 10739 _mm512_setzero_ps (), 10740 (__mmask16) __U, 10741 _MM_FROUND_CUR_DIRECTION); 10742 } 10743 10744 extern __inline __m512d 10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10746 _mm512_mul_pd (__m512d __A, __m512d __B) 10747 { 10748 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 10749 (__v8df) __B, 10750 (__v8df) 10751 _mm512_undefined_pd (), 10752 (__mmask8) -1, 10753 _MM_FROUND_CUR_DIRECTION); 10754 } 10755 10756 extern __inline __m512d 10757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10758 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10759 { 10760 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 10761 (__v8df) __B, 10762 (__v8df) __W, 10763 (__mmask8) __U, 10764 _MM_FROUND_CUR_DIRECTION); 10765 } 10766 10767 extern __inline __m512d 10768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10769 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) 10770 { 10771 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 10772 (__v8df) __B, 10773 (__v8df) 10774 _mm512_setzero_pd (), 10775 (__mmask8) __U, 10776 _MM_FROUND_CUR_DIRECTION); 10777 } 10778 10779 extern __inline __m512 10780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10781 _mm512_mul_ps (__m512 __A, __m512 __B) 10782 { 10783 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 10784 (__v16sf) __B, 10785 (__v16sf) 10786 _mm512_undefined_ps (), 10787 (__mmask16) -1, 10788 _MM_FROUND_CUR_DIRECTION); 10789 } 10790 10791 extern __inline __m512 10792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10793 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10794 { 10795 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 10796 (__v16sf) __B, 10797 (__v16sf) __W, 10798 (__mmask16) __U, 10799 _MM_FROUND_CUR_DIRECTION); 10800 } 10801 10802 extern __inline __m512 10803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10804 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) 10805 { 10806 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 10807 (__v16sf) __B, 10808 (__v16sf) 10809 _mm512_setzero_ps (), 10810 (__mmask16) __U, 10811 _MM_FROUND_CUR_DIRECTION); 10812 } 10813 10814 extern __inline __m512d 10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10816 _mm512_div_pd (__m512d __M, __m512d __V) 10817 { 10818 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 10819 (__v8df) __V, 10820 (__v8df) 10821 _mm512_undefined_pd (), 10822 (__mmask8) -1, 10823 _MM_FROUND_CUR_DIRECTION); 10824 } 10825 10826 extern __inline __m512d 10827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10828 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) 10829 { 10830 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 10831 (__v8df) __V, 10832 (__v8df) __W, 10833 (__mmask8) __U, 10834 _MM_FROUND_CUR_DIRECTION); 10835 } 10836 10837 extern __inline __m512d 10838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10839 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) 10840 { 10841 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 10842 (__v8df) __V, 10843 (__v8df) 10844 _mm512_setzero_pd (), 10845 (__mmask8) __U, 10846 _MM_FROUND_CUR_DIRECTION); 10847 } 10848 10849 extern __inline __m512 10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10851 _mm512_div_ps (__m512 __A, __m512 __B) 10852 { 10853 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 10854 (__v16sf) __B, 10855 (__v16sf) 10856 _mm512_undefined_ps (), 10857 (__mmask16) -1, 10858 _MM_FROUND_CUR_DIRECTION); 10859 } 10860 10861 extern __inline __m512 10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10863 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10864 { 10865 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 10866 (__v16sf) __B, 10867 (__v16sf) __W, 10868 (__mmask16) __U, 10869 _MM_FROUND_CUR_DIRECTION); 10870 } 10871 10872 extern __inline __m512 10873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10874 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) 10875 { 10876 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 10877 (__v16sf) __B, 10878 (__v16sf) 10879 _mm512_setzero_ps (), 10880 (__mmask16) __U, 10881 _MM_FROUND_CUR_DIRECTION); 10882 } 10883 10884 extern __inline __m512d 10885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10886 _mm512_max_pd (__m512d __A, __m512d __B) 10887 { 10888 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10889 (__v8df) __B, 10890 (__v8df) 10891 _mm512_undefined_pd (), 10892 (__mmask8) -1, 10893 _MM_FROUND_CUR_DIRECTION); 10894 } 10895 10896 extern __inline __m512d 10897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10898 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10899 { 10900 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10901 (__v8df) __B, 10902 (__v8df) __W, 10903 (__mmask8) __U, 10904 _MM_FROUND_CUR_DIRECTION); 10905 } 10906 10907 extern __inline __m512d 10908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10909 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 10910 { 10911 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10912 (__v8df) __B, 10913 (__v8df) 10914 _mm512_setzero_pd (), 10915 (__mmask8) __U, 10916 _MM_FROUND_CUR_DIRECTION); 10917 } 10918 10919 extern __inline __m512 10920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10921 _mm512_max_ps (__m512 __A, __m512 __B) 10922 { 10923 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10924 (__v16sf) __B, 10925 (__v16sf) 10926 _mm512_undefined_ps (), 10927 (__mmask16) -1, 10928 _MM_FROUND_CUR_DIRECTION); 10929 } 10930 10931 extern __inline __m512 10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10933 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10934 { 10935 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10936 (__v16sf) __B, 10937 (__v16sf) __W, 10938 (__mmask16) __U, 10939 _MM_FROUND_CUR_DIRECTION); 10940 } 10941 10942 extern __inline __m512 10943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10944 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 10945 { 10946 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10947 (__v16sf) __B, 10948 (__v16sf) 10949 _mm512_setzero_ps (), 10950 (__mmask16) __U, 10951 _MM_FROUND_CUR_DIRECTION); 10952 } 10953 10954 extern __inline __m512d 10955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10956 _mm512_min_pd (__m512d __A, __m512d __B) 10957 { 10958 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 10959 (__v8df) __B, 10960 (__v8df) 10961 _mm512_undefined_pd (), 10962 (__mmask8) -1, 10963 _MM_FROUND_CUR_DIRECTION); 10964 } 10965 10966 extern __inline __m512d 10967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10968 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10969 { 10970 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 10971 (__v8df) __B, 10972 (__v8df) __W, 10973 (__mmask8) __U, 10974 _MM_FROUND_CUR_DIRECTION); 10975 } 10976 10977 extern __inline __m512d 10978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10979 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 10980 { 10981 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 10982 (__v8df) __B, 10983 (__v8df) 10984 _mm512_setzero_pd (), 10985 (__mmask8) __U, 10986 _MM_FROUND_CUR_DIRECTION); 10987 } 10988 10989 extern __inline __m512 10990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10991 _mm512_min_ps (__m512 __A, __m512 __B) 10992 { 10993 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 10994 (__v16sf) __B, 10995 (__v16sf) 10996 _mm512_undefined_ps (), 10997 (__mmask16) -1, 10998 _MM_FROUND_CUR_DIRECTION); 10999 } 11000 11001 extern __inline __m512 11002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11003 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11004 { 11005 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 11006 (__v16sf) __B, 11007 (__v16sf) __W, 11008 (__mmask16) __U, 11009 _MM_FROUND_CUR_DIRECTION); 11010 } 11011 11012 extern __inline __m512 11013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11014 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 11015 { 11016 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 11017 (__v16sf) __B, 11018 (__v16sf) 11019 _mm512_setzero_ps (), 11020 (__mmask16) __U, 11021 _MM_FROUND_CUR_DIRECTION); 11022 } 11023 11024 extern __inline __m512d 11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11026 _mm512_scalef_pd (__m512d __A, __m512d __B) 11027 { 11028 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11029 (__v8df) __B, 11030 (__v8df) 11031 _mm512_undefined_pd (), 11032 (__mmask8) -1, 11033 _MM_FROUND_CUR_DIRECTION); 11034 } 11035 11036 extern __inline __m512d 11037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11038 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11039 { 11040 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11041 (__v8df) __B, 11042 (__v8df) __W, 11043 (__mmask8) __U, 11044 _MM_FROUND_CUR_DIRECTION); 11045 } 11046 11047 extern __inline __m512d 11048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11049 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 11050 { 11051 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11052 (__v8df) __B, 11053 (__v8df) 11054 _mm512_setzero_pd (), 11055 (__mmask8) __U, 11056 _MM_FROUND_CUR_DIRECTION); 11057 } 11058 11059 extern __inline __m512 11060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11061 _mm512_scalef_ps (__m512 __A, __m512 __B) 11062 { 11063 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11064 (__v16sf) __B, 11065 (__v16sf) 11066 _mm512_undefined_ps (), 11067 (__mmask16) -1, 11068 _MM_FROUND_CUR_DIRECTION); 11069 } 11070 11071 extern __inline __m512 11072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11073 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11074 { 11075 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11076 (__v16sf) __B, 11077 (__v16sf) __W, 11078 (__mmask16) __U, 11079 _MM_FROUND_CUR_DIRECTION); 11080 } 11081 11082 extern __inline __m512 11083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11084 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 11085 { 11086 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11087 (__v16sf) __B, 11088 (__v16sf) 11089 _mm512_setzero_ps (), 11090 (__mmask16) __U, 11091 _MM_FROUND_CUR_DIRECTION); 11092 } 11093 11094 extern __inline __m128d 11095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11096 _mm_scalef_sd (__m128d __A, __m128d __B) 11097 { 11098 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, 11099 (__v2df) __B, 11100 _MM_FROUND_CUR_DIRECTION); 11101 } 11102 11103 extern __inline __m128 11104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11105 _mm_scalef_ss (__m128 __A, __m128 __B) 11106 { 11107 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, 11108 (__v4sf) __B, 11109 _MM_FROUND_CUR_DIRECTION); 11110 } 11111 11112 extern __inline __m512d 11113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11114 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) 11115 { 11116 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11117 (__v8df) __B, 11118 (__v8df) __C, 11119 (__mmask8) -1, 11120 _MM_FROUND_CUR_DIRECTION); 11121 } 11122 11123 extern __inline __m512d 11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11125 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11126 { 11127 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11128 (__v8df) __B, 11129 (__v8df) __C, 11130 (__mmask8) __U, 11131 _MM_FROUND_CUR_DIRECTION); 11132 } 11133 11134 extern __inline __m512d 11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11136 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11137 { 11138 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 11139 (__v8df) __B, 11140 (__v8df) __C, 11141 (__mmask8) __U, 11142 _MM_FROUND_CUR_DIRECTION); 11143 } 11144 11145 extern __inline __m512d 11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11147 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11148 { 11149 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 11150 (__v8df) __B, 11151 (__v8df) __C, 11152 (__mmask8) __U, 11153 _MM_FROUND_CUR_DIRECTION); 11154 } 11155 11156 extern __inline __m512 11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11158 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) 11159 { 11160 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11161 (__v16sf) __B, 11162 (__v16sf) __C, 11163 (__mmask16) -1, 11164 _MM_FROUND_CUR_DIRECTION); 11165 } 11166 11167 extern __inline __m512 11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11169 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11170 { 11171 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11172 (__v16sf) __B, 11173 (__v16sf) __C, 11174 (__mmask16) __U, 11175 _MM_FROUND_CUR_DIRECTION); 11176 } 11177 11178 extern __inline __m512 11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11180 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11181 { 11182 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 11183 (__v16sf) __B, 11184 (__v16sf) __C, 11185 (__mmask16) __U, 11186 _MM_FROUND_CUR_DIRECTION); 11187 } 11188 11189 extern __inline __m512 11190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11191 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11192 { 11193 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 11194 (__v16sf) __B, 11195 (__v16sf) __C, 11196 (__mmask16) __U, 11197 _MM_FROUND_CUR_DIRECTION); 11198 } 11199 11200 extern __inline __m512d 11201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11202 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) 11203 { 11204 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11205 (__v8df) __B, 11206 -(__v8df) __C, 11207 (__mmask8) -1, 11208 _MM_FROUND_CUR_DIRECTION); 11209 } 11210 11211 extern __inline __m512d 11212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11213 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11214 { 11215 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11216 (__v8df) __B, 11217 -(__v8df) __C, 11218 (__mmask8) __U, 11219 _MM_FROUND_CUR_DIRECTION); 11220 } 11221 11222 extern __inline __m512d 11223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11224 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11225 { 11226 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 11227 (__v8df) __B, 11228 (__v8df) __C, 11229 (__mmask8) __U, 11230 _MM_FROUND_CUR_DIRECTION); 11231 } 11232 11233 extern __inline __m512d 11234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11235 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11236 { 11237 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 11238 (__v8df) __B, 11239 -(__v8df) __C, 11240 (__mmask8) __U, 11241 _MM_FROUND_CUR_DIRECTION); 11242 } 11243 11244 extern __inline __m512 11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11246 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) 11247 { 11248 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11249 (__v16sf) __B, 11250 -(__v16sf) __C, 11251 (__mmask16) -1, 11252 _MM_FROUND_CUR_DIRECTION); 11253 } 11254 11255 extern __inline __m512 11256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11257 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11258 { 11259 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11260 (__v16sf) __B, 11261 -(__v16sf) __C, 11262 (__mmask16) __U, 11263 _MM_FROUND_CUR_DIRECTION); 11264 } 11265 11266 extern __inline __m512 11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11268 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11269 { 11270 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 11271 (__v16sf) __B, 11272 (__v16sf) __C, 11273 (__mmask16) __U, 11274 _MM_FROUND_CUR_DIRECTION); 11275 } 11276 11277 extern __inline __m512 11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11279 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11280 { 11281 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 11282 (__v16sf) __B, 11283 -(__v16sf) __C, 11284 (__mmask16) __U, 11285 _MM_FROUND_CUR_DIRECTION); 11286 } 11287 11288 extern __inline __m512d 11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11290 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) 11291 { 11292 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11293 (__v8df) __B, 11294 (__v8df) __C, 11295 (__mmask8) -1, 11296 _MM_FROUND_CUR_DIRECTION); 11297 } 11298 11299 extern __inline __m512d 11300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11301 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11302 { 11303 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11304 (__v8df) __B, 11305 (__v8df) __C, 11306 (__mmask8) __U, 11307 _MM_FROUND_CUR_DIRECTION); 11308 } 11309 11310 extern __inline __m512d 11311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11312 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11313 { 11314 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 11315 (__v8df) __B, 11316 (__v8df) __C, 11317 (__mmask8) __U, 11318 _MM_FROUND_CUR_DIRECTION); 11319 } 11320 11321 extern __inline __m512d 11322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11323 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11324 { 11325 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 11326 (__v8df) __B, 11327 (__v8df) __C, 11328 (__mmask8) __U, 11329 _MM_FROUND_CUR_DIRECTION); 11330 } 11331 11332 extern __inline __m512 11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11334 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) 11335 { 11336 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11337 (__v16sf) __B, 11338 (__v16sf) __C, 11339 (__mmask16) -1, 11340 _MM_FROUND_CUR_DIRECTION); 11341 } 11342 11343 extern __inline __m512 11344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11345 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11346 { 11347 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11348 (__v16sf) __B, 11349 (__v16sf) __C, 11350 (__mmask16) __U, 11351 _MM_FROUND_CUR_DIRECTION); 11352 } 11353 11354 extern __inline __m512 11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11356 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11357 { 11358 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 11359 (__v16sf) __B, 11360 (__v16sf) __C, 11361 (__mmask16) __U, 11362 _MM_FROUND_CUR_DIRECTION); 11363 } 11364 11365 extern __inline __m512 11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11367 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11368 { 11369 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 11370 (__v16sf) __B, 11371 (__v16sf) __C, 11372 (__mmask16) __U, 11373 _MM_FROUND_CUR_DIRECTION); 11374 } 11375 11376 extern __inline __m512d 11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11378 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) 11379 { 11380 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11381 (__v8df) __B, 11382 -(__v8df) __C, 11383 (__mmask8) -1, 11384 _MM_FROUND_CUR_DIRECTION); 11385 } 11386 11387 extern __inline __m512d 11388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11389 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11390 { 11391 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11392 (__v8df) __B, 11393 -(__v8df) __C, 11394 (__mmask8) __U, 11395 _MM_FROUND_CUR_DIRECTION); 11396 } 11397 11398 extern __inline __m512d 11399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11400 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11401 { 11402 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 11403 (__v8df) __B, 11404 (__v8df) __C, 11405 (__mmask8) __U, 11406 _MM_FROUND_CUR_DIRECTION); 11407 } 11408 11409 extern __inline __m512d 11410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11411 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11412 { 11413 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 11414 (__v8df) __B, 11415 -(__v8df) __C, 11416 (__mmask8) __U, 11417 _MM_FROUND_CUR_DIRECTION); 11418 } 11419 11420 extern __inline __m512 11421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11422 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) 11423 { 11424 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11425 (__v16sf) __B, 11426 -(__v16sf) __C, 11427 (__mmask16) -1, 11428 _MM_FROUND_CUR_DIRECTION); 11429 } 11430 11431 extern __inline __m512 11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11433 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11434 { 11435 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11436 (__v16sf) __B, 11437 -(__v16sf) __C, 11438 (__mmask16) __U, 11439 _MM_FROUND_CUR_DIRECTION); 11440 } 11441 11442 extern __inline __m512 11443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11444 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11445 { 11446 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 11447 (__v16sf) __B, 11448 (__v16sf) __C, 11449 (__mmask16) __U, 11450 _MM_FROUND_CUR_DIRECTION); 11451 } 11452 11453 extern __inline __m512 11454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11455 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11456 { 11457 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 11458 (__v16sf) __B, 11459 -(__v16sf) __C, 11460 (__mmask16) __U, 11461 _MM_FROUND_CUR_DIRECTION); 11462 } 11463 11464 extern __inline __m512d 11465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11466 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) 11467 { 11468 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 11469 (__v8df) __B, 11470 (__v8df) __C, 11471 (__mmask8) -1, 11472 _MM_FROUND_CUR_DIRECTION); 11473 } 11474 11475 extern __inline __m512d 11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11477 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11478 { 11479 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 11480 (__v8df) __B, 11481 (__v8df) __C, 11482 (__mmask8) __U, 11483 _MM_FROUND_CUR_DIRECTION); 11484 } 11485 11486 extern __inline __m512d 11487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11488 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11489 { 11490 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 11491 (__v8df) __B, 11492 (__v8df) __C, 11493 (__mmask8) __U, 11494 _MM_FROUND_CUR_DIRECTION); 11495 } 11496 11497 extern __inline __m512d 11498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11499 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11500 { 11501 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 11502 (__v8df) __B, 11503 (__v8df) __C, 11504 (__mmask8) __U, 11505 _MM_FROUND_CUR_DIRECTION); 11506 } 11507 11508 extern __inline __m512 11509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11510 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) 11511 { 11512 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 11513 (__v16sf) __B, 11514 (__v16sf) __C, 11515 (__mmask16) -1, 11516 _MM_FROUND_CUR_DIRECTION); 11517 } 11518 11519 extern __inline __m512 11520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11521 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11522 { 11523 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 11524 (__v16sf) __B, 11525 (__v16sf) __C, 11526 (__mmask16) __U, 11527 _MM_FROUND_CUR_DIRECTION); 11528 } 11529 11530 extern __inline __m512 11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11532 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11533 { 11534 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 11535 (__v16sf) __B, 11536 (__v16sf) __C, 11537 (__mmask16) __U, 11538 _MM_FROUND_CUR_DIRECTION); 11539 } 11540 11541 extern __inline __m512 11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11543 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11544 { 11545 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 11546 (__v16sf) __B, 11547 (__v16sf) __C, 11548 (__mmask16) __U, 11549 _MM_FROUND_CUR_DIRECTION); 11550 } 11551 11552 extern __inline __m512d 11553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11554 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) 11555 { 11556 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 11557 (__v8df) __B, 11558 -(__v8df) __C, 11559 (__mmask8) -1, 11560 _MM_FROUND_CUR_DIRECTION); 11561 } 11562 11563 extern __inline __m512d 11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11565 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11566 { 11567 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 11568 (__v8df) __B, 11569 (__v8df) __C, 11570 (__mmask8) __U, 11571 _MM_FROUND_CUR_DIRECTION); 11572 } 11573 11574 extern __inline __m512d 11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11576 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11577 { 11578 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 11579 (__v8df) __B, 11580 (__v8df) __C, 11581 (__mmask8) __U, 11582 _MM_FROUND_CUR_DIRECTION); 11583 } 11584 11585 extern __inline __m512d 11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11587 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11588 { 11589 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 11590 (__v8df) __B, 11591 -(__v8df) __C, 11592 (__mmask8) __U, 11593 _MM_FROUND_CUR_DIRECTION); 11594 } 11595 11596 extern __inline __m512 11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11598 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) 11599 { 11600 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 11601 (__v16sf) __B, 11602 -(__v16sf) __C, 11603 (__mmask16) -1, 11604 _MM_FROUND_CUR_DIRECTION); 11605 } 11606 11607 extern __inline __m512 11608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11609 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11610 { 11611 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 11612 (__v16sf) __B, 11613 (__v16sf) __C, 11614 (__mmask16) __U, 11615 _MM_FROUND_CUR_DIRECTION); 11616 } 11617 11618 extern __inline __m512 11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11620 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11621 { 11622 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 11623 (__v16sf) __B, 11624 (__v16sf) __C, 11625 (__mmask16) __U, 11626 _MM_FROUND_CUR_DIRECTION); 11627 } 11628 11629 extern __inline __m512 11630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11631 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11632 { 11633 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 11634 (__v16sf) __B, 11635 -(__v16sf) __C, 11636 (__mmask16) __U, 11637 _MM_FROUND_CUR_DIRECTION); 11638 } 11639 11640 extern __inline __m256i 11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11642 _mm512_cvttpd_epi32 (__m512d __A) 11643 { 11644 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11645 (__v8si) 11646 _mm256_undefined_si256 (), 11647 (__mmask8) -1, 11648 _MM_FROUND_CUR_DIRECTION); 11649 } 11650 11651 extern __inline __m256i 11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11653 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 11654 { 11655 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11656 (__v8si) __W, 11657 (__mmask8) __U, 11658 _MM_FROUND_CUR_DIRECTION); 11659 } 11660 11661 extern __inline __m256i 11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11663 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 11664 { 11665 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11666 (__v8si) 11667 _mm256_setzero_si256 (), 11668 (__mmask8) __U, 11669 _MM_FROUND_CUR_DIRECTION); 11670 } 11671 11672 extern __inline __m256i 11673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11674 _mm512_cvttpd_epu32 (__m512d __A) 11675 { 11676 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11677 (__v8si) 11678 _mm256_undefined_si256 (), 11679 (__mmask8) -1, 11680 _MM_FROUND_CUR_DIRECTION); 11681 } 11682 11683 extern __inline __m256i 11684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11685 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 11686 { 11687 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11688 (__v8si) __W, 11689 (__mmask8) __U, 11690 _MM_FROUND_CUR_DIRECTION); 11691 } 11692 11693 extern __inline __m256i 11694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11695 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 11696 { 11697 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11698 (__v8si) 11699 _mm256_setzero_si256 (), 11700 (__mmask8) __U, 11701 _MM_FROUND_CUR_DIRECTION); 11702 } 11703 11704 extern __inline __m256i 11705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11706 _mm512_cvtpd_epi32 (__m512d __A) 11707 { 11708 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11709 (__v8si) 11710 _mm256_undefined_si256 (), 11711 (__mmask8) -1, 11712 _MM_FROUND_CUR_DIRECTION); 11713 } 11714 11715 extern __inline __m256i 11716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11717 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 11718 { 11719 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11720 (__v8si) __W, 11721 (__mmask8) __U, 11722 _MM_FROUND_CUR_DIRECTION); 11723 } 11724 11725 extern __inline __m256i 11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11727 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 11728 { 11729 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11730 (__v8si) 11731 _mm256_setzero_si256 (), 11732 (__mmask8) __U, 11733 _MM_FROUND_CUR_DIRECTION); 11734 } 11735 11736 extern __inline __m256i 11737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11738 _mm512_cvtpd_epu32 (__m512d __A) 11739 { 11740 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11741 (__v8si) 11742 _mm256_undefined_si256 (), 11743 (__mmask8) -1, 11744 _MM_FROUND_CUR_DIRECTION); 11745 } 11746 11747 extern __inline __m256i 11748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11749 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 11750 { 11751 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11752 (__v8si) __W, 11753 (__mmask8) __U, 11754 _MM_FROUND_CUR_DIRECTION); 11755 } 11756 11757 extern __inline __m256i 11758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11759 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 11760 { 11761 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11762 (__v8si) 11763 _mm256_setzero_si256 (), 11764 (__mmask8) __U, 11765 _MM_FROUND_CUR_DIRECTION); 11766 } 11767 11768 extern __inline __m512i 11769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11770 _mm512_cvttps_epi32 (__m512 __A) 11771 { 11772 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11773 (__v16si) 11774 _mm512_undefined_si512 (), 11775 (__mmask16) -1, 11776 _MM_FROUND_CUR_DIRECTION); 11777 } 11778 11779 extern __inline __m512i 11780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11781 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 11782 { 11783 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11784 (__v16si) __W, 11785 (__mmask16) __U, 11786 _MM_FROUND_CUR_DIRECTION); 11787 } 11788 11789 extern __inline __m512i 11790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11791 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 11792 { 11793 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11794 (__v16si) 11795 _mm512_setzero_si512 (), 11796 (__mmask16) __U, 11797 _MM_FROUND_CUR_DIRECTION); 11798 } 11799 11800 extern __inline __m512i 11801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11802 _mm512_cvttps_epu32 (__m512 __A) 11803 { 11804 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11805 (__v16si) 11806 _mm512_undefined_si512 (), 11807 (__mmask16) -1, 11808 _MM_FROUND_CUR_DIRECTION); 11809 } 11810 11811 extern __inline __m512i 11812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11813 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 11814 { 11815 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11816 (__v16si) __W, 11817 (__mmask16) __U, 11818 _MM_FROUND_CUR_DIRECTION); 11819 } 11820 11821 extern __inline __m512i 11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11823 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 11824 { 11825 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11826 (__v16si) 11827 _mm512_setzero_si512 (), 11828 (__mmask16) __U, 11829 _MM_FROUND_CUR_DIRECTION); 11830 } 11831 11832 extern __inline __m512i 11833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11834 _mm512_cvtps_epi32 (__m512 __A) 11835 { 11836 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11837 (__v16si) 11838 _mm512_undefined_si512 (), 11839 (__mmask16) -1, 11840 _MM_FROUND_CUR_DIRECTION); 11841 } 11842 11843 extern __inline __m512i 11844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11845 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 11846 { 11847 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11848 (__v16si) __W, 11849 (__mmask16) __U, 11850 _MM_FROUND_CUR_DIRECTION); 11851 } 11852 11853 extern __inline __m512i 11854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11855 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 11856 { 11857 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11858 (__v16si) 11859 _mm512_setzero_si512 (), 11860 (__mmask16) __U, 11861 _MM_FROUND_CUR_DIRECTION); 11862 } 11863 11864 extern __inline __m512i 11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11866 _mm512_cvtps_epu32 (__m512 __A) 11867 { 11868 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11869 (__v16si) 11870 _mm512_undefined_si512 (), 11871 (__mmask16) -1, 11872 _MM_FROUND_CUR_DIRECTION); 11873 } 11874 11875 extern __inline __m512i 11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11877 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 11878 { 11879 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11880 (__v16si) __W, 11881 (__mmask16) __U, 11882 _MM_FROUND_CUR_DIRECTION); 11883 } 11884 11885 extern __inline __m512i 11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11887 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) 11888 { 11889 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11890 (__v16si) 11891 _mm512_setzero_si512 (), 11892 (__mmask16) __U, 11893 _MM_FROUND_CUR_DIRECTION); 11894 } 11895 11896 #ifdef __x86_64__ 11897 extern __inline __m128 11898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11899 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) 11900 { 11901 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 11902 _MM_FROUND_CUR_DIRECTION); 11903 } 11904 11905 extern __inline __m128d 11906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11907 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) 11908 { 11909 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 11910 _MM_FROUND_CUR_DIRECTION); 11911 } 11912 #endif 11913 11914 extern __inline __m128 11915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11916 _mm_cvtu32_ss (__m128 __A, unsigned __B) 11917 { 11918 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 11919 _MM_FROUND_CUR_DIRECTION); 11920 } 11921 11922 extern __inline __m512 11923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11924 _mm512_cvtepi32_ps (__m512i __A) 11925 { 11926 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11927 (__v16sf) 11928 _mm512_undefined_ps (), 11929 (__mmask16) -1, 11930 _MM_FROUND_CUR_DIRECTION); 11931 } 11932 11933 extern __inline __m512 11934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11935 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 11936 { 11937 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11938 (__v16sf) __W, 11939 (__mmask16) __U, 11940 _MM_FROUND_CUR_DIRECTION); 11941 } 11942 11943 extern __inline __m512 11944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11945 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 11946 { 11947 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11948 (__v16sf) 11949 _mm512_setzero_ps (), 11950 (__mmask16) __U, 11951 _MM_FROUND_CUR_DIRECTION); 11952 } 11953 11954 extern __inline __m512 11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11956 _mm512_cvtepu32_ps (__m512i __A) 11957 { 11958 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 11959 (__v16sf) 11960 _mm512_undefined_ps (), 11961 (__mmask16) -1, 11962 _MM_FROUND_CUR_DIRECTION); 11963 } 11964 11965 extern __inline __m512 11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11967 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 11968 { 11969 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 11970 (__v16sf) __W, 11971 (__mmask16) __U, 11972 _MM_FROUND_CUR_DIRECTION); 11973 } 11974 11975 extern __inline __m512 11976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11977 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 11978 { 11979 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 11980 (__v16sf) 11981 _mm512_setzero_ps (), 11982 (__mmask16) __U, 11983 _MM_FROUND_CUR_DIRECTION); 11984 } 11985 11986 #ifdef __OPTIMIZE__ 11987 extern __inline __m512d 11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11989 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) 11990 { 11991 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 11992 (__v8df) __B, 11993 (__v8di) __C, 11994 __imm, 11995 (__mmask8) -1, 11996 _MM_FROUND_CUR_DIRECTION); 11997 } 11998 11999 extern __inline __m512d 12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12001 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, 12002 __m512i __C, const int __imm) 12003 { 12004 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 12005 (__v8df) __B, 12006 (__v8di) __C, 12007 __imm, 12008 (__mmask8) __U, 12009 _MM_FROUND_CUR_DIRECTION); 12010 } 12011 12012 extern __inline __m512d 12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12014 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, 12015 __m512i __C, const int __imm) 12016 { 12017 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 12018 (__v8df) __B, 12019 (__v8di) __C, 12020 __imm, 12021 (__mmask8) __U, 12022 _MM_FROUND_CUR_DIRECTION); 12023 } 12024 12025 extern __inline __m512 12026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12027 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) 12028 { 12029 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 12030 (__v16sf) __B, 12031 (__v16si) __C, 12032 __imm, 12033 (__mmask16) -1, 12034 _MM_FROUND_CUR_DIRECTION); 12035 } 12036 12037 extern __inline __m512 12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12039 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, 12040 __m512i __C, const int __imm) 12041 { 12042 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 12043 (__v16sf) __B, 12044 (__v16si) __C, 12045 __imm, 12046 (__mmask16) __U, 12047 _MM_FROUND_CUR_DIRECTION); 12048 } 12049 12050 extern __inline __m512 12051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12052 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, 12053 __m512i __C, const int __imm) 12054 { 12055 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 12056 (__v16sf) __B, 12057 (__v16si) __C, 12058 __imm, 12059 (__mmask16) __U, 12060 _MM_FROUND_CUR_DIRECTION); 12061 } 12062 12063 extern __inline __m128d 12064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12065 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) 12066 { 12067 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 12068 (__v2df) __B, 12069 (__v2di) __C, __imm, 12070 (__mmask8) -1, 12071 _MM_FROUND_CUR_DIRECTION); 12072 } 12073 12074 extern __inline __m128d 12075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12076 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, 12077 __m128i __C, const int __imm) 12078 { 12079 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 12080 (__v2df) __B, 12081 (__v2di) __C, __imm, 12082 (__mmask8) __U, 12083 _MM_FROUND_CUR_DIRECTION); 12084 } 12085 12086 extern __inline __m128d 12087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12088 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, 12089 __m128i __C, const int __imm) 12090 { 12091 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 12092 (__v2df) __B, 12093 (__v2di) __C, 12094 __imm, 12095 (__mmask8) __U, 12096 _MM_FROUND_CUR_DIRECTION); 12097 } 12098 12099 extern __inline __m128 12100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12101 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) 12102 { 12103 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 12104 (__v4sf) __B, 12105 (__v4si) __C, __imm, 12106 (__mmask8) -1, 12107 _MM_FROUND_CUR_DIRECTION); 12108 } 12109 12110 extern __inline __m128 12111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12112 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, 12113 __m128i __C, const int __imm) 12114 { 12115 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 12116 (__v4sf) __B, 12117 (__v4si) __C, __imm, 12118 (__mmask8) __U, 12119 _MM_FROUND_CUR_DIRECTION); 12120 } 12121 12122 extern __inline __m128 12123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12124 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, 12125 __m128i __C, const int __imm) 12126 { 12127 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 12128 (__v4sf) __B, 12129 (__v4si) __C, __imm, 12130 (__mmask8) __U, 12131 _MM_FROUND_CUR_DIRECTION); 12132 } 12133 #else 12134 #define _mm512_fixupimm_pd(X, Y, Z, C) \ 12135 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 12136 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12137 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12138 12139 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ 12140 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 12141 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12142 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12143 12144 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ 12145 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 12146 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12147 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12148 12149 #define _mm512_fixupimm_ps(X, Y, Z, C) \ 12150 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 12151 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12152 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 12153 12154 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ 12155 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 12156 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12157 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12158 12159 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ 12160 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 12161 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12162 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12163 12164 #define _mm_fixupimm_sd(X, Y, Z, C) \ 12165 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 12166 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12167 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12168 12169 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ 12170 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 12171 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12172 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12173 12174 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ 12175 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 12176 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12177 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12178 12179 #define _mm_fixupimm_ss(X, Y, Z, C) \ 12180 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 12181 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12182 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12183 12184 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ 12185 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 12186 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12187 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12188 12189 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ 12190 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 12191 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12192 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12193 #endif 12194 12195 #ifdef __x86_64__ 12196 extern __inline unsigned long long 12197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12198 _mm_cvtss_u64 (__m128 __A) 12199 { 12200 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 12201 __A, 12202 _MM_FROUND_CUR_DIRECTION); 12203 } 12204 12205 extern __inline unsigned long long 12206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12207 _mm_cvttss_u64 (__m128 __A) 12208 { 12209 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 12210 __A, 12211 _MM_FROUND_CUR_DIRECTION); 12212 } 12213 12214 extern __inline long long 12215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12216 _mm_cvttss_i64 (__m128 __A) 12217 { 12218 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 12219 _MM_FROUND_CUR_DIRECTION); 12220 } 12221 #endif /* __x86_64__ */ 12222 12223 extern __inline unsigned 12224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12225 _mm_cvtss_u32 (__m128 __A) 12226 { 12227 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 12228 _MM_FROUND_CUR_DIRECTION); 12229 } 12230 12231 extern __inline unsigned 12232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12233 _mm_cvttss_u32 (__m128 __A) 12234 { 12235 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 12236 _MM_FROUND_CUR_DIRECTION); 12237 } 12238 12239 extern __inline int 12240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12241 _mm_cvttss_i32 (__m128 __A) 12242 { 12243 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 12244 _MM_FROUND_CUR_DIRECTION); 12245 } 12246 12247 #ifdef __x86_64__ 12248 extern __inline unsigned long long 12249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12250 _mm_cvtsd_u64 (__m128d __A) 12251 { 12252 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 12253 __A, 12254 _MM_FROUND_CUR_DIRECTION); 12255 } 12256 12257 extern __inline unsigned long long 12258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12259 _mm_cvttsd_u64 (__m128d __A) 12260 { 12261 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 12262 __A, 12263 _MM_FROUND_CUR_DIRECTION); 12264 } 12265 12266 extern __inline long long 12267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12268 _mm_cvttsd_i64 (__m128d __A) 12269 { 12270 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 12271 _MM_FROUND_CUR_DIRECTION); 12272 } 12273 #endif /* __x86_64__ */ 12274 12275 extern __inline unsigned 12276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12277 _mm_cvtsd_u32 (__m128d __A) 12278 { 12279 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 12280 _MM_FROUND_CUR_DIRECTION); 12281 } 12282 12283 extern __inline unsigned 12284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12285 _mm_cvttsd_u32 (__m128d __A) 12286 { 12287 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 12288 _MM_FROUND_CUR_DIRECTION); 12289 } 12290 12291 extern __inline int 12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12293 _mm_cvttsd_i32 (__m128d __A) 12294 { 12295 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 12296 _MM_FROUND_CUR_DIRECTION); 12297 } 12298 12299 extern __inline __m512d 12300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12301 _mm512_cvtps_pd (__m256 __A) 12302 { 12303 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12304 (__v8df) 12305 _mm512_undefined_pd (), 12306 (__mmask8) -1, 12307 _MM_FROUND_CUR_DIRECTION); 12308 } 12309 12310 extern __inline __m512d 12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12312 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 12313 { 12314 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12315 (__v8df) __W, 12316 (__mmask8) __U, 12317 _MM_FROUND_CUR_DIRECTION); 12318 } 12319 12320 extern __inline __m512d 12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12322 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 12323 { 12324 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12325 (__v8df) 12326 _mm512_setzero_pd (), 12327 (__mmask8) __U, 12328 _MM_FROUND_CUR_DIRECTION); 12329 } 12330 12331 extern __inline __m512 12332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12333 _mm512_cvtph_ps (__m256i __A) 12334 { 12335 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12336 (__v16sf) 12337 _mm512_undefined_ps (), 12338 (__mmask16) -1, 12339 _MM_FROUND_CUR_DIRECTION); 12340 } 12341 12342 extern __inline __m512 12343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12344 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 12345 { 12346 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12347 (__v16sf) __W, 12348 (__mmask16) __U, 12349 _MM_FROUND_CUR_DIRECTION); 12350 } 12351 12352 extern __inline __m512 12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12354 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 12355 { 12356 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12357 (__v16sf) 12358 _mm512_setzero_ps (), 12359 (__mmask16) __U, 12360 _MM_FROUND_CUR_DIRECTION); 12361 } 12362 12363 extern __inline __m256 12364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12365 _mm512_cvtpd_ps (__m512d __A) 12366 { 12367 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12368 (__v8sf) 12369 _mm256_undefined_ps (), 12370 (__mmask8) -1, 12371 _MM_FROUND_CUR_DIRECTION); 12372 } 12373 12374 extern __inline __m256 12375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12376 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 12377 { 12378 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12379 (__v8sf) __W, 12380 (__mmask8) __U, 12381 _MM_FROUND_CUR_DIRECTION); 12382 } 12383 12384 extern __inline __m256 12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12386 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 12387 { 12388 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12389 (__v8sf) 12390 _mm256_setzero_ps (), 12391 (__mmask8) __U, 12392 _MM_FROUND_CUR_DIRECTION); 12393 } 12394 12395 #ifdef __OPTIMIZE__ 12396 extern __inline __m512 12397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12398 _mm512_getexp_ps (__m512 __A) 12399 { 12400 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12401 (__v16sf) 12402 _mm512_undefined_ps (), 12403 (__mmask16) -1, 12404 _MM_FROUND_CUR_DIRECTION); 12405 } 12406 12407 extern __inline __m512 12408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12409 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 12410 { 12411 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12412 (__v16sf) __W, 12413 (__mmask16) __U, 12414 _MM_FROUND_CUR_DIRECTION); 12415 } 12416 12417 extern __inline __m512 12418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12419 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 12420 { 12421 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12422 (__v16sf) 12423 _mm512_setzero_ps (), 12424 (__mmask16) __U, 12425 _MM_FROUND_CUR_DIRECTION); 12426 } 12427 12428 extern __inline __m512d 12429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12430 _mm512_getexp_pd (__m512d __A) 12431 { 12432 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12433 (__v8df) 12434 _mm512_undefined_pd (), 12435 (__mmask8) -1, 12436 _MM_FROUND_CUR_DIRECTION); 12437 } 12438 12439 extern __inline __m512d 12440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12441 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 12442 { 12443 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12444 (__v8df) __W, 12445 (__mmask8) __U, 12446 _MM_FROUND_CUR_DIRECTION); 12447 } 12448 12449 extern __inline __m512d 12450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12451 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 12452 { 12453 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12454 (__v8df) 12455 _mm512_setzero_pd (), 12456 (__mmask8) __U, 12457 _MM_FROUND_CUR_DIRECTION); 12458 } 12459 12460 extern __inline __m128 12461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12462 _mm_getexp_ss (__m128 __A, __m128 __B) 12463 { 12464 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 12465 (__v4sf) __B, 12466 _MM_FROUND_CUR_DIRECTION); 12467 } 12468 12469 extern __inline __m128d 12470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12471 _mm_getexp_sd (__m128d __A, __m128d __B) 12472 { 12473 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 12474 (__v2df) __B, 12475 _MM_FROUND_CUR_DIRECTION); 12476 } 12477 12478 extern __inline __m512d 12479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12480 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 12481 _MM_MANTISSA_SIGN_ENUM __C) 12482 { 12483 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12484 (__C << 2) | __B, 12485 _mm512_undefined_pd (), 12486 (__mmask8) -1, 12487 _MM_FROUND_CUR_DIRECTION); 12488 } 12489 12490 extern __inline __m512d 12491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12492 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, 12493 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12494 { 12495 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12496 (__C << 2) | __B, 12497 (__v8df) __W, __U, 12498 _MM_FROUND_CUR_DIRECTION); 12499 } 12500 12501 extern __inline __m512d 12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12503 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, 12504 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12505 { 12506 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12507 (__C << 2) | __B, 12508 (__v8df) 12509 _mm512_setzero_pd (), 12510 __U, 12511 _MM_FROUND_CUR_DIRECTION); 12512 } 12513 12514 extern __inline __m512 12515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12516 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 12517 _MM_MANTISSA_SIGN_ENUM __C) 12518 { 12519 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12520 (__C << 2) | __B, 12521 _mm512_undefined_ps (), 12522 (__mmask16) -1, 12523 _MM_FROUND_CUR_DIRECTION); 12524 } 12525 12526 extern __inline __m512 12527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12528 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, 12529 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12530 { 12531 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12532 (__C << 2) | __B, 12533 (__v16sf) __W, __U, 12534 _MM_FROUND_CUR_DIRECTION); 12535 } 12536 12537 extern __inline __m512 12538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12539 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, 12540 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12541 { 12542 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12543 (__C << 2) | __B, 12544 (__v16sf) 12545 _mm512_setzero_ps (), 12546 __U, 12547 _MM_FROUND_CUR_DIRECTION); 12548 } 12549 12550 extern __inline __m128d 12551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12552 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 12553 _MM_MANTISSA_SIGN_ENUM __D) 12554 { 12555 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 12556 (__v2df) __B, 12557 (__D << 2) | __C, 12558 _MM_FROUND_CUR_DIRECTION); 12559 } 12560 12561 extern __inline __m128 12562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12563 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 12564 _MM_MANTISSA_SIGN_ENUM __D) 12565 { 12566 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 12567 (__v4sf) __B, 12568 (__D << 2) | __C, 12569 _MM_FROUND_CUR_DIRECTION); 12570 } 12571 12572 #else 12573 #define _mm512_getmant_pd(X, B, C) \ 12574 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12575 (int)(((C)<<2) | (B)), \ 12576 (__v8df)_mm512_undefined_pd(), \ 12577 (__mmask8)-1,\ 12578 _MM_FROUND_CUR_DIRECTION)) 12579 12580 #define _mm512_mask_getmant_pd(W, U, X, B, C) \ 12581 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12582 (int)(((C)<<2) | (B)), \ 12583 (__v8df)(__m512d)(W), \ 12584 (__mmask8)(U),\ 12585 _MM_FROUND_CUR_DIRECTION)) 12586 12587 #define _mm512_maskz_getmant_pd(U, X, B, C) \ 12588 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12589 (int)(((C)<<2) | (B)), \ 12590 (__v8df)_mm512_setzero_pd(), \ 12591 (__mmask8)(U),\ 12592 _MM_FROUND_CUR_DIRECTION)) 12593 #define _mm512_getmant_ps(X, B, C) \ 12594 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12595 (int)(((C)<<2) | (B)), \ 12596 (__v16sf)_mm512_undefined_ps(), \ 12597 (__mmask16)-1,\ 12598 _MM_FROUND_CUR_DIRECTION)) 12599 12600 #define _mm512_mask_getmant_ps(W, U, X, B, C) \ 12601 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12602 (int)(((C)<<2) | (B)), \ 12603 (__v16sf)(__m512)(W), \ 12604 (__mmask16)(U),\ 12605 _MM_FROUND_CUR_DIRECTION)) 12606 12607 #define _mm512_maskz_getmant_ps(U, X, B, C) \ 12608 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12609 (int)(((C)<<2) | (B)), \ 12610 (__v16sf)_mm512_setzero_ps(), \ 12611 (__mmask16)(U),\ 12612 _MM_FROUND_CUR_DIRECTION)) 12613 #define _mm_getmant_sd(X, Y, C, D) \ 12614 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 12615 (__v2df)(__m128d)(Y), \ 12616 (int)(((D)<<2) | (C)), \ 12617 _MM_FROUND_CUR_DIRECTION)) 12618 12619 #define _mm_getmant_ss(X, Y, C, D) \ 12620 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 12621 (__v4sf)(__m128)(Y), \ 12622 (int)(((D)<<2) | (C)), \ 12623 _MM_FROUND_CUR_DIRECTION)) 12624 12625 #define _mm_getexp_ss(A, B) \ 12626 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 12627 _MM_FROUND_CUR_DIRECTION)) 12628 12629 #define _mm_getexp_sd(A, B) \ 12630 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\ 12631 _MM_FROUND_CUR_DIRECTION)) 12632 12633 #define _mm512_getexp_ps(A) \ 12634 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12635 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 12636 12637 #define _mm512_mask_getexp_ps(W, U, A) \ 12638 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12639 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12640 12641 #define _mm512_maskz_getexp_ps(U, A) \ 12642 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12643 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12644 12645 #define _mm512_getexp_pd(A) \ 12646 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12647 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 12648 12649 #define _mm512_mask_getexp_pd(W, U, A) \ 12650 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12651 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12652 12653 #define _mm512_maskz_getexp_pd(U, A) \ 12654 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12655 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12656 #endif 12657 12658 #ifdef __OPTIMIZE__ 12659 extern __inline __m512 12660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12661 _mm512_roundscale_ps (__m512 __A, const int __imm) 12662 { 12663 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 12664 (__v16sf) 12665 _mm512_undefined_ps (), 12666 -1, 12667 _MM_FROUND_CUR_DIRECTION); 12668 } 12669 12670 extern __inline __m512 12671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12672 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, 12673 const int __imm) 12674 { 12675 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 12676 (__v16sf) __A, 12677 (__mmask16) __B, 12678 _MM_FROUND_CUR_DIRECTION); 12679 } 12680 12681 extern __inline __m512 12682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12683 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) 12684 { 12685 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 12686 __imm, 12687 (__v16sf) 12688 _mm512_setzero_ps (), 12689 (__mmask16) __A, 12690 _MM_FROUND_CUR_DIRECTION); 12691 } 12692 12693 extern __inline __m512d 12694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12695 _mm512_roundscale_pd (__m512d __A, const int __imm) 12696 { 12697 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 12698 (__v8df) 12699 _mm512_undefined_pd (), 12700 -1, 12701 _MM_FROUND_CUR_DIRECTION); 12702 } 12703 12704 extern __inline __m512d 12705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12706 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, 12707 const int __imm) 12708 { 12709 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 12710 (__v8df) __A, 12711 (__mmask8) __B, 12712 _MM_FROUND_CUR_DIRECTION); 12713 } 12714 12715 extern __inline __m512d 12716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12717 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) 12718 { 12719 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 12720 __imm, 12721 (__v8df) 12722 _mm512_setzero_pd (), 12723 (__mmask8) __A, 12724 _MM_FROUND_CUR_DIRECTION); 12725 } 12726 12727 extern __inline __m128 12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12729 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) 12730 { 12731 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 12732 (__v4sf) __B, __imm, 12733 _MM_FROUND_CUR_DIRECTION); 12734 } 12735 12736 extern __inline __m128d 12737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12738 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) 12739 { 12740 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 12741 (__v2df) __B, __imm, 12742 _MM_FROUND_CUR_DIRECTION); 12743 } 12744 12745 #else 12746 #define _mm512_roundscale_ps(A, B) \ 12747 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 12748 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 12749 #define _mm512_mask_roundscale_ps(A, B, C, D) \ 12750 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 12751 (int)(D), \ 12752 (__v16sf)(__m512)(A), \ 12753 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) 12754 #define _mm512_maskz_roundscale_ps(A, B, C) \ 12755 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 12756 (int)(C), \ 12757 (__v16sf)_mm512_setzero_ps(),\ 12758 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) 12759 #define _mm512_roundscale_pd(A, B) \ 12760 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 12761 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12762 #define _mm512_mask_roundscale_pd(A, B, C, D) \ 12763 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 12764 (int)(D), \ 12765 (__v8df)(__m512d)(A), \ 12766 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) 12767 #define _mm512_maskz_roundscale_pd(A, B, C) \ 12768 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 12769 (int)(C), \ 12770 (__v8df)_mm512_setzero_pd(),\ 12771 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) 12772 #define _mm_roundscale_ss(A, B, C) \ 12773 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 12774 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 12775 #define _mm_roundscale_sd(A, B, C) \ 12776 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 12777 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 12778 #endif 12779 12780 #ifdef __OPTIMIZE__ 12781 extern __inline __mmask8 12782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12783 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) 12784 { 12785 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 12786 (__v8df) __Y, __P, 12787 (__mmask8) -1, 12788 _MM_FROUND_CUR_DIRECTION); 12789 } 12790 12791 extern __inline __mmask16 12792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12793 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) 12794 { 12795 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 12796 (__v16sf) __Y, __P, 12797 (__mmask16) -1, 12798 _MM_FROUND_CUR_DIRECTION); 12799 } 12800 12801 extern __inline __mmask16 12802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12803 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) 12804 { 12805 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 12806 (__v16sf) __Y, __P, 12807 (__mmask16) __U, 12808 _MM_FROUND_CUR_DIRECTION); 12809 } 12810 12811 extern __inline __mmask8 12812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12813 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) 12814 { 12815 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 12816 (__v8df) __Y, __P, 12817 (__mmask8) __U, 12818 _MM_FROUND_CUR_DIRECTION); 12819 } 12820 12821 extern __inline __mmask8 12822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12823 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) 12824 { 12825 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 12826 (__v2df) __Y, __P, 12827 (__mmask8) -1, 12828 _MM_FROUND_CUR_DIRECTION); 12829 } 12830 12831 extern __inline __mmask8 12832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12833 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) 12834 { 12835 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 12836 (__v2df) __Y, __P, 12837 (__mmask8) __M, 12838 _MM_FROUND_CUR_DIRECTION); 12839 } 12840 12841 extern __inline __mmask8 12842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12843 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) 12844 { 12845 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 12846 (__v4sf) __Y, __P, 12847 (__mmask8) -1, 12848 _MM_FROUND_CUR_DIRECTION); 12849 } 12850 12851 extern __inline __mmask8 12852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12853 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) 12854 { 12855 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 12856 (__v4sf) __Y, __P, 12857 (__mmask8) __M, 12858 _MM_FROUND_CUR_DIRECTION); 12859 } 12860 12861 #else 12862 #define _mm512_cmp_pd_mask(X, Y, P) \ 12863 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 12864 (__v8df)(__m512d)(Y), (int)(P),\ 12865 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12866 12867 #define _mm512_cmp_ps_mask(X, Y, P) \ 12868 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 12869 (__v16sf)(__m512)(Y), (int)(P),\ 12870 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) 12871 12872 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ 12873 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 12874 (__v8df)(__m512d)(Y), (int)(P),\ 12875 (__mmask8)M, _MM_FROUND_CUR_DIRECTION)) 12876 12877 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ 12878 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 12879 (__v16sf)(__m512)(Y), (int)(P),\ 12880 (__mmask16)M,_MM_FROUND_CUR_DIRECTION)) 12881 12882 #define _mm_cmp_sd_mask(X, Y, P) \ 12883 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 12884 (__v2df)(__m128d)(Y), (int)(P),\ 12885 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12886 12887 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 12888 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 12889 (__v2df)(__m128d)(Y), (int)(P),\ 12890 M,_MM_FROUND_CUR_DIRECTION)) 12891 12892 #define _mm_cmp_ss_mask(X, Y, P) \ 12893 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 12894 (__v4sf)(__m128)(Y), (int)(P), \ 12895 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12896 12897 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 12898 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 12899 (__v4sf)(__m128)(Y), (int)(P), \ 12900 M,_MM_FROUND_CUR_DIRECTION)) 12901 #endif 12902 12903 extern __inline __mmask16 12904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12905 _mm512_kmov (__mmask16 __A) 12906 { 12907 return __builtin_ia32_kmov16 (__A); 12908 } 12909 12910 #ifdef __DISABLE_AVX512F__ 12911 #undef __DISABLE_AVX512F__ 12912 #pragma GCC pop_options 12913 #endif /* __DISABLE_AVX512F__ */ 12914 12915 #endif /* _AVX512FINTRIN_H_INCLUDED */ 12916