Home | History | Annotate | Download | only in Headers
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef double __v8df __attribute__((__vector_size__(64)));
     31 typedef float __v16sf __attribute__((__vector_size__(64)));
     32 typedef long long __v8di __attribute__((__vector_size__(64)));
     33 typedef int __v16si __attribute__((__vector_size__(64)));
     34 
     35 typedef float __m512 __attribute__((__vector_size__(64)));
     36 typedef double __m512d __attribute__((__vector_size__(64)));
     37 typedef long long __m512i __attribute__((__vector_size__(64)));
     38 
     39 typedef unsigned char __mmask8;
     40 typedef unsigned short __mmask16;
     41 
     42 /* Rounding mode macros.  */
     43 #define _MM_FROUND_TO_NEAREST_INT   0x00
     44 #define _MM_FROUND_TO_NEG_INF       0x01
     45 #define _MM_FROUND_TO_POS_INF       0x02
     46 #define _MM_FROUND_TO_ZERO          0x03
     47 #define _MM_FROUND_CUR_DIRECTION    0x04
     48 
     49 /* Define the default attributes for the functions in this file. */
     50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
     51 
     52 /* Create vectors with repeated elements */
     53 
     54 static  __inline __m512i __DEFAULT_FN_ATTRS
     55 _mm512_setzero_si512(void)
     56 {
     57   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
     58 }
     59 
     60 static __inline__ __m512d __DEFAULT_FN_ATTRS
     61 _mm512_undefined_pd()
     62 {
     63   return (__m512d)__builtin_ia32_undef512();
     64 }
     65 
     66 static __inline__ __m512 __DEFAULT_FN_ATTRS
     67 _mm512_undefined()
     68 {
     69   return (__m512)__builtin_ia32_undef512();
     70 }
     71 
     72 static __inline__ __m512 __DEFAULT_FN_ATTRS
     73 _mm512_undefined_ps()
     74 {
     75   return (__m512)__builtin_ia32_undef512();
     76 }
     77 
     78 static __inline__ __m512i __DEFAULT_FN_ATTRS
     79 _mm512_undefined_epi32()
     80 {
     81   return (__m512i)__builtin_ia32_undef512();
     82 }
     83 
     84 static __inline __m512i __DEFAULT_FN_ATTRS
     85 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
     86 {
     87   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
     88                  (__v16si)
     89                  _mm512_setzero_si512 (),
     90                  __M);
     91 }
     92 
     93 static __inline __m512i __DEFAULT_FN_ATTRS
     94 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
     95 {
     96 #ifdef __x86_64__
     97   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
     98                  (__v8di)
     99                  _mm512_setzero_si512 (),
    100                  __M);
    101 #else
    102   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
    103                  (__v8di)
    104                  _mm512_setzero_si512 (),
    105                  __M);
    106 #endif
    107 }
    108 
    109 static __inline __m512 __DEFAULT_FN_ATTRS
    110 _mm512_setzero_ps(void)
    111 {
    112   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    113                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    114 }
    115 static  __inline __m512d __DEFAULT_FN_ATTRS
    116 _mm512_setzero_pd(void)
    117 {
    118   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    119 }
    120 
    121 static __inline __m512 __DEFAULT_FN_ATTRS
    122 _mm512_set1_ps(float __w)
    123 {
    124   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    125                    __w, __w, __w, __w, __w, __w, __w, __w  };
    126 }
    127 
    128 static __inline __m512d __DEFAULT_FN_ATTRS
    129 _mm512_set1_pd(double __w)
    130 {
    131   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    132 }
    133 
    134 static __inline __m512i __DEFAULT_FN_ATTRS
    135 _mm512_set1_epi32(int __s)
    136 {
    137   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    138                              __s, __s, __s, __s, __s, __s, __s, __s };
    139 }
    140 
    141 static __inline __m512i __DEFAULT_FN_ATTRS
    142 _mm512_set1_epi64(long long __d)
    143 {
    144   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    145 }
    146 
    147 static __inline__ __m512 __DEFAULT_FN_ATTRS
    148 _mm512_broadcastss_ps(__m128 __X)
    149 {
    150   float __f = __X[0];
    151   return (__v16sf){ __f, __f, __f, __f,
    152                     __f, __f, __f, __f,
    153                     __f, __f, __f, __f,
    154                     __f, __f, __f, __f };
    155 }
    156 
    157 static __inline__ __m512d __DEFAULT_FN_ATTRS
    158 _mm512_broadcastsd_pd(__m128d __X)
    159 {
    160   double __d = __X[0];
    161   return (__v8df){ __d, __d, __d, __d,
    162                    __d, __d, __d, __d };
    163 }
    164 
    165 /* Cast between vector types */
    166 
    167 static __inline __m512d __DEFAULT_FN_ATTRS
    168 _mm512_castpd256_pd512(__m256d __a)
    169 {
    170   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    171 }
    172 
    173 static __inline __m512 __DEFAULT_FN_ATTRS
    174 _mm512_castps256_ps512(__m256 __a)
    175 {
    176   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    177                                           -1, -1, -1, -1, -1, -1, -1, -1);
    178 }
    179 
    180 static __inline __m128d __DEFAULT_FN_ATTRS
    181 _mm512_castpd512_pd128(__m512d __a)
    182 {
    183   return __builtin_shufflevector(__a, __a, 0, 1);
    184 }
    185 
    186 static __inline __m128 __DEFAULT_FN_ATTRS
    187 _mm512_castps512_ps128(__m512 __a)
    188 {
    189   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    190 }
    191 
    192 /* Bitwise operators */
    193 static __inline__ __m512i __DEFAULT_FN_ATTRS
    194 _mm512_and_epi32(__m512i __a, __m512i __b)
    195 {
    196   return __a & __b;
    197 }
    198 
    199 static __inline__ __m512i __DEFAULT_FN_ATTRS
    200 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    201 {
    202   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
    203               (__v16si) __b,
    204               (__v16si) __src,
    205               (__mmask16) __k);
    206 }
    207 static __inline__ __m512i __DEFAULT_FN_ATTRS
    208 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    209 {
    210   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
    211               (__v16si) __b,
    212               (__v16si)
    213               _mm512_setzero_si512 (),
    214               (__mmask16) __k);
    215 }
    216 
    217 static __inline__ __m512i __DEFAULT_FN_ATTRS
    218 _mm512_and_epi64(__m512i __a, __m512i __b)
    219 {
    220   return __a & __b;
    221 }
    222 
    223 static __inline__ __m512i __DEFAULT_FN_ATTRS
    224 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    225 {
    226   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
    227               (__v8di) __b,
    228               (__v8di) __src,
    229               (__mmask8) __k);
    230 }
    231 static __inline__ __m512i __DEFAULT_FN_ATTRS
    232 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    233 {
    234   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
    235               (__v8di) __b,
    236               (__v8di)
    237               _mm512_setzero_si512 (),
    238               (__mmask8) __k);
    239 }
    240 
    241 static __inline__ __m512i __DEFAULT_FN_ATTRS
    242 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    243 {
    244   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
    245               (__v16si) __B,
    246               (__v16si)
    247               _mm512_setzero_si512 (),
    248               (__mmask16) -1);
    249 }
    250 
    251 static __inline__ __m512i __DEFAULT_FN_ATTRS
    252 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    253 {
    254   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
    255               (__v16si) __B,
    256               (__v16si) __W,
    257               (__mmask16) __U);
    258 }
    259 
    260 static __inline__ __m512i __DEFAULT_FN_ATTRS
    261 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    262 {
    263   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
    264               (__v16si) __B,
    265               (__v16si)
    266               _mm512_setzero_si512 (),
    267               (__mmask16) __U);
    268 }
    269 
    270 static __inline__ __m512i __DEFAULT_FN_ATTRS
    271 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
    272 {
    273   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
    274               (__v8di) __B,
    275               (__v8di)
    276               _mm512_setzero_si512 (),
    277               (__mmask8) -1);
    278 }
    279 
    280 static __inline__ __m512i __DEFAULT_FN_ATTRS
    281 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    282 {
    283   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
    284               (__v8di) __B,
    285               (__v8di) __W, __U);
    286 }
    287 
    288 static __inline__ __m512i __DEFAULT_FN_ATTRS
    289 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    290 {
    291   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
    292               (__v8di) __B,
    293               (__v8di)
    294               _mm512_setzero_pd (),
    295               __U);
    296 }
    297 static __inline__ __m512i __DEFAULT_FN_ATTRS
    298 _mm512_or_epi32(__m512i __a, __m512i __b)
    299 {
    300   return __a | __b;
    301 }
    302 
    303 static __inline__ __m512i __DEFAULT_FN_ATTRS
    304 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    305 {
    306   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
    307               (__v16si) __b,
    308               (__v16si) __src,
    309               (__mmask16) __k);
    310 }
    311 static __inline__ __m512i __DEFAULT_FN_ATTRS
    312 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    313 {
    314   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
    315               (__v16si) __b,
    316               (__v16si)
    317               _mm512_setzero_si512 (),
    318               (__mmask16) __k);
    319 }
    320 
    321 static __inline__ __m512i __DEFAULT_FN_ATTRS
    322 _mm512_or_epi64(__m512i __a, __m512i __b)
    323 {
    324   return __a | __b;
    325 }
    326 
    327 static __inline__ __m512i __DEFAULT_FN_ATTRS
    328 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    329 {
    330   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
    331               (__v8di) __b,
    332               (__v8di) __src,
    333               (__mmask8) __k);
    334 }
    335 static __inline__ __m512i __DEFAULT_FN_ATTRS
    336 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    337 {
    338   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
    339               (__v8di) __b,
    340               (__v8di)
    341               _mm512_setzero_si512 (),
    342               (__mmask8) __k);
    343 }
    344 
    345 static __inline__ __m512i __DEFAULT_FN_ATTRS
    346 _mm512_xor_epi32(__m512i __a, __m512i __b)
    347 {
    348   return __a ^ __b;
    349 }
    350 
    351 static __inline__ __m512i __DEFAULT_FN_ATTRS
    352 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    353 {
    354   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
    355               (__v16si) __b,
    356               (__v16si) __src,
    357               (__mmask16) __k);
    358 }
    359 static __inline__ __m512i __DEFAULT_FN_ATTRS
    360 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    361 {
    362   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
    363               (__v16si) __b,
    364               (__v16si)
    365               _mm512_setzero_si512 (),
    366               (__mmask16) __k);
    367 }
    368 
    369 static __inline__ __m512i __DEFAULT_FN_ATTRS
    370 _mm512_xor_epi64(__m512i __a, __m512i __b)
    371 {
    372   return __a ^ __b;
    373 }
    374 
    375 static __inline__ __m512i __DEFAULT_FN_ATTRS
    376 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    377 {
    378   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
    379               (__v8di) __b,
    380               (__v8di) __src,
    381               (__mmask8) __k);
    382 }
    383 static __inline__ __m512i __DEFAULT_FN_ATTRS
    384 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    385 {
    386   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
    387               (__v8di) __b,
    388               (__v8di)
    389               _mm512_setzero_si512 (),
    390               (__mmask8) __k);
    391 }
    392 
    393 static __inline__ __m512i __DEFAULT_FN_ATTRS
    394 _mm512_and_si512(__m512i __a, __m512i __b)
    395 {
    396   return __a & __b;
    397 }
    398 
    399 static __inline__ __m512i __DEFAULT_FN_ATTRS
    400 _mm512_or_si512(__m512i __a, __m512i __b)
    401 {
    402   return __a | __b;
    403 }
    404 
    405 static __inline__ __m512i __DEFAULT_FN_ATTRS
    406 _mm512_xor_si512(__m512i __a, __m512i __b)
    407 {
    408   return __a ^ __b;
    409 }
    410 /* Arithmetic */
    411 
    412 static __inline __m512d __DEFAULT_FN_ATTRS
    413 _mm512_add_pd(__m512d __a, __m512d __b)
    414 {
    415   return __a + __b;
    416 }
    417 
    418 static __inline __m512 __DEFAULT_FN_ATTRS
    419 _mm512_add_ps(__m512 __a, __m512 __b)
    420 {
    421   return __a + __b;
    422 }
    423 
    424 static __inline __m512d __DEFAULT_FN_ATTRS
    425 _mm512_mul_pd(__m512d __a, __m512d __b)
    426 {
    427   return __a * __b;
    428 }
    429 
    430 static __inline __m512 __DEFAULT_FN_ATTRS
    431 _mm512_mul_ps(__m512 __a, __m512 __b)
    432 {
    433   return __a * __b;
    434 }
    435 
    436 static __inline __m512d __DEFAULT_FN_ATTRS
    437 _mm512_sub_pd(__m512d __a, __m512d __b)
    438 {
    439   return __a - __b;
    440 }
    441 
    442 static __inline __m512 __DEFAULT_FN_ATTRS
    443 _mm512_sub_ps(__m512 __a, __m512 __b)
    444 {
    445   return __a - __b;
    446 }
    447 
    448 static __inline__ __m512i __DEFAULT_FN_ATTRS
    449 _mm512_add_epi64 (__m512i __A, __m512i __B)
    450 {
    451   return (__m512i) ((__v8di) __A + (__v8di) __B);
    452 }
    453 
    454 static __inline__ __m512i __DEFAULT_FN_ATTRS
    455 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    456 {
    457   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    458              (__v8di) __B,
    459              (__v8di) __W,
    460              (__mmask8) __U);
    461 }
    462 
    463 static __inline__ __m512i __DEFAULT_FN_ATTRS
    464 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    465 {
    466   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    467              (__v8di) __B,
    468              (__v8di)
    469              _mm512_setzero_si512 (),
    470              (__mmask8) __U);
    471 }
    472 
    473 static __inline__ __m512i __DEFAULT_FN_ATTRS
    474 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    475 {
    476   return (__m512i) ((__v8di) __A - (__v8di) __B);
    477 }
    478 
    479 static __inline__ __m512i __DEFAULT_FN_ATTRS
    480 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    481 {
    482   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    483              (__v8di) __B,
    484              (__v8di) __W,
    485              (__mmask8) __U);
    486 }
    487 
    488 static __inline__ __m512i __DEFAULT_FN_ATTRS
    489 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    490 {
    491   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    492              (__v8di) __B,
    493              (__v8di)
    494              _mm512_setzero_si512 (),
    495              (__mmask8) __U);
    496 }
    497 
    498 static __inline__ __m512i __DEFAULT_FN_ATTRS
    499 _mm512_add_epi32 (__m512i __A, __m512i __B)
    500 {
    501   return (__m512i) ((__v16si) __A + (__v16si) __B);
    502 }
    503 
    504 static __inline__ __m512i __DEFAULT_FN_ATTRS
    505 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    506 {
    507   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    508              (__v16si) __B,
    509              (__v16si) __W,
    510              (__mmask16) __U);
    511 }
    512 
    513 static __inline__ __m512i __DEFAULT_FN_ATTRS
    514 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    515 {
    516   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    517              (__v16si) __B,
    518              (__v16si)
    519              _mm512_setzero_si512 (),
    520              (__mmask16) __U);
    521 }
    522 
    523 static __inline__ __m512i __DEFAULT_FN_ATTRS
    524 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    525 {
    526   return (__m512i) ((__v16si) __A - (__v16si) __B);
    527 }
    528 
    529 static __inline__ __m512i __DEFAULT_FN_ATTRS
    530 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    531 {
    532   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    533              (__v16si) __B,
    534              (__v16si) __W,
    535              (__mmask16) __U);
    536 }
    537 
    538 static __inline__ __m512i __DEFAULT_FN_ATTRS
    539 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    540 {
    541   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    542              (__v16si) __B,
    543              (__v16si)
    544              _mm512_setzero_si512 (),
    545              (__mmask16) __U);
    546 }
    547 
    548 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    549 _mm512_max_pd(__m512d __A, __m512d __B)
    550 {
    551   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    552              (__v8df) __B,
    553              (__v8df)
    554              _mm512_setzero_pd (),
    555              (__mmask8) -1,
    556              _MM_FROUND_CUR_DIRECTION);
    557 }
    558 
    559 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    560 _mm512_max_ps(__m512 __A, __m512 __B)
    561 {
    562   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    563             (__v16sf) __B,
    564             (__v16sf)
    565             _mm512_setzero_ps (),
    566             (__mmask16) -1,
    567             _MM_FROUND_CUR_DIRECTION);
    568 }
    569 
    570 static __inline__ __m128 __DEFAULT_FN_ATTRS
    571 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
    572   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
    573                 (__v4sf) __B,
    574                 (__v4sf) __W,
    575                 (__mmask8) __U,
    576                 _MM_FROUND_CUR_DIRECTION);
    577 }
    578 
    579 static __inline__ __m128 __DEFAULT_FN_ATTRS
    580 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
    581   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
    582                 (__v4sf) __B,
    583                 (__v4sf)  _mm_setzero_ps (),
    584                 (__mmask8) __U,
    585                 _MM_FROUND_CUR_DIRECTION);
    586 }
    587 
    588 #define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
    589   (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
    590                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
    591 
    592 #define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
    593   (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
    594                 (__v4sf)  __W, (__mmask8) __U,__R); })
    595 
    596 #define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
    597   (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
    598                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
    599 
    600 static __inline__ __m128d __DEFAULT_FN_ATTRS
    601 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
    602   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
    603                 (__v2df) __B,
    604                 (__v2df) __W,
    605                 (__mmask8) __U,
    606                 _MM_FROUND_CUR_DIRECTION);
    607 }
    608 
    609 static __inline__ __m128d __DEFAULT_FN_ATTRS
    610 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
    611   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
    612                 (__v2df) __B,
    613                 (__v2df)  _mm_setzero_pd (),
    614                 (__mmask8) __U,
    615                 _MM_FROUND_CUR_DIRECTION);
    616 }
    617 
    618 #define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
    619   (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
    620                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
    621 
    622 #define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
    623   (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
    624                 (__v2df)  __W, (__mmask8) __U,__R); })
    625 
    626 #define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
    627   (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
    628                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
    629 
    630 static __inline __m512i
    631 __DEFAULT_FN_ATTRS
    632 _mm512_max_epi32(__m512i __A, __m512i __B)
    633 {
    634   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
    635               (__v16si) __B,
    636               (__v16si)
    637               _mm512_setzero_si512 (),
    638               (__mmask16) -1);
    639 }
    640 
    641 static __inline __m512i __DEFAULT_FN_ATTRS
    642 _mm512_max_epu32(__m512i __A, __m512i __B)
    643 {
    644   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
    645               (__v16si) __B,
    646               (__v16si)
    647               _mm512_setzero_si512 (),
    648               (__mmask16) -1);
    649 }
    650 
    651 static __inline __m512i __DEFAULT_FN_ATTRS
    652 _mm512_max_epi64(__m512i __A, __m512i __B)
    653 {
    654   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
    655               (__v8di) __B,
    656               (__v8di)
    657               _mm512_setzero_si512 (),
    658               (__mmask8) -1);
    659 }
    660 
    661 static __inline __m512i __DEFAULT_FN_ATTRS
    662 _mm512_max_epu64(__m512i __A, __m512i __B)
    663 {
    664   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
    665               (__v8di) __B,
    666               (__v8di)
    667               _mm512_setzero_si512 (),
    668               (__mmask8) -1);
    669 }
    670 
    671 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    672 _mm512_min_pd(__m512d __A, __m512d __B)
    673 {
    674   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
    675              (__v8df) __B,
    676              (__v8df)
    677              _mm512_setzero_pd (),
    678              (__mmask8) -1,
    679              _MM_FROUND_CUR_DIRECTION);
    680 }
    681 
    682 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    683 _mm512_min_ps(__m512 __A, __m512 __B)
    684 {
    685   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
    686             (__v16sf) __B,
    687             (__v16sf)
    688             _mm512_setzero_ps (),
    689             (__mmask16) -1,
    690             _MM_FROUND_CUR_DIRECTION);
    691 }
    692 
    693 static __inline__ __m128 __DEFAULT_FN_ATTRS
    694 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
    695   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
    696                 (__v4sf) __B,
    697                 (__v4sf) __W,
    698                 (__mmask8) __U,
    699                 _MM_FROUND_CUR_DIRECTION);
    700 }
    701 
    702 static __inline__ __m128 __DEFAULT_FN_ATTRS
    703 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
    704   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
    705                 (__v4sf) __B,
    706                 (__v4sf)  _mm_setzero_ps (),
    707                 (__mmask8) __U,
    708                 _MM_FROUND_CUR_DIRECTION);
    709 }
    710 
    711 #define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
    712   (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
    713                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
    714 
    715 #define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
    716   (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
    717                 (__v4sf)  __W, (__mmask8) __U,__R); })
    718 
    719 #define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
    720   (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
    721                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
    722 
    723 static __inline__ __m128d __DEFAULT_FN_ATTRS
    724 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
    725   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
    726                 (__v2df) __B,
    727                 (__v2df) __W,
    728                 (__mmask8) __U,
    729                 _MM_FROUND_CUR_DIRECTION);
    730 }
    731 
    732 static __inline__ __m128d __DEFAULT_FN_ATTRS
    733 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
    734   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
    735                 (__v2df) __B,
    736                 (__v2df)  _mm_setzero_pd (),
    737                 (__mmask8) __U,
    738                 _MM_FROUND_CUR_DIRECTION);
    739 }
    740 
    741 #define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
    742   (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
    743                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
    744 
    745 #define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
    746   (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
    747                 (__v2df)  __W, (__mmask8) __U,__R); })
    748 
    749 #define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
    750   (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
    751                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
    752 
    753 static __inline __m512i
    754 __DEFAULT_FN_ATTRS
    755 _mm512_min_epi32(__m512i __A, __m512i __B)
    756 {
    757   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
    758               (__v16si) __B,
    759               (__v16si)
    760               _mm512_setzero_si512 (),
    761               (__mmask16) -1);
    762 }
    763 
    764 static __inline __m512i __DEFAULT_FN_ATTRS
    765 _mm512_min_epu32(__m512i __A, __m512i __B)
    766 {
    767   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
    768               (__v16si) __B,
    769               (__v16si)
    770               _mm512_setzero_si512 (),
    771               (__mmask16) -1);
    772 }
    773 
    774 static __inline __m512i __DEFAULT_FN_ATTRS
    775 _mm512_min_epi64(__m512i __A, __m512i __B)
    776 {
    777   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
    778               (__v8di) __B,
    779               (__v8di)
    780               _mm512_setzero_si512 (),
    781               (__mmask8) -1);
    782 }
    783 
    784 static __inline __m512i __DEFAULT_FN_ATTRS
    785 _mm512_min_epu64(__m512i __A, __m512i __B)
    786 {
    787   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
    788               (__v8di) __B,
    789               (__v8di)
    790               _mm512_setzero_si512 (),
    791               (__mmask8) -1);
    792 }
    793 
    794 static __inline __m512i __DEFAULT_FN_ATTRS
    795 _mm512_mul_epi32(__m512i __X, __m512i __Y)
    796 {
    797   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    798               (__v16si) __Y,
    799               (__v8di)
    800               _mm512_setzero_si512 (),
    801               (__mmask8) -1);
    802 }
    803 
    804 static __inline __m512i __DEFAULT_FN_ATTRS
    805 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
    806 {
    807   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    808               (__v16si) __Y,
    809               (__v8di) __W, __M);
    810 }
    811 
    812 static __inline __m512i __DEFAULT_FN_ATTRS
    813 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
    814 {
    815   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    816               (__v16si) __Y,
    817               (__v8di)
    818               _mm512_setzero_si512 (),
    819               __M);
    820 }
    821 
    822 static __inline __m512i __DEFAULT_FN_ATTRS
    823 _mm512_mul_epu32(__m512i __X, __m512i __Y)
    824 {
    825   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    826                (__v16si) __Y,
    827                (__v8di)
    828                _mm512_setzero_si512 (),
    829                (__mmask8) -1);
    830 }
    831 
    832 static __inline __m512i __DEFAULT_FN_ATTRS
    833 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
    834 {
    835   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    836                (__v16si) __Y,
    837                (__v8di) __W, __M);
    838 }
    839 
    840 static __inline __m512i __DEFAULT_FN_ATTRS
    841 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
    842 {
    843   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    844                (__v16si) __Y,
    845                (__v8di)
    846                _mm512_setzero_si512 (),
    847                __M);
    848 }
    849 
    850 static __inline __m512i __DEFAULT_FN_ATTRS
    851 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
    852 {
    853   return (__m512i) ((__v16si) __A * (__v16si) __B);
    854 }
    855 
    856 static __inline __m512i __DEFAULT_FN_ATTRS
    857 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
    858 {
    859   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
    860               (__v16si) __B,
    861               (__v16si)
    862               _mm512_setzero_si512 (),
    863               __M);
    864 }
    865 
    866 static __inline __m512i __DEFAULT_FN_ATTRS
    867 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
    868 {
    869   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
    870               (__v16si) __B,
    871               (__v16si) __W, __M);
    872 }
    873 
    874 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    875 _mm512_sqrt_pd(__m512d __a)
    876 {
    877   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
    878                                                 (__v8df) _mm512_setzero_pd (),
    879                                                 (__mmask8) -1,
    880                                                 _MM_FROUND_CUR_DIRECTION);
    881 }
    882 
    883 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    884 _mm512_sqrt_ps(__m512 __a)
    885 {
    886   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
    887                                                (__v16sf) _mm512_setzero_ps (),
    888                                                (__mmask16) -1,
    889                                                _MM_FROUND_CUR_DIRECTION);
    890 }
    891 
    892 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    893 _mm512_rsqrt14_pd(__m512d __A)
    894 {
    895   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
    896                  (__v8df)
    897                  _mm512_setzero_pd (),
    898                  (__mmask8) -1);}
    899 
    900 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    901 _mm512_rsqrt14_ps(__m512 __A)
    902 {
    903   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
    904                 (__v16sf)
    905                 _mm512_setzero_ps (),
    906                 (__mmask16) -1);
    907 }
    908 
    909 static  __inline__ __m128 __DEFAULT_FN_ATTRS
    910 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
    911 {
    912   return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
    913              (__v4sf) __B,
    914              (__v4sf)
    915              _mm_setzero_ps (),
    916              (__mmask8) -1);
    917 }
    918 
    919 static  __inline__ __m128d __DEFAULT_FN_ATTRS
    920 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
    921 {
    922   return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
    923               (__v2df) __B,
    924               (__v2df)
    925               _mm_setzero_pd (),
    926               (__mmask8) -1);
    927 }
    928 
    929 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    930 _mm512_rcp14_pd(__m512d __A)
    931 {
    932   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
    933                (__v8df)
    934                _mm512_setzero_pd (),
    935                (__mmask8) -1);
    936 }
    937 
    938 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    939 _mm512_rcp14_ps(__m512 __A)
    940 {
    941   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
    942               (__v16sf)
    943               _mm512_setzero_ps (),
    944               (__mmask16) -1);
    945 }
    946 static  __inline__ __m128 __DEFAULT_FN_ATTRS
    947 _mm_rcp14_ss(__m128 __A, __m128 __B)
    948 {
    949   return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
    950                  (__v4sf) __B,
    951                  (__v4sf)
    952                  _mm_setzero_ps (),
    953                  (__mmask8) -1);
    954 }
    955 
    956 static  __inline__ __m128d __DEFAULT_FN_ATTRS
    957 _mm_rcp14_sd(__m128d __A, __m128d __B)
    958 {
    959   return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
    960             (__v2df) __B,
    961             (__v2df)
    962             _mm_setzero_pd (),
    963             (__mmask8) -1);
    964 }
    965 
    966 static __inline __m512 __DEFAULT_FN_ATTRS
    967 _mm512_floor_ps(__m512 __A)
    968 {
    969   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
    970                                                   _MM_FROUND_FLOOR,
    971                                                   (__v16sf) __A, -1,
    972                                                   _MM_FROUND_CUR_DIRECTION);
    973 }
    974 
    975 static __inline __m512d __DEFAULT_FN_ATTRS
    976 _mm512_floor_pd(__m512d __A)
    977 {
    978   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
    979                                                    _MM_FROUND_FLOOR,
    980                                                    (__v8df) __A, -1,
    981                                                    _MM_FROUND_CUR_DIRECTION);
    982 }
    983 
    984 static __inline __m512 __DEFAULT_FN_ATTRS
    985 _mm512_ceil_ps(__m512 __A)
    986 {
    987   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
    988                                                   _MM_FROUND_CEIL,
    989                                                   (__v16sf) __A, -1,
    990                                                   _MM_FROUND_CUR_DIRECTION);
    991 }
    992 
    993 static __inline __m512d __DEFAULT_FN_ATTRS
    994 _mm512_ceil_pd(__m512d __A)
    995 {
    996   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
    997                                                    _MM_FROUND_CEIL,
    998                                                    (__v8df) __A, -1,
    999                                                    _MM_FROUND_CUR_DIRECTION);
   1000 }
   1001 
   1002 static __inline __m512i __DEFAULT_FN_ATTRS
   1003 _mm512_abs_epi64(__m512i __A)
   1004 {
   1005   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1006              (__v8di)
   1007              _mm512_setzero_si512 (),
   1008              (__mmask8) -1);
   1009 }
   1010 
   1011 static __inline __m512i __DEFAULT_FN_ATTRS
   1012 _mm512_abs_epi32(__m512i __A)
   1013 {
   1014   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1015              (__v16si)
   1016              _mm512_setzero_si512 (),
   1017              (__mmask16) -1);
   1018 }
   1019 
   1020 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1021 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1022   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
   1023                 (__v4sf) __B,
   1024                 (__v4sf) __W,
   1025                 (__mmask8) __U,
   1026                 _MM_FROUND_CUR_DIRECTION);
   1027 }
   1028 
   1029 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1030 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1031   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
   1032                 (__v4sf) __B,
   1033                 (__v4sf)  _mm_setzero_ps (),
   1034                 (__mmask8) __U,
   1035                 _MM_FROUND_CUR_DIRECTION);
   1036 }
   1037 
   1038 #define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
   1039   (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
   1040                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
   1041 
   1042 #define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
   1043   (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
   1044                 (__v4sf)  __W, (__mmask8) __U,__R); })
   1045 
   1046 #define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
   1047   (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
   1048                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
   1049 
   1050 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1051 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1052   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
   1053                 (__v2df) __B,
   1054                 (__v2df) __W,
   1055                 (__mmask8) __U,
   1056                 _MM_FROUND_CUR_DIRECTION);
   1057 }
   1058 
   1059 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1060 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1061   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
   1062                 (__v2df) __B,
   1063                 (__v2df)  _mm_setzero_pd (),
   1064                 (__mmask8) __U,
   1065                 _MM_FROUND_CUR_DIRECTION);
   1066 }
   1067 #define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
   1068   (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
   1069                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
   1070 
   1071 #define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
   1072   (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
   1073                 (__v2df)  __W, (__mmask8) __U,__R); })
   1074 
   1075 #define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
   1076   (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
   1077                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
   1078 
   1079 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1080 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1081   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1082              (__v8df) __B,
   1083              (__v8df) __W,
   1084              (__mmask8) __U,
   1085              _MM_FROUND_CUR_DIRECTION);
   1086 }
   1087 
   1088 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1090   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1091              (__v8df) __B,
   1092              (__v8df) _mm512_setzero_pd (),
   1093              (__mmask8) __U,
   1094              _MM_FROUND_CUR_DIRECTION);
   1095 }
   1096 
   1097 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1098 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1099   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   1100             (__v16sf) __B,
   1101             (__v16sf) __W,
   1102             (__mmask16) __U,
   1103             _MM_FROUND_CUR_DIRECTION);
   1104 }
   1105 
   1106 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1107 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   1108   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   1109             (__v16sf) __B,
   1110             (__v16sf) _mm512_setzero_ps (),
   1111             (__mmask16) __U,
   1112             _MM_FROUND_CUR_DIRECTION);
   1113 }
   1114 
   1115 #define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
   1116   (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
   1117                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
   1118 
   1119 #define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
   1120   (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
   1121                 (__v8df) __W, (__mmask8) __U, __R); })
   1122 
   1123 #define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
   1124   (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
   1125                 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
   1126 
   1127 #define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
   1128   (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1129                 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
   1130 
   1131 #define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
   1132   (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1133                 (__v16sf) __W, (__mmask16)__U, __R); })
   1134 
   1135 #define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
   1136   (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1137                 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
   1138 
   1139 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1140 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1141   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
   1142                 (__v4sf) __B,
   1143                 (__v4sf) __W,
   1144                 (__mmask8) __U,
   1145                 _MM_FROUND_CUR_DIRECTION);
   1146 }
   1147 
   1148 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1149 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1150   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
   1151                 (__v4sf) __B,
   1152                 (__v4sf)  _mm_setzero_ps (),
   1153                 (__mmask8) __U,
   1154                 _MM_FROUND_CUR_DIRECTION);
   1155 }
   1156 #define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
   1157   (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
   1158                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
   1159 
   1160 #define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
   1161   (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
   1162                 (__v4sf)  __W, (__mmask8) __U,__R); })
   1163 
   1164 #define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
   1165   (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
   1166                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
   1167 
   1168 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1169 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1170   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
   1171                 (__v2df) __B,
   1172                 (__v2df) __W,
   1173                 (__mmask8) __U,
   1174                 _MM_FROUND_CUR_DIRECTION);
   1175 }
   1176 
   1177 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1178 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1179   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
   1180                 (__v2df) __B,
   1181                 (__v2df)  _mm_setzero_pd (),
   1182                 (__mmask8) __U,
   1183                 _MM_FROUND_CUR_DIRECTION);
   1184 }
   1185 
   1186 #define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
   1187   (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
   1188                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
   1189 
   1190 #define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
   1191   (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
   1192                 (__v2df)  __W, (__mmask8) __U,__R); })
   1193 
   1194 #define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
   1195   (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
   1196                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
   1197 
   1198 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1199 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1200   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   1201              (__v8df) __B,
   1202              (__v8df) __W,
   1203              (__mmask8) __U,
   1204              _MM_FROUND_CUR_DIRECTION);
   1205 }
   1206 
   1207 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1208 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1209   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   1210              (__v8df) __B,
   1211              (__v8df)
   1212              _mm512_setzero_pd (),
   1213              (__mmask8) __U,
   1214              _MM_FROUND_CUR_DIRECTION);
   1215 }
   1216 
   1217 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1218 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1219   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   1220             (__v16sf) __B,
   1221             (__v16sf) __W,
   1222             (__mmask16) __U,
   1223             _MM_FROUND_CUR_DIRECTION);
   1224 }
   1225 
   1226 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1227 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   1228   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   1229             (__v16sf) __B,
   1230             (__v16sf)
   1231             _mm512_setzero_ps (),
   1232             (__mmask16) __U,
   1233             _MM_FROUND_CUR_DIRECTION);
   1234 }
   1235 
   1236 #define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
   1237   (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
   1238              (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
   1239 
   1240 #define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
   1241   (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
   1242              (__v8df) __W, (__mmask8) __U, __R); })
   1243 
   1244 #define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
   1245    (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
   1246              (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
   1247 
   1248 #define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
   1249   (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1250             (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
   1251 
   1252 #define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
   1253   (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1254             (__v16sf) __W, (__mmask16) __U, __R); });
   1255 
   1256 #define _mm512_maskz_sub_round_ps(__U, __A, __B, __R)  __extension__ ({ \
   1257   (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1258             (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
   1259 
   1260 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1261 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1262   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
   1263                 (__v4sf) __B,
   1264                 (__v4sf) __W,
   1265                 (__mmask8) __U,
   1266                 _MM_FROUND_CUR_DIRECTION);
   1267 }
   1268 
   1269 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1270 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1271   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
   1272                 (__v4sf) __B,
   1273                 (__v4sf)  _mm_setzero_ps (),
   1274                 (__mmask8) __U,
   1275                 _MM_FROUND_CUR_DIRECTION);
   1276 }
   1277 #define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
   1278   (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
   1279                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
   1280 
   1281 #define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
   1282   (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
   1283                 (__v4sf)  __W, (__mmask8) __U,__R); })
   1284 
   1285 #define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
   1286   (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
   1287                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
   1288 
   1289 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1290 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1291   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
   1292                 (__v2df) __B,
   1293                 (__v2df) __W,
   1294                 (__mmask8) __U,
   1295                 _MM_FROUND_CUR_DIRECTION);
   1296 }
   1297 
   1298 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1299 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1300   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
   1301                 (__v2df) __B,
   1302                 (__v2df)  _mm_setzero_pd (),
   1303                 (__mmask8) __U,
   1304                 _MM_FROUND_CUR_DIRECTION);
   1305 }
   1306 
   1307 #define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
   1308   (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
   1309                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
   1310 
   1311 #define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
   1312   (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
   1313                 (__v2df)  __W, (__mmask8) __U,__R); })
   1314 
   1315 #define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
   1316   (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
   1317                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
   1318 
   1319 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1320 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1321   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   1322              (__v8df) __B,
   1323              (__v8df) __W,
   1324              (__mmask8) __U,
   1325              _MM_FROUND_CUR_DIRECTION);
   1326 }
   1327 
   1328 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1329 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1330   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   1331              (__v8df) __B,
   1332              (__v8df)
   1333              _mm512_setzero_pd (),
   1334              (__mmask8) __U,
   1335              _MM_FROUND_CUR_DIRECTION);
   1336 }
   1337 
   1338 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1339 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1340   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   1341             (__v16sf) __B,
   1342             (__v16sf) __W,
   1343             (__mmask16) __U,
   1344             _MM_FROUND_CUR_DIRECTION);
   1345 }
   1346 
   1347 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1348 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   1349   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   1350             (__v16sf) __B,
   1351             (__v16sf)
   1352             _mm512_setzero_ps (),
   1353             (__mmask16) __U,
   1354             _MM_FROUND_CUR_DIRECTION);
   1355 }
   1356 
   1357 #define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
   1358   (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
   1359              (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
   1360 
   1361 #define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
   1362   (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
   1363              (__v8df) __W, (__mmask8) __U, __R); })
   1364 
   1365 #define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
   1366    (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
   1367              (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
   1368 
   1369 #define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
   1370   (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1371             (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
   1372 
   1373 #define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
   1374   (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1375             (__v16sf) __W, (__mmask16) __U, __R); });
   1376 
   1377 #define _mm512_maskz_mul_round_ps(__U, __A, __B, __R)  __extension__ ({ \
   1378   (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1379             (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
   1380 
   1381 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1382 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1383   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
   1384                 (__v4sf) __B,
   1385                 (__v4sf) __W,
   1386                 (__mmask8) __U,
   1387                 _MM_FROUND_CUR_DIRECTION);
   1388 }
   1389 
   1390 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1391 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1392   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
   1393                 (__v4sf) __B,
   1394                 (__v4sf)  _mm_setzero_ps (),
   1395                 (__mmask8) __U,
   1396                 _MM_FROUND_CUR_DIRECTION);
   1397 }
   1398 
   1399 #define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
   1400   (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
   1401                 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
   1402 
   1403 #define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
   1404   (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
   1405                 (__v4sf)  __W, (__mmask8) __U,__R); })
   1406 
   1407 #define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
   1408   (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
   1409                 (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
   1410 
   1411 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1412 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1413   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
   1414                 (__v2df) __B,
   1415                 (__v2df) __W,
   1416                 (__mmask8) __U,
   1417                 _MM_FROUND_CUR_DIRECTION);
   1418 }
   1419 
   1420 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1421 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1422   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
   1423                 (__v2df) __B,
   1424                 (__v2df)  _mm_setzero_pd (),
   1425                 (__mmask8) __U,
   1426                 _MM_FROUND_CUR_DIRECTION);
   1427 }
   1428 
   1429 #define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
   1430   (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
   1431                 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
   1432 
   1433 #define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
   1434   (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
   1435                 (__v2df)  __W, (__mmask8) __U,__R); })
   1436 
   1437 #define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
   1438   (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
   1439                 (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
   1440 
   1441 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1442 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1443   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   1444              (__v8df) __B,
   1445              (__v8df) __W,
   1446              (__mmask8) __U,
   1447              _MM_FROUND_CUR_DIRECTION);
   1448 }
   1449 
   1450 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1451 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1452   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   1453              (__v8df) __B,
   1454              (__v8df)
   1455              _mm512_setzero_pd (),
   1456              (__mmask8) __U,
   1457              _MM_FROUND_CUR_DIRECTION);
   1458 }
   1459 
   1460 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1461 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1462   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   1463             (__v16sf) __B,
   1464             (__v16sf) __W,
   1465             (__mmask16) __U,
   1466             _MM_FROUND_CUR_DIRECTION);
   1467 }
   1468 
   1469 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1470 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   1471   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   1472             (__v16sf) __B,
   1473             (__v16sf)
   1474             _mm512_setzero_ps (),
   1475             (__mmask16) __U,
   1476             _MM_FROUND_CUR_DIRECTION);
   1477 }
   1478 
   1479 #define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
   1480   (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
   1481              (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
   1482 
   1483 #define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
   1484   (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
   1485              (__v8df) __W, (__mmask8) __U, __R); })
   1486 
   1487 #define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
   1488    (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
   1489              (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
   1490 
   1491 #define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
   1492   (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1493             (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
   1494 
   1495 #define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
   1496   (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1497             (__v16sf) __W, (__mmask16) __U, __R); });
   1498 
   1499 #define _mm512_maskz_div_round_ps(__U, __A, __B, __R)  __extension__ ({ \
   1500   (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
   1501             (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
   1502 
   1503 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   1504   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
   1505                                          -1, _MM_FROUND_CUR_DIRECTION); })
   1506 
   1507 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   1508   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
   1509                                           -1, _MM_FROUND_CUR_DIRECTION); })
   1510 
   1511 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   1512   (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
   1513                                              (__v8df) (B), (__v8df) (C), \
   1514                                              (__mmask8) -1, (R)); })
   1515 
   1516 
   1517 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   1518   (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
   1519                                              (__v8df) (B), (__v8df) (C), \
   1520                                              (__mmask8) (U), (R)); })
   1521 
   1522 
   1523 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   1524   (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
   1525                                               (__v8df) (B), (__v8df) (C), \
   1526                                               (__mmask8) (U), (R)); })
   1527 
   1528 
   1529 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   1530   (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
   1531                                               (__v8df) (B), (__v8df) (C), \
   1532                                               (__mmask8) (U), (R)); })
   1533 
   1534 
   1535 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   1536   (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
   1537                                              (__v8df) (B), -(__v8df) (C), \
   1538                                              (__mmask8) -1, (R)); })
   1539 
   1540 
   1541 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   1542   (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
   1543                                              (__v8df) (B), -(__v8df) (C), \
   1544                                              (__mmask8) (U), (R)); })
   1545 
   1546 
   1547 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   1548   (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
   1549                                               (__v8df) (B), -(__v8df) (C), \
   1550                                               (__mmask8) (U), (R)); })
   1551 
   1552 
   1553 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   1554   (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
   1555                                              (__v8df) (B), (__v8df) (C), \
   1556                                              (__mmask8) -1, (R)); })
   1557 
   1558 
   1559 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   1560   (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
   1561                                               (__v8df) (B), (__v8df) (C), \
   1562                                               (__mmask8) (U), (R)); })
   1563 
   1564 
   1565 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   1566   (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
   1567                                               (__v8df) (B), (__v8df) (C), \
   1568                                               (__mmask8) (U), (R)); })
   1569 
   1570 
   1571 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   1572   (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
   1573                                              (__v8df) (B), -(__v8df) (C), \
   1574                                              (__mmask8) -1, (R)); })
   1575 
   1576 
   1577 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   1578   (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
   1579                                               (__v8df) (B), -(__v8df) (C), \
   1580                                               (__mmask8) (U), (R)); })
   1581 
   1582 
   1583 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1584 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   1585 {
   1586   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   1587                                                     (__v8df) __B,
   1588                                                     (__v8df) __C,
   1589                                                     (__mmask8) -1,
   1590                                                     _MM_FROUND_CUR_DIRECTION);
   1591 }
   1592 
   1593 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1594 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   1595 {
   1596   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   1597                                                     (__v8df) __B,
   1598                                                     (__v8df) __C,
   1599                                                     (__mmask8) __U,
   1600                                                     _MM_FROUND_CUR_DIRECTION);
   1601 }
   1602 
   1603 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1604 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   1605 {
   1606   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   1607                                                      (__v8df) __B,
   1608                                                      (__v8df) __C,
   1609                                                      (__mmask8) __U,
   1610                                                      _MM_FROUND_CUR_DIRECTION);
   1611 }
   1612 
   1613 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1614 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1615 {
   1616   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   1617                                                      (__v8df) __B,
   1618                                                      (__v8df) __C,
   1619                                                      (__mmask8) __U,
   1620                                                      _MM_FROUND_CUR_DIRECTION);
   1621 }
   1622 
   1623 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1624 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   1625 {
   1626   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   1627                                                     (__v8df) __B,
   1628                                                     -(__v8df) __C,
   1629                                                     (__mmask8) -1,
   1630                                                     _MM_FROUND_CUR_DIRECTION);
   1631 }
   1632 
   1633 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1634 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   1635 {
   1636   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   1637                                                     (__v8df) __B,
   1638                                                     -(__v8df) __C,
   1639                                                     (__mmask8) __U,
   1640                                                     _MM_FROUND_CUR_DIRECTION);
   1641 }
   1642 
   1643 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1644 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1645 {
   1646   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   1647                                                      (__v8df) __B,
   1648                                                      -(__v8df) __C,
   1649                                                      (__mmask8) __U,
   1650                                                      _MM_FROUND_CUR_DIRECTION);
   1651 }
   1652 
   1653 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1654 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   1655 {
   1656   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   1657                                                     (__v8df) __B,
   1658                                                     (__v8df) __C,
   1659                                                     (__mmask8) -1,
   1660                                                     _MM_FROUND_CUR_DIRECTION);
   1661 }
   1662 
   1663 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1664 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   1665 {
   1666   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   1667                                                      (__v8df) __B,
   1668                                                      (__v8df) __C,
   1669                                                      (__mmask8) __U,
   1670                                                      _MM_FROUND_CUR_DIRECTION);
   1671 }
   1672 
   1673 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1674 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1675 {
   1676   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   1677                                                      (__v8df) __B,
   1678                                                      (__v8df) __C,
   1679                                                      (__mmask8) __U,
   1680                                                      _MM_FROUND_CUR_DIRECTION);
   1681 }
   1682 
   1683 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1684 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   1685 {
   1686   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   1687                                                     (__v8df) __B,
   1688                                                     -(__v8df) __C,
   1689                                                     (__mmask8) -1,
   1690                                                     _MM_FROUND_CUR_DIRECTION);
   1691 }
   1692 
   1693 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1694 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1695 {
   1696   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   1697                                                      (__v8df) __B,
   1698                                                      -(__v8df) __C,
   1699                                                      (__mmask8) __U,
   1700                                                      _MM_FROUND_CUR_DIRECTION);
   1701 }
   1702 
   1703 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   1704   (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
   1705                                             (__v16sf) (B), (__v16sf) (C), \
   1706                                             (__mmask16) -1, (R)); })
   1707 
   1708 
   1709 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   1710   (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
   1711                                             (__v16sf) (B), (__v16sf) (C), \
   1712                                             (__mmask16) (U), (R)); })
   1713 
   1714 
   1715 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   1716   (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
   1717                                              (__v16sf) (B), (__v16sf) (C), \
   1718                                              (__mmask16) (U), (R)); })
   1719 
   1720 
   1721 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   1722   (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
   1723                                              (__v16sf) (B), (__v16sf) (C), \
   1724                                              (__mmask16) (U), (R)); })
   1725 
   1726 
   1727 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   1728   (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
   1729                                             (__v16sf) (B), -(__v16sf) (C), \
   1730                                             (__mmask16) -1, (R)); })
   1731 
   1732 
   1733 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   1734   (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
   1735                                             (__v16sf) (B), -(__v16sf) (C), \
   1736                                             (__mmask16) (U), (R)); })
   1737 
   1738 
   1739 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   1740   (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
   1741                                              (__v16sf) (B), -(__v16sf) (C), \
   1742                                              (__mmask16) (U), (R)); })
   1743 
   1744 
   1745 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   1746   (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
   1747                                             (__v16sf) (B), (__v16sf) (C), \
   1748                                             (__mmask16) -1, (R)); })
   1749 
   1750 
   1751 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   1752   (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
   1753                                              (__v16sf) (B), (__v16sf) (C), \
   1754                                              (__mmask16) (U), (R)); })
   1755 
   1756 
   1757 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   1758   (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
   1759                                              (__v16sf) (B), (__v16sf) (C), \
   1760                                              (__mmask16) (U), (R)); })
   1761 
   1762 
   1763 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   1764   (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
   1765                                             (__v16sf) (B), -(__v16sf) (C), \
   1766                                             (__mmask16) -1, (R)); })
   1767 
   1768 
   1769 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   1770   (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
   1771                                              (__v16sf) (B), -(__v16sf) (C), \
   1772                                              (__mmask16) (U), (R)); })
   1773 
   1774 
   1775 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1776 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   1777 {
   1778   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   1779                                                    (__v16sf) __B,
   1780                                                    (__v16sf) __C,
   1781                                                    (__mmask16) -1,
   1782                                                    _MM_FROUND_CUR_DIRECTION);
   1783 }
   1784 
   1785 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1786 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   1787 {
   1788   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   1789                                                    (__v16sf) __B,
   1790                                                    (__v16sf) __C,
   1791                                                    (__mmask16) __U,
   1792                                                    _MM_FROUND_CUR_DIRECTION);
   1793 }
   1794 
   1795 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1796 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   1797 {
   1798   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   1799                                                     (__v16sf) __B,
   1800                                                     (__v16sf) __C,
   1801                                                     (__mmask16) __U,
   1802                                                     _MM_FROUND_CUR_DIRECTION);
   1803 }
   1804 
   1805 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1806 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   1807 {
   1808   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   1809                                                     (__v16sf) __B,
   1810                                                     (__v16sf) __C,
   1811                                                     (__mmask16) __U,
   1812                                                     _MM_FROUND_CUR_DIRECTION);
   1813 }
   1814 
   1815 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1816 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   1817 {
   1818   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   1819                                                    (__v16sf) __B,
   1820                                                    -(__v16sf) __C,
   1821                                                    (__mmask16) -1,
   1822                                                    _MM_FROUND_CUR_DIRECTION);
   1823 }
   1824 
   1825 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1826 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   1827 {
   1828   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   1829                                                    (__v16sf) __B,
   1830                                                    -(__v16sf) __C,
   1831                                                    (__mmask16) __U,
   1832                                                    _MM_FROUND_CUR_DIRECTION);
   1833 }
   1834 
   1835 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1836 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   1837 {
   1838   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   1839                                                     (__v16sf) __B,
   1840                                                     -(__v16sf) __C,
   1841                                                     (__mmask16) __U,
   1842                                                     _MM_FROUND_CUR_DIRECTION);
   1843 }
   1844 
   1845 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1846 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   1847 {
   1848   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   1849                                                    (__v16sf) __B,
   1850                                                    (__v16sf) __C,
   1851                                                    (__mmask16) -1,
   1852                                                    _MM_FROUND_CUR_DIRECTION);
   1853 }
   1854 
   1855 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1856 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   1857 {
   1858   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   1859                                                     (__v16sf) __B,
   1860                                                     (__v16sf) __C,
   1861                                                     (__mmask16) __U,
   1862                                                     _MM_FROUND_CUR_DIRECTION);
   1863 }
   1864 
   1865 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1866 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   1867 {
   1868   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   1869                                                     (__v16sf) __B,
   1870                                                     (__v16sf) __C,
   1871                                                     (__mmask16) __U,
   1872                                                     _MM_FROUND_CUR_DIRECTION);
   1873 }
   1874 
   1875 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1876 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   1877 {
   1878   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   1879                                                    (__v16sf) __B,
   1880                                                    -(__v16sf) __C,
   1881                                                    (__mmask16) -1,
   1882                                                    _MM_FROUND_CUR_DIRECTION);
   1883 }
   1884 
   1885 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1886 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   1887 {
   1888   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   1889                                                     (__v16sf) __B,
   1890                                                     -(__v16sf) __C,
   1891                                                     (__mmask16) __U,
   1892                                                     _MM_FROUND_CUR_DIRECTION);
   1893 }
   1894 
   1895 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   1896   (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
   1897                                                 (__v8df) (B), (__v8df) (C), \
   1898                                                 (__mmask8) -1, (R)); })
   1899 
   1900 
   1901 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   1902   (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
   1903                                                 (__v8df) (B), (__v8df) (C), \
   1904                                                 (__mmask8) (U), (R)); })
   1905 
   1906 
   1907 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   1908   (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
   1909                                                  (__v8df) (B), (__v8df) (C), \
   1910                                                  (__mmask8) (U), (R)); })
   1911 
   1912 
   1913 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   1914   (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
   1915                                                  (__v8df) (B), (__v8df) (C), \
   1916                                                  (__mmask8) (U), (R)); })
   1917 
   1918 
   1919 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   1920   (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
   1921                                                 (__v8df) (B), -(__v8df) (C), \
   1922                                                 (__mmask8) -1, (R)); })
   1923 
   1924 
   1925 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   1926   (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
   1927                                                 (__v8df) (B), -(__v8df) (C), \
   1928                                                 (__mmask8) (U), (R)); })
   1929 
   1930 
   1931 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   1932   (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
   1933                                                  (__v8df) (B), -(__v8df) (C), \
   1934                                                  (__mmask8) (U), (R)); })
   1935 
   1936 
   1937 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1938 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   1939 {
   1940   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   1941                                                        (__v8df) __B,
   1942                                                        (__v8df) __C,
   1943                                                        (__mmask8) -1,
   1944                                                        _MM_FROUND_CUR_DIRECTION);
   1945 }
   1946 
   1947 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1948 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   1949 {
   1950   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   1951                                                        (__v8df) __B,
   1952                                                        (__v8df) __C,
   1953                                                        (__mmask8) __U,
   1954                                                        _MM_FROUND_CUR_DIRECTION);
   1955 }
   1956 
   1957 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1958 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   1959 {
   1960   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   1961                                                         (__v8df) __B,
   1962                                                         (__v8df) __C,
   1963                                                         (__mmask8) __U,
   1964                                                         _MM_FROUND_CUR_DIRECTION);
   1965 }
   1966 
   1967 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1968 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1969 {
   1970   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   1971                                                         (__v8df) __B,
   1972                                                         (__v8df) __C,
   1973                                                         (__mmask8) __U,
   1974                                                         _MM_FROUND_CUR_DIRECTION);
   1975 }
   1976 
   1977 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1978 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   1979 {
   1980   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   1981                                                        (__v8df) __B,
   1982                                                        -(__v8df) __C,
   1983                                                        (__mmask8) -1,
   1984                                                        _MM_FROUND_CUR_DIRECTION);
   1985 }
   1986 
   1987 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1988 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   1989 {
   1990   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   1991                                                        (__v8df) __B,
   1992                                                        -(__v8df) __C,
   1993                                                        (__mmask8) __U,
   1994                                                        _MM_FROUND_CUR_DIRECTION);
   1995 }
   1996 
   1997 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1998 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   1999 {
   2000   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   2001                                                         (__v8df) __B,
   2002                                                         -(__v8df) __C,
   2003                                                         (__mmask8) __U,
   2004                                                         _MM_FROUND_CUR_DIRECTION);
   2005 }
   2006 
   2007 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   2008   (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
   2009                                                (__v16sf) (B), (__v16sf) (C), \
   2010                                                (__mmask16) -1, (R)); })
   2011 
   2012 
   2013 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2014   (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
   2015                                                (__v16sf) (B), (__v16sf) (C), \
   2016                                                (__mmask16) (U), (R)); })
   2017 
   2018 
   2019 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   2020   (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
   2021                                                 (__v16sf) (B), (__v16sf) (C), \
   2022                                                 (__mmask16) (U), (R)); })
   2023 
   2024 
   2025 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2026   (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
   2027                                                 (__v16sf) (B), (__v16sf) (C), \
   2028                                                 (__mmask16) (U), (R)); })
   2029 
   2030 
   2031 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   2032   (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
   2033                                                (__v16sf) (B), -(__v16sf) (C), \
   2034                                                (__mmask16) -1, (R)); })
   2035 
   2036 
   2037 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2038   (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
   2039                                                (__v16sf) (B), -(__v16sf) (C), \
   2040                                                (__mmask16) (U), (R)); })
   2041 
   2042 
   2043 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2044   (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
   2045                                                 (__v16sf) (B), -(__v16sf) (C), \
   2046                                                 (__mmask16) (U), (R)); })
   2047 
   2048 
   2049 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2050 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2051 {
   2052   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2053                                                       (__v16sf) __B,
   2054                                                       (__v16sf) __C,
   2055                                                       (__mmask16) -1,
   2056                                                       _MM_FROUND_CUR_DIRECTION);
   2057 }
   2058 
   2059 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2060 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2061 {
   2062   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2063                                                       (__v16sf) __B,
   2064                                                       (__v16sf) __C,
   2065                                                       (__mmask16) __U,
   2066                                                       _MM_FROUND_CUR_DIRECTION);
   2067 }
   2068 
   2069 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2070 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2071 {
   2072   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   2073                                                        (__v16sf) __B,
   2074                                                        (__v16sf) __C,
   2075                                                        (__mmask16) __U,
   2076                                                        _MM_FROUND_CUR_DIRECTION);
   2077 }
   2078 
   2079 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2080 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2081 {
   2082   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   2083                                                        (__v16sf) __B,
   2084                                                        (__v16sf) __C,
   2085                                                        (__mmask16) __U,
   2086                                                        _MM_FROUND_CUR_DIRECTION);
   2087 }
   2088 
   2089 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2090 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2091 {
   2092   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2093                                                       (__v16sf) __B,
   2094                                                       -(__v16sf) __C,
   2095                                                       (__mmask16) -1,
   2096                                                       _MM_FROUND_CUR_DIRECTION);
   2097 }
   2098 
   2099 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2100 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2101 {
   2102   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2103                                                       (__v16sf) __B,
   2104                                                       -(__v16sf) __C,
   2105                                                       (__mmask16) __U,
   2106                                                       _MM_FROUND_CUR_DIRECTION);
   2107 }
   2108 
   2109 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2110 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2111 {
   2112   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   2113                                                        (__v16sf) __B,
   2114                                                        -(__v16sf) __C,
   2115                                                        (__mmask16) __U,
   2116                                                        _MM_FROUND_CUR_DIRECTION);
   2117 }
   2118 
   2119 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   2120   (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
   2121                                               (__v8df) (B), (__v8df) (C), \
   2122                                               (__mmask8) (U), (R)); })
   2123 
   2124 
   2125 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2126 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2127 {
   2128   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   2129                                                      (__v8df) __B,
   2130                                                      (__v8df) __C,
   2131                                                      (__mmask8) __U,
   2132                                                      _MM_FROUND_CUR_DIRECTION);
   2133 }
   2134 
   2135 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   2136   (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
   2137                                              (__v16sf) (B), (__v16sf) (C), \
   2138                                              (__mmask16) (U), (R)); })
   2139 
   2140 
   2141 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2142 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2143 {
   2144   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   2145                                                     (__v16sf) __B,
   2146                                                     (__v16sf) __C,
   2147                                                     (__mmask16) __U,
   2148                                                     _MM_FROUND_CUR_DIRECTION);
   2149 }
   2150 
   2151 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2152   (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
   2153                                                  (__v8df) (B), (__v8df) (C), \
   2154                                                  (__mmask8) (U), (R)); })
   2155 
   2156 
   2157 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2158 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2159 {
   2160   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   2161                                                         (__v8df) __B,
   2162                                                         (__v8df) __C,
   2163                                                         (__mmask8) __U,
   2164                                                         _MM_FROUND_CUR_DIRECTION);
   2165 }
   2166 
   2167 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2168   (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
   2169                                                 (__v16sf) (B), (__v16sf) (C), \
   2170                                                 (__mmask16) (U), (R)); })
   2171 
   2172 
   2173 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2174 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2175 {
   2176   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   2177                                                        (__v16sf) __B,
   2178                                                        (__v16sf) __C,
   2179                                                        (__mmask16) __U,
   2180                                                        _MM_FROUND_CUR_DIRECTION);
   2181 }
   2182 
   2183 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2184   (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
   2185                                               (__v8df) (B), (__v8df) (C), \
   2186                                               (__mmask8) (U), (R)); })
   2187 
   2188 
   2189 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2190 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2191 {
   2192   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   2193                                                      (__v8df) __B,
   2194                                                      (__v8df) __C,
   2195                                                      (__mmask8) __U,
   2196                                                      _MM_FROUND_CUR_DIRECTION);
   2197 }
   2198 
   2199 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2200   (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
   2201                                              (__v16sf) (B), (__v16sf) (C), \
   2202                                              (__mmask16) (U), (R)); })
   2203 
   2204 
   2205 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2206 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2207 {
   2208   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   2209                                                     (__v16sf) __B,
   2210                                                     (__v16sf) __C,
   2211                                                     (__mmask16) __U,
   2212                                                     _MM_FROUND_CUR_DIRECTION);
   2213 }
   2214 
   2215 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2216   (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
   2217                                               (__v8df) (B), (__v8df) (C), \
   2218                                               (__mmask8) (U), (R)); })
   2219 
   2220 
   2221 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   2222   (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
   2223                                                (__v8df) (B), (__v8df) (C), \
   2224                                                (__mmask8) (U), (R)); })
   2225 
   2226 
   2227 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2228 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2229 {
   2230   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   2231                                                      (__v8df) __B,
   2232                                                      (__v8df) __C,
   2233                                                      (__mmask8) __U,
   2234                                                      _MM_FROUND_CUR_DIRECTION);
   2235 }
   2236 
   2237 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2238 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2239 {
   2240   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   2241                                                       (__v8df) __B,
   2242                                                       (__v8df) __C,
   2243                                                       (__mmask8) __U,
   2244                                                       _MM_FROUND_CUR_DIRECTION);
   2245 }
   2246 
   2247 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2248   (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
   2249                                              (__v16sf) (B), (__v16sf) (C), \
   2250                                              (__mmask16) (U), (R)); })
   2251 
   2252 
   2253 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   2254   (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
   2255                                               (__v16sf) (B), (__v16sf) (C), \
   2256                                               (__mmask16) (U), (R)); })
   2257 
   2258 
   2259 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2260 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2261 {
   2262   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   2263                                                     (__v16sf) __B,
   2264                                                     (__v16sf) __C,
   2265                                                     (__mmask16) __U,
   2266                                                     _MM_FROUND_CUR_DIRECTION);
   2267 }
   2268 
   2269 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2270 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2271 {
   2272   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   2273                                                      (__v16sf) __B,
   2274                                                      (__v16sf) __C,
   2275                                                      (__mmask16) __U,
   2276                                                      _MM_FROUND_CUR_DIRECTION);
   2277 }
   2278 
   2279 
   2280 
   2281 /* Vector permutations */
   2282 
   2283 static __inline __m512i __DEFAULT_FN_ATTRS
   2284 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   2285 {
   2286   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   2287                                                        /* idx */ ,
   2288                                                        (__v16si) __A,
   2289                                                        (__v16si) __B,
   2290                                                        (__mmask16) -1);
   2291 }
   2292 static __inline __m512i __DEFAULT_FN_ATTRS
   2293 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   2294 {
   2295   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   2296                                                        /* idx */ ,
   2297                                                        (__v8di) __A,
   2298                                                        (__v8di) __B,
   2299                                                        (__mmask8) -1);
   2300 }
   2301 
   2302 static __inline __m512d __DEFAULT_FN_ATTRS
   2303 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
   2304 {
   2305   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   2306                                                         /* idx */ ,
   2307                                                         (__v8df) __A,
   2308                                                         (__v8df) __B,
   2309                                                         (__mmask8) -1);
   2310 }
   2311 static __inline __m512 __DEFAULT_FN_ATTRS
   2312 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
   2313 {
   2314   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   2315                                                        /* idx */ ,
   2316                                                        (__v16sf) __A,
   2317                                                        (__v16sf) __B,
   2318                                                        (__mmask16) -1);
   2319 }
   2320 
   2321 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   2322   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   2323                                          (__v8di)(__m512i)(B), \
   2324                                          (I), (__v8di)_mm512_setzero_si512(), \
   2325                                          (__mmask8)-1); })
   2326 
   2327 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   2328   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   2329                                          (__v16si)(__m512i)(B), \
   2330                                          (I), (__v16si)_mm512_setzero_si512(), \
   2331                                          (__mmask16)-1); })
   2332 
   2333 /* Vector Extract */
   2334 
   2335 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
   2336       (__m256d)                                                          \
   2337         __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A),           \
   2338                                          (I),                            \
   2339                                          (__v4df)_mm256_setzero_si256(), \
   2340                                          (__mmask8) -1); })
   2341 
   2342 #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
   2343       (__m128)                                                           \
   2344         __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A),           \
   2345                                          (I),                            \
   2346                                          (__v4sf)_mm_setzero_ps(),       \
   2347                                          (__mmask8) -1); })
   2348 
   2349 /* Vector Blend */
   2350 
   2351 static __inline __m512d __DEFAULT_FN_ATTRS
   2352 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   2353 {
   2354   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
   2355                  (__v8df) __W,
   2356                  (__mmask8) __U);
   2357 }
   2358 
   2359 static __inline __m512 __DEFAULT_FN_ATTRS
   2360 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   2361 {
   2362   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
   2363                 (__v16sf) __W,
   2364                 (__mmask16) __U);
   2365 }
   2366 
   2367 static __inline __m512i __DEFAULT_FN_ATTRS
   2368 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   2369 {
   2370   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
   2371                 (__v8di) __W,
   2372                 (__mmask8) __U);
   2373 }
   2374 
   2375 static __inline __m512i __DEFAULT_FN_ATTRS
   2376 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   2377 {
   2378   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
   2379                 (__v16si) __W,
   2380                 (__mmask16) __U);
   2381 }
   2382 
   2383 /* Compare */
   2384 
   2385 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   2386   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   2387                                           (__v16sf)(__m512)(B), \
   2388                                           (P), (__mmask16)-1, (R)); })
   2389 
   2390 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   2391   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   2392                                           (__v16sf)(__m512)(B), \
   2393                                           (P), (__mmask16)(U), (R)); })
   2394 
   2395 #define _mm512_cmp_ps_mask(A, B, P) \
   2396   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   2397 
   2398 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   2399   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   2400 
   2401 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   2402   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   2403                                          (__v8df)(__m512d)(B), \
   2404                                          (P), (__mmask8)-1, (R)); })
   2405 
   2406 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   2407   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   2408                                          (__v8df)(__m512d)(B), \
   2409                                          (P), (__mmask8)(U), (R)); })
   2410 
   2411 #define _mm512_cmp_pd_mask(A, B, P) \
   2412   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   2413 
   2414 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   2415   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   2416 
   2417 /* Conversion */
   2418 
   2419 static __inline __m512i __DEFAULT_FN_ATTRS
   2420 _mm512_cvttps_epu32(__m512 __A)
   2421 {
   2422   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   2423                   (__v16si)
   2424                   _mm512_setzero_si512 (),
   2425                   (__mmask16) -1,
   2426                   _MM_FROUND_CUR_DIRECTION);
   2427 }
   2428 
   2429 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   2430   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
   2431                                           (__v16sf)_mm512_setzero_ps(), \
   2432                                           (__mmask16)-1, (R)); })
   2433 
   2434 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   2435   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
   2436                                            (__v16sf)_mm512_setzero_ps(), \
   2437                                            (__mmask16)-1, (R)); })
   2438 
   2439 static __inline __m512d __DEFAULT_FN_ATTRS
   2440 _mm512_cvtepi32_pd(__m256i __A)
   2441 {
   2442   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   2443                 (__v8df)
   2444                 _mm512_setzero_pd (),
   2445                 (__mmask8) -1);
   2446 }
   2447 
   2448 static __inline __m512d __DEFAULT_FN_ATTRS
   2449 _mm512_cvtepu32_pd(__m256i __A)
   2450 {
   2451   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   2452                 (__v8df)
   2453                 _mm512_setzero_pd (),
   2454                 (__mmask8) -1);
   2455 }
   2456 
   2457 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   2458   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
   2459                                           (__v8sf)_mm256_setzero_ps(), \
   2460                                           (__mmask8)-1, (R)); })
   2461 
   2462 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   2463   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
   2464                                             (__v16hi)_mm256_setzero_si256(), \
   2465                                             -1); })
   2466 
   2467 static  __inline __m512 __DEFAULT_FN_ATTRS
   2468 _mm512_cvtph_ps(__m256i __A)
   2469 {
   2470   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   2471                 (__v16sf)
   2472                 _mm512_setzero_ps (),
   2473                 (__mmask16) -1,
   2474                 _MM_FROUND_CUR_DIRECTION);
   2475 }
   2476 
   2477 static __inline __m512i __DEFAULT_FN_ATTRS
   2478 _mm512_cvttps_epi32(__m512 __a)
   2479 {
   2480   return (__m512i)
   2481     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   2482                                      (__v16si) _mm512_setzero_si512 (),
   2483                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   2484 }
   2485 
   2486 static __inline __m256i __DEFAULT_FN_ATTRS
   2487 _mm512_cvttpd_epi32(__m512d __a)
   2488 {
   2489   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   2490                                                    (__v8si)_mm256_setzero_si256(),
   2491                                                    (__mmask8) -1,
   2492                                                     _MM_FROUND_CUR_DIRECTION);
   2493 }
   2494 
   2495 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   2496   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
   2497                                             (__v8si)_mm256_setzero_si256(), \
   2498                                             (__mmask8)-1, (R)); })
   2499 
   2500 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   2501   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
   2502                                             (__v16si)_mm512_setzero_si512(), \
   2503                                             (__mmask16)-1, (R)); })
   2504 
   2505 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   2506   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
   2507                                            (__v16si)_mm512_setzero_si512(), \
   2508                                            (__mmask16)-1, (R)); })
   2509 
   2510 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   2511   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
   2512                                            (__v8si)_mm256_setzero_si256(), \
   2513                                            (__mmask8)-1, (R)); })
   2514 
   2515 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   2516   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
   2517                                             (__v16si)_mm512_setzero_si512(), \
   2518                                             (__mmask16)-1, (R)); })
   2519 
   2520 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   2521   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
   2522                                             (__v8si)_mm256_setzero_si256(), \
   2523                                             (__mmask8) -1, (R)); })
   2524 
   2525 /* Unpack and Interleave */
   2526 static __inline __m512d __DEFAULT_FN_ATTRS
   2527 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   2528 {
   2529   return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   2530 }
   2531 
   2532 static __inline __m512d __DEFAULT_FN_ATTRS
   2533 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   2534 {
   2535   return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   2536 }
   2537 
   2538 static __inline __m512 __DEFAULT_FN_ATTRS
   2539 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   2540 {
   2541   return __builtin_shufflevector(__a, __b,
   2542                                  2,    18,    3,    19,
   2543                                  2+4,  18+4,  3+4,  19+4,
   2544                                  2+8,  18+8,  3+8,  19+8,
   2545                                  2+12, 18+12, 3+12, 19+12);
   2546 }
   2547 
   2548 static __inline __m512 __DEFAULT_FN_ATTRS
   2549 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   2550 {
   2551   return __builtin_shufflevector(__a, __b,
   2552                                  0,    16,    1,    17,
   2553                                  0+4,  16+4,  1+4,  17+4,
   2554                                  0+8,  16+8,  1+8,  17+8,
   2555                                  0+12, 16+12, 1+12, 17+12);
   2556 }
   2557 
   2558 /* Bit Test */
   2559 
   2560 static __inline __mmask16 __DEFAULT_FN_ATTRS
   2561 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   2562 {
   2563   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   2564             (__v16si) __B,
   2565             (__mmask16) -1);
   2566 }
   2567 
   2568 static __inline __mmask8 __DEFAULT_FN_ATTRS
   2569 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   2570 {
   2571   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   2572                  (__v8di) __B,
   2573                  (__mmask8) -1);
   2574 }
   2575 
   2576 /* SIMD load ops */
   2577 
   2578 static __inline __m512i __DEFAULT_FN_ATTRS
   2579 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   2580 {
   2581   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
   2582                                                      (__v16si)
   2583                                                      _mm512_setzero_si512 (),
   2584                                                      (__mmask16) __U);
   2585 }
   2586 
   2587 static __inline __m512i __DEFAULT_FN_ATTRS
   2588 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   2589 {
   2590   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
   2591                                                      (__v8di)
   2592                                                      _mm512_setzero_si512 (),
   2593                                                      (__mmask8) __U);
   2594 }
   2595 
   2596 static __inline __m512 __DEFAULT_FN_ATTRS
   2597 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   2598 {
   2599   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
   2600                                                   (__v16sf)
   2601                                                   _mm512_setzero_ps (),
   2602                                                   (__mmask16) __U);
   2603 }
   2604 
   2605 static __inline __m512d __DEFAULT_FN_ATTRS
   2606 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   2607 {
   2608   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
   2609                                                    (__v8df)
   2610                                                    _mm512_setzero_pd (),
   2611                                                    (__mmask8) __U);
   2612 }
   2613 
   2614 static __inline __m512 __DEFAULT_FN_ATTRS
   2615 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   2616 {
   2617   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   2618                                                   (__v16sf)
   2619                                                   _mm512_setzero_ps (),
   2620                                                   (__mmask16) __U);
   2621 }
   2622 
   2623 static __inline __m512d __DEFAULT_FN_ATTRS
   2624 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   2625 {
   2626   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   2627                                                    (__v8df)
   2628                                                    _mm512_setzero_pd (),
   2629                                                    (__mmask8) __U);
   2630 }
   2631 
   2632 static __inline __m512d __DEFAULT_FN_ATTRS
   2633 _mm512_loadu_pd(double const *__p)
   2634 {
   2635   struct __loadu_pd {
   2636     __m512d __v;
   2637   } __attribute__((__packed__, __may_alias__));
   2638   return ((struct __loadu_pd*)__p)->__v;
   2639 }
   2640 
   2641 static __inline __m512 __DEFAULT_FN_ATTRS
   2642 _mm512_loadu_ps(float const *__p)
   2643 {
   2644   struct __loadu_ps {
   2645     __m512 __v;
   2646   } __attribute__((__packed__, __may_alias__));
   2647   return ((struct __loadu_ps*)__p)->__v;
   2648 }
   2649 
   2650 static __inline __m512 __DEFAULT_FN_ATTRS
   2651 _mm512_load_ps(double const *__p)
   2652 {
   2653   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   2654                                                   (__v16sf)
   2655                                                   _mm512_setzero_ps (),
   2656                                                   (__mmask16) -1);
   2657 }
   2658 
   2659 static __inline __m512d __DEFAULT_FN_ATTRS
   2660 _mm512_load_pd(float const *__p)
   2661 {
   2662   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   2663                                                    (__v8df)
   2664                                                    _mm512_setzero_pd (),
   2665                                                    (__mmask8) -1);
   2666 }
   2667 
   2668 /* SIMD store ops */
   2669 
   2670 static __inline void __DEFAULT_FN_ATTRS
   2671 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   2672 {
   2673   __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
   2674                                      (__mmask8) __U);
   2675 }
   2676 
   2677 static __inline void __DEFAULT_FN_ATTRS
   2678 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   2679 {
   2680   __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
   2681                                      (__mmask16) __U);
   2682 }
   2683 
   2684 static __inline void __DEFAULT_FN_ATTRS
   2685 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   2686 {
   2687   __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   2688 }
   2689 
   2690 static __inline void __DEFAULT_FN_ATTRS
   2691 _mm512_storeu_pd(void *__P, __m512d __A)
   2692 {
   2693   __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
   2694 }
   2695 
   2696 static __inline void __DEFAULT_FN_ATTRS
   2697 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   2698 {
   2699   __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
   2700                                    (__mmask16) __U);
   2701 }
   2702 
   2703 static __inline void __DEFAULT_FN_ATTRS
   2704 _mm512_storeu_ps(void *__P, __m512 __A)
   2705 {
   2706   __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
   2707 }
   2708 
   2709 static __inline void __DEFAULT_FN_ATTRS
   2710 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   2711 {
   2712   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   2713 }
   2714 
   2715 static __inline void __DEFAULT_FN_ATTRS
   2716 _mm512_store_pd(void *__P, __m512d __A)
   2717 {
   2718   *(__m512d*)__P = __A;
   2719 }
   2720 
   2721 static __inline void __DEFAULT_FN_ATTRS
   2722 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   2723 {
   2724   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   2725                                    (__mmask16) __U);
   2726 }
   2727 
   2728 static __inline void __DEFAULT_FN_ATTRS
   2729 _mm512_store_ps(void *__P, __m512 __A)
   2730 {
   2731   *(__m512*)__P = __A;
   2732 }
   2733 
   2734 /* Mask ops */
   2735 
   2736 static __inline __mmask16 __DEFAULT_FN_ATTRS
   2737 _mm512_knot(__mmask16 __M)
   2738 {
   2739   return __builtin_ia32_knothi(__M);
   2740 }
   2741 
   2742 /* Integer compare */
   2743 
   2744 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2745 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   2746   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   2747                                                    (__mmask16)-1);
   2748 }
   2749 
   2750 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2751 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2752   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   2753                                                    __u);
   2754 }
   2755 
   2756 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2757 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   2758   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   2759                                                  (__mmask16)-1);
   2760 }
   2761 
   2762 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2763 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2764   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   2765                                                  __u);
   2766 }
   2767 
   2768 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2769 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2770   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   2771                                                   __u);
   2772 }
   2773 
   2774 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2775 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   2776   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   2777                                                   (__mmask8)-1);
   2778 }
   2779 
   2780 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2781 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   2782   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   2783                                                 (__mmask8)-1);
   2784 }
   2785 
   2786 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2787 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2788   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   2789                                                 __u);
   2790 }
   2791 
   2792 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2793 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   2794   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   2795                                                 (__mmask16)-1);
   2796 }
   2797 
   2798 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2799 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2800   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   2801                                                 __u);
   2802 }
   2803 
   2804 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2805 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   2806   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   2807                                                  (__mmask16)-1);
   2808 }
   2809 
   2810 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2811 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2812   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   2813                                                  __u);
   2814 }
   2815 
   2816 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2817 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   2818   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   2819                                                (__mmask8)-1);
   2820 }
   2821 
   2822 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2823 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2824   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   2825                                                __u);
   2826 }
   2827 
   2828 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2829 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   2830   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   2831                                                 (__mmask8)-1);
   2832 }
   2833 
   2834 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2835 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2836   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   2837                                                 __u);
   2838 }
   2839 
   2840 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2841 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   2842   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   2843                                                    (__mmask16)-1);
   2844 }
   2845 
   2846 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2847 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2848   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   2849                                                    __u);
   2850 }
   2851 
   2852 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2853 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   2854   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   2855                                                  (__mmask16)-1);
   2856 }
   2857 
   2858 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2859 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2860   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   2861                                                  __u);
   2862 }
   2863 
   2864 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2865 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2866   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   2867                                                   __u);
   2868 }
   2869 
   2870 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2871 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   2872   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   2873                                                   (__mmask8)-1);
   2874 }
   2875 
   2876 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2877 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   2878   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   2879                                                 (__mmask8)-1);
   2880 }
   2881 
   2882 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2883 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2884   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   2885                                                 __u);
   2886 }
   2887 
   2888 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2889 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   2890   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   2891                                                 (__mmask16)-1);
   2892 }
   2893 
   2894 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2895 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2896   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   2897                                                 __u);
   2898 }
   2899 
   2900 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2901 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   2902   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   2903                                                  (__mmask16)-1);
   2904 }
   2905 
   2906 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2907 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2908   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   2909                                                  __u);
   2910 }
   2911 
   2912 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2913 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   2914   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   2915                                                (__mmask8)-1);
   2916 }
   2917 
   2918 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2919 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2920   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   2921                                                __u);
   2922 }
   2923 
   2924 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2925 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   2926   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   2927                                                 (__mmask8)-1);
   2928 }
   2929 
   2930 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2931 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2932   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   2933                                                 __u);
   2934 }
   2935 
   2936 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2937 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   2938   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   2939                                                 (__mmask16)-1);
   2940 }
   2941 
   2942 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2943 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2944   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   2945                                                 __u);
   2946 }
   2947 
   2948 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2949 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   2950   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   2951                                                  (__mmask16)-1);
   2952 }
   2953 
   2954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2955 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2956   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   2957                                                  __u);
   2958 }
   2959 
   2960 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2961 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   2962   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   2963                                                (__mmask8)-1);
   2964 }
   2965 
   2966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2967 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2968   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   2969                                                __u);
   2970 }
   2971 
   2972 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2973 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   2974   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   2975                                                 (__mmask8)-1);
   2976 }
   2977 
   2978 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   2979 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   2980   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   2981                                                 __u);
   2982 }
   2983 
   2984 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2985 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   2986   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   2987                                                 (__mmask16)-1);
   2988 }
   2989 
   2990 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2991 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   2992   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   2993                                                 __u);
   2994 }
   2995 
   2996 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   2997 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   2998   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   2999                                                  (__mmask16)-1);
   3000 }
   3001 
   3002 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   3003 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   3004   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   3005                                                  __u);
   3006 }
   3007 
   3008 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   3009 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   3010   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   3011                                                (__mmask8)-1);
   3012 }
   3013 
   3014 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   3015 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   3016   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   3017                                                __u);
   3018 }
   3019 
   3020 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   3021 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   3022   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   3023                                                 (__mmask8)-1);
   3024 }
   3025 
   3026 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   3027 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   3028   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   3029                                                 __u);
   3030 }
   3031 
   3032 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   3033   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   3034                                          (__v16si)(__m512i)(b), (p), \
   3035                                          (__mmask16)-1); })
   3036 
   3037 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   3038   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   3039                                           (__v16si)(__m512i)(b), (p), \
   3040                                           (__mmask16)-1); })
   3041 
   3042 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   3043   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   3044                                         (__v8di)(__m512i)(b), (p), \
   3045                                         (__mmask8)-1); })
   3046 
   3047 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   3048   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   3049                                          (__v8di)(__m512i)(b), (p), \
   3050                                          (__mmask8)-1); })
   3051 
   3052 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   3053   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   3054                                          (__v16si)(__m512i)(b), (p), \
   3055                                          (__mmask16)(m)); })
   3056 
   3057 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   3058   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   3059                                           (__v16si)(__m512i)(b), (p), \
   3060                                           (__mmask16)(m)); })
   3061 
   3062 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   3063   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   3064                                         (__v8di)(__m512i)(b), (p), \
   3065                                         (__mmask8)(m)); })
   3066 
   3067 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   3068   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   3069                                          (__v8di)(__m512i)(b), (p), \
   3070                                          (__mmask8)(m)); })
   3071 
   3072 #undef __DEFAULT_FN_ATTRS
   3073 
   3074 #endif // __AVX512FINTRIN_H
   3075