Home | History | Annotate | Download | only in Headers
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef char __v64qi __attribute__((__vector_size__(64)));
     31 typedef short __v32hi __attribute__((__vector_size__(64)));
     32 typedef double __v8df __attribute__((__vector_size__(64)));
     33 typedef float __v16sf __attribute__((__vector_size__(64)));
     34 typedef long long __v8di __attribute__((__vector_size__(64)));
     35 typedef int __v16si __attribute__((__vector_size__(64)));
     36 
     37 /* Unsigned types */
     38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
     39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
     40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
     41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
     42 
     43 typedef float __m512 __attribute__((__vector_size__(64)));
     44 typedef double __m512d __attribute__((__vector_size__(64)));
     45 typedef long long __m512i __attribute__((__vector_size__(64)));
     46 
     47 typedef unsigned char __mmask8;
     48 typedef unsigned short __mmask16;
     49 
     50 /* Rounding mode macros.  */
     51 #define _MM_FROUND_TO_NEAREST_INT   0x00
     52 #define _MM_FROUND_TO_NEG_INF       0x01
     53 #define _MM_FROUND_TO_POS_INF       0x02
     54 #define _MM_FROUND_TO_ZERO          0x03
     55 #define _MM_FROUND_CUR_DIRECTION    0x04
     56 
     57 typedef enum
     58 {
     59   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
     60   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
     61   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
     62   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
     63   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
     64   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
     65   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
     66   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
     67   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
     68   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
     69   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
     70   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
     71   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
     72   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
     73   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
     74   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
     75   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
     76   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
     77   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
     78   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
     79   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
     80   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
     81   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
     82   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
     83   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
     84   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
     85   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
     86   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
     87   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
     88   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
     89   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
     90   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
     91   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
     92   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
     93   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
     94   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
     95   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
     96   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
     97   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
     98   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
     99   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
    100   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
    101   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
    102   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
    103   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
    104   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
    105   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
    106   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
    107   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
    108   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
    109   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
    110   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
    111   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
    112   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
    113   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
    114   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
    115   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
    116   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
    117   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
    118   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
    119   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
    120   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
    121   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
    122   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
    123   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
    124   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
    125   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
    126   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
    127   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
    128   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
    129   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
    130   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
    131   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
    132   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
    133   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
    134   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
    135   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
    136   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
    137   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
    138   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
    139   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
    140   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
    141   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
    142   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
    143   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
    144   _MM_PERM_DDDD = 0xFF
    145 } _MM_PERM_ENUM;
    146 
    147 typedef enum
    148 {
    149   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
    150   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
    151   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
    152   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
    153 } _MM_MANTISSA_NORM_ENUM;
    154 
    155 typedef enum
    156 {
    157   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
    158   _MM_MANT_SIGN_zero,   /* sign = 0             */
    159   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
    160 } _MM_MANTISSA_SIGN_ENUM;
    161 
    162 /* Define the default attributes for the functions in this file. */
    163 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
    164 
    165 /* Create vectors with repeated elements */
    166 
    167 static  __inline __m512i __DEFAULT_FN_ATTRS
    168 _mm512_setzero_si512(void)
    169 {
    170   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    171 }
    172 
    173 #define _mm512_setzero_epi32 _mm512_setzero_si512
    174 
    175 static __inline__ __m512d __DEFAULT_FN_ATTRS
    176 _mm512_undefined_pd(void)
    177 {
    178   return (__m512d)__builtin_ia32_undef512();
    179 }
    180 
    181 static __inline__ __m512 __DEFAULT_FN_ATTRS
    182 _mm512_undefined(void)
    183 {
    184   return (__m512)__builtin_ia32_undef512();
    185 }
    186 
    187 static __inline__ __m512 __DEFAULT_FN_ATTRS
    188 _mm512_undefined_ps(void)
    189 {
    190   return (__m512)__builtin_ia32_undef512();
    191 }
    192 
    193 static __inline__ __m512i __DEFAULT_FN_ATTRS
    194 _mm512_undefined_epi32(void)
    195 {
    196   return (__m512i)__builtin_ia32_undef512();
    197 }
    198 
    199 static __inline__ __m512i __DEFAULT_FN_ATTRS
    200 _mm512_broadcastd_epi32 (__m128i __A)
    201 {
    202   return (__m512i)__builtin_shufflevector((__v4si) __A,
    203                                           (__v4si)_mm_undefined_si128(),
    204                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    205 }
    206 
    207 static __inline__ __m512i __DEFAULT_FN_ATTRS
    208 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
    209 {
    210   return (__m512i)__builtin_ia32_selectd_512(__M,
    211                                              (__v16si) _mm512_broadcastd_epi32(__A),
    212                                              (__v16si) __O);
    213 }
    214 
    215 static __inline__ __m512i __DEFAULT_FN_ATTRS
    216 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
    217 {
    218   return (__m512i)__builtin_ia32_selectd_512(__M,
    219                                              (__v16si) _mm512_broadcastd_epi32(__A),
    220                                              (__v16si) _mm512_setzero_si512());
    221 }
    222 
    223 static __inline__ __m512i __DEFAULT_FN_ATTRS
    224 _mm512_broadcastq_epi64 (__m128i __A)
    225 {
    226   return (__m512i)__builtin_shufflevector((__v2di) __A,
    227                                           (__v2di) _mm_undefined_si128(),
    228                                           0, 0, 0, 0, 0, 0, 0, 0);
    229 }
    230 
    231 static __inline__ __m512i __DEFAULT_FN_ATTRS
    232 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
    233 {
    234   return (__m512i)__builtin_ia32_selectq_512(__M,
    235                                              (__v8di) _mm512_broadcastq_epi64(__A),
    236                                              (__v8di) __O);
    237 
    238 }
    239 
    240 static __inline__ __m512i __DEFAULT_FN_ATTRS
    241 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
    242 {
    243   return (__m512i)__builtin_ia32_selectq_512(__M,
    244                                              (__v8di) _mm512_broadcastq_epi64(__A),
    245                                              (__v8di) _mm512_setzero_si512());
    246 }
    247 
    248 static __inline __m512i __DEFAULT_FN_ATTRS
    249 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
    250 {
    251   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
    252                  (__v16si)
    253                  _mm512_setzero_si512 (),
    254                  __M);
    255 }
    256 
    257 static __inline __m512i __DEFAULT_FN_ATTRS
    258 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
    259 {
    260 #ifdef __x86_64__
    261   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
    262                  (__v8di)
    263                  _mm512_setzero_si512 (),
    264                  __M);
    265 #else
    266   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
    267                  (__v8di)
    268                  _mm512_setzero_si512 (),
    269                  __M);
    270 #endif
    271 }
    272 
    273 static __inline __m512 __DEFAULT_FN_ATTRS
    274 _mm512_setzero_ps(void)
    275 {
    276   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    277                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    278 }
    279 
    280 #define _mm512_setzero _mm512_setzero_ps
    281 
    282 static  __inline __m512d __DEFAULT_FN_ATTRS
    283 _mm512_setzero_pd(void)
    284 {
    285   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    286 }
    287 
    288 static __inline __m512 __DEFAULT_FN_ATTRS
    289 _mm512_set1_ps(float __w)
    290 {
    291   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    292                    __w, __w, __w, __w, __w, __w, __w, __w  };
    293 }
    294 
    295 static __inline __m512d __DEFAULT_FN_ATTRS
    296 _mm512_set1_pd(double __w)
    297 {
    298   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    299 }
    300 
    301 static __inline __m512i __DEFAULT_FN_ATTRS
    302 _mm512_set1_epi8(char __w)
    303 {
    304   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
    305                              __w, __w, __w, __w, __w, __w, __w, __w,
    306                              __w, __w, __w, __w, __w, __w, __w, __w,
    307                              __w, __w, __w, __w, __w, __w, __w, __w,
    308                              __w, __w, __w, __w, __w, __w, __w, __w,
    309                              __w, __w, __w, __w, __w, __w, __w, __w,
    310                              __w, __w, __w, __w, __w, __w, __w, __w,
    311                              __w, __w, __w, __w, __w, __w, __w, __w  };
    312 }
    313 
    314 static __inline __m512i __DEFAULT_FN_ATTRS
    315 _mm512_set1_epi16(short __w)
    316 {
    317   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
    318                              __w, __w, __w, __w, __w, __w, __w, __w,
    319                              __w, __w, __w, __w, __w, __w, __w, __w,
    320                              __w, __w, __w, __w, __w, __w, __w, __w };
    321 }
    322 
    323 static __inline __m512i __DEFAULT_FN_ATTRS
    324 _mm512_set1_epi32(int __s)
    325 {
    326   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    327                              __s, __s, __s, __s, __s, __s, __s, __s };
    328 }
    329 
    330 static __inline __m512i __DEFAULT_FN_ATTRS
    331 _mm512_set1_epi64(long long __d)
    332 {
    333   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    334 }
    335 
    336 static __inline__ __m512 __DEFAULT_FN_ATTRS
    337 _mm512_broadcastss_ps(__m128 __A)
    338 {
    339   return (__m512)__builtin_shufflevector((__v4sf) __A,
    340                                          (__v4sf)_mm_undefined_ps(),
    341                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    342 }
    343 
    344 static __inline __m512i __DEFAULT_FN_ATTRS
    345 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    346 {
    347   return  (__m512i)(__v16si)
    348    { __D, __C, __B, __A, __D, __C, __B, __A,
    349      __D, __C, __B, __A, __D, __C, __B, __A };
    350 }
    351 
    352 static __inline __m512i __DEFAULT_FN_ATTRS
    353 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    354        long long __D)
    355 {
    356   return  (__m512i) (__v8di)
    357    { __D, __C, __B, __A, __D, __C, __B, __A };
    358 }
    359 
    360 static __inline __m512d __DEFAULT_FN_ATTRS
    361 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    362 {
    363   return  (__m512d)
    364    { __D, __C, __B, __A, __D, __C, __B, __A };
    365 }
    366 
    367 static __inline __m512 __DEFAULT_FN_ATTRS
    368 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    369 {
    370   return  (__m512)
    371    { __D, __C, __B, __A, __D, __C, __B, __A,
    372      __D, __C, __B, __A, __D, __C, __B, __A };
    373 }
    374 
    375 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
    376   _mm512_set4_epi32((e3),(e2),(e1),(e0))
    377 
    378 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
    379   _mm512_set4_epi64((e3),(e2),(e1),(e0))
    380 
    381 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
    382   _mm512_set4_pd((e3),(e2),(e1),(e0))
    383 
    384 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
    385   _mm512_set4_ps((e3),(e2),(e1),(e0))
    386 
    387 static __inline__ __m512d __DEFAULT_FN_ATTRS
    388 _mm512_broadcastsd_pd(__m128d __A)
    389 {
    390   return (__m512d)__builtin_shufflevector((__v2df) __A,
    391                                           (__v2df) _mm_undefined_pd(),
    392                                           0, 0, 0, 0, 0, 0, 0, 0);
    393 }
    394 
    395 /* Cast between vector types */
    396 
    397 static __inline __m512d __DEFAULT_FN_ATTRS
    398 _mm512_castpd256_pd512(__m256d __a)
    399 {
    400   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    401 }
    402 
    403 static __inline __m512 __DEFAULT_FN_ATTRS
    404 _mm512_castps256_ps512(__m256 __a)
    405 {
    406   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    407                                           -1, -1, -1, -1, -1, -1, -1, -1);
    408 }
    409 
    410 static __inline __m128d __DEFAULT_FN_ATTRS
    411 _mm512_castpd512_pd128(__m512d __a)
    412 {
    413   return __builtin_shufflevector(__a, __a, 0, 1);
    414 }
    415 
    416 static __inline __m256d __DEFAULT_FN_ATTRS
    417 _mm512_castpd512_pd256 (__m512d __A)
    418 {
    419   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
    420 }
    421 
    422 static __inline __m128 __DEFAULT_FN_ATTRS
    423 _mm512_castps512_ps128(__m512 __a)
    424 {
    425   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    426 }
    427 
    428 static __inline __m256 __DEFAULT_FN_ATTRS
    429 _mm512_castps512_ps256 (__m512 __A)
    430 {
    431   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
    432 }
    433 
    434 static __inline __m512 __DEFAULT_FN_ATTRS
    435 _mm512_castpd_ps (__m512d __A)
    436 {
    437   return (__m512) (__A);
    438 }
    439 
    440 static __inline __m512i __DEFAULT_FN_ATTRS
    441 _mm512_castpd_si512 (__m512d __A)
    442 {
    443   return (__m512i) (__A);
    444 }
    445 
    446 static __inline__ __m512d __DEFAULT_FN_ATTRS
    447 _mm512_castpd128_pd512 (__m128d __A)
    448 {
    449   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    450 }
    451 
    452 static __inline __m512d __DEFAULT_FN_ATTRS
    453 _mm512_castps_pd (__m512 __A)
    454 {
    455   return (__m512d) (__A);
    456 }
    457 
    458 static __inline __m512i __DEFAULT_FN_ATTRS
    459 _mm512_castps_si512 (__m512 __A)
    460 {
    461   return (__m512i) (__A);
    462 }
    463 
    464 static __inline__ __m512 __DEFAULT_FN_ATTRS
    465 _mm512_castps128_ps512 (__m128 __A)
    466 {
    467     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    468 }
    469 
    470 static __inline__ __m512i __DEFAULT_FN_ATTRS
    471 _mm512_castsi128_si512 (__m128i __A)
    472 {
    473    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    474 }
    475 
    476 static __inline__ __m512i __DEFAULT_FN_ATTRS
    477 _mm512_castsi256_si512 (__m256i __A)
    478 {
    479    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
    480 }
    481 
    482 static __inline __m512 __DEFAULT_FN_ATTRS
    483 _mm512_castsi512_ps (__m512i __A)
    484 {
    485   return (__m512) (__A);
    486 }
    487 
    488 static __inline __m512d __DEFAULT_FN_ATTRS
    489 _mm512_castsi512_pd (__m512i __A)
    490 {
    491   return (__m512d) (__A);
    492 }
    493 
    494 static __inline __m128i __DEFAULT_FN_ATTRS
    495 _mm512_castsi512_si128 (__m512i __A)
    496 {
    497   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
    498 }
    499 
    500 static __inline __m256i __DEFAULT_FN_ATTRS
    501 _mm512_castsi512_si256 (__m512i __A)
    502 {
    503   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
    504 }
    505 
    506 /* Bitwise operators */
    507 static __inline__ __m512i __DEFAULT_FN_ATTRS
    508 _mm512_and_epi32(__m512i __a, __m512i __b)
    509 {
    510   return (__m512i)((__v16su)__a & (__v16su)__b);
    511 }
    512 
    513 static __inline__ __m512i __DEFAULT_FN_ATTRS
    514 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    515 {
    516   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    517                 (__v16si) _mm512_and_epi32(__a, __b),
    518                 (__v16si) __src);
    519 }
    520 
    521 static __inline__ __m512i __DEFAULT_FN_ATTRS
    522 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    523 {
    524   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
    525                                          __k, __a, __b);
    526 }
    527 
    528 static __inline__ __m512i __DEFAULT_FN_ATTRS
    529 _mm512_and_epi64(__m512i __a, __m512i __b)
    530 {
    531   return (__m512i)((__v8du)__a & (__v8du)__b);
    532 }
    533 
    534 static __inline__ __m512i __DEFAULT_FN_ATTRS
    535 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    536 {
    537     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
    538                 (__v8di) _mm512_and_epi64(__a, __b),
    539                 (__v8di) __src);
    540 }
    541 
    542 static __inline__ __m512i __DEFAULT_FN_ATTRS
    543 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    544 {
    545   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
    546                                          __k, __a, __b);
    547 }
    548 
    549 static __inline__ __m512i __DEFAULT_FN_ATTRS
    550 _mm512_andnot_si512 (__m512i __A, __m512i __B)
    551 {
    552   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    553 }
    554 
    555 static __inline__ __m512i __DEFAULT_FN_ATTRS
    556 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    557 {
    558   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
    559 }
    560 
    561 static __inline__ __m512i __DEFAULT_FN_ATTRS
    562 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    563 {
    564   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    565                                          (__v16si)_mm512_andnot_epi32(__A, __B),
    566                                          (__v16si)__W);
    567 }
    568 
    569 static __inline__ __m512i __DEFAULT_FN_ATTRS
    570 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    571 {
    572   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
    573                                            __U, __A, __B);
    574 }
    575 
    576 static __inline__ __m512i __DEFAULT_FN_ATTRS
    577 _mm512_andnot_epi64(__m512i __A, __m512i __B)
    578 {
    579   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    580 }
    581 
    582 static __inline__ __m512i __DEFAULT_FN_ATTRS
    583 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    584 {
    585   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    586                                           (__v8di)_mm512_andnot_epi64(__A, __B),
    587                                           (__v8di)__W);
    588 }
    589 
    590 static __inline__ __m512i __DEFAULT_FN_ATTRS
    591 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    592 {
    593   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
    594                                            __U, __A, __B);
    595 }
    596 
    597 static __inline__ __m512i __DEFAULT_FN_ATTRS
    598 _mm512_or_epi32(__m512i __a, __m512i __b)
    599 {
    600   return (__m512i)((__v16su)__a | (__v16su)__b);
    601 }
    602 
    603 static __inline__ __m512i __DEFAULT_FN_ATTRS
    604 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    605 {
    606   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    607                                              (__v16si)_mm512_or_epi32(__a, __b),
    608                                              (__v16si)__src);
    609 }
    610 
    611 static __inline__ __m512i __DEFAULT_FN_ATTRS
    612 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    613 {
    614   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
    615 }
    616 
    617 static __inline__ __m512i __DEFAULT_FN_ATTRS
    618 _mm512_or_epi64(__m512i __a, __m512i __b)
    619 {
    620   return (__m512i)((__v8du)__a | (__v8du)__b);
    621 }
    622 
    623 static __inline__ __m512i __DEFAULT_FN_ATTRS
    624 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    625 {
    626   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    627                                              (__v8di)_mm512_or_epi64(__a, __b),
    628                                              (__v8di)__src);
    629 }
    630 
    631 static __inline__ __m512i __DEFAULT_FN_ATTRS
    632 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    633 {
    634   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
    635 }
    636 
    637 static __inline__ __m512i __DEFAULT_FN_ATTRS
    638 _mm512_xor_epi32(__m512i __a, __m512i __b)
    639 {
    640   return (__m512i)((__v16su)__a ^ (__v16su)__b);
    641 }
    642 
    643 static __inline__ __m512i __DEFAULT_FN_ATTRS
    644 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    645 {
    646   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    647                                             (__v16si)_mm512_xor_epi32(__a, __b),
    648                                             (__v16si)__src);
    649 }
    650 
    651 static __inline__ __m512i __DEFAULT_FN_ATTRS
    652 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    653 {
    654   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
    655 }
    656 
    657 static __inline__ __m512i __DEFAULT_FN_ATTRS
    658 _mm512_xor_epi64(__m512i __a, __m512i __b)
    659 {
    660   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    661 }
    662 
    663 static __inline__ __m512i __DEFAULT_FN_ATTRS
    664 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    665 {
    666   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    667                                              (__v8di)_mm512_xor_epi64(__a, __b),
    668                                              (__v8di)__src);
    669 }
    670 
    671 static __inline__ __m512i __DEFAULT_FN_ATTRS
    672 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    673 {
    674   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
    675 }
    676 
    677 static __inline__ __m512i __DEFAULT_FN_ATTRS
    678 _mm512_and_si512(__m512i __a, __m512i __b)
    679 {
    680   return (__m512i)((__v8du)__a & (__v8du)__b);
    681 }
    682 
    683 static __inline__ __m512i __DEFAULT_FN_ATTRS
    684 _mm512_or_si512(__m512i __a, __m512i __b)
    685 {
    686   return (__m512i)((__v8du)__a | (__v8du)__b);
    687 }
    688 
    689 static __inline__ __m512i __DEFAULT_FN_ATTRS
    690 _mm512_xor_si512(__m512i __a, __m512i __b)
    691 {
    692   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    693 }
    694 
    695 /* Arithmetic */
    696 
    697 static __inline __m512d __DEFAULT_FN_ATTRS
    698 _mm512_add_pd(__m512d __a, __m512d __b)
    699 {
    700   return (__m512d)((__v8df)__a + (__v8df)__b);
    701 }
    702 
    703 static __inline __m512 __DEFAULT_FN_ATTRS
    704 _mm512_add_ps(__m512 __a, __m512 __b)
    705 {
    706   return (__m512)((__v16sf)__a + (__v16sf)__b);
    707 }
    708 
    709 static __inline __m512d __DEFAULT_FN_ATTRS
    710 _mm512_mul_pd(__m512d __a, __m512d __b)
    711 {
    712   return (__m512d)((__v8df)__a * (__v8df)__b);
    713 }
    714 
    715 static __inline __m512 __DEFAULT_FN_ATTRS
    716 _mm512_mul_ps(__m512 __a, __m512 __b)
    717 {
    718   return (__m512)((__v16sf)__a * (__v16sf)__b);
    719 }
    720 
    721 static __inline __m512d __DEFAULT_FN_ATTRS
    722 _mm512_sub_pd(__m512d __a, __m512d __b)
    723 {
    724   return (__m512d)((__v8df)__a - (__v8df)__b);
    725 }
    726 
    727 static __inline __m512 __DEFAULT_FN_ATTRS
    728 _mm512_sub_ps(__m512 __a, __m512 __b)
    729 {
    730   return (__m512)((__v16sf)__a - (__v16sf)__b);
    731 }
    732 
    733 static __inline__ __m512i __DEFAULT_FN_ATTRS
    734 _mm512_add_epi64 (__m512i __A, __m512i __B)
    735 {
    736   return (__m512i) ((__v8du) __A + (__v8du) __B);
    737 }
    738 
    739 static __inline__ __m512i __DEFAULT_FN_ATTRS
    740 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    741 {
    742   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    743              (__v8di) __B,
    744              (__v8di) __W,
    745              (__mmask8) __U);
    746 }
    747 
    748 static __inline__ __m512i __DEFAULT_FN_ATTRS
    749 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    750 {
    751   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    752              (__v8di) __B,
    753              (__v8di)
    754              _mm512_setzero_si512 (),
    755              (__mmask8) __U);
    756 }
    757 
    758 static __inline__ __m512i __DEFAULT_FN_ATTRS
    759 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    760 {
    761   return (__m512i) ((__v8du) __A - (__v8du) __B);
    762 }
    763 
    764 static __inline__ __m512i __DEFAULT_FN_ATTRS
    765 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    766 {
    767   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    768              (__v8di) __B,
    769              (__v8di) __W,
    770              (__mmask8) __U);
    771 }
    772 
    773 static __inline__ __m512i __DEFAULT_FN_ATTRS
    774 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    775 {
    776   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    777              (__v8di) __B,
    778              (__v8di)
    779              _mm512_setzero_si512 (),
    780              (__mmask8) __U);
    781 }
    782 
    783 static __inline__ __m512i __DEFAULT_FN_ATTRS
    784 _mm512_add_epi32 (__m512i __A, __m512i __B)
    785 {
    786   return (__m512i) ((__v16su) __A + (__v16su) __B);
    787 }
    788 
    789 static __inline__ __m512i __DEFAULT_FN_ATTRS
    790 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    791 {
    792   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    793              (__v16si) __B,
    794              (__v16si) __W,
    795              (__mmask16) __U);
    796 }
    797 
    798 static __inline__ __m512i __DEFAULT_FN_ATTRS
    799 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    800 {
    801   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    802              (__v16si) __B,
    803              (__v16si)
    804              _mm512_setzero_si512 (),
    805              (__mmask16) __U);
    806 }
    807 
    808 static __inline__ __m512i __DEFAULT_FN_ATTRS
    809 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    810 {
    811   return (__m512i) ((__v16su) __A - (__v16su) __B);
    812 }
    813 
    814 static __inline__ __m512i __DEFAULT_FN_ATTRS
    815 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    816 {
    817   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    818              (__v16si) __B,
    819              (__v16si) __W,
    820              (__mmask16) __U);
    821 }
    822 
    823 static __inline__ __m512i __DEFAULT_FN_ATTRS
    824 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    825 {
    826   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    827              (__v16si) __B,
    828              (__v16si)
    829              _mm512_setzero_si512 (),
    830              (__mmask16) __U);
    831 }
    832 
    833 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
    834   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    835                                         (__v8df)(__m512d)(B), \
    836                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
    837                                         (int)(R)); })
    838 
    839 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
    840   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    841                                         (__v8df)(__m512d)(B), \
    842                                         (__v8df)_mm512_setzero_pd(), \
    843                                         (__mmask8)(U), (int)(R)); })
    844 
    845 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
    846   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    847                                         (__v8df)(__m512d)(B), \
    848                                         (__v8df)_mm512_undefined_pd(), \
    849                                         (__mmask8)-1, (int)(R)); })
    850 
    851 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    852 _mm512_max_pd(__m512d __A, __m512d __B)
    853 {
    854   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    855              (__v8df) __B,
    856              (__v8df)
    857              _mm512_setzero_pd (),
    858              (__mmask8) -1,
    859              _MM_FROUND_CUR_DIRECTION);
    860 }
    861 
    862 static __inline__ __m512d __DEFAULT_FN_ATTRS
    863 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
    864 {
    865   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    866                   (__v8df) __B,
    867                   (__v8df) __W,
    868                   (__mmask8) __U,
    869                   _MM_FROUND_CUR_DIRECTION);
    870 }
    871 
    872 static __inline__ __m512d __DEFAULT_FN_ATTRS
    873 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
    874 {
    875   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    876                   (__v8df) __B,
    877                   (__v8df)
    878                   _mm512_setzero_pd (),
    879                   (__mmask8) __U,
    880                   _MM_FROUND_CUR_DIRECTION);
    881 }
    882 
    883 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
    884   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    885                                        (__v16sf)(__m512)(B), \
    886                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
    887                                        (int)(R)); })
    888 
    889 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
    890   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    891                                        (__v16sf)(__m512)(B), \
    892                                        (__v16sf)_mm512_setzero_ps(), \
    893                                        (__mmask16)(U), (int)(R)); })
    894 
    895 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
    896   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    897                                        (__v16sf)(__m512)(B), \
    898                                        (__v16sf)_mm512_undefined_ps(), \
    899                                        (__mmask16)-1, (int)(R)); })
    900 
    901 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    902 _mm512_max_ps(__m512 __A, __m512 __B)
    903 {
    904   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    905             (__v16sf) __B,
    906             (__v16sf)
    907             _mm512_setzero_ps (),
    908             (__mmask16) -1,
    909             _MM_FROUND_CUR_DIRECTION);
    910 }
    911 
    912 static __inline__ __m512 __DEFAULT_FN_ATTRS
    913 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
    914 {
    915   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    916                  (__v16sf) __B,
    917                  (__v16sf) __W,
    918                  (__mmask16) __U,
    919                  _MM_FROUND_CUR_DIRECTION);
    920 }
    921 
    922 static __inline__ __m512 __DEFAULT_FN_ATTRS
    923 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
    924 {
    925   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    926                  (__v16sf) __B,
    927                  (__v16sf)
    928                  _mm512_setzero_ps (),
    929                  (__mmask16) __U,
    930                  _MM_FROUND_CUR_DIRECTION);
    931 }
    932 
    933 static __inline__ __m128 __DEFAULT_FN_ATTRS
    934 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
    935   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    936                 (__v4sf) __B,
    937                 (__v4sf) __W,
    938                 (__mmask8) __U,
    939                 _MM_FROUND_CUR_DIRECTION);
    940 }
    941 
    942 static __inline__ __m128 __DEFAULT_FN_ATTRS
    943 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
    944   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    945                 (__v4sf) __B,
    946                 (__v4sf)  _mm_setzero_ps (),
    947                 (__mmask8) __U,
    948                 _MM_FROUND_CUR_DIRECTION);
    949 }
    950 
    951 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
    952   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    953                                           (__v4sf)(__m128)(B), \
    954                                           (__v4sf)_mm_setzero_ps(), \
    955                                           (__mmask8)-1, (int)(R)); })
    956 
    957 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
    958   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    959                                           (__v4sf)(__m128)(B), \
    960                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
    961                                           (int)(R)); })
    962 
    963 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
    964   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    965                                           (__v4sf)(__m128)(B), \
    966                                           (__v4sf)_mm_setzero_ps(), \
    967                                           (__mmask8)(U), (int)(R)); })
    968 
    969 static __inline__ __m128d __DEFAULT_FN_ATTRS
    970 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
    971   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    972                 (__v2df) __B,
    973                 (__v2df) __W,
    974                 (__mmask8) __U,
    975                 _MM_FROUND_CUR_DIRECTION);
    976 }
    977 
    978 static __inline__ __m128d __DEFAULT_FN_ATTRS
    979 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
    980   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    981                 (__v2df) __B,
    982                 (__v2df)  _mm_setzero_pd (),
    983                 (__mmask8) __U,
    984                 _MM_FROUND_CUR_DIRECTION);
    985 }
    986 
    987 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
    988   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
    989                                            (__v2df)(__m128d)(B), \
    990                                            (__v2df)_mm_setzero_pd(), \
    991                                            (__mmask8)-1, (int)(R)); })
    992 
    993 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
    994   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
    995                                            (__v2df)(__m128d)(B), \
    996                                            (__v2df)(__m128d)(W), \
    997                                            (__mmask8)(U), (int)(R)); })
    998 
    999 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
   1000   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1001                                            (__v2df)(__m128d)(B), \
   1002                                            (__v2df)_mm_setzero_pd(), \
   1003                                            (__mmask8)(U), (int)(R)); })
   1004 
   1005 static __inline __m512i
   1006 __DEFAULT_FN_ATTRS
   1007 _mm512_max_epi32(__m512i __A, __m512i __B)
   1008 {
   1009   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1010               (__v16si) __B,
   1011               (__v16si)
   1012               _mm512_setzero_si512 (),
   1013               (__mmask16) -1);
   1014 }
   1015 
   1016 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1017 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1018 {
   1019   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1020                    (__v16si) __B,
   1021                    (__v16si) __W, __M);
   1022 }
   1023 
   1024 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1025 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1026 {
   1027   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1028                    (__v16si) __B,
   1029                    (__v16si)
   1030                    _mm512_setzero_si512 (),
   1031                    __M);
   1032 }
   1033 
   1034 static __inline __m512i __DEFAULT_FN_ATTRS
   1035 _mm512_max_epu32(__m512i __A, __m512i __B)
   1036 {
   1037   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1038               (__v16si) __B,
   1039               (__v16si)
   1040               _mm512_setzero_si512 (),
   1041               (__mmask16) -1);
   1042 }
   1043 
   1044 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1045 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1046 {
   1047   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1048                    (__v16si) __B,
   1049                    (__v16si) __W, __M);
   1050 }
   1051 
   1052 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1053 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1054 {
   1055   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1056                    (__v16si) __B,
   1057                    (__v16si)
   1058                    _mm512_setzero_si512 (),
   1059                    __M);
   1060 }
   1061 
   1062 static __inline __m512i __DEFAULT_FN_ATTRS
   1063 _mm512_max_epi64(__m512i __A, __m512i __B)
   1064 {
   1065   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1066               (__v8di) __B,
   1067               (__v8di)
   1068               _mm512_setzero_si512 (),
   1069               (__mmask8) -1);
   1070 }
   1071 
   1072 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1073 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1074 {
   1075   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1076                    (__v8di) __B,
   1077                    (__v8di) __W, __M);
   1078 }
   1079 
   1080 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1081 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1082 {
   1083   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1084                    (__v8di) __B,
   1085                    (__v8di)
   1086                    _mm512_setzero_si512 (),
   1087                    __M);
   1088 }
   1089 
   1090 static __inline __m512i __DEFAULT_FN_ATTRS
   1091 _mm512_max_epu64(__m512i __A, __m512i __B)
   1092 {
   1093   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1094               (__v8di) __B,
   1095               (__v8di)
   1096               _mm512_setzero_si512 (),
   1097               (__mmask8) -1);
   1098 }
   1099 
   1100 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1101 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1102 {
   1103   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1104                    (__v8di) __B,
   1105                    (__v8di) __W, __M);
   1106 }
   1107 
   1108 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1109 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1110 {
   1111   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1112                    (__v8di) __B,
   1113                    (__v8di)
   1114                    _mm512_setzero_si512 (),
   1115                    __M);
   1116 }
   1117 
   1118 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
   1119   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1120                                         (__v8df)(__m512d)(B), \
   1121                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   1122                                         (int)(R)); })
   1123 
   1124 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
   1125   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1126                                         (__v8df)(__m512d)(B), \
   1127                                         (__v8df)_mm512_setzero_pd(), \
   1128                                         (__mmask8)(U), (int)(R)); })
   1129 
   1130 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
   1131   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1132                                         (__v8df)(__m512d)(B), \
   1133                                         (__v8df)_mm512_undefined_pd(), \
   1134                                         (__mmask8)-1, (int)(R)); })
   1135 
   1136 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1137 _mm512_min_pd(__m512d __A, __m512d __B)
   1138 {
   1139   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1140              (__v8df) __B,
   1141              (__v8df)
   1142              _mm512_setzero_pd (),
   1143              (__mmask8) -1,
   1144              _MM_FROUND_CUR_DIRECTION);
   1145 }
   1146 
   1147 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1148 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   1149 {
   1150   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1151                   (__v8df) __B,
   1152                   (__v8df) __W,
   1153                   (__mmask8) __U,
   1154                   _MM_FROUND_CUR_DIRECTION);
   1155 }
   1156 
   1157 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
   1158   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1159                                        (__v16sf)(__m512)(B), \
   1160                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1161                                        (int)(R)); })
   1162 
   1163 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
   1164   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1165                                        (__v16sf)(__m512)(B), \
   1166                                        (__v16sf)_mm512_setzero_ps(), \
   1167                                        (__mmask16)(U), (int)(R)); })
   1168 
   1169 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
   1170   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1171                                        (__v16sf)(__m512)(B), \
   1172                                        (__v16sf)_mm512_undefined_ps(), \
   1173                                        (__mmask16)-1, (int)(R)); })
   1174 
   1175 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1176 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   1177 {
   1178   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1179                   (__v8df) __B,
   1180                   (__v8df)
   1181                   _mm512_setzero_pd (),
   1182                   (__mmask8) __U,
   1183                   _MM_FROUND_CUR_DIRECTION);
   1184 }
   1185 
   1186 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1187 _mm512_min_ps(__m512 __A, __m512 __B)
   1188 {
   1189   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1190             (__v16sf) __B,
   1191             (__v16sf)
   1192             _mm512_setzero_ps (),
   1193             (__mmask16) -1,
   1194             _MM_FROUND_CUR_DIRECTION);
   1195 }
   1196 
   1197 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1198 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1199 {
   1200   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1201                  (__v16sf) __B,
   1202                  (__v16sf) __W,
   1203                  (__mmask16) __U,
   1204                  _MM_FROUND_CUR_DIRECTION);
   1205 }
   1206 
   1207 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1208 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1209 {
   1210   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1211                  (__v16sf) __B,
   1212                  (__v16sf)
   1213                  _mm512_setzero_ps (),
   1214                  (__mmask16) __U,
   1215                  _MM_FROUND_CUR_DIRECTION);
   1216 }
   1217 
   1218 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1219 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1220   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1221                 (__v4sf) __B,
   1222                 (__v4sf) __W,
   1223                 (__mmask8) __U,
   1224                 _MM_FROUND_CUR_DIRECTION);
   1225 }
   1226 
   1227 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1228 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1229   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1230                 (__v4sf) __B,
   1231                 (__v4sf)  _mm_setzero_ps (),
   1232                 (__mmask8) __U,
   1233                 _MM_FROUND_CUR_DIRECTION);
   1234 }
   1235 
   1236 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
   1237   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1238                                           (__v4sf)(__m128)(B), \
   1239                                           (__v4sf)_mm_setzero_ps(), \
   1240                                           (__mmask8)-1, (int)(R)); })
   1241 
   1242 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
   1243   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1244                                           (__v4sf)(__m128)(B), \
   1245                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1246                                           (int)(R)); })
   1247 
   1248 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
   1249   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1250                                           (__v4sf)(__m128)(B), \
   1251                                           (__v4sf)_mm_setzero_ps(), \
   1252                                           (__mmask8)(U), (int)(R)); })
   1253 
   1254 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1255 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1256   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1257                 (__v2df) __B,
   1258                 (__v2df) __W,
   1259                 (__mmask8) __U,
   1260                 _MM_FROUND_CUR_DIRECTION);
   1261 }
   1262 
   1263 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1264 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1265   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1266                 (__v2df) __B,
   1267                 (__v2df)  _mm_setzero_pd (),
   1268                 (__mmask8) __U,
   1269                 _MM_FROUND_CUR_DIRECTION);
   1270 }
   1271 
   1272 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
   1273   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1274                                            (__v2df)(__m128d)(B), \
   1275                                            (__v2df)_mm_setzero_pd(), \
   1276                                            (__mmask8)-1, (int)(R)); })
   1277 
   1278 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
   1279   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1280                                            (__v2df)(__m128d)(B), \
   1281                                            (__v2df)(__m128d)(W), \
   1282                                            (__mmask8)(U), (int)(R)); })
   1283 
   1284 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
   1285   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1286                                            (__v2df)(__m128d)(B), \
   1287                                            (__v2df)_mm_setzero_pd(), \
   1288                                            (__mmask8)(U), (int)(R)); })
   1289 
   1290 static __inline __m512i
   1291 __DEFAULT_FN_ATTRS
   1292 _mm512_min_epi32(__m512i __A, __m512i __B)
   1293 {
   1294   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1295               (__v16si) __B,
   1296               (__v16si)
   1297               _mm512_setzero_si512 (),
   1298               (__mmask16) -1);
   1299 }
   1300 
   1301 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1302 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1303 {
   1304   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1305                    (__v16si) __B,
   1306                    (__v16si) __W, __M);
   1307 }
   1308 
   1309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1310 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1311 {
   1312   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1313                    (__v16si) __B,
   1314                    (__v16si)
   1315                    _mm512_setzero_si512 (),
   1316                    __M);
   1317 }
   1318 
   1319 static __inline __m512i __DEFAULT_FN_ATTRS
   1320 _mm512_min_epu32(__m512i __A, __m512i __B)
   1321 {
   1322   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1323               (__v16si) __B,
   1324               (__v16si)
   1325               _mm512_setzero_si512 (),
   1326               (__mmask16) -1);
   1327 }
   1328 
   1329 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1330 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1331 {
   1332   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1333                    (__v16si) __B,
   1334                    (__v16si) __W, __M);
   1335 }
   1336 
   1337 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1338 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1339 {
   1340   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1341                    (__v16si) __B,
   1342                    (__v16si)
   1343                    _mm512_setzero_si512 (),
   1344                    __M);
   1345 }
   1346 
   1347 static __inline __m512i __DEFAULT_FN_ATTRS
   1348 _mm512_min_epi64(__m512i __A, __m512i __B)
   1349 {
   1350   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1351               (__v8di) __B,
   1352               (__v8di)
   1353               _mm512_setzero_si512 (),
   1354               (__mmask8) -1);
   1355 }
   1356 
   1357 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1358 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1359 {
   1360   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1361                    (__v8di) __B,
   1362                    (__v8di) __W, __M);
   1363 }
   1364 
   1365 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1366 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1367 {
   1368   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1369                    (__v8di) __B,
   1370                    (__v8di)
   1371                    _mm512_setzero_si512 (),
   1372                    __M);
   1373 }
   1374 
   1375 static __inline __m512i __DEFAULT_FN_ATTRS
   1376 _mm512_min_epu64(__m512i __A, __m512i __B)
   1377 {
   1378   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1379               (__v8di) __B,
   1380               (__v8di)
   1381               _mm512_setzero_si512 (),
   1382               (__mmask8) -1);
   1383 }
   1384 
   1385 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1386 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1387 {
   1388   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1389                    (__v8di) __B,
   1390                    (__v8di) __W, __M);
   1391 }
   1392 
   1393 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1394 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1395 {
   1396   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1397                    (__v8di) __B,
   1398                    (__v8di)
   1399                    _mm512_setzero_si512 (),
   1400                    __M);
   1401 }
   1402 
   1403 static __inline __m512i __DEFAULT_FN_ATTRS
   1404 _mm512_mul_epi32(__m512i __X, __m512i __Y)
   1405 {
   1406   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1407               (__v16si) __Y,
   1408               (__v8di)
   1409               _mm512_setzero_si512 (),
   1410               (__mmask8) -1);
   1411 }
   1412 
   1413 static __inline __m512i __DEFAULT_FN_ATTRS
   1414 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1415 {
   1416   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1417               (__v16si) __Y,
   1418               (__v8di) __W, __M);
   1419 }
   1420 
   1421 static __inline __m512i __DEFAULT_FN_ATTRS
   1422 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
   1423 {
   1424   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1425               (__v16si) __Y,
   1426               (__v8di)
   1427               _mm512_setzero_si512 (),
   1428               __M);
   1429 }
   1430 
   1431 static __inline __m512i __DEFAULT_FN_ATTRS
   1432 _mm512_mul_epu32(__m512i __X, __m512i __Y)
   1433 {
   1434   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1435                (__v16si) __Y,
   1436                (__v8di)
   1437                _mm512_setzero_si512 (),
   1438                (__mmask8) -1);
   1439 }
   1440 
   1441 static __inline __m512i __DEFAULT_FN_ATTRS
   1442 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1443 {
   1444   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1445                (__v16si) __Y,
   1446                (__v8di) __W, __M);
   1447 }
   1448 
   1449 static __inline __m512i __DEFAULT_FN_ATTRS
   1450 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
   1451 {
   1452   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1453                (__v16si) __Y,
   1454                (__v8di)
   1455                _mm512_setzero_si512 (),
   1456                __M);
   1457 }
   1458 
   1459 static __inline __m512i __DEFAULT_FN_ATTRS
   1460 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
   1461 {
   1462   return (__m512i) ((__v16su) __A * (__v16su) __B);
   1463 }
   1464 
   1465 static __inline __m512i __DEFAULT_FN_ATTRS
   1466 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1467 {
   1468   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
   1469               (__v16si) __B,
   1470               (__v16si)
   1471               _mm512_setzero_si512 (),
   1472               __M);
   1473 }
   1474 
   1475 static __inline __m512i __DEFAULT_FN_ATTRS
   1476 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1477 {
   1478   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
   1479               (__v16si) __B,
   1480               (__v16si) __W, __M);
   1481 }
   1482 
   1483 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
   1484   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1485                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
   1486                                          (int)(R)); })
   1487 
   1488 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
   1489   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1490                                          (__v8df)_mm512_setzero_pd(), \
   1491                                          (__mmask8)(U), (int)(R)); })
   1492 
   1493 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
   1494   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1495                                          (__v8df)_mm512_undefined_pd(), \
   1496                                          (__mmask8)-1, (int)(R)); })
   1497 
   1498 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1499 _mm512_sqrt_pd(__m512d __a)
   1500 {
   1501   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
   1502                                                 (__v8df) _mm512_setzero_pd (),
   1503                                                 (__mmask8) -1,
   1504                                                 _MM_FROUND_CUR_DIRECTION);
   1505 }
   1506 
   1507 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1508 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1509 {
   1510   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1511                    (__v8df) __W,
   1512                    (__mmask8) __U,
   1513                    _MM_FROUND_CUR_DIRECTION);
   1514 }
   1515 
   1516 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1517 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   1518 {
   1519   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1520                    (__v8df)
   1521                    _mm512_setzero_pd (),
   1522                    (__mmask8) __U,
   1523                    _MM_FROUND_CUR_DIRECTION);
   1524 }
   1525 
   1526 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
   1527   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1528                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
   1529                                         (int)(R)); })
   1530 
   1531 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
   1532   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1533                                         (__v16sf)_mm512_setzero_ps(), \
   1534                                         (__mmask16)(U), (int)(R)); })
   1535 
   1536 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
   1537   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1538                                         (__v16sf)_mm512_undefined_ps(), \
   1539                                         (__mmask16)-1, (int)(R)); })
   1540 
   1541 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1542 _mm512_sqrt_ps(__m512 __a)
   1543 {
   1544   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
   1545                                                (__v16sf) _mm512_setzero_ps (),
   1546                                                (__mmask16) -1,
   1547                                                _MM_FROUND_CUR_DIRECTION);
   1548 }
   1549 
   1550 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1551 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
   1552 {
   1553   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1554                                                (__v16sf) __W,
   1555                                                (__mmask16) __U,
   1556                                                _MM_FROUND_CUR_DIRECTION);
   1557 }
   1558 
   1559 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1560 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
   1561 {
   1562   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1563                                                (__v16sf) _mm512_setzero_ps (),
   1564                                                (__mmask16) __U,
   1565                                                _MM_FROUND_CUR_DIRECTION);
   1566 }
   1567 
   1568 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1569 _mm512_rsqrt14_pd(__m512d __A)
   1570 {
   1571   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1572                  (__v8df)
   1573                  _mm512_setzero_pd (),
   1574                  (__mmask8) -1);}
   1575 
   1576 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1577 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1578 {
   1579   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1580                   (__v8df) __W,
   1581                   (__mmask8) __U);
   1582 }
   1583 
   1584 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1585 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1586 {
   1587   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1588                   (__v8df)
   1589                   _mm512_setzero_pd (),
   1590                   (__mmask8) __U);
   1591 }
   1592 
   1593 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1594 _mm512_rsqrt14_ps(__m512 __A)
   1595 {
   1596   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1597                 (__v16sf)
   1598                 _mm512_setzero_ps (),
   1599                 (__mmask16) -1);
   1600 }
   1601 
   1602 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1603 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1604 {
   1605   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1606                  (__v16sf) __W,
   1607                  (__mmask16) __U);
   1608 }
   1609 
   1610 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1611 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1612 {
   1613   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1614                  (__v16sf)
   1615                  _mm512_setzero_ps (),
   1616                  (__mmask16) __U);
   1617 }
   1618 
   1619 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1620 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
   1621 {
   1622   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1623              (__v4sf) __B,
   1624              (__v4sf)
   1625              _mm_setzero_ps (),
   1626              (__mmask8) -1);
   1627 }
   1628 
   1629 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1630 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1631 {
   1632  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1633           (__v4sf) __B,
   1634           (__v4sf) __W,
   1635           (__mmask8) __U);
   1636 }
   1637 
   1638 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1639 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1640 {
   1641  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1642           (__v4sf) __B,
   1643           (__v4sf) _mm_setzero_ps (),
   1644           (__mmask8) __U);
   1645 }
   1646 
   1647 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1648 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
   1649 {
   1650   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
   1651               (__v2df) __B,
   1652               (__v2df)
   1653               _mm_setzero_pd (),
   1654               (__mmask8) -1);
   1655 }
   1656 
   1657 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1658 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1659 {
   1660  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1661           (__v2df) __B,
   1662           (__v2df) __W,
   1663           (__mmask8) __U);
   1664 }
   1665 
   1666 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1667 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1668 {
   1669  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1670           (__v2df) __B,
   1671           (__v2df) _mm_setzero_pd (),
   1672           (__mmask8) __U);
   1673 }
   1674 
   1675 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1676 _mm512_rcp14_pd(__m512d __A)
   1677 {
   1678   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1679                (__v8df)
   1680                _mm512_setzero_pd (),
   1681                (__mmask8) -1);
   1682 }
   1683 
   1684 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1685 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1686 {
   1687   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1688                 (__v8df) __W,
   1689                 (__mmask8) __U);
   1690 }
   1691 
   1692 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1693 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1694 {
   1695   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1696                 (__v8df)
   1697                 _mm512_setzero_pd (),
   1698                 (__mmask8) __U);
   1699 }
   1700 
   1701 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1702 _mm512_rcp14_ps(__m512 __A)
   1703 {
   1704   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1705               (__v16sf)
   1706               _mm512_setzero_ps (),
   1707               (__mmask16) -1);
   1708 }
   1709 
   1710 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1711 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1712 {
   1713   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1714                    (__v16sf) __W,
   1715                    (__mmask16) __U);
   1716 }
   1717 
   1718 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1719 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1720 {
   1721   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1722                    (__v16sf)
   1723                    _mm512_setzero_ps (),
   1724                    (__mmask16) __U);
   1725 }
   1726 
   1727 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1728 _mm_rcp14_ss(__m128 __A, __m128 __B)
   1729 {
   1730   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1731                  (__v4sf) __B,
   1732                  (__v4sf)
   1733                  _mm_setzero_ps (),
   1734                  (__mmask8) -1);
   1735 }
   1736 
   1737 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1738 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1739 {
   1740  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1741           (__v4sf) __B,
   1742           (__v4sf) __W,
   1743           (__mmask8) __U);
   1744 }
   1745 
   1746 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1747 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1748 {
   1749  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1750           (__v4sf) __B,
   1751           (__v4sf) _mm_setzero_ps (),
   1752           (__mmask8) __U);
   1753 }
   1754 
   1755 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1756 _mm_rcp14_sd(__m128d __A, __m128d __B)
   1757 {
   1758   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
   1759             (__v2df) __B,
   1760             (__v2df)
   1761             _mm_setzero_pd (),
   1762             (__mmask8) -1);
   1763 }
   1764 
   1765 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1766 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1767 {
   1768  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1769           (__v2df) __B,
   1770           (__v2df) __W,
   1771           (__mmask8) __U);
   1772 }
   1773 
   1774 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1775 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1776 {
   1777  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1778           (__v2df) __B,
   1779           (__v2df) _mm_setzero_pd (),
   1780           (__mmask8) __U);
   1781 }
   1782 
   1783 static __inline __m512 __DEFAULT_FN_ATTRS
   1784 _mm512_floor_ps(__m512 __A)
   1785 {
   1786   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1787                                                   _MM_FROUND_FLOOR,
   1788                                                   (__v16sf) __A, -1,
   1789                                                   _MM_FROUND_CUR_DIRECTION);
   1790 }
   1791 
   1792 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1793 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1794 {
   1795   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1796                    _MM_FROUND_FLOOR,
   1797                    (__v16sf) __W, __U,
   1798                    _MM_FROUND_CUR_DIRECTION);
   1799 }
   1800 
   1801 static __inline __m512d __DEFAULT_FN_ATTRS
   1802 _mm512_floor_pd(__m512d __A)
   1803 {
   1804   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1805                                                    _MM_FROUND_FLOOR,
   1806                                                    (__v8df) __A, -1,
   1807                                                    _MM_FROUND_CUR_DIRECTION);
   1808 }
   1809 
   1810 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1811 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1812 {
   1813   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1814                 _MM_FROUND_FLOOR,
   1815                 (__v8df) __W, __U,
   1816                 _MM_FROUND_CUR_DIRECTION);
   1817 }
   1818 
   1819 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1820 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1821 {
   1822   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1823                    _MM_FROUND_CEIL,
   1824                    (__v16sf) __W, __U,
   1825                    _MM_FROUND_CUR_DIRECTION);
   1826 }
   1827 
   1828 static __inline __m512 __DEFAULT_FN_ATTRS
   1829 _mm512_ceil_ps(__m512 __A)
   1830 {
   1831   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1832                                                   _MM_FROUND_CEIL,
   1833                                                   (__v16sf) __A, -1,
   1834                                                   _MM_FROUND_CUR_DIRECTION);
   1835 }
   1836 
   1837 static __inline __m512d __DEFAULT_FN_ATTRS
   1838 _mm512_ceil_pd(__m512d __A)
   1839 {
   1840   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1841                                                    _MM_FROUND_CEIL,
   1842                                                    (__v8df) __A, -1,
   1843                                                    _MM_FROUND_CUR_DIRECTION);
   1844 }
   1845 
   1846 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1847 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1848 {
   1849   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1850                 _MM_FROUND_CEIL,
   1851                 (__v8df) __W, __U,
   1852                 _MM_FROUND_CUR_DIRECTION);
   1853 }
   1854 
   1855 static __inline __m512i __DEFAULT_FN_ATTRS
   1856 _mm512_abs_epi64(__m512i __A)
   1857 {
   1858   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1859              (__v8di)
   1860              _mm512_setzero_si512 (),
   1861              (__mmask8) -1);
   1862 }
   1863 
   1864 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1865 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   1866 {
   1867   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1868                   (__v8di) __W,
   1869                   (__mmask8) __U);
   1870 }
   1871 
   1872 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1873 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   1874 {
   1875   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1876                   (__v8di)
   1877                   _mm512_setzero_si512 (),
   1878                   (__mmask8) __U);
   1879 }
   1880 
   1881 static __inline __m512i __DEFAULT_FN_ATTRS
   1882 _mm512_abs_epi32(__m512i __A)
   1883 {
   1884   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1885              (__v16si)
   1886              _mm512_setzero_si512 (),
   1887              (__mmask16) -1);
   1888 }
   1889 
   1890 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1891 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   1892 {
   1893   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1894                   (__v16si) __W,
   1895                   (__mmask16) __U);
   1896 }
   1897 
   1898 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1899 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   1900 {
   1901   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1902                   (__v16si)
   1903                   _mm512_setzero_si512 (),
   1904                   (__mmask16) __U);
   1905 }
   1906 
   1907 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1908 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1909   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1910                 (__v4sf) __B,
   1911                 (__v4sf) __W,
   1912                 (__mmask8) __U,
   1913                 _MM_FROUND_CUR_DIRECTION);
   1914 }
   1915 
   1916 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1917 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1918   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1919                 (__v4sf) __B,
   1920                 (__v4sf)  _mm_setzero_ps (),
   1921                 (__mmask8) __U,
   1922                 _MM_FROUND_CUR_DIRECTION);
   1923 }
   1924 
   1925 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
   1926   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1927                                           (__v4sf)(__m128)(B), \
   1928                                           (__v4sf)_mm_setzero_ps(), \
   1929                                           (__mmask8)-1, (int)(R)); })
   1930 
   1931 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
   1932   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1933                                           (__v4sf)(__m128)(B), \
   1934                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1935                                           (int)(R)); })
   1936 
   1937 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
   1938   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1939                                           (__v4sf)(__m128)(B), \
   1940                                           (__v4sf)_mm_setzero_ps(), \
   1941                                           (__mmask8)(U), (int)(R)); })
   1942 
   1943 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1944 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1945   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1946                 (__v2df) __B,
   1947                 (__v2df) __W,
   1948                 (__mmask8) __U,
   1949                 _MM_FROUND_CUR_DIRECTION);
   1950 }
   1951 
   1952 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1953 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1954   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1955                 (__v2df) __B,
   1956                 (__v2df)  _mm_setzero_pd (),
   1957                 (__mmask8) __U,
   1958                 _MM_FROUND_CUR_DIRECTION);
   1959 }
   1960 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
   1961   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1962                                            (__v2df)(__m128d)(B), \
   1963                                            (__v2df)_mm_setzero_pd(), \
   1964                                            (__mmask8)-1, (int)(R)); })
   1965 
   1966 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
   1967   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1968                                            (__v2df)(__m128d)(B), \
   1969                                            (__v2df)(__m128d)(W), \
   1970                                            (__mmask8)(U), (int)(R)); })
   1971 
   1972 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
   1973   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1974                                            (__v2df)(__m128d)(B), \
   1975                                            (__v2df)_mm_setzero_pd(), \
   1976                                            (__mmask8)(U), (int)(R)); })
   1977 
   1978 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1979 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1980   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1981              (__v8df) __B,
   1982              (__v8df) __W,
   1983              (__mmask8) __U,
   1984              _MM_FROUND_CUR_DIRECTION);
   1985 }
   1986 
   1987 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1988 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1989   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1990              (__v8df) __B,
   1991              (__v8df) _mm512_setzero_pd (),
   1992              (__mmask8) __U,
   1993              _MM_FROUND_CUR_DIRECTION);
   1994 }
   1995 
   1996 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1997 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1998   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   1999             (__v16sf) __B,
   2000             (__v16sf) __W,
   2001             (__mmask16) __U,
   2002             _MM_FROUND_CUR_DIRECTION);
   2003 }
   2004 
   2005 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2006 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2007   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   2008             (__v16sf) __B,
   2009             (__v16sf) _mm512_setzero_ps (),
   2010             (__mmask16) __U,
   2011             _MM_FROUND_CUR_DIRECTION);
   2012 }
   2013 
   2014 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
   2015   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2016                                         (__v8df)(__m512d)(B), \
   2017                                         (__v8df)_mm512_setzero_pd(), \
   2018                                         (__mmask8)-1, (int)(R)); })
   2019 
   2020 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
   2021   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2022                                         (__v8df)(__m512d)(B), \
   2023                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2024                                         (int)(R)); })
   2025 
   2026 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
   2027   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2028                                         (__v8df)(__m512d)(B), \
   2029                                         (__v8df)_mm512_setzero_pd(), \
   2030                                         (__mmask8)(U), (int)(R)); })
   2031 
   2032 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
   2033   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2034                                        (__v16sf)(__m512)(B), \
   2035                                        (__v16sf)_mm512_setzero_ps(), \
   2036                                        (__mmask16)-1, (int)(R)); })
   2037 
   2038 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
   2039   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2040                                        (__v16sf)(__m512)(B), \
   2041                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2042                                        (int)(R)); })
   2043 
   2044 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
   2045   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2046                                        (__v16sf)(__m512)(B), \
   2047                                        (__v16sf)_mm512_setzero_ps(), \
   2048                                        (__mmask16)(U), (int)(R)); })
   2049 
   2050 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2051 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2052   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2053                 (__v4sf) __B,
   2054                 (__v4sf) __W,
   2055                 (__mmask8) __U,
   2056                 _MM_FROUND_CUR_DIRECTION);
   2057 }
   2058 
   2059 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2060 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2061   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2062                 (__v4sf) __B,
   2063                 (__v4sf)  _mm_setzero_ps (),
   2064                 (__mmask8) __U,
   2065                 _MM_FROUND_CUR_DIRECTION);
   2066 }
   2067 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
   2068   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2069                                           (__v4sf)(__m128)(B), \
   2070                                           (__v4sf)_mm_setzero_ps(), \
   2071                                           (__mmask8)-1, (int)(R)); })
   2072 
   2073 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
   2074   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2075                                           (__v4sf)(__m128)(B), \
   2076                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2077                                           (int)(R)); })
   2078 
   2079 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
   2080   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2081                                           (__v4sf)(__m128)(B), \
   2082                                           (__v4sf)_mm_setzero_ps(), \
   2083                                           (__mmask8)(U), (int)(R)); })
   2084 
   2085 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2086 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2087   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2088                 (__v2df) __B,
   2089                 (__v2df) __W,
   2090                 (__mmask8) __U,
   2091                 _MM_FROUND_CUR_DIRECTION);
   2092 }
   2093 
   2094 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2095 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2096   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2097                 (__v2df) __B,
   2098                 (__v2df)  _mm_setzero_pd (),
   2099                 (__mmask8) __U,
   2100                 _MM_FROUND_CUR_DIRECTION);
   2101 }
   2102 
   2103 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
   2104   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2105                                            (__v2df)(__m128d)(B), \
   2106                                            (__v2df)_mm_setzero_pd(), \
   2107                                            (__mmask8)-1, (int)(R)); })
   2108 
   2109 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
   2110   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2111                                            (__v2df)(__m128d)(B), \
   2112                                            (__v2df)(__m128d)(W), \
   2113                                            (__mmask8)(U), (int)(R)); })
   2114 
   2115 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
   2116   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2117                                            (__v2df)(__m128d)(B), \
   2118                                            (__v2df)_mm_setzero_pd(), \
   2119                                            (__mmask8)(U), (int)(R)); })
   2120 
   2121 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2122 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2123   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2124              (__v8df) __B,
   2125              (__v8df) __W,
   2126              (__mmask8) __U,
   2127              _MM_FROUND_CUR_DIRECTION);
   2128 }
   2129 
   2130 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2131 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2132   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2133              (__v8df) __B,
   2134              (__v8df)
   2135              _mm512_setzero_pd (),
   2136              (__mmask8) __U,
   2137              _MM_FROUND_CUR_DIRECTION);
   2138 }
   2139 
   2140 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2141 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2142   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2143             (__v16sf) __B,
   2144             (__v16sf) __W,
   2145             (__mmask16) __U,
   2146             _MM_FROUND_CUR_DIRECTION);
   2147 }
   2148 
   2149 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2150 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2151   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2152             (__v16sf) __B,
   2153             (__v16sf)
   2154             _mm512_setzero_ps (),
   2155             (__mmask16) __U,
   2156             _MM_FROUND_CUR_DIRECTION);
   2157 }
   2158 
   2159 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
   2160   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2161                                         (__v8df)(__m512d)(B), \
   2162                                         (__v8df)_mm512_setzero_pd(), \
   2163                                         (__mmask8)-1, (int)(R)); })
   2164 
   2165 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
   2166   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2167                                         (__v8df)(__m512d)(B), \
   2168                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2169                                         (int)(R)); })
   2170 
   2171 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
   2172   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2173                                         (__v8df)(__m512d)(B), \
   2174                                         (__v8df)_mm512_setzero_pd(), \
   2175                                         (__mmask8)(U), (int)(R)); })
   2176 
   2177 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
   2178   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2179                                        (__v16sf)(__m512)(B), \
   2180                                        (__v16sf)_mm512_setzero_ps(), \
   2181                                        (__mmask16)-1, (int)(R)); })
   2182 
   2183 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
   2184   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2185                                        (__v16sf)(__m512)(B), \
   2186                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2187                                        (int)(R)); });
   2188 
   2189 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
   2190   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2191                                        (__v16sf)(__m512)(B), \
   2192                                        (__v16sf)_mm512_setzero_ps(), \
   2193                                        (__mmask16)(U), (int)(R)); });
   2194 
   2195 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2196 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2197   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2198                 (__v4sf) __B,
   2199                 (__v4sf) __W,
   2200                 (__mmask8) __U,
   2201                 _MM_FROUND_CUR_DIRECTION);
   2202 }
   2203 
   2204 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2205 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2206   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2207                 (__v4sf) __B,
   2208                 (__v4sf)  _mm_setzero_ps (),
   2209                 (__mmask8) __U,
   2210                 _MM_FROUND_CUR_DIRECTION);
   2211 }
   2212 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
   2213   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2214                                           (__v4sf)(__m128)(B), \
   2215                                           (__v4sf)_mm_setzero_ps(), \
   2216                                           (__mmask8)-1, (int)(R)); })
   2217 
   2218 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
   2219   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2220                                           (__v4sf)(__m128)(B), \
   2221                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2222                                           (int)(R)); })
   2223 
   2224 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
   2225   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2226                                           (__v4sf)(__m128)(B), \
   2227                                           (__v4sf)_mm_setzero_ps(), \
   2228                                           (__mmask8)(U), (int)(R)); })
   2229 
   2230 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2231 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2232   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2233                 (__v2df) __B,
   2234                 (__v2df) __W,
   2235                 (__mmask8) __U,
   2236                 _MM_FROUND_CUR_DIRECTION);
   2237 }
   2238 
   2239 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2240 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2241   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2242                 (__v2df) __B,
   2243                 (__v2df)  _mm_setzero_pd (),
   2244                 (__mmask8) __U,
   2245                 _MM_FROUND_CUR_DIRECTION);
   2246 }
   2247 
   2248 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
   2249   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2250                                            (__v2df)(__m128d)(B), \
   2251                                            (__v2df)_mm_setzero_pd(), \
   2252                                            (__mmask8)-1, (int)(R)); })
   2253 
   2254 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
   2255   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2256                                            (__v2df)(__m128d)(B), \
   2257                                            (__v2df)(__m128d)(W), \
   2258                                            (__mmask8)(U), (int)(R)); })
   2259 
   2260 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
   2261   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2262                                            (__v2df)(__m128d)(B), \
   2263                                            (__v2df)_mm_setzero_pd(), \
   2264                                            (__mmask8)(U), (int)(R)); })
   2265 
   2266 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2267 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2268   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2269              (__v8df) __B,
   2270              (__v8df) __W,
   2271              (__mmask8) __U,
   2272              _MM_FROUND_CUR_DIRECTION);
   2273 }
   2274 
   2275 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2276 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2277   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2278              (__v8df) __B,
   2279              (__v8df)
   2280              _mm512_setzero_pd (),
   2281              (__mmask8) __U,
   2282              _MM_FROUND_CUR_DIRECTION);
   2283 }
   2284 
   2285 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2286 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2287   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2288             (__v16sf) __B,
   2289             (__v16sf) __W,
   2290             (__mmask16) __U,
   2291             _MM_FROUND_CUR_DIRECTION);
   2292 }
   2293 
   2294 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2295 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2296   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2297             (__v16sf) __B,
   2298             (__v16sf)
   2299             _mm512_setzero_ps (),
   2300             (__mmask16) __U,
   2301             _MM_FROUND_CUR_DIRECTION);
   2302 }
   2303 
   2304 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
   2305   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2306                                         (__v8df)(__m512d)(B), \
   2307                                         (__v8df)_mm512_setzero_pd(), \
   2308                                         (__mmask8)-1, (int)(R)); })
   2309 
   2310 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
   2311   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2312                                         (__v8df)(__m512d)(B), \
   2313                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2314                                         (int)(R)); })
   2315 
   2316 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
   2317   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2318                                         (__v8df)(__m512d)(B), \
   2319                                         (__v8df)_mm512_setzero_pd(), \
   2320                                         (__mmask8)(U), (int)(R)); })
   2321 
   2322 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
   2323   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2324                                        (__v16sf)(__m512)(B), \
   2325                                        (__v16sf)_mm512_setzero_ps(), \
   2326                                        (__mmask16)-1, (int)(R)); })
   2327 
   2328 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
   2329   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2330                                        (__v16sf)(__m512)(B), \
   2331                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2332                                        (int)(R)); });
   2333 
   2334 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
   2335   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2336                                        (__v16sf)(__m512)(B), \
   2337                                        (__v16sf)_mm512_setzero_ps(), \
   2338                                        (__mmask16)(U), (int)(R)); });
   2339 
   2340 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2341 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2342   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2343                 (__v4sf) __B,
   2344                 (__v4sf) __W,
   2345                 (__mmask8) __U,
   2346                 _MM_FROUND_CUR_DIRECTION);
   2347 }
   2348 
   2349 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2350 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2351   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2352                 (__v4sf) __B,
   2353                 (__v4sf)  _mm_setzero_ps (),
   2354                 (__mmask8) __U,
   2355                 _MM_FROUND_CUR_DIRECTION);
   2356 }
   2357 
   2358 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
   2359   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2360                                           (__v4sf)(__m128)(B), \
   2361                                           (__v4sf)_mm_setzero_ps(), \
   2362                                           (__mmask8)-1, (int)(R)); })
   2363 
   2364 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
   2365   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2366                                           (__v4sf)(__m128)(B), \
   2367                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2368                                           (int)(R)); })
   2369 
   2370 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
   2371   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2372                                           (__v4sf)(__m128)(B), \
   2373                                           (__v4sf)_mm_setzero_ps(), \
   2374                                           (__mmask8)(U), (int)(R)); })
   2375 
   2376 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2377 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2378   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2379                 (__v2df) __B,
   2380                 (__v2df) __W,
   2381                 (__mmask8) __U,
   2382                 _MM_FROUND_CUR_DIRECTION);
   2383 }
   2384 
   2385 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2386 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2387   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2388                 (__v2df) __B,
   2389                 (__v2df)  _mm_setzero_pd (),
   2390                 (__mmask8) __U,
   2391                 _MM_FROUND_CUR_DIRECTION);
   2392 }
   2393 
   2394 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
   2395   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2396                                            (__v2df)(__m128d)(B), \
   2397                                            (__v2df)_mm_setzero_pd(), \
   2398                                            (__mmask8)-1, (int)(R)); })
   2399 
   2400 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
   2401   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2402                                            (__v2df)(__m128d)(B), \
   2403                                            (__v2df)(__m128d)(W), \
   2404                                            (__mmask8)(U), (int)(R)); })
   2405 
   2406 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
   2407   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2408                                            (__v2df)(__m128d)(B), \
   2409                                            (__v2df)_mm_setzero_pd(), \
   2410                                            (__mmask8)(U), (int)(R)); })
   2411 
   2412 static __inline __m512d __DEFAULT_FN_ATTRS
   2413 _mm512_div_pd(__m512d __a, __m512d __b)
   2414 {
   2415   return (__m512d)((__v8df)__a/(__v8df)__b);
   2416 }
   2417 
   2418 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2419 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2420   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   2421              (__v8df) __B,
   2422              (__v8df) __W,
   2423              (__mmask8) __U,
   2424              _MM_FROUND_CUR_DIRECTION);
   2425 }
   2426 
   2427 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2428 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2429   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   2430              (__v8df) __B,
   2431              (__v8df)
   2432              _mm512_setzero_pd (),
   2433              (__mmask8) __U,
   2434              _MM_FROUND_CUR_DIRECTION);
   2435 }
   2436 
   2437 static __inline __m512 __DEFAULT_FN_ATTRS
   2438 _mm512_div_ps(__m512 __a, __m512 __b)
   2439 {
   2440   return (__m512)((__v16sf)__a/(__v16sf)__b);
   2441 }
   2442 
   2443 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2444 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2445   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2446             (__v16sf) __B,
   2447             (__v16sf) __W,
   2448             (__mmask16) __U,
   2449             _MM_FROUND_CUR_DIRECTION);
   2450 }
   2451 
   2452 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2453 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2454   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2455             (__v16sf) __B,
   2456             (__v16sf)
   2457             _mm512_setzero_ps (),
   2458             (__mmask16) __U,
   2459             _MM_FROUND_CUR_DIRECTION);
   2460 }
   2461 
   2462 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
   2463   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2464                                         (__v8df)(__m512d)(B), \
   2465                                         (__v8df)_mm512_setzero_pd(), \
   2466                                         (__mmask8)-1, (int)(R)); })
   2467 
   2468 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
   2469   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2470                                         (__v8df)(__m512d)(B), \
   2471                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2472                                         (int)(R)); })
   2473 
   2474 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
   2475   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2476                                         (__v8df)(__m512d)(B), \
   2477                                         (__v8df)_mm512_setzero_pd(), \
   2478                                         (__mmask8)(U), (int)(R)); })
   2479 
   2480 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
   2481   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2482                                        (__v16sf)(__m512)(B), \
   2483                                        (__v16sf)_mm512_setzero_ps(), \
   2484                                        (__mmask16)-1, (int)(R)); })
   2485 
   2486 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
   2487   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2488                                        (__v16sf)(__m512)(B), \
   2489                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2490                                        (int)(R)); });
   2491 
   2492 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
   2493   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2494                                        (__v16sf)(__m512)(B), \
   2495                                        (__v16sf)_mm512_setzero_ps(), \
   2496                                        (__mmask16)(U), (int)(R)); });
   2497 
   2498 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   2499   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
   2500                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
   2501                                          _MM_FROUND_CUR_DIRECTION); })
   2502 
   2503 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
   2504   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2505                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2506                                          _MM_FROUND_CUR_DIRECTION); })
   2507 
   2508 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
   2509   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2510                                          (__v16sf)_mm512_setzero_ps(), \
   2511                                          (__mmask16)(A), \
   2512                                          _MM_FROUND_CUR_DIRECTION); })
   2513 
   2514 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
   2515   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2516                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2517                                          (int)(R)); })
   2518 
   2519 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
   2520   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2521                                          (__v16sf)_mm512_setzero_ps(), \
   2522                                          (__mmask16)(A), (int)(R)); })
   2523 
   2524 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
   2525   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
   2526                                          (__v16sf)_mm512_undefined_ps(), \
   2527                                          (__mmask16)-1, (int)(R)); })
   2528 
   2529 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   2530   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
   2531                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
   2532                                           _MM_FROUND_CUR_DIRECTION); })
   2533 
   2534 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
   2535   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2536                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2537                                           _MM_FROUND_CUR_DIRECTION); })
   2538 
   2539 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
   2540   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2541                                           (__v8df)_mm512_setzero_pd(), \
   2542                                           (__mmask8)(A), \
   2543                                           _MM_FROUND_CUR_DIRECTION); })
   2544 
   2545 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
   2546   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2547                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2548                                           (int)(R)); })
   2549 
   2550 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
   2551   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2552                                           (__v8df)_mm512_setzero_pd(), \
   2553                                           (__mmask8)(A), (int)(R)); })
   2554 
   2555 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
   2556   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
   2557                                           (__v8df)_mm512_undefined_pd(), \
   2558                                           (__mmask8)-1, (int)(R)); })
   2559 
   2560 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   2561   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2562                                            (__v8df)(__m512d)(B), \
   2563                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2564                                            (int)(R)); })
   2565 
   2566 
   2567 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2568   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2569                                            (__v8df)(__m512d)(B), \
   2570                                            (__v8df)(__m512d)(C), \
   2571                                            (__mmask8)(U), (int)(R)); })
   2572 
   2573 
   2574 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2575   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
   2576                                             (__v8df)(__m512d)(B), \
   2577                                             (__v8df)(__m512d)(C), \
   2578                                             (__mmask8)(U), (int)(R)); })
   2579 
   2580 
   2581 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2582   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2583                                             (__v8df)(__m512d)(B), \
   2584                                             (__v8df)(__m512d)(C), \
   2585                                             (__mmask8)(U), (int)(R)); })
   2586 
   2587 
   2588 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   2589   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2590                                            (__v8df)(__m512d)(B), \
   2591                                            -(__v8df)(__m512d)(C), \
   2592                                            (__mmask8)-1, (int)(R)); })
   2593 
   2594 
   2595 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2596   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2597                                            (__v8df)(__m512d)(B), \
   2598                                            -(__v8df)(__m512d)(C), \
   2599                                            (__mmask8)(U), (int)(R)); })
   2600 
   2601 
   2602 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2603   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2604                                             (__v8df)(__m512d)(B), \
   2605                                             -(__v8df)(__m512d)(C), \
   2606                                             (__mmask8)(U), (int)(R)); })
   2607 
   2608 
   2609 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   2610   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2611                                            (__v8df)(__m512d)(B), \
   2612                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2613                                            (int)(R)); })
   2614 
   2615 
   2616 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2617   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
   2618                                             (__v8df)(__m512d)(B), \
   2619                                             (__v8df)(__m512d)(C), \
   2620                                             (__mmask8)(U), (int)(R)); })
   2621 
   2622 
   2623 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2624   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2625                                             (__v8df)(__m512d)(B), \
   2626                                             (__v8df)(__m512d)(C), \
   2627                                             (__mmask8)(U), (int)(R)); })
   2628 
   2629 
   2630 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   2631   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2632                                            (__v8df)(__m512d)(B), \
   2633                                            -(__v8df)(__m512d)(C), \
   2634                                            (__mmask8)-1, (int)(R)); })
   2635 
   2636 
   2637 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2638   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2639                                             (__v8df)(__m512d)(B), \
   2640                                             -(__v8df)(__m512d)(C), \
   2641                                             (__mmask8)(U), (int)(R)); })
   2642 
   2643 
   2644 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2645 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2646 {
   2647   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2648                                                     (__v8df) __B,
   2649                                                     (__v8df) __C,
   2650                                                     (__mmask8) -1,
   2651                                                     _MM_FROUND_CUR_DIRECTION);
   2652 }
   2653 
   2654 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2655 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2656 {
   2657   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2658                                                     (__v8df) __B,
   2659                                                     (__v8df) __C,
   2660                                                     (__mmask8) __U,
   2661                                                     _MM_FROUND_CUR_DIRECTION);
   2662 }
   2663 
   2664 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2665 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2666 {
   2667   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2668                                                      (__v8df) __B,
   2669                                                      (__v8df) __C,
   2670                                                      (__mmask8) __U,
   2671                                                      _MM_FROUND_CUR_DIRECTION);
   2672 }
   2673 
   2674 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2675 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2676 {
   2677   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2678                                                      (__v8df) __B,
   2679                                                      (__v8df) __C,
   2680                                                      (__mmask8) __U,
   2681                                                      _MM_FROUND_CUR_DIRECTION);
   2682 }
   2683 
   2684 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2685 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2686 {
   2687   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2688                                                     (__v8df) __B,
   2689                                                     -(__v8df) __C,
   2690                                                     (__mmask8) -1,
   2691                                                     _MM_FROUND_CUR_DIRECTION);
   2692 }
   2693 
   2694 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2695 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2696 {
   2697   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2698                                                     (__v8df) __B,
   2699                                                     -(__v8df) __C,
   2700                                                     (__mmask8) __U,
   2701                                                     _MM_FROUND_CUR_DIRECTION);
   2702 }
   2703 
   2704 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2705 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2706 {
   2707   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2708                                                      (__v8df) __B,
   2709                                                      -(__v8df) __C,
   2710                                                      (__mmask8) __U,
   2711                                                      _MM_FROUND_CUR_DIRECTION);
   2712 }
   2713 
   2714 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2715 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2716 {
   2717   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2718                                                     (__v8df) __B,
   2719                                                     (__v8df) __C,
   2720                                                     (__mmask8) -1,
   2721                                                     _MM_FROUND_CUR_DIRECTION);
   2722 }
   2723 
   2724 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2725 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2726 {
   2727   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   2728                                                      (__v8df) __B,
   2729                                                      (__v8df) __C,
   2730                                                      (__mmask8) __U,
   2731                                                      _MM_FROUND_CUR_DIRECTION);
   2732 }
   2733 
   2734 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2735 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2736 {
   2737   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2738                                                      (__v8df) __B,
   2739                                                      (__v8df) __C,
   2740                                                      (__mmask8) __U,
   2741                                                      _MM_FROUND_CUR_DIRECTION);
   2742 }
   2743 
   2744 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2745 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2746 {
   2747   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2748                                                     (__v8df) __B,
   2749                                                     -(__v8df) __C,
   2750                                                     (__mmask8) -1,
   2751                                                     _MM_FROUND_CUR_DIRECTION);
   2752 }
   2753 
   2754 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2755 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2756 {
   2757   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2758                                                      (__v8df) __B,
   2759                                                      -(__v8df) __C,
   2760                                                      (__mmask8) __U,
   2761                                                      _MM_FROUND_CUR_DIRECTION);
   2762 }
   2763 
   2764 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   2765   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2766                                           (__v16sf)(__m512)(B), \
   2767                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2768                                           (int)(R)); })
   2769 
   2770 
   2771 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2772   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2773                                           (__v16sf)(__m512)(B), \
   2774                                           (__v16sf)(__m512)(C), \
   2775                                           (__mmask16)(U), (int)(R)); })
   2776 
   2777 
   2778 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2779   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
   2780                                            (__v16sf)(__m512)(B), \
   2781                                            (__v16sf)(__m512)(C), \
   2782                                            (__mmask16)(U), (int)(R)); })
   2783 
   2784 
   2785 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2786   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2787                                            (__v16sf)(__m512)(B), \
   2788                                            (__v16sf)(__m512)(C), \
   2789                                            (__mmask16)(U), (int)(R)); })
   2790 
   2791 
   2792 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   2793   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2794                                           (__v16sf)(__m512)(B), \
   2795                                           -(__v16sf)(__m512)(C), \
   2796                                           (__mmask16)-1, (int)(R)); })
   2797 
   2798 
   2799 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2800   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2801                                           (__v16sf)(__m512)(B), \
   2802                                           -(__v16sf)(__m512)(C), \
   2803                                           (__mmask16)(U), (int)(R)); })
   2804 
   2805 
   2806 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2807   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2808                                            (__v16sf)(__m512)(B), \
   2809                                            -(__v16sf)(__m512)(C), \
   2810                                            (__mmask16)(U), (int)(R)); })
   2811 
   2812 
   2813 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   2814   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2815                                           (__v16sf)(__m512)(B), \
   2816                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2817                                           (int)(R)); })
   2818 
   2819 
   2820 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2821   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
   2822                                            (__v16sf)(__m512)(B), \
   2823                                            (__v16sf)(__m512)(C), \
   2824                                            (__mmask16)(U), (int)(R)); })
   2825 
   2826 
   2827 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2828   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2829                                            (__v16sf)(__m512)(B), \
   2830                                            (__v16sf)(__m512)(C), \
   2831                                            (__mmask16)(U), (int)(R)); })
   2832 
   2833 
   2834 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   2835   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2836                                           (__v16sf)(__m512)(B), \
   2837                                           -(__v16sf)(__m512)(C), \
   2838                                           (__mmask16)-1, (int)(R)); })
   2839 
   2840 
   2841 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2842   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2843                                            (__v16sf)(__m512)(B), \
   2844                                            -(__v16sf)(__m512)(C), \
   2845                                            (__mmask16)(U), (int)(R)); })
   2846 
   2847 
   2848 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2849 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2850 {
   2851   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2852                                                    (__v16sf) __B,
   2853                                                    (__v16sf) __C,
   2854                                                    (__mmask16) -1,
   2855                                                    _MM_FROUND_CUR_DIRECTION);
   2856 }
   2857 
   2858 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2859 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2860 {
   2861   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2862                                                    (__v16sf) __B,
   2863                                                    (__v16sf) __C,
   2864                                                    (__mmask16) __U,
   2865                                                    _MM_FROUND_CUR_DIRECTION);
   2866 }
   2867 
   2868 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2869 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2870 {
   2871   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2872                                                     (__v16sf) __B,
   2873                                                     (__v16sf) __C,
   2874                                                     (__mmask16) __U,
   2875                                                     _MM_FROUND_CUR_DIRECTION);
   2876 }
   2877 
   2878 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2879 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2880 {
   2881   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2882                                                     (__v16sf) __B,
   2883                                                     (__v16sf) __C,
   2884                                                     (__mmask16) __U,
   2885                                                     _MM_FROUND_CUR_DIRECTION);
   2886 }
   2887 
   2888 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2889 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2890 {
   2891   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2892                                                    (__v16sf) __B,
   2893                                                    -(__v16sf) __C,
   2894                                                    (__mmask16) -1,
   2895                                                    _MM_FROUND_CUR_DIRECTION);
   2896 }
   2897 
   2898 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2899 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2900 {
   2901   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2902                                                    (__v16sf) __B,
   2903                                                    -(__v16sf) __C,
   2904                                                    (__mmask16) __U,
   2905                                                    _MM_FROUND_CUR_DIRECTION);
   2906 }
   2907 
   2908 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2909 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2910 {
   2911   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2912                                                     (__v16sf) __B,
   2913                                                     -(__v16sf) __C,
   2914                                                     (__mmask16) __U,
   2915                                                     _MM_FROUND_CUR_DIRECTION);
   2916 }
   2917 
   2918 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2919 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2920 {
   2921   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2922                                                    (__v16sf) __B,
   2923                                                    (__v16sf) __C,
   2924                                                    (__mmask16) -1,
   2925                                                    _MM_FROUND_CUR_DIRECTION);
   2926 }
   2927 
   2928 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2929 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2930 {
   2931   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   2932                                                     (__v16sf) __B,
   2933                                                     (__v16sf) __C,
   2934                                                     (__mmask16) __U,
   2935                                                     _MM_FROUND_CUR_DIRECTION);
   2936 }
   2937 
   2938 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2939 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2940 {
   2941   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2942                                                     (__v16sf) __B,
   2943                                                     (__v16sf) __C,
   2944                                                     (__mmask16) __U,
   2945                                                     _MM_FROUND_CUR_DIRECTION);
   2946 }
   2947 
   2948 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2949 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2950 {
   2951   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2952                                                    (__v16sf) __B,
   2953                                                    -(__v16sf) __C,
   2954                                                    (__mmask16) -1,
   2955                                                    _MM_FROUND_CUR_DIRECTION);
   2956 }
   2957 
   2958 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2959 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2960 {
   2961   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2962                                                     (__v16sf) __B,
   2963                                                     -(__v16sf) __C,
   2964                                                     (__mmask16) __U,
   2965                                                     _MM_FROUND_CUR_DIRECTION);
   2966 }
   2967 
   2968 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   2969   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2970                                               (__v8df)(__m512d)(B), \
   2971                                               (__v8df)(__m512d)(C), \
   2972                                               (__mmask8)-1, (int)(R)); })
   2973 
   2974 
   2975 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2976   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2977                                               (__v8df)(__m512d)(B), \
   2978                                               (__v8df)(__m512d)(C), \
   2979                                               (__mmask8)(U), (int)(R)); })
   2980 
   2981 
   2982 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   2983   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
   2984                                                (__v8df)(__m512d)(B), \
   2985                                                (__v8df)(__m512d)(C), \
   2986                                                (__mmask8)(U), (int)(R)); })
   2987 
   2988 
   2989 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2990   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   2991                                                (__v8df)(__m512d)(B), \
   2992                                                (__v8df)(__m512d)(C), \
   2993                                                (__mmask8)(U), (int)(R)); })
   2994 
   2995 
   2996 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   2997   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2998                                               (__v8df)(__m512d)(B), \
   2999                                               -(__v8df)(__m512d)(C), \
   3000                                               (__mmask8)-1, (int)(R)); })
   3001 
   3002 
   3003 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3004   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3005                                               (__v8df)(__m512d)(B), \
   3006                                               -(__v8df)(__m512d)(C), \
   3007                                               (__mmask8)(U), (int)(R)); })
   3008 
   3009 
   3010 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   3011   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3012                                                (__v8df)(__m512d)(B), \
   3013                                                -(__v8df)(__m512d)(C), \
   3014                                                (__mmask8)(U), (int)(R)); })
   3015 
   3016 
   3017 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3018 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   3019 {
   3020   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3021                                                        (__v8df) __B,
   3022                                                        (__v8df) __C,
   3023                                                        (__mmask8) -1,
   3024                                                        _MM_FROUND_CUR_DIRECTION);
   3025 }
   3026 
   3027 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3028 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3029 {
   3030   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3031                                                        (__v8df) __B,
   3032                                                        (__v8df) __C,
   3033                                                        (__mmask8) __U,
   3034                                                        _MM_FROUND_CUR_DIRECTION);
   3035 }
   3036 
   3037 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3038 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3039 {
   3040   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   3041                                                         (__v8df) __B,
   3042                                                         (__v8df) __C,
   3043                                                         (__mmask8) __U,
   3044                                                         _MM_FROUND_CUR_DIRECTION);
   3045 }
   3046 
   3047 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3048 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3049 {
   3050   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3051                                                         (__v8df) __B,
   3052                                                         (__v8df) __C,
   3053                                                         (__mmask8) __U,
   3054                                                         _MM_FROUND_CUR_DIRECTION);
   3055 }
   3056 
   3057 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3058 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   3059 {
   3060   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3061                                                        (__v8df) __B,
   3062                                                        -(__v8df) __C,
   3063                                                        (__mmask8) -1,
   3064                                                        _MM_FROUND_CUR_DIRECTION);
   3065 }
   3066 
   3067 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3068 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3069 {
   3070   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3071                                                        (__v8df) __B,
   3072                                                        -(__v8df) __C,
   3073                                                        (__mmask8) __U,
   3074                                                        _MM_FROUND_CUR_DIRECTION);
   3075 }
   3076 
   3077 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3078 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3079 {
   3080   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3081                                                         (__v8df) __B,
   3082                                                         -(__v8df) __C,
   3083                                                         (__mmask8) __U,
   3084                                                         _MM_FROUND_CUR_DIRECTION);
   3085 }
   3086 
   3087 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   3088   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3089                                              (__v16sf)(__m512)(B), \
   3090                                              (__v16sf)(__m512)(C), \
   3091                                              (__mmask16)-1, (int)(R)); })
   3092 
   3093 
   3094 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3095   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3096                                              (__v16sf)(__m512)(B), \
   3097                                              (__v16sf)(__m512)(C), \
   3098                                              (__mmask16)(U), (int)(R)); })
   3099 
   3100 
   3101 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3102   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
   3103                                               (__v16sf)(__m512)(B), \
   3104                                               (__v16sf)(__m512)(C), \
   3105                                               (__mmask16)(U), (int)(R)); })
   3106 
   3107 
   3108 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   3109   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3110                                               (__v16sf)(__m512)(B), \
   3111                                               (__v16sf)(__m512)(C), \
   3112                                               (__mmask16)(U), (int)(R)); })
   3113 
   3114 
   3115 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   3116   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3117                                              (__v16sf)(__m512)(B), \
   3118                                              -(__v16sf)(__m512)(C), \
   3119                                              (__mmask16)-1, (int)(R)); })
   3120 
   3121 
   3122 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3123   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3124                                              (__v16sf)(__m512)(B), \
   3125                                              -(__v16sf)(__m512)(C), \
   3126                                              (__mmask16)(U), (int)(R)); })
   3127 
   3128 
   3129 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   3130   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3131                                               (__v16sf)(__m512)(B), \
   3132                                               -(__v16sf)(__m512)(C), \
   3133                                               (__mmask16)(U), (int)(R)); })
   3134 
   3135 
   3136 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3137 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3138 {
   3139   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3140                                                       (__v16sf) __B,
   3141                                                       (__v16sf) __C,
   3142                                                       (__mmask16) -1,
   3143                                                       _MM_FROUND_CUR_DIRECTION);
   3144 }
   3145 
   3146 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3147 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3148 {
   3149   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3150                                                       (__v16sf) __B,
   3151                                                       (__v16sf) __C,
   3152                                                       (__mmask16) __U,
   3153                                                       _MM_FROUND_CUR_DIRECTION);
   3154 }
   3155 
   3156 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3157 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3158 {
   3159   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3160                                                        (__v16sf) __B,
   3161                                                        (__v16sf) __C,
   3162                                                        (__mmask16) __U,
   3163                                                        _MM_FROUND_CUR_DIRECTION);
   3164 }
   3165 
   3166 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3167 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3168 {
   3169   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3170                                                        (__v16sf) __B,
   3171                                                        (__v16sf) __C,
   3172                                                        (__mmask16) __U,
   3173                                                        _MM_FROUND_CUR_DIRECTION);
   3174 }
   3175 
   3176 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3177 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   3178 {
   3179   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3180                                                       (__v16sf) __B,
   3181                                                       -(__v16sf) __C,
   3182                                                       (__mmask16) -1,
   3183                                                       _MM_FROUND_CUR_DIRECTION);
   3184 }
   3185 
   3186 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3187 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3188 {
   3189   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3190                                                       (__v16sf) __B,
   3191                                                       -(__v16sf) __C,
   3192                                                       (__mmask16) __U,
   3193                                                       _MM_FROUND_CUR_DIRECTION);
   3194 }
   3195 
   3196 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3197 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3198 {
   3199   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3200                                                        (__v16sf) __B,
   3201                                                        -(__v16sf) __C,
   3202                                                        (__mmask16) __U,
   3203                                                        _MM_FROUND_CUR_DIRECTION);
   3204 }
   3205 
   3206 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3207   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
   3208                                             (__v8df)(__m512d)(B), \
   3209                                             (__v8df)(__m512d)(C), \
   3210                                             (__mmask8)(U), (int)(R)); })
   3211 
   3212 
   3213 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3214 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3215 {
   3216   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   3217                                                      (__v8df) __B,
   3218                                                      (__v8df) __C,
   3219                                                      (__mmask8) __U,
   3220                                                      _MM_FROUND_CUR_DIRECTION);
   3221 }
   3222 
   3223 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3224   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
   3225                                            (__v16sf)(__m512)(B), \
   3226                                            (__v16sf)(__m512)(C), \
   3227                                            (__mmask16)(U), (int)(R)); })
   3228 
   3229 
   3230 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3231 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3232 {
   3233   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   3234                                                     (__v16sf) __B,
   3235                                                     (__v16sf) __C,
   3236                                                     (__mmask16) __U,
   3237                                                     _MM_FROUND_CUR_DIRECTION);
   3238 }
   3239 
   3240 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   3241   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
   3242                                                (__v8df)(__m512d)(B), \
   3243                                                (__v8df)(__m512d)(C), \
   3244                                                (__mmask8)(U), (int)(R)); })
   3245 
   3246 
   3247 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3248 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3249 {
   3250   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3251                                                         (__v8df) __B,
   3252                                                         (__v8df) __C,
   3253                                                         (__mmask8) __U,
   3254                                                         _MM_FROUND_CUR_DIRECTION);
   3255 }
   3256 
   3257 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   3258   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
   3259                                               (__v16sf)(__m512)(B), \
   3260                                               (__v16sf)(__m512)(C), \
   3261                                               (__mmask16)(U), (int)(R)); })
   3262 
   3263 
   3264 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3265 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3266 {
   3267   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3268                                                        (__v16sf) __B,
   3269                                                        (__v16sf) __C,
   3270                                                        (__mmask16) __U,
   3271                                                        _MM_FROUND_CUR_DIRECTION);
   3272 }
   3273 
   3274 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3275   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
   3276                                             (__v8df)(__m512d)(B), \
   3277                                             (__v8df)(__m512d)(C), \
   3278                                             (__mmask8)(U), (int)(R)); })
   3279 
   3280 
   3281 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3282 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3283 {
   3284   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3285                                                      (__v8df) __B,
   3286                                                      (__v8df) __C,
   3287                                                      (__mmask8) __U,
   3288                                                      _MM_FROUND_CUR_DIRECTION);
   3289 }
   3290 
   3291 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3292   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
   3293                                            (__v16sf)(__m512)(B), \
   3294                                            (__v16sf)(__m512)(C), \
   3295                                            (__mmask16)(U), (int)(R)); })
   3296 
   3297 
   3298 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3299 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3300 {
   3301   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3302                                                     (__v16sf) __B,
   3303                                                     (__v16sf) __C,
   3304                                                     (__mmask16) __U,
   3305                                                     _MM_FROUND_CUR_DIRECTION);
   3306 }
   3307 
   3308 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3309   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
   3310                                             (__v8df)(__m512d)(B), \
   3311                                             (__v8df)(__m512d)(C), \
   3312                                             (__mmask8)(U), (int)(R)); })
   3313 
   3314 
   3315 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3316   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
   3317                                              (__v8df)(__m512d)(B), \
   3318                                              (__v8df)(__m512d)(C), \
   3319                                              (__mmask8)(U), (int)(R)); })
   3320 
   3321 
   3322 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3323 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3324 {
   3325   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3326                                                      (__v8df) __B,
   3327                                                      (__v8df) __C,
   3328                                                      (__mmask8) __U,
   3329                                                      _MM_FROUND_CUR_DIRECTION);
   3330 }
   3331 
   3332 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3333 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3334 {
   3335   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3336                                                       (__v8df) __B,
   3337                                                       (__v8df) __C,
   3338                                                       (__mmask8) __U,
   3339                                                       _MM_FROUND_CUR_DIRECTION);
   3340 }
   3341 
   3342 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3343   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
   3344                                            (__v16sf)(__m512)(B), \
   3345                                            (__v16sf)(__m512)(C), \
   3346                                            (__mmask16)(U), (int)(R)); })
   3347 
   3348 
   3349 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3350   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
   3351                                             (__v16sf)(__m512)(B), \
   3352                                             (__v16sf)(__m512)(C), \
   3353                                             (__mmask16)(U), (int)(R)); })
   3354 
   3355 
   3356 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3357 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3358 {
   3359   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3360                                                     (__v16sf) __B,
   3361                                                     (__v16sf) __C,
   3362                                                     (__mmask16) __U,
   3363                                                     _MM_FROUND_CUR_DIRECTION);
   3364 }
   3365 
   3366 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3367 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3368 {
   3369   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3370                                                      (__v16sf) __B,
   3371                                                      (__v16sf) __C,
   3372                                                      (__mmask16) __U,
   3373                                                      _MM_FROUND_CUR_DIRECTION);
   3374 }
   3375 
   3376 
   3377 
   3378 /* Vector permutations */
   3379 
   3380 static __inline __m512i __DEFAULT_FN_ATTRS
   3381 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   3382 {
   3383   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3384                                                        /* idx */ ,
   3385                                                        (__v16si) __A,
   3386                                                        (__v16si) __B,
   3387                                                        (__mmask16) -1);
   3388 }
   3389 
   3390 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3391 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   3392                                 __m512i __I, __m512i __B)
   3393 {
   3394   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3395                                                         /* idx */ ,
   3396                                                         (__v16si) __A,
   3397                                                         (__v16si) __B,
   3398                                                         (__mmask16) __U);
   3399 }
   3400 
   3401 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3402 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   3403                                  __m512i __I, __m512i __B)
   3404 {
   3405   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   3406                                                         /* idx */ ,
   3407                                                         (__v16si) __A,
   3408                                                         (__v16si) __B,
   3409                                                         (__mmask16) __U);
   3410 }
   3411 
   3412 static __inline __m512i __DEFAULT_FN_ATTRS
   3413 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   3414 {
   3415   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3416                                                        /* idx */ ,
   3417                                                        (__v8di) __A,
   3418                                                        (__v8di) __B,
   3419                                                        (__mmask8) -1);
   3420 }
   3421 
   3422 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3423 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   3424                                 __m512i __B)
   3425 {
   3426   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3427                                                        /* idx */ ,
   3428                                                        (__v8di) __A,
   3429                                                        (__v8di) __B,
   3430                                                        (__mmask8) __U);
   3431 }
   3432 
   3433 
   3434 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3435 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   3436          __m512i __I, __m512i __B)
   3437 {
   3438   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   3439                                                         /* idx */ ,
   3440                                                         (__v8di) __A,
   3441                                                         (__v8di) __B,
   3442                                                         (__mmask8) __U);
   3443 }
   3444 
   3445 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   3446   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3447                                          (__v8di)(__m512i)(B), (int)(I), \
   3448                                          (__v8di)_mm512_setzero_si512(), \
   3449                                          (__mmask8)-1); })
   3450 
   3451 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
   3452   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3453                                          (__v8di)(__m512i)(B), (int)(imm), \
   3454                                          (__v8di)(__m512i)(W), \
   3455                                          (__mmask8)(U)); })
   3456 
   3457 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
   3458   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3459                                          (__v8di)(__m512i)(B), (int)(imm), \
   3460                                          (__v8di)_mm512_setzero_si512(), \
   3461                                          (__mmask8)(U)); })
   3462 
   3463 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   3464   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3465                                          (__v16si)(__m512i)(B), (int)(I), \
   3466                                          (__v16si)_mm512_setzero_si512(), \
   3467                                          (__mmask16)-1); })
   3468 
   3469 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
   3470   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3471                                          (__v16si)(__m512i)(B), (int)(imm), \
   3472                                          (__v16si)(__m512i)(W), \
   3473                                          (__mmask16)(U)); })
   3474 
   3475 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
   3476   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3477                                          (__v16si)(__m512i)(B), (int)(imm), \
   3478                                          (__v16si)_mm512_setzero_si512(), \
   3479                                          (__mmask16)(U)); })
   3480 /* Vector Extract */
   3481 
   3482 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
   3483   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
   3484                                             (__v4df)_mm256_setzero_si256(), \
   3485                                             (__mmask8)-1); })
   3486 
   3487 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
   3488   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
   3489                                             (__v4df)(__m256d)(W), \
   3490                                             (__mmask8)(U)); })
   3491 
   3492 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
   3493   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
   3494                                             (__v4df)_mm256_setzero_pd(), \
   3495                                             (__mmask8)(U)); })
   3496 
   3497 #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
   3498   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
   3499                                            (__v4sf)_mm_setzero_ps(), \
   3500                                            (__mmask8)-1); })
   3501 
   3502 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
   3503   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
   3504                                            (__v4sf)(__m128)(W), \
   3505                                            (__mmask8)(U)); })
   3506 
   3507 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
   3508   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
   3509                                            (__v4sf)_mm_setzero_ps(), \
   3510                                            (__mmask8)(U)); })
   3511 /* Vector Blend */
   3512 
   3513 static __inline __m512d __DEFAULT_FN_ATTRS
   3514 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   3515 {
   3516   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   3517                  (__v8df) __W,
   3518                  (__v8df) __A);
   3519 }
   3520 
   3521 static __inline __m512 __DEFAULT_FN_ATTRS
   3522 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   3523 {
   3524   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   3525                 (__v16sf) __W,
   3526                 (__v16sf) __A);
   3527 }
   3528 
   3529 static __inline __m512i __DEFAULT_FN_ATTRS
   3530 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   3531 {
   3532   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   3533                 (__v8di) __W,
   3534                 (__v8di) __A);
   3535 }
   3536 
   3537 static __inline __m512i __DEFAULT_FN_ATTRS
   3538 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   3539 {
   3540   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   3541                 (__v16si) __W,
   3542                 (__v16si) __A);
   3543 }
   3544 
   3545 /* Compare */
   3546 
   3547 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   3548   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3549                                           (__v16sf)(__m512)(B), (int)(P), \
   3550                                           (__mmask16)-1, (int)(R)); })
   3551 
   3552 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   3553   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3554                                           (__v16sf)(__m512)(B), (int)(P), \
   3555                                           (__mmask16)(U), (int)(R)); })
   3556 
   3557 #define _mm512_cmp_ps_mask(A, B, P) \
   3558   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3559 
   3560 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   3561   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3562 
   3563 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   3564   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3565                                          (__v8df)(__m512d)(B), (int)(P), \
   3566                                          (__mmask8)-1, (int)(R)); })
   3567 
   3568 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   3569   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3570                                          (__v8df)(__m512d)(B), (int)(P), \
   3571                                          (__mmask8)(U), (int)(R)); })
   3572 
   3573 #define _mm512_cmp_pd_mask(A, B, P) \
   3574   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3575 
   3576 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   3577   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3578 
   3579 /* Conversion */
   3580 
   3581 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
   3582   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3583                                              (__v16si)_mm512_undefined_epi32(), \
   3584                                              (__mmask16)-1, (int)(R)); })
   3585 
   3586 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
   3587   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3588                                              (__v16si)(__m512i)(W), \
   3589                                              (__mmask16)(U), (int)(R)); })
   3590 
   3591 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
   3592   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3593                                              (__v16si)_mm512_setzero_si512(), \
   3594                                              (__mmask16)(U), (int)(R)); })
   3595 
   3596 
   3597 static __inline __m512i __DEFAULT_FN_ATTRS
   3598 _mm512_cvttps_epu32(__m512 __A)
   3599 {
   3600   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3601                   (__v16si)
   3602                   _mm512_setzero_si512 (),
   3603                   (__mmask16) -1,
   3604                   _MM_FROUND_CUR_DIRECTION);
   3605 }
   3606 
   3607 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3608 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   3609 {
   3610   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3611                    (__v16si) __W,
   3612                    (__mmask16) __U,
   3613                    _MM_FROUND_CUR_DIRECTION);
   3614 }
   3615 
   3616 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3617 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   3618 {
   3619   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3620                    (__v16si) _mm512_setzero_si512 (),
   3621                    (__mmask16) __U,
   3622                    _MM_FROUND_CUR_DIRECTION);
   3623 }
   3624 
   3625 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   3626   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3627                                           (__v16sf)_mm512_setzero_ps(), \
   3628                                           (__mmask16)-1, (int)(R)); })
   3629 
   3630 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
   3631   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3632                                           (__v16sf)(__m512)(W), \
   3633                                           (__mmask16)(U), (int)(R)); })
   3634 
   3635 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
   3636   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3637                                           (__v16sf)_mm512_setzero_ps(), \
   3638                                           (__mmask16)(U), (int)(R)); })
   3639 
   3640 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   3641   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3642                                            (__v16sf)_mm512_setzero_ps(), \
   3643                                            (__mmask16)-1, (int)(R)); })
   3644 
   3645 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
   3646   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3647                                            (__v16sf)(__m512)(W), \
   3648                                            (__mmask16)(U), (int)(R)); })
   3649 
   3650 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
   3651   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3652                                            (__v16sf)_mm512_setzero_ps(), \
   3653                                            (__mmask16)(U), (int)(R)); })
   3654 
   3655 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3656 _mm512_cvtepu32_ps (__m512i __A)
   3657 {
   3658   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3659                  (__v16sf) _mm512_undefined_ps (),
   3660                  (__mmask16) -1,
   3661                  _MM_FROUND_CUR_DIRECTION);
   3662 }
   3663 
   3664 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3665 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3666 {
   3667   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3668                  (__v16sf) __W,
   3669                  (__mmask16) __U,
   3670                  _MM_FROUND_CUR_DIRECTION);
   3671 }
   3672 
   3673 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3674 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   3675 {
   3676   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3677                  (__v16sf) _mm512_setzero_ps (),
   3678                  (__mmask16) __U,
   3679                  _MM_FROUND_CUR_DIRECTION);
   3680 }
   3681 
   3682 static __inline __m512d __DEFAULT_FN_ATTRS
   3683 _mm512_cvtepi32_pd(__m256i __A)
   3684 {
   3685   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3686                 (__v8df)
   3687                 _mm512_setzero_pd (),
   3688                 (__mmask8) -1);
   3689 }
   3690 
   3691 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3692 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3693 {
   3694   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3695                 (__v8df) __W,
   3696                 (__mmask8) __U);
   3697 }
   3698 
   3699 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3700 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   3701 {
   3702   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3703                 (__v8df) _mm512_setzero_pd (),
   3704                 (__mmask8) __U);
   3705 }
   3706 
   3707 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3708 _mm512_cvtepi32_ps (__m512i __A)
   3709 {
   3710   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3711                 (__v16sf) _mm512_undefined_ps (),
   3712                 (__mmask16) -1,
   3713                 _MM_FROUND_CUR_DIRECTION);
   3714 }
   3715 
   3716 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3717 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3718 {
   3719   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3720                 (__v16sf) __W,
   3721                 (__mmask16) __U,
   3722                 _MM_FROUND_CUR_DIRECTION);
   3723 }
   3724 
   3725 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3726 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   3727 {
   3728   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3729                 (__v16sf) _mm512_setzero_ps (),
   3730                 (__mmask16) __U,
   3731                 _MM_FROUND_CUR_DIRECTION);
   3732 }
   3733 
   3734 static __inline __m512d __DEFAULT_FN_ATTRS
   3735 _mm512_cvtepu32_pd(__m256i __A)
   3736 {
   3737   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3738                 (__v8df)
   3739                 _mm512_setzero_pd (),
   3740                 (__mmask8) -1);
   3741 }
   3742 
   3743 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3744 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3745 {
   3746   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3747                   (__v8df) __W,
   3748                   (__mmask8) __U);
   3749 }
   3750 
   3751 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3752 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   3753 {
   3754   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3755                   (__v8df) _mm512_setzero_pd (),
   3756                   (__mmask8) __U);
   3757 }
   3758 
   3759 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   3760   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3761                                           (__v8sf)_mm256_setzero_ps(), \
   3762                                           (__mmask8)-1, (int)(R)); })
   3763 
   3764 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   3765   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3766                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
   3767                                           (int)(R)); })
   3768 
   3769 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   3770   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3771                                           (__v8sf)_mm256_setzero_ps(), \
   3772                                           (__mmask8)(U), (int)(R)); })
   3773 
   3774 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3775 _mm512_cvtpd_ps (__m512d __A)
   3776 {
   3777   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3778                 (__v8sf) _mm256_undefined_ps (),
   3779                 (__mmask8) -1,
   3780                 _MM_FROUND_CUR_DIRECTION);
   3781 }
   3782 
   3783 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3784 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   3785 {
   3786   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3787                 (__v8sf) __W,
   3788                 (__mmask8) __U,
   3789                 _MM_FROUND_CUR_DIRECTION);
   3790 }
   3791 
   3792 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3793 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   3794 {
   3795   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3796                 (__v8sf) _mm256_setzero_ps (),
   3797                 (__mmask8) __U,
   3798                 _MM_FROUND_CUR_DIRECTION);
   3799 }
   3800 
   3801 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
   3802   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3803                                             (__v16hi)_mm256_undefined_si256(), \
   3804                                             (__mmask16)-1); })
   3805 
   3806 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
   3807   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3808                                             (__v16hi)(__m256i)(U), \
   3809                                             (__mmask16)(W)); })
   3810 
   3811 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
   3812   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3813                                             (__v16hi)_mm256_setzero_si256(), \
   3814                                             (__mmask16)(W)); })
   3815 
   3816 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   3817   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3818                                             (__v16hi)_mm256_setzero_si256(), \
   3819                                             (__mmask16)-1); })
   3820 
   3821 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
   3822   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3823                                             (__v16hi)(__m256i)(U), \
   3824                                             (__mmask16)(W)); })
   3825 
   3826 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
   3827   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3828                                             (__v16hi)_mm256_setzero_si256(), \
   3829                                             (__mmask16)(W)); })
   3830 
   3831 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
   3832   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3833                                            (__v16sf)_mm512_undefined_ps(), \
   3834                                            (__mmask16)-1, (int)(R)); })
   3835 
   3836 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
   3837   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3838                                            (__v16sf)(__m512)(W), \
   3839                                            (__mmask16)(U), (int)(R)); })
   3840 
   3841 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
   3842   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3843                                            (__v16sf)_mm512_setzero_ps(), \
   3844                                            (__mmask16)(U), (int)(R)); })
   3845 
   3846 
   3847 static  __inline __m512 __DEFAULT_FN_ATTRS
   3848 _mm512_cvtph_ps(__m256i __A)
   3849 {
   3850   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3851                 (__v16sf)
   3852                 _mm512_setzero_ps (),
   3853                 (__mmask16) -1,
   3854                 _MM_FROUND_CUR_DIRECTION);
   3855 }
   3856 
   3857 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3858 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   3859 {
   3860   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3861                  (__v16sf) __W,
   3862                  (__mmask16) __U,
   3863                  _MM_FROUND_CUR_DIRECTION);
   3864 }
   3865 
   3866 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3867 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   3868 {
   3869   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3870                  (__v16sf) _mm512_setzero_ps (),
   3871                  (__mmask16) __U,
   3872                  _MM_FROUND_CUR_DIRECTION);
   3873 }
   3874 
   3875 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   3876   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3877                                             (__v8si)_mm256_setzero_si256(), \
   3878                                             (__mmask8)-1, (int)(R)); })
   3879 
   3880 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   3881   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3882                                             (__v8si)(__m256i)(W), \
   3883                                             (__mmask8)(U), (int)(R)); })
   3884 
   3885 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   3886   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3887                                             (__v8si)_mm256_setzero_si256(), \
   3888                                             (__mmask8)(U), (int)(R)); })
   3889 
   3890 static __inline __m256i __DEFAULT_FN_ATTRS
   3891 _mm512_cvttpd_epi32(__m512d __a)
   3892 {
   3893   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   3894                                                    (__v8si)_mm256_setzero_si256(),
   3895                                                    (__mmask8) -1,
   3896                                                     _MM_FROUND_CUR_DIRECTION);
   3897 }
   3898 
   3899 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3900 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   3901 {
   3902   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   3903                   (__v8si) __W,
   3904                   (__mmask8) __U,
   3905                   _MM_FROUND_CUR_DIRECTION);
   3906 }
   3907 
   3908 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3909 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   3910 {
   3911   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   3912                   (__v8si) _mm256_setzero_si256 (),
   3913                   (__mmask8) __U,
   3914                   _MM_FROUND_CUR_DIRECTION);
   3915 }
   3916 
   3917 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   3918   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3919                                             (__v16si)_mm512_setzero_si512(), \
   3920                                             (__mmask16)-1, (int)(R)); })
   3921 
   3922 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   3923   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3924                                             (__v16si)(__m512i)(W), \
   3925                                             (__mmask16)(U), (int)(R)); })
   3926 
   3927 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   3928   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3929                                             (__v16si)_mm512_setzero_si512(), \
   3930                                             (__mmask16)(U), (int)(R)); })
   3931 
   3932 static __inline __m512i __DEFAULT_FN_ATTRS
   3933 _mm512_cvttps_epi32(__m512 __a)
   3934 {
   3935   return (__m512i)
   3936     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   3937                                      (__v16si) _mm512_setzero_si512 (),
   3938                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   3939 }
   3940 
   3941 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3942 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   3943 {
   3944   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   3945                   (__v16si) __W,
   3946                   (__mmask16) __U,
   3947                   _MM_FROUND_CUR_DIRECTION);
   3948 }
   3949 
   3950 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3951 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   3952 {
   3953   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   3954                   (__v16si) _mm512_setzero_si512 (),
   3955                   (__mmask16) __U,
   3956                   _MM_FROUND_CUR_DIRECTION);
   3957 }
   3958 
   3959 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   3960   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3961                                            (__v16si)_mm512_setzero_si512(), \
   3962                                            (__mmask16)-1, (int)(R)); })
   3963 
   3964 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
   3965   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3966                                            (__v16si)(__m512i)(W), \
   3967                                            (__mmask16)(U), (int)(R)); })
   3968 
   3969 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
   3970   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3971                                            (__v16si)_mm512_setzero_si512(), \
   3972                                            (__mmask16)(U), (int)(R)); })
   3973 
   3974 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3975 _mm512_cvtps_epi32 (__m512 __A)
   3976 {
   3977   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3978                  (__v16si) _mm512_undefined_epi32 (),
   3979                  (__mmask16) -1,
   3980                  _MM_FROUND_CUR_DIRECTION);
   3981 }
   3982 
   3983 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3984 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   3985 {
   3986   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3987                  (__v16si) __W,
   3988                  (__mmask16) __U,
   3989                  _MM_FROUND_CUR_DIRECTION);
   3990 }
   3991 
   3992 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3993 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   3994 {
   3995   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3996                  (__v16si)
   3997                  _mm512_setzero_si512 (),
   3998                  (__mmask16) __U,
   3999                  _MM_FROUND_CUR_DIRECTION);
   4000 }
   4001 
   4002 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   4003   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4004                                            (__v8si)_mm256_setzero_si256(), \
   4005                                            (__mmask8)-1, (int)(R)); })
   4006 
   4007 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4008   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4009                                            (__v8si)(__m256i)(W), \
   4010                                            (__mmask8)(U), (int)(R)); })
   4011 
   4012 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
   4013   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4014                                            (__v8si)_mm256_setzero_si256(), \
   4015                                            (__mmask8)(U), (int)(R)); })
   4016 
   4017 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4018 _mm512_cvtpd_epi32 (__m512d __A)
   4019 {
   4020   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4021                  (__v8si)
   4022                  _mm256_undefined_si256 (),
   4023                  (__mmask8) -1,
   4024                  _MM_FROUND_CUR_DIRECTION);
   4025 }
   4026 
   4027 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4028 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4029 {
   4030   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4031                  (__v8si) __W,
   4032                  (__mmask8) __U,
   4033                  _MM_FROUND_CUR_DIRECTION);
   4034 }
   4035 
   4036 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4037 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   4038 {
   4039   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4040                  (__v8si)
   4041                  _mm256_setzero_si256 (),
   4042                  (__mmask8) __U,
   4043                  _MM_FROUND_CUR_DIRECTION);
   4044 }
   4045 
   4046 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   4047   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4048                                             (__v16si)_mm512_setzero_si512(), \
   4049                                             (__mmask16)-1, (int)(R)); })
   4050 
   4051 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
   4052   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4053                                             (__v16si)(__m512i)(W), \
   4054                                             (__mmask16)(U), (int)(R)); })
   4055 
   4056 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
   4057   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4058                                             (__v16si)_mm512_setzero_si512(), \
   4059                                             (__mmask16)(U), (int)(R)); })
   4060 
   4061 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4062 _mm512_cvtps_epu32 ( __m512 __A)
   4063 {
   4064   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
   4065                   (__v16si)\
   4066                   _mm512_undefined_epi32 (),\
   4067                   (__mmask16) -1,\
   4068                   _MM_FROUND_CUR_DIRECTION);\
   4069 }
   4070 
   4071 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4072 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   4073 {
   4074   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4075                   (__v16si) __W,
   4076                   (__mmask16) __U,
   4077                   _MM_FROUND_CUR_DIRECTION);
   4078 }
   4079 
   4080 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4081 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
   4082 {
   4083   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4084                   (__v16si)
   4085                   _mm512_setzero_si512 (),
   4086                   (__mmask16) __U ,
   4087                   _MM_FROUND_CUR_DIRECTION);
   4088 }
   4089 
   4090 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   4091   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4092                                             (__v8si)_mm256_setzero_si256(), \
   4093                                             (__mmask8)-1, (int)(R)); })
   4094 
   4095 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   4096   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4097                                             (__v8si)(W), \
   4098                                             (__mmask8)(U), (int)(R)); })
   4099 
   4100 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   4101   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4102                                             (__v8si)_mm256_setzero_si256(), \
   4103                                             (__mmask8)(U), (int)(R)); })
   4104 
   4105 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4106 _mm512_cvtpd_epu32 (__m512d __A)
   4107 {
   4108   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4109                   (__v8si)
   4110                   _mm256_undefined_si256 (),
   4111                   (__mmask8) -1,
   4112                   _MM_FROUND_CUR_DIRECTION);
   4113 }
   4114 
   4115 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4116 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   4117 {
   4118   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4119                   (__v8si) __W,
   4120                   (__mmask8) __U,
   4121                   _MM_FROUND_CUR_DIRECTION);
   4122 }
   4123 
   4124 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4125 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   4126 {
   4127   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4128                   (__v8si)
   4129                   _mm256_setzero_si256 (),
   4130                   (__mmask8) __U,
   4131                   _MM_FROUND_CUR_DIRECTION);
   4132 }
   4133 
   4134 /* Unpack and Interleave */
   4135 
   4136 static __inline __m512d __DEFAULT_FN_ATTRS
   4137 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   4138 {
   4139   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4140                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4141 }
   4142 
   4143 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4144 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4145 {
   4146   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4147                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4148                                            (__v8df)__W);
   4149 }
   4150 
   4151 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4152 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
   4153 {
   4154   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4155                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4156                                            (__v8df)_mm512_setzero_pd());
   4157 }
   4158 
   4159 static __inline __m512d __DEFAULT_FN_ATTRS
   4160 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   4161 {
   4162   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4163                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4164 }
   4165 
   4166 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4167 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4168 {
   4169   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4170                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4171                                            (__v8df)__W);
   4172 }
   4173 
   4174 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4175 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   4176 {
   4177   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4178                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4179                                            (__v8df)_mm512_setzero_pd());
   4180 }
   4181 
   4182 static __inline __m512 __DEFAULT_FN_ATTRS
   4183 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   4184 {
   4185   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4186                                          2,    18,    3,    19,
   4187                                          2+4,  18+4,  3+4,  19+4,
   4188                                          2+8,  18+8,  3+8,  19+8,
   4189                                          2+12, 18+12, 3+12, 19+12);
   4190 }
   4191 
   4192 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4193 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4194 {
   4195   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4196                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4197                                           (__v16sf)__W);
   4198 }
   4199 
   4200 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4201 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4202 {
   4203   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4204                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4205                                           (__v16sf)_mm512_setzero_ps());
   4206 }
   4207 
   4208 static __inline __m512 __DEFAULT_FN_ATTRS
   4209 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   4210 {
   4211   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4212                                          0,    16,    1,    17,
   4213                                          0+4,  16+4,  1+4,  17+4,
   4214                                          0+8,  16+8,  1+8,  17+8,
   4215                                          0+12, 16+12, 1+12, 17+12);
   4216 }
   4217 
   4218 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4219 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4220 {
   4221   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4222                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4223                                           (__v16sf)__W);
   4224 }
   4225 
   4226 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4227 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4228 {
   4229   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4230                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4231                                           (__v16sf)_mm512_setzero_ps());
   4232 }
   4233 
   4234 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4235 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
   4236 {
   4237   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4238                                           2,    18,    3,    19,
   4239                                           2+4,  18+4,  3+4,  19+4,
   4240                                           2+8,  18+8,  3+8,  19+8,
   4241                                           2+12, 18+12, 3+12, 19+12);
   4242 }
   4243 
   4244 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4245 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4246 {
   4247   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4248                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4249                                        (__v16si)__W);
   4250 }
   4251 
   4252 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4253 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4254 {
   4255   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4256                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4257                                        (__v16si)_mm512_setzero_si512());
   4258 }
   4259 
   4260 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4261 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
   4262 {
   4263   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4264                                           0,    16,    1,    17,
   4265                                           0+4,  16+4,  1+4,  17+4,
   4266                                           0+8,  16+8,  1+8,  17+8,
   4267                                           0+12, 16+12, 1+12, 17+12);
   4268 }
   4269 
   4270 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4271 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4272 {
   4273   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4274                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4275                                        (__v16si)__W);
   4276 }
   4277 
   4278 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4279 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4280 {
   4281   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4282                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4283                                        (__v16si)_mm512_setzero_si512());
   4284 }
   4285 
   4286 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4287 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
   4288 {
   4289   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4290                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4291 }
   4292 
   4293 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4294 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4295 {
   4296   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4297                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4298                                         (__v8di)__W);
   4299 }
   4300 
   4301 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4302 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
   4303 {
   4304   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4305                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4306                                         (__v8di)_mm512_setzero_si512());
   4307 }
   4308 
   4309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4310 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   4311 {
   4312   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4313                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4314 }
   4315 
   4316 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4317 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4318 {
   4319   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4320                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4321                                         (__v8di)__W);
   4322 }
   4323 
   4324 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4325 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4326 {
   4327   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4328                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4329                                         (__v8di)_mm512_setzero_si512());
   4330 }
   4331 
   4332 /* Bit Test */
   4333 
   4334 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4335 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   4336 {
   4337   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4338             (__v16si) __B,
   4339             (__mmask16) -1);
   4340 }
   4341 
   4342 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4343 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   4344 {
   4345   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4346                  (__v16si) __B, __U);
   4347 }
   4348 
   4349 static __inline __mmask8 __DEFAULT_FN_ATTRS
   4350 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   4351 {
   4352   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   4353                  (__v8di) __B,
   4354                  (__mmask8) -1);
   4355 }
   4356 
   4357 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4358 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   4359 {
   4360   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   4361 }
   4362 
   4363 
   4364 /* SIMD load ops */
   4365 
   4366 static __inline __m512i __DEFAULT_FN_ATTRS
   4367 _mm512_loadu_si512 (void const *__P)
   4368 {
   4369   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4370                   (__v16si)
   4371                   _mm512_setzero_si512 (),
   4372                   (__mmask16) -1);
   4373 }
   4374 
   4375 static __inline __m512i __DEFAULT_FN_ATTRS
   4376 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   4377 {
   4378   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4379                   (__v16si) __W,
   4380                   (__mmask16) __U);
   4381 }
   4382 
   4383 
   4384 static __inline __m512i __DEFAULT_FN_ATTRS
   4385 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   4386 {
   4387   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
   4388                                                      (__v16si)
   4389                                                      _mm512_setzero_si512 (),
   4390                                                      (__mmask16) __U);
   4391 }
   4392 
   4393 static __inline __m512i __DEFAULT_FN_ATTRS
   4394 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   4395 {
   4396   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
   4397                   (__v8di) __W,
   4398                   (__mmask8) __U);
   4399 }
   4400 
   4401 static __inline __m512i __DEFAULT_FN_ATTRS
   4402 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   4403 {
   4404   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
   4405                                                      (__v8di)
   4406                                                      _mm512_setzero_si512 (),
   4407                                                      (__mmask8) __U);
   4408 }
   4409 
   4410 static __inline __m512 __DEFAULT_FN_ATTRS
   4411 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   4412 {
   4413   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
   4414                    (__v16sf) __W,
   4415                    (__mmask16) __U);
   4416 }
   4417 
   4418 static __inline __m512 __DEFAULT_FN_ATTRS
   4419 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   4420 {
   4421   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
   4422                                                   (__v16sf)
   4423                                                   _mm512_setzero_ps (),
   4424                                                   (__mmask16) __U);
   4425 }
   4426 
   4427 static __inline __m512d __DEFAULT_FN_ATTRS
   4428 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   4429 {
   4430   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
   4431                 (__v8df) __W,
   4432                 (__mmask8) __U);
   4433 }
   4434 
   4435 static __inline __m512d __DEFAULT_FN_ATTRS
   4436 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   4437 {
   4438   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
   4439                                                    (__v8df)
   4440                                                    _mm512_setzero_pd (),
   4441                                                    (__mmask8) __U);
   4442 }
   4443 
   4444 static __inline __m512d __DEFAULT_FN_ATTRS
   4445 _mm512_loadu_pd(double const *__p)
   4446 {
   4447   struct __loadu_pd {
   4448     __m512d __v;
   4449   } __attribute__((__packed__, __may_alias__));
   4450   return ((struct __loadu_pd*)__p)->__v;
   4451 }
   4452 
   4453 static __inline __m512 __DEFAULT_FN_ATTRS
   4454 _mm512_loadu_ps(float const *__p)
   4455 {
   4456   struct __loadu_ps {
   4457     __m512 __v;
   4458   } __attribute__((__packed__, __may_alias__));
   4459   return ((struct __loadu_ps*)__p)->__v;
   4460 }
   4461 
   4462 static __inline __m512 __DEFAULT_FN_ATTRS
   4463 _mm512_load_ps(float const *__p)
   4464 {
   4465   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   4466                                                   (__v16sf)
   4467                                                   _mm512_setzero_ps (),
   4468                                                   (__mmask16) -1);
   4469 }
   4470 
   4471 static __inline __m512 __DEFAULT_FN_ATTRS
   4472 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
   4473 {
   4474   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
   4475                    (__v16sf) __W,
   4476                    (__mmask16) __U);
   4477 }
   4478 
   4479 static __inline __m512 __DEFAULT_FN_ATTRS
   4480 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   4481 {
   4482   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   4483                                                   (__v16sf)
   4484                                                   _mm512_setzero_ps (),
   4485                                                   (__mmask16) __U);
   4486 }
   4487 
   4488 static __inline __m512d __DEFAULT_FN_ATTRS
   4489 _mm512_load_pd(double const *__p)
   4490 {
   4491   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   4492                                                    (__v8df)
   4493                                                    _mm512_setzero_pd (),
   4494                                                    (__mmask8) -1);
   4495 }
   4496 
   4497 static __inline __m512d __DEFAULT_FN_ATTRS
   4498 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
   4499 {
   4500   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
   4501                           (__v8df) __W,
   4502                           (__mmask8) __U);
   4503 }
   4504 
   4505 static __inline __m512d __DEFAULT_FN_ATTRS
   4506 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   4507 {
   4508   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   4509                                                    (__v8df)
   4510                                                    _mm512_setzero_pd (),
   4511                                                    (__mmask8) __U);
   4512 }
   4513 
   4514 static __inline __m512i __DEFAULT_FN_ATTRS
   4515 _mm512_load_si512 (void const *__P)
   4516 {
   4517   return *(__m512i *) __P;
   4518 }
   4519 
   4520 static __inline __m512i __DEFAULT_FN_ATTRS
   4521 _mm512_load_epi32 (void const *__P)
   4522 {
   4523   return *(__m512i *) __P;
   4524 }
   4525 
   4526 static __inline __m512i __DEFAULT_FN_ATTRS
   4527 _mm512_load_epi64 (void const *__P)
   4528 {
   4529   return *(__m512i *) __P;
   4530 }
   4531 
   4532 /* SIMD store ops */
   4533 
   4534 static __inline void __DEFAULT_FN_ATTRS
   4535 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   4536 {
   4537   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
   4538                                      (__mmask8) __U);
   4539 }
   4540 
   4541 static __inline void __DEFAULT_FN_ATTRS
   4542 _mm512_storeu_si512 (void *__P, __m512i __A)
   4543 {
   4544   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
   4545             (__mmask16) -1);
   4546 }
   4547 
   4548 static __inline void __DEFAULT_FN_ATTRS
   4549 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   4550 {
   4551   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
   4552                                      (__mmask16) __U);
   4553 }
   4554 
   4555 static __inline void __DEFAULT_FN_ATTRS
   4556 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   4557 {
   4558   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
   4559 }
   4560 
   4561 static __inline void __DEFAULT_FN_ATTRS
   4562 _mm512_storeu_pd(void *__P, __m512d __A)
   4563 {
   4564   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
   4565 }
   4566 
   4567 static __inline void __DEFAULT_FN_ATTRS
   4568 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   4569 {
   4570   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
   4571                                    (__mmask16) __U);
   4572 }
   4573 
   4574 static __inline void __DEFAULT_FN_ATTRS
   4575 _mm512_storeu_ps(void *__P, __m512 __A)
   4576 {
   4577   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
   4578 }
   4579 
   4580 static __inline void __DEFAULT_FN_ATTRS
   4581 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   4582 {
   4583   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   4584 }
   4585 
   4586 static __inline void __DEFAULT_FN_ATTRS
   4587 _mm512_store_pd(void *__P, __m512d __A)
   4588 {
   4589   *(__m512d*)__P = __A;
   4590 }
   4591 
   4592 static __inline void __DEFAULT_FN_ATTRS
   4593 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   4594 {
   4595   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   4596                                    (__mmask16) __U);
   4597 }
   4598 
   4599 static __inline void __DEFAULT_FN_ATTRS
   4600 _mm512_store_ps(void *__P, __m512 __A)
   4601 {
   4602   *(__m512*)__P = __A;
   4603 }
   4604 
   4605 static __inline void __DEFAULT_FN_ATTRS
   4606 _mm512_store_si512 (void *__P, __m512i __A)
   4607 {
   4608   *(__m512i *) __P = __A;
   4609 }
   4610 
   4611 static __inline void __DEFAULT_FN_ATTRS
   4612 _mm512_store_epi32 (void *__P, __m512i __A)
   4613 {
   4614   *(__m512i *) __P = __A;
   4615 }
   4616 
   4617 static __inline void __DEFAULT_FN_ATTRS
   4618 _mm512_store_epi64 (void *__P, __m512i __A)
   4619 {
   4620   *(__m512i *) __P = __A;
   4621 }
   4622 
   4623 /* Mask ops */
   4624 
   4625 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4626 _mm512_knot(__mmask16 __M)
   4627 {
   4628   return __builtin_ia32_knothi(__M);
   4629 }
   4630 
   4631 /* Integer compare */
   4632 
   4633 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4634 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   4635   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4636                                                    (__mmask16)-1);
   4637 }
   4638 
   4639 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4640 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4641   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4642                                                    __u);
   4643 }
   4644 
   4645 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4646 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   4647   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4648                                                  (__mmask16)-1);
   4649 }
   4650 
   4651 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4652 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4653   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4654                                                  __u);
   4655 }
   4656 
   4657 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4658 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4659   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4660                                                   __u);
   4661 }
   4662 
   4663 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4664 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   4665   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4666                                                   (__mmask8)-1);
   4667 }
   4668 
   4669 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4670 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   4671   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4672                                                 (__mmask8)-1);
   4673 }
   4674 
   4675 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4676 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4677   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4678                                                 __u);
   4679 }
   4680 
   4681 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4682 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   4683   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4684                                                 (__mmask16)-1);
   4685 }
   4686 
   4687 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4688 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4689   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4690                                                 __u);
   4691 }
   4692 
   4693 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4694 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   4695   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4696                                                  (__mmask16)-1);
   4697 }
   4698 
   4699 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4700 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4701   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4702                                                  __u);
   4703 }
   4704 
   4705 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4706 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   4707   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4708                                                (__mmask8)-1);
   4709 }
   4710 
   4711 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4712 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4713   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4714                                                __u);
   4715 }
   4716 
   4717 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4718 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   4719   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4720                                                 (__mmask8)-1);
   4721 }
   4722 
   4723 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4724 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4725   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4726                                                 __u);
   4727 }
   4728 
   4729 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4730 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   4731   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4732                                                    (__mmask16)-1);
   4733 }
   4734 
   4735 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4736 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4737   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4738                                                    __u);
   4739 }
   4740 
   4741 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4742 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   4743   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4744                                                  (__mmask16)-1);
   4745 }
   4746 
   4747 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4748 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4749   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4750                                                  __u);
   4751 }
   4752 
   4753 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4754 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4755   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4756                                                   __u);
   4757 }
   4758 
   4759 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4760 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   4761   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4762                                                   (__mmask8)-1);
   4763 }
   4764 
   4765 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4766 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   4767   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4768                                                 (__mmask8)-1);
   4769 }
   4770 
   4771 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4772 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4773   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4774                                                 __u);
   4775 }
   4776 
   4777 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4778 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   4779   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4780                                                 (__mmask16)-1);
   4781 }
   4782 
   4783 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4784 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4785   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4786                                                 __u);
   4787 }
   4788 
   4789 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4790 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   4791   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4792                                                  (__mmask16)-1);
   4793 }
   4794 
   4795 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4796 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4797   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4798                                                  __u);
   4799 }
   4800 
   4801 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4802 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   4803   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4804                                                (__mmask8)-1);
   4805 }
   4806 
   4807 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4808 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4809   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4810                                                __u);
   4811 }
   4812 
   4813 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4814 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   4815   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4816                                                 (__mmask8)-1);
   4817 }
   4818 
   4819 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4820 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4821   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4822                                                 __u);
   4823 }
   4824 
   4825 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4826 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   4827   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4828                                                 (__mmask16)-1);
   4829 }
   4830 
   4831 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4832 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4833   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4834                                                 __u);
   4835 }
   4836 
   4837 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4838 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   4839   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4840                                                  (__mmask16)-1);
   4841 }
   4842 
   4843 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4844 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4845   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4846                                                  __u);
   4847 }
   4848 
   4849 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4850 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   4851   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4852                                                (__mmask8)-1);
   4853 }
   4854 
   4855 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4856 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4857   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4858                                                __u);
   4859 }
   4860 
   4861 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4862 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   4863   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4864                                                 (__mmask8)-1);
   4865 }
   4866 
   4867 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4868 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4869   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4870                                                 __u);
   4871 }
   4872 
   4873 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4874 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   4875   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4876                                                 (__mmask16)-1);
   4877 }
   4878 
   4879 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4880 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4881   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4882                                                 __u);
   4883 }
   4884 
   4885 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4886 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   4887   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4888                                                  (__mmask16)-1);
   4889 }
   4890 
   4891 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4892 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4893   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4894                                                  __u);
   4895 }
   4896 
   4897 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4898 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   4899   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4900                                                (__mmask8)-1);
   4901 }
   4902 
   4903 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4904 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4905   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4906                                                __u);
   4907 }
   4908 
   4909 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4910 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   4911   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4912                                                 (__mmask8)-1);
   4913 }
   4914 
   4915 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4916 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4917   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4918                                                 __u);
   4919 }
   4920 
   4921 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4922 _mm512_cvtepi8_epi32 (__m128i __A)
   4923 {
   4924   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4925                 (__v16si)
   4926                 _mm512_setzero_si512 (),
   4927                 (__mmask16) -1);
   4928 }
   4929 
   4930 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4931 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   4932 {
   4933   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4934                 (__v16si) __W,
   4935                 (__mmask16) __U);
   4936 }
   4937 
   4938 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4939 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
   4940 {
   4941   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4942                 (__v16si)
   4943                 _mm512_setzero_si512 (),
   4944                 (__mmask16) __U);
   4945 }
   4946 
   4947 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4948 _mm512_cvtepi8_epi64 (__m128i __A)
   4949 {
   4950   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4951                 (__v8di)
   4952                 _mm512_setzero_si512 (),
   4953                 (__mmask8) -1);
   4954 }
   4955 
   4956 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4957 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   4958 {
   4959   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4960                 (__v8di) __W,
   4961                 (__mmask8) __U);
   4962 }
   4963 
   4964 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4965 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
   4966 {
   4967   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4968                 (__v8di)
   4969                 _mm512_setzero_si512 (),
   4970                 (__mmask8) __U);
   4971 }
   4972 
   4973 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4974 _mm512_cvtepi32_epi64 (__m256i __X)
   4975 {
   4976   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4977                 (__v8di)
   4978                 _mm512_setzero_si512 (),
   4979                 (__mmask8) -1);
   4980 }
   4981 
   4982 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4983 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   4984 {
   4985   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4986                 (__v8di) __W,
   4987                 (__mmask8) __U);
   4988 }
   4989 
   4990 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4991 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
   4992 {
   4993   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4994                 (__v8di)
   4995                 _mm512_setzero_si512 (),
   4996                 (__mmask8) __U);
   4997 }
   4998 
   4999 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5000 _mm512_cvtepi16_epi32 (__m256i __A)
   5001 {
   5002   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5003                 (__v16si)
   5004                 _mm512_setzero_si512 (),
   5005                 (__mmask16) -1);
   5006 }
   5007 
   5008 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5009 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   5010 {
   5011   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5012                 (__v16si) __W,
   5013                 (__mmask16) __U);
   5014 }
   5015 
   5016 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5017 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
   5018 {
   5019   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5020                 (__v16si)
   5021                 _mm512_setzero_si512 (),
   5022                 (__mmask16) __U);
   5023 }
   5024 
   5025 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5026 _mm512_cvtepi16_epi64 (__m128i __A)
   5027 {
   5028   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5029                 (__v8di)
   5030                 _mm512_setzero_si512 (),
   5031                 (__mmask8) -1);
   5032 }
   5033 
   5034 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5035 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5036 {
   5037   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5038                 (__v8di) __W,
   5039                 (__mmask8) __U);
   5040 }
   5041 
   5042 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5043 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
   5044 {
   5045   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5046                 (__v8di)
   5047                 _mm512_setzero_si512 (),
   5048                 (__mmask8) __U);
   5049 }
   5050 
   5051 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5052 _mm512_cvtepu8_epi32 (__m128i __A)
   5053 {
   5054   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5055                 (__v16si)
   5056                 _mm512_setzero_si512 (),
   5057                 (__mmask16) -1);
   5058 }
   5059 
   5060 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5061 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   5062 {
   5063   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5064                 (__v16si) __W,
   5065                 (__mmask16) __U);
   5066 }
   5067 
   5068 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5069 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
   5070 {
   5071   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5072                 (__v16si)
   5073                 _mm512_setzero_si512 (),
   5074                 (__mmask16) __U);
   5075 }
   5076 
   5077 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5078 _mm512_cvtepu8_epi64 (__m128i __A)
   5079 {
   5080   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5081                 (__v8di)
   5082                 _mm512_setzero_si512 (),
   5083                 (__mmask8) -1);
   5084 }
   5085 
   5086 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5087 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5088 {
   5089   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5090                 (__v8di) __W,
   5091                 (__mmask8) __U);
   5092 }
   5093 
   5094 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5095 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   5096 {
   5097   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5098                 (__v8di)
   5099                 _mm512_setzero_si512 (),
   5100                 (__mmask8) __U);
   5101 }
   5102 
   5103 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5104 _mm512_cvtepu32_epi64 (__m256i __X)
   5105 {
   5106   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5107                 (__v8di)
   5108                 _mm512_setzero_si512 (),
   5109                 (__mmask8) -1);
   5110 }
   5111 
   5112 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5113 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   5114 {
   5115   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5116                 (__v8di) __W,
   5117                 (__mmask8) __U);
   5118 }
   5119 
   5120 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5121 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
   5122 {
   5123   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5124                 (__v8di)
   5125                 _mm512_setzero_si512 (),
   5126                 (__mmask8) __U);
   5127 }
   5128 
   5129 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5130 _mm512_cvtepu16_epi32 (__m256i __A)
   5131 {
   5132   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5133                 (__v16si)
   5134                 _mm512_setzero_si512 (),
   5135                 (__mmask16) -1);
   5136 }
   5137 
   5138 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5139 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   5140 {
   5141   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5142                 (__v16si) __W,
   5143                 (__mmask16) __U);
   5144 }
   5145 
   5146 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5147 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
   5148 {
   5149   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5150                 (__v16si)
   5151                 _mm512_setzero_si512 (),
   5152                 (__mmask16) __U);
   5153 }
   5154 
   5155 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5156 _mm512_cvtepu16_epi64 (__m128i __A)
   5157 {
   5158   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5159                 (__v8di)
   5160                 _mm512_setzero_si512 (),
   5161                 (__mmask8) -1);
   5162 }
   5163 
   5164 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5165 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5166 {
   5167   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5168                 (__v8di) __W,
   5169                 (__mmask8) __U);
   5170 }
   5171 
   5172 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5173 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
   5174 {
   5175   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5176                 (__v8di)
   5177                 _mm512_setzero_si512 (),
   5178                 (__mmask8) __U);
   5179 }
   5180 
   5181 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5182 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   5183 {
   5184   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5185               (__v16si) __B,
   5186               (__v16si)
   5187               _mm512_setzero_si512 (),
   5188               (__mmask16) -1);
   5189 }
   5190 
   5191 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5192 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5193 {
   5194   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5195               (__v16si) __B,
   5196               (__v16si) __W,
   5197               (__mmask16) __U);
   5198 }
   5199 
   5200 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5201 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5202 {
   5203   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5204               (__v16si) __B,
   5205               (__v16si)
   5206               _mm512_setzero_si512 (),
   5207               (__mmask16) __U);
   5208 }
   5209 
   5210 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5211 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   5212 {
   5213   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5214               (__v8di) __B,
   5215               (__v8di)
   5216               _mm512_setzero_si512 (),
   5217               (__mmask8) -1);
   5218 }
   5219 
   5220 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5221 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5222 {
   5223   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5224               (__v8di) __B,
   5225               (__v8di) __W,
   5226               (__mmask8) __U);
   5227 }
   5228 
   5229 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5230 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5231 {
   5232   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5233               (__v8di) __B,
   5234               (__v8di)
   5235               _mm512_setzero_si512 (),
   5236               (__mmask8) __U);
   5237 }
   5238 
   5239 
   5240 
   5241 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   5242   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5243                                          (__v16si)(__m512i)(b), (int)(p), \
   5244                                          (__mmask16)-1); })
   5245 
   5246 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   5247   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5248                                           (__v16si)(__m512i)(b), (int)(p), \
   5249                                           (__mmask16)-1); })
   5250 
   5251 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   5252   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5253                                         (__v8di)(__m512i)(b), (int)(p), \
   5254                                         (__mmask8)-1); })
   5255 
   5256 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   5257   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5258                                          (__v8di)(__m512i)(b), (int)(p), \
   5259                                          (__mmask8)-1); })
   5260 
   5261 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   5262   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5263                                          (__v16si)(__m512i)(b), (int)(p), \
   5264                                          (__mmask16)(m)); })
   5265 
   5266 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   5267   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5268                                           (__v16si)(__m512i)(b), (int)(p), \
   5269                                           (__mmask16)(m)); })
   5270 
   5271 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   5272   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5273                                         (__v8di)(__m512i)(b), (int)(p), \
   5274                                         (__mmask8)(m)); })
   5275 
   5276 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   5277   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5278                                          (__v8di)(__m512i)(b), (int)(p), \
   5279                                          (__mmask8)(m)); })
   5280 
   5281 #define _mm512_rol_epi32(a, b) __extension__ ({ \
   5282   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5283                                         (__v16si)_mm512_setzero_si512(), \
   5284                                         (__mmask16)-1); })
   5285 
   5286 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
   5287   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5288                                         (__v16si)(__m512i)(W), \
   5289                                         (__mmask16)(U)); })
   5290 
   5291 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
   5292   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5293                                         (__v16si)_mm512_setzero_si512(), \
   5294                                         (__mmask16)(U)); })
   5295 
   5296 #define _mm512_rol_epi64(a, b) __extension__ ({ \
   5297   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5298                                         (__v8di)_mm512_setzero_si512(), \
   5299                                         (__mmask8)-1); })
   5300 
   5301 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
   5302   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5303                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5304 
   5305 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
   5306   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5307                                         (__v8di)_mm512_setzero_si512(), \
   5308                                         (__mmask8)(U)); })
   5309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5310 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   5311 {
   5312   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5313               (__v16si) __B,
   5314               (__v16si)
   5315               _mm512_setzero_si512 (),
   5316               (__mmask16) -1);
   5317 }
   5318 
   5319 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5320 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5321 {
   5322   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5323               (__v16si) __B,
   5324               (__v16si) __W,
   5325               (__mmask16) __U);
   5326 }
   5327 
   5328 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5329 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5330 {
   5331   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5332               (__v16si) __B,
   5333               (__v16si)
   5334               _mm512_setzero_si512 (),
   5335               (__mmask16) __U);
   5336 }
   5337 
   5338 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5339 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   5340 {
   5341   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5342               (__v8di) __B,
   5343               (__v8di)
   5344               _mm512_setzero_si512 (),
   5345               (__mmask8) -1);
   5346 }
   5347 
   5348 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5349 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5350 {
   5351   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5352               (__v8di) __B,
   5353               (__v8di) __W,
   5354               (__mmask8) __U);
   5355 }
   5356 
   5357 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5358 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5359 {
   5360   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5361               (__v8di) __B,
   5362               (__v8di)
   5363               _mm512_setzero_si512 (),
   5364               (__mmask8) __U);
   5365 }
   5366 
   5367 #define _mm512_ror_epi32(A, B) __extension__ ({ \
   5368   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5369                                         (__v16si)_mm512_setzero_si512(), \
   5370                                         (__mmask16)-1); })
   5371 
   5372 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   5373   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5374                                         (__v16si)(__m512i)(W), \
   5375                                         (__mmask16)(U)); })
   5376 
   5377 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
   5378   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5379                                         (__v16si)_mm512_setzero_si512(), \
   5380                                         (__mmask16)(U)); })
   5381 
   5382 #define _mm512_ror_epi64(A, B) __extension__ ({ \
   5383   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5384                                         (__v8di)_mm512_setzero_si512(), \
   5385                                         (__mmask8)-1); })
   5386 
   5387 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   5388   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5389                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5390 
   5391 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
   5392   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5393                                         (__v8di)_mm512_setzero_si512(), \
   5394                                         (__mmask8)(U)); })
   5395 
   5396 #define _mm512_slli_epi32(A, B) __extension__ ({ \
   5397   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5398                                          (__v16si)_mm512_setzero_si512(), \
   5399                                          (__mmask16)-1); })
   5400 
   5401 #define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \
   5402   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5403                                          (__v16si)(__m512i)(W), \
   5404                                          (__mmask16)(U)); })
   5405 
   5406 #define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \
   5407   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5408                                          (__v16si)_mm512_setzero_si512(), \
   5409                                          (__mmask16)(U)); })
   5410 
   5411 #define _mm512_slli_epi64(A, B) __extension__ ({ \
   5412   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5413                                          (__v8di)_mm512_setzero_si512(), \
   5414                                          (__mmask8)-1); })
   5415 
   5416 #define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \
   5417   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5418                                          (__v8di)(__m512i)(W), \
   5419                                          (__mmask8)(U)); })
   5420 
   5421 #define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \
   5422   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5423                                          (__v8di)_mm512_setzero_si512(), \
   5424                                          (__mmask8)(U)); })
   5425 
   5426 
   5427 
   5428 #define _mm512_srli_epi32(A, B) __extension__ ({ \
   5429   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5430                                          (__v16si)_mm512_setzero_si512(), \
   5431                                          (__mmask16)-1); })
   5432 
   5433 #define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \
   5434   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5435                                          (__v16si)(__m512i)(W), \
   5436                                          (__mmask16)(U)); })
   5437 
   5438 #define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \
   5439   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5440                                          (__v16si)_mm512_setzero_si512(), \
   5441                                          (__mmask16)(U)); })
   5442 
   5443 #define _mm512_srli_epi64(A, B) __extension__ ({ \
   5444   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5445                                          (__v8di)_mm512_setzero_si512(), \
   5446                                          (__mmask8)-1); })
   5447 
   5448 #define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \
   5449   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5450                                          (__v8di)(__m512i)(W), \
   5451                                          (__mmask8)(U)); })
   5452 
   5453 #define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \
   5454   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5455                                          (__v8di)_mm512_setzero_si512(), \
   5456                                          (__mmask8)(U)); })
   5457 
   5458 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5459 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5460 {
   5461   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5462               (__v16si) __W,
   5463               (__mmask16) __U);
   5464 }
   5465 
   5466 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5467 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
   5468 {
   5469   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5470               (__v16si)
   5471               _mm512_setzero_si512 (),
   5472               (__mmask16) __U);
   5473 }
   5474 
   5475 static __inline__ void __DEFAULT_FN_ATTRS
   5476 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5477 {
   5478   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
   5479           (__mmask16) __U);
   5480 }
   5481 
   5482 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5483 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   5484 {
   5485   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5486                  (__v16si) __A,
   5487                  (__v16si) __W);
   5488 }
   5489 
   5490 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5491 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
   5492 {
   5493   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5494                  (__v16si) __A,
   5495                  (__v16si) _mm512_setzero_si512 ());
   5496 }
   5497 
   5498 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5499 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   5500 {
   5501   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5502                  (__v8di) __A,
   5503                  (__v8di) __W);
   5504 }
   5505 
   5506 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5507 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
   5508 {
   5509   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5510                  (__v8di) __A,
   5511                  (__v8di) _mm512_setzero_si512 ());
   5512 }
   5513 
   5514 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5515 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5516 {
   5517   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5518               (__v8di) __W,
   5519               (__mmask8) __U);
   5520 }
   5521 
   5522 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5523 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5524 {
   5525   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5526               (__v8di)
   5527               _mm512_setzero_si512 (),
   5528               (__mmask8) __U);
   5529 }
   5530 
   5531 static __inline__ void __DEFAULT_FN_ATTRS
   5532 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5533 {
   5534   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
   5535           (__mmask8) __U);
   5536 }
   5537 
   5538 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5539 _mm512_movedup_pd (__m512d __A)
   5540 {
   5541   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
   5542                                           0, 0, 2, 2, 4, 4, 6, 6);
   5543 }
   5544 
   5545 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5546 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   5547 {
   5548   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5549                                               (__v8df)_mm512_movedup_pd(__A),
   5550                                               (__v8df)__W);
   5551 }
   5552 
   5553 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5554 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   5555 {
   5556   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5557                                               (__v8df)_mm512_movedup_pd(__A),
   5558                                               (__v8df)_mm512_setzero_pd());
   5559 }
   5560 
   5561 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
   5562   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5563                                              (__v8df)(__m512d)(B), \
   5564                                              (__v8di)(__m512i)(C), (int)(imm), \
   5565                                              (__mmask8)-1, (int)(R)); })
   5566 
   5567 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
   5568   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5569                                              (__v8df)(__m512d)(B), \
   5570                                              (__v8di)(__m512i)(C), (int)(imm), \
   5571                                              (__mmask8)(U), (int)(R)); })
   5572 
   5573 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5574   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5575                                              (__v8df)(__m512d)(B), \
   5576                                              (__v8di)(__m512i)(C), (int)(imm), \
   5577                                              (__mmask8)-1, \
   5578                                              _MM_FROUND_CUR_DIRECTION); })
   5579 
   5580 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5581   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5582                                              (__v8df)(__m512d)(B), \
   5583                                              (__v8di)(__m512i)(C), (int)(imm), \
   5584                                              (__mmask8)(U), \
   5585                                              _MM_FROUND_CUR_DIRECTION); })
   5586 
   5587 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
   5588   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5589                                               (__v8df)(__m512d)(B), \
   5590                                               (__v8di)(__m512i)(C), \
   5591                                               (int)(imm), (__mmask8)(U), \
   5592                                               (int)(R)); })
   5593 
   5594 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5595   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5596                                               (__v8df)(__m512d)(B), \
   5597                                               (__v8di)(__m512i)(C), \
   5598                                               (int)(imm), (__mmask8)(U), \
   5599                                               _MM_FROUND_CUR_DIRECTION); })
   5600 
   5601 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
   5602   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5603                                             (__v16sf)(__m512)(B), \
   5604                                             (__v16si)(__m512i)(C), (int)(imm), \
   5605                                             (__mmask16)-1, (int)(R)); })
   5606 
   5607 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
   5608   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5609                                             (__v16sf)(__m512)(B), \
   5610                                             (__v16si)(__m512i)(C), (int)(imm), \
   5611                                             (__mmask16)(U), (int)(R)); })
   5612 
   5613 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5614   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5615                                             (__v16sf)(__m512)(B), \
   5616                                             (__v16si)(__m512i)(C), (int)(imm), \
   5617                                             (__mmask16)-1, \
   5618                                             _MM_FROUND_CUR_DIRECTION); })
   5619 
   5620 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5621   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5622                                             (__v16sf)(__m512)(B), \
   5623                                             (__v16si)(__m512i)(C), (int)(imm), \
   5624                                             (__mmask16)(U), \
   5625                                             _MM_FROUND_CUR_DIRECTION); })
   5626 
   5627 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
   5628   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5629                                              (__v16sf)(__m512)(B), \
   5630                                              (__v16si)(__m512i)(C), \
   5631                                              (int)(imm), (__mmask16)(U), \
   5632                                              (int)(R)); })
   5633 
   5634 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5635   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5636                                              (__v16sf)(__m512)(B), \
   5637                                              (__v16si)(__m512i)(C), \
   5638                                              (int)(imm), (__mmask16)(U), \
   5639                                              _MM_FROUND_CUR_DIRECTION); })
   5640 
   5641 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
   5642   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5643                                           (__v2df)(__m128d)(B), \
   5644                                           (__v2di)(__m128i)(C), (int)(imm), \
   5645                                           (__mmask8)-1, (int)(R)); })
   5646 
   5647 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
   5648   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5649                                           (__v2df)(__m128d)(B), \
   5650                                           (__v2di)(__m128i)(C), (int)(imm), \
   5651                                           (__mmask8)(U), (int)(R)); })
   5652 
   5653 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
   5654   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5655                                           (__v2df)(__m128d)(B), \
   5656                                           (__v2di)(__m128i)(C), (int)(imm), \
   5657                                           (__mmask8)-1, \
   5658                                           _MM_FROUND_CUR_DIRECTION); })
   5659 
   5660 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
   5661   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5662                                           (__v2df)(__m128d)(B), \
   5663                                           (__v2di)(__m128i)(C), (int)(imm), \
   5664                                           (__mmask8)(U), \
   5665                                           _MM_FROUND_CUR_DIRECTION); })
   5666 
   5667 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
   5668   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5669                                            (__v2df)(__m128d)(B), \
   5670                                            (__v2di)(__m128i)(C), (int)(imm), \
   5671                                            (__mmask8)(U), (int)(R)); })
   5672 
   5673 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
   5674   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5675                                            (__v2df)(__m128d)(B), \
   5676                                            (__v2di)(__m128i)(C), (int)(imm), \
   5677                                            (__mmask8)(U), \
   5678                                            _MM_FROUND_CUR_DIRECTION); })
   5679 
   5680 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
   5681   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5682                                          (__v4sf)(__m128)(B), \
   5683                                          (__v4si)(__m128i)(C), (int)(imm), \
   5684                                          (__mmask8)-1, (int)(R)); })
   5685 
   5686 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
   5687   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5688                                          (__v4sf)(__m128)(B), \
   5689                                          (__v4si)(__m128i)(C), (int)(imm), \
   5690                                          (__mmask8)(U), (int)(R)); })
   5691 
   5692 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
   5693   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5694                                          (__v4sf)(__m128)(B), \
   5695                                          (__v4si)(__m128i)(C), (int)(imm), \
   5696                                          (__mmask8)-1, \
   5697                                          _MM_FROUND_CUR_DIRECTION); })
   5698 
   5699 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
   5700   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5701                                          (__v4sf)(__m128)(B), \
   5702                                          (__v4si)(__m128i)(C), (int)(imm), \
   5703                                          (__mmask8)(U), \
   5704                                          _MM_FROUND_CUR_DIRECTION); })
   5705 
   5706 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
   5707   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5708                                           (__v4sf)(__m128)(B), \
   5709                                           (__v4si)(__m128i)(C), (int)(imm), \
   5710                                           (__mmask8)(U), (int)(R)); })
   5711 
   5712 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
   5713   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5714                                           (__v4sf)(__m128)(B), \
   5715                                           (__v4si)(__m128i)(C), (int)(imm), \
   5716                                           (__mmask8)(U), \
   5717                                           _MM_FROUND_CUR_DIRECTION); })
   5718 
   5719 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
   5720   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5721                                                  (__v2df)(__m128d)(B), \
   5722                                                  (__v2df)_mm_setzero_pd(), \
   5723                                                  (__mmask8)-1, (int)(R)); })
   5724 
   5725 
   5726 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5727 _mm_getexp_sd (__m128d __A, __m128d __B)
   5728 {
   5729   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
   5730                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5731 }
   5732 
   5733 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5734 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5735 {
   5736  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5737           (__v2df) __B,
   5738           (__v2df) __W,
   5739           (__mmask8) __U,
   5740           _MM_FROUND_CUR_DIRECTION);
   5741 }
   5742 
   5743 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
   5744   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5745                                                  (__v2df)(__m128d)(B), \
   5746                                                  (__v2df)(__m128d)(W), \
   5747                                                  (__mmask8)(U), (int)(R)); })
   5748 
   5749 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5750 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   5751 {
   5752  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5753           (__v2df) __B,
   5754           (__v2df) _mm_setzero_pd (),
   5755           (__mmask8) __U,
   5756           _MM_FROUND_CUR_DIRECTION);
   5757 }
   5758 
   5759 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
   5760   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5761                                                  (__v2df)(__m128d)(B), \
   5762                                                  (__v2df)_mm_setzero_pd(), \
   5763                                                  (__mmask8)(U), (int)(R)); })
   5764 
   5765 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
   5766   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5767                                                 (__v4sf)(__m128)(B), \
   5768                                                 (__v4sf)_mm_setzero_ps(), \
   5769                                                 (__mmask8)-1, (int)(R)); })
   5770 
   5771 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5772 _mm_getexp_ss (__m128 __A, __m128 __B)
   5773 {
   5774   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5775                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5776 }
   5777 
   5778 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5779 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5780 {
   5781  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5782           (__v4sf) __B,
   5783           (__v4sf) __W,
   5784           (__mmask8) __U,
   5785           _MM_FROUND_CUR_DIRECTION);
   5786 }
   5787 
   5788 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
   5789   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5790                                                 (__v4sf)(__m128)(B), \
   5791                                                 (__v4sf)(__m128)(W), \
   5792                                                 (__mmask8)(U), (int)(R)); })
   5793 
   5794 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5795 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   5796 {
   5797  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5798           (__v4sf) __B,
   5799           (__v4sf) _mm_setzero_pd (),
   5800           (__mmask8) __U,
   5801           _MM_FROUND_CUR_DIRECTION);
   5802 }
   5803 
   5804 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
   5805   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5806                                                 (__v4sf)(__m128)(B), \
   5807                                                 (__v4sf)_mm_setzero_ps(), \
   5808                                                 (__mmask8)(U), (int)(R)); })
   5809 
   5810 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
   5811   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5812                                                (__v2df)(__m128d)(B), \
   5813                                                (int)(((D)<<2) | (C)), \
   5814                                                (__v2df)_mm_setzero_pd(), \
   5815                                                (__mmask8)-1, (int)(R)); })
   5816 
   5817 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
   5818   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5819                                                (__v2df)(__m128d)(B), \
   5820                                                (int)(((D)<<2) | (C)), \
   5821                                                (__v2df)_mm_setzero_pd(), \
   5822                                                (__mmask8)-1, \
   5823                                                _MM_FROUND_CUR_DIRECTION); })
   5824 
   5825 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
   5826   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5827                                                (__v2df)(__m128d)(B), \
   5828                                                (int)(((D)<<2) | (C)), \
   5829                                                (__v2df)(__m128d)(W), \
   5830                                                (__mmask8)(U), \
   5831                                                _MM_FROUND_CUR_DIRECTION); })
   5832 
   5833 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
   5834   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5835                                                (__v2df)(__m128d)(B), \
   5836                                                (int)(((D)<<2) | (C)), \
   5837                                                (__v2df)(__m128d)(W), \
   5838                                                (__mmask8)(U), (int)(R)); })
   5839 
   5840 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
   5841   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5842                                                (__v2df)(__m128d)(B), \
   5843                                                (int)(((D)<<2) | (C)), \
   5844                                                (__v2df)_mm_setzero_pd(), \
   5845                                                (__mmask8)(U), \
   5846                                                _MM_FROUND_CUR_DIRECTION); })
   5847 
   5848 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
   5849   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5850                                                (__v2df)(__m128d)(B), \
   5851                                                (int)(((D)<<2) | (C)), \
   5852                                                (__v2df)_mm_setzero_pd(), \
   5853                                                (__mmask8)(U), (int)(R)); })
   5854 
   5855 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
   5856   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5857                                               (__v4sf)(__m128)(B), \
   5858                                               (int)(((D)<<2) | (C)), \
   5859                                               (__v4sf)_mm_setzero_ps(), \
   5860                                               (__mmask8)-1, (int)(R)); })
   5861 
   5862 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
   5863   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5864                                               (__v4sf)(__m128)(B), \
   5865                                               (int)(((D)<<2) | (C)), \
   5866                                               (__v4sf)_mm_setzero_ps(), \
   5867                                               (__mmask8)-1, \
   5868                                               _MM_FROUND_CUR_DIRECTION); })
   5869 
   5870 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
   5871   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5872                                               (__v4sf)(__m128)(B), \
   5873                                               (int)(((D)<<2) | (C)), \
   5874                                               (__v4sf)(__m128)(W), \
   5875                                               (__mmask8)(U), \
   5876                                               _MM_FROUND_CUR_DIRECTION); })
   5877 
   5878 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
   5879   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5880                                               (__v4sf)(__m128)(B), \
   5881                                               (int)(((D)<<2) | (C)), \
   5882                                               (__v4sf)(__m128)(W), \
   5883                                               (__mmask8)(U), (int)(R)); })
   5884 
   5885 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
   5886   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5887                                               (__v4sf)(__m128)(B), \
   5888                                               (int)(((D)<<2) | (C)), \
   5889                                               (__v4sf)_mm_setzero_pd(), \
   5890                                               (__mmask8)(U), \
   5891                                               _MM_FROUND_CUR_DIRECTION); })
   5892 
   5893 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
   5894   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5895                                               (__v4sf)(__m128)(B), \
   5896                                               (int)(((D)<<2) | (C)), \
   5897                                               (__v4sf)_mm_setzero_ps(), \
   5898                                               (__mmask8)(U), (int)(R)); })
   5899 
   5900 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5901 _mm512_kmov (__mmask16 __A)
   5902 {
   5903   return  __A;
   5904 }
   5905 
   5906 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
   5907   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
   5908                               (int)(P), (int)(R)); })
   5909 
   5910 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
   5911   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
   5912                               (int)(P), (int)(R)); })
   5913 
   5914 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   5915   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   5916 
   5917 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5918 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   5919          __mmask16 __U, __m512i __B)
   5920 {
   5921   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   5922                    (__v16si) __I
   5923                    /* idx */ ,
   5924                    (__v16si) __B,
   5925                    (__mmask16) __U);
   5926 }
   5927 
   5928 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5929 _mm512_sll_epi32 (__m512i __A, __m128i __B)
   5930 {
   5931   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5932              (__v4si) __B,
   5933              (__v16si)
   5934              _mm512_setzero_si512 (),
   5935              (__mmask16) -1);
   5936 }
   5937 
   5938 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5939 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   5940 {
   5941   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5942              (__v4si) __B,
   5943              (__v16si) __W,
   5944              (__mmask16) __U);
   5945 }
   5946 
   5947 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5948 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   5949 {
   5950   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5951              (__v4si) __B,
   5952              (__v16si)
   5953              _mm512_setzero_si512 (),
   5954              (__mmask16) __U);
   5955 }
   5956 
   5957 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5958 _mm512_sll_epi64 (__m512i __A, __m128i __B)
   5959 {
   5960   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5961              (__v2di) __B,
   5962              (__v8di)
   5963              _mm512_setzero_si512 (),
   5964              (__mmask8) -1);
   5965 }
   5966 
   5967 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5968 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   5969 {
   5970   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5971              (__v2di) __B,
   5972              (__v8di) __W,
   5973              (__mmask8) __U);
   5974 }
   5975 
   5976 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5977 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   5978 {
   5979   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5980              (__v2di) __B,
   5981              (__v8di)
   5982              _mm512_setzero_si512 (),
   5983              (__mmask8) __U);
   5984 }
   5985 
   5986 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5987 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
   5988 {
   5989   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   5990               (__v16si) __Y,
   5991               (__v16si)
   5992               _mm512_setzero_si512 (),
   5993               (__mmask16) -1);
   5994 }
   5995 
   5996 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5997 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   5998 {
   5999   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   6000               (__v16si) __Y,
   6001               (__v16si) __W,
   6002               (__mmask16) __U);
   6003 }
   6004 
   6005 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6006 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6007 {
   6008   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   6009               (__v16si) __Y,
   6010               (__v16si)
   6011               _mm512_setzero_si512 (),
   6012               (__mmask16) __U);
   6013 }
   6014 
   6015 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6016 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
   6017 {
   6018   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6019              (__v8di) __Y,
   6020              (__v8di)
   6021              _mm512_undefined_pd (),
   6022              (__mmask8) -1);
   6023 }
   6024 
   6025 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6026 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6027 {
   6028   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6029              (__v8di) __Y,
   6030              (__v8di) __W,
   6031              (__mmask8) __U);
   6032 }
   6033 
   6034 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6035 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
   6036 {
   6037   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6038              (__v8di) __Y,
   6039              (__v8di)
   6040              _mm512_setzero_si512 (),
   6041              (__mmask8) __U);
   6042 }
   6043 
   6044 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6045 _mm512_sra_epi32 (__m512i __A, __m128i __B)
   6046 {
   6047   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6048              (__v4si) __B,
   6049              (__v16si)
   6050              _mm512_setzero_si512 (),
   6051              (__mmask16) -1);
   6052 }
   6053 
   6054 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6055 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6056 {
   6057   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6058              (__v4si) __B,
   6059              (__v16si) __W,
   6060              (__mmask16) __U);
   6061 }
   6062 
   6063 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6064 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   6065 {
   6066   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6067              (__v4si) __B,
   6068              (__v16si)
   6069              _mm512_setzero_si512 (),
   6070              (__mmask16) __U);
   6071 }
   6072 
   6073 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6074 _mm512_sra_epi64 (__m512i __A, __m128i __B)
   6075 {
   6076   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6077              (__v2di) __B,
   6078              (__v8di)
   6079              _mm512_setzero_si512 (),
   6080              (__mmask8) -1);
   6081 }
   6082 
   6083 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6084 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6085 {
   6086   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6087              (__v2di) __B,
   6088              (__v8di) __W,
   6089              (__mmask8) __U);
   6090 }
   6091 
   6092 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6093 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   6094 {
   6095   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6096              (__v2di) __B,
   6097              (__v8di)
   6098              _mm512_setzero_si512 (),
   6099              (__mmask8) __U);
   6100 }
   6101 
   6102 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6103 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
   6104 {
   6105   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6106               (__v16si) __Y,
   6107               (__v16si)
   6108               _mm512_setzero_si512 (),
   6109               (__mmask16) -1);
   6110 }
   6111 
   6112 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6113 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6114 {
   6115   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6116               (__v16si) __Y,
   6117               (__v16si) __W,
   6118               (__mmask16) __U);
   6119 }
   6120 
   6121 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6122 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6123 {
   6124   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6125               (__v16si) __Y,
   6126               (__v16si)
   6127               _mm512_setzero_si512 (),
   6128               (__mmask16) __U);
   6129 }
   6130 
   6131 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6132 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
   6133 {
   6134   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6135              (__v8di) __Y,
   6136              (__v8di)
   6137              _mm512_setzero_si512 (),
   6138              (__mmask8) -1);
   6139 }
   6140 
   6141 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6142 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6143 {
   6144   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6145              (__v8di) __Y,
   6146              (__v8di) __W,
   6147              (__mmask8) __U);
   6148 }
   6149 
   6150 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6151 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
   6152 {
   6153   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6154              (__v8di) __Y,
   6155              (__v8di)
   6156              _mm512_setzero_si512 (),
   6157              (__mmask8) __U);
   6158 }
   6159 
   6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6161 _mm512_srl_epi32 (__m512i __A, __m128i __B)
   6162 {
   6163   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6164              (__v4si) __B,
   6165              (__v16si)
   6166              _mm512_setzero_si512 (),
   6167              (__mmask16) -1);
   6168 }
   6169 
   6170 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6171 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6172 {
   6173   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6174              (__v4si) __B,
   6175              (__v16si) __W,
   6176              (__mmask16) __U);
   6177 }
   6178 
   6179 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6180 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   6181 {
   6182   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6183              (__v4si) __B,
   6184              (__v16si)
   6185              _mm512_setzero_si512 (),
   6186              (__mmask16) __U);
   6187 }
   6188 
   6189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6190 _mm512_srl_epi64 (__m512i __A, __m128i __B)
   6191 {
   6192   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6193              (__v2di) __B,
   6194              (__v8di)
   6195              _mm512_setzero_si512 (),
   6196              (__mmask8) -1);
   6197 }
   6198 
   6199 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6200 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6201 {
   6202   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6203              (__v2di) __B,
   6204              (__v8di) __W,
   6205              (__mmask8) __U);
   6206 }
   6207 
   6208 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6209 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   6210 {
   6211   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6212              (__v2di) __B,
   6213              (__v8di)
   6214              _mm512_setzero_si512 (),
   6215              (__mmask8) __U);
   6216 }
   6217 
   6218 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6219 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
   6220 {
   6221   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6222               (__v16si) __Y,
   6223               (__v16si)
   6224               _mm512_setzero_si512 (),
   6225               (__mmask16) -1);
   6226 }
   6227 
   6228 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6229 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6230 {
   6231   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6232               (__v16si) __Y,
   6233               (__v16si) __W,
   6234               (__mmask16) __U);
   6235 }
   6236 
   6237 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6238 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6239 {
   6240   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6241               (__v16si) __Y,
   6242               (__v16si)
   6243               _mm512_setzero_si512 (),
   6244               (__mmask16) __U);
   6245 }
   6246 
   6247 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6248 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
   6249 {
   6250   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
   6251              (__v8di) __Y,
   6252              (__v8di)
   6253              _mm512_setzero_si512 (),
   6254              (__mmask8) -1);
   6255 }
   6256 
   6257 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6258 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6259 {
   6260   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
   6261              (__v8di) __Y,
   6262              (__v8di) __W,
   6263              (__mmask8) __U);
   6264 }
   6265 
   6266 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6267 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
   6268 {
   6269   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
   6270              (__v8di) __Y,
   6271              (__v8di)
   6272              _mm512_setzero_si512 (),
   6273              (__mmask8) __U);
   6274 }
   6275 
   6276 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6277   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6278                                             (__v16si)(__m512i)(B), \
   6279                                             (__v16si)(__m512i)(C), (int)(imm), \
   6280                                             (__mmask16)-1); })
   6281 
   6282 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6283   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6284                                             (__v16si)(__m512i)(B), \
   6285                                             (__v16si)(__m512i)(C), (int)(imm), \
   6286                                             (__mmask16)(U)); })
   6287 
   6288 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6289   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
   6290                                              (__v16si)(__m512i)(B), \
   6291                                              (__v16si)(__m512i)(C), \
   6292                                              (int)(imm), (__mmask16)(U)); })
   6293 
   6294 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6295   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6296                                             (__v8di)(__m512i)(B), \
   6297                                             (__v8di)(__m512i)(C), (int)(imm), \
   6298                                             (__mmask8)-1); })
   6299 
   6300 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6301   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6302                                             (__v8di)(__m512i)(B), \
   6303                                             (__v8di)(__m512i)(C), (int)(imm), \
   6304                                             (__mmask8)(U)); })
   6305 
   6306 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6307   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
   6308                                              (__v8di)(__m512i)(B), \
   6309                                              (__v8di)(__m512i)(C), (int)(imm), \
   6310                                              (__mmask8)(U)); })
   6311 
   6312 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
   6313   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6314 
   6315 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
   6316   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6317 
   6318 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
   6319   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6320 
   6321 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
   6322   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6323 
   6324 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6325 _mm_cvtsd_u32 (__m128d __A)
   6326 {
   6327   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
   6328              _MM_FROUND_CUR_DIRECTION);
   6329 }
   6330 
   6331 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
   6332   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
   6333                                                   (int)(R)); })
   6334 
   6335 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6336 _mm_cvtsd_u64 (__m128d __A)
   6337 {
   6338   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
   6339                  __A,
   6340                  _MM_FROUND_CUR_DIRECTION);
   6341 }
   6342 
   6343 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
   6344   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6345 
   6346 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
   6347   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6348 
   6349 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
   6350   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6351 
   6352 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
   6353   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6354 
   6355 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
   6356   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6357 
   6358 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6359 _mm_cvtss_u32 (__m128 __A)
   6360 {
   6361   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
   6362              _MM_FROUND_CUR_DIRECTION);
   6363 }
   6364 
   6365 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
   6366   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
   6367                                                   (int)(R)); })
   6368 
   6369 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6370 _mm_cvtss_u64 (__m128 __A)
   6371 {
   6372   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
   6373                  __A,
   6374                  _MM_FROUND_CUR_DIRECTION);
   6375 }
   6376 
   6377 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
   6378   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6379 
   6380 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
   6381   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6382 
   6383 static __inline__ int __DEFAULT_FN_ATTRS
   6384 _mm_cvttsd_i32 (__m128d __A)
   6385 {
   6386   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
   6387               _MM_FROUND_CUR_DIRECTION);
   6388 }
   6389 
   6390 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
   6391   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6392 
   6393 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
   6394   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6395 
   6396 static __inline__ long long __DEFAULT_FN_ATTRS
   6397 _mm_cvttsd_i64 (__m128d __A)
   6398 {
   6399   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
   6400               _MM_FROUND_CUR_DIRECTION);
   6401 }
   6402 
   6403 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
   6404   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6405 
   6406 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6407 _mm_cvttsd_u32 (__m128d __A)
   6408 {
   6409   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
   6410               _MM_FROUND_CUR_DIRECTION);
   6411 }
   6412 
   6413 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
   6414   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
   6415                                                    (int)(R)); })
   6416 
   6417 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6418 _mm_cvttsd_u64 (__m128d __A)
   6419 {
   6420   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
   6421                   __A,
   6422                   _MM_FROUND_CUR_DIRECTION);
   6423 }
   6424 
   6425 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
   6426   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6427 
   6428 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
   6429   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6430 
   6431 static __inline__ int __DEFAULT_FN_ATTRS
   6432 _mm_cvttss_i32 (__m128 __A)
   6433 {
   6434   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
   6435               _MM_FROUND_CUR_DIRECTION);
   6436 }
   6437 
   6438 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
   6439   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6440 
   6441 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
   6442   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6443 
   6444 static __inline__ long long __DEFAULT_FN_ATTRS
   6445 _mm_cvttss_i64 (__m128 __A)
   6446 {
   6447   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
   6448               _MM_FROUND_CUR_DIRECTION);
   6449 }
   6450 
   6451 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
   6452   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6453 
   6454 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6455 _mm_cvttss_u32 (__m128 __A)
   6456 {
   6457   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
   6458               _MM_FROUND_CUR_DIRECTION);
   6459 }
   6460 
   6461 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
   6462   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
   6463                                                    (int)(R)); })
   6464 
   6465 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6466 _mm_cvttss_u64 (__m128 __A)
   6467 {
   6468   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
   6469                   __A,
   6470                   _MM_FROUND_CUR_DIRECTION);
   6471 }
   6472 
   6473 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6474 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
   6475             __m512d __B)
   6476 {
   6477   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
   6478               (__v8di) __I
   6479               /* idx */ ,
   6480               (__v8df) __B,
   6481               (__mmask8) __U);
   6482 }
   6483 
   6484 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6485 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
   6486             __m512 __B)
   6487 {
   6488   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
   6489                    (__v16si) __I
   6490                    /* idx */ ,
   6491                    (__v16sf) __B,
   6492                    (__mmask16) __U);
   6493 }
   6494 
   6495 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6496 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
   6497          __mmask8 __U, __m512i __B)
   6498 {
   6499   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
   6500                    (__v8di) __I
   6501                    /* idx */ ,
   6502                    (__v8di) __B,
   6503                    (__mmask8) __U);
   6504 }
   6505 
   6506 #define _mm512_permute_pd(X, C) __extension__ ({ \
   6507   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   6508                                    (__v8df)_mm512_undefined_pd(), \
   6509                                    0 + (((C) >> 0) & 0x1), \
   6510                                    0 + (((C) >> 1) & 0x1), \
   6511                                    2 + (((C) >> 2) & 0x1), \
   6512                                    2 + (((C) >> 3) & 0x1), \
   6513                                    4 + (((C) >> 4) & 0x1), \
   6514                                    4 + (((C) >> 5) & 0x1), \
   6515                                    6 + (((C) >> 6) & 0x1), \
   6516                                    6 + (((C) >> 7) & 0x1)); })
   6517 
   6518 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6519   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6520                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6521                                        (__v8df)(__m512d)(W)); })
   6522 
   6523 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
   6524   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6525                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6526                                        (__v8df)_mm512_setzero_pd()); })
   6527 
   6528 #define _mm512_permute_ps(X, C) __extension__ ({ \
   6529   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
   6530                                   (__v16sf)_mm512_undefined_ps(), \
   6531                                    0  + (((C) >> 0) & 0x3), \
   6532                                    0  + (((C) >> 2) & 0x3), \
   6533                                    0  + (((C) >> 4) & 0x3), \
   6534                                    0  + (((C) >> 6) & 0x3), \
   6535                                    4  + (((C) >> 0) & 0x3), \
   6536                                    4  + (((C) >> 2) & 0x3), \
   6537                                    4  + (((C) >> 4) & 0x3), \
   6538                                    4  + (((C) >> 6) & 0x3), \
   6539                                    8  + (((C) >> 0) & 0x3), \
   6540                                    8  + (((C) >> 2) & 0x3), \
   6541                                    8  + (((C) >> 4) & 0x3), \
   6542                                    8  + (((C) >> 6) & 0x3), \
   6543                                    12 + (((C) >> 0) & 0x3), \
   6544                                    12 + (((C) >> 2) & 0x3), \
   6545                                    12 + (((C) >> 4) & 0x3), \
   6546                                    12 + (((C) >> 6) & 0x3)); })
   6547 
   6548 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6549   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6550                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6551                                       (__v16sf)(__m512)(W)); })
   6552 
   6553 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
   6554   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6555                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6556                                       (__v16sf)_mm512_setzero_ps()); })
   6557 
   6558 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6559 _mm512_permutevar_pd (__m512d __A, __m512i __C)
   6560 {
   6561   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   6562               (__v8di) __C,
   6563               (__v8df)
   6564               _mm512_undefined_pd (),
   6565               (__mmask8) -1);
   6566 }
   6567 
   6568 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6569 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
   6570 {
   6571   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   6572               (__v8di) __C,
   6573               (__v8df) __W,
   6574               (__mmask8) __U);
   6575 }
   6576 
   6577 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6578 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
   6579 {
   6580   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   6581               (__v8di) __C,
   6582               (__v8df)
   6583               _mm512_setzero_pd (),
   6584               (__mmask8) __U);
   6585 }
   6586 
   6587 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6588 _mm512_permutevar_ps (__m512 __A, __m512i __C)
   6589 {
   6590   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   6591                    (__v16si) __C,
   6592                    (__v16sf)
   6593                    _mm512_undefined_ps (),
   6594                    (__mmask16) -1);
   6595 }
   6596 
   6597 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6598 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
   6599 {
   6600   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   6601                    (__v16si) __C,
   6602                    (__v16sf) __W,
   6603                    (__mmask16) __U);
   6604 }
   6605 
   6606 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6607 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
   6608 {
   6609   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   6610                    (__v16si) __C,
   6611                    (__v16sf)
   6612                    _mm512_setzero_ps (),
   6613                    (__mmask16) __U);
   6614 }
   6615 
   6616 static __inline __m512d __DEFAULT_FN_ATTRS
   6617 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
   6618 {
   6619   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6620                     /* idx */ ,
   6621                     (__v8df) __A,
   6622                     (__v8df) __B,
   6623                     (__mmask8) -1);
   6624 }
   6625 
   6626 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6627 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
   6628 {
   6629   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6630                     /* idx */ ,
   6631                     (__v8df) __A,
   6632                     (__v8df) __B,
   6633                     (__mmask8) __U);
   6634 }
   6635 
   6636 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6637 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
   6638             __m512d __B)
   6639 {
   6640   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
   6641                                                          /* idx */ ,
   6642                                                          (__v8df) __A,
   6643                                                          (__v8df) __B,
   6644                                                          (__mmask8) __U);
   6645 }
   6646 
   6647 static __inline __m512 __DEFAULT_FN_ATTRS
   6648 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
   6649 {
   6650   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6651                                                          /* idx */ ,
   6652                                                          (__v16sf) __A,
   6653                                                          (__v16sf) __B,
   6654                                                          (__mmask16) -1);
   6655 }
   6656 
   6657 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6658 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
   6659 {
   6660   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6661                                                          /* idx */ ,
   6662                                                          (__v16sf) __A,
   6663                                                          (__v16sf) __B,
   6664                                                          (__mmask16) __U);
   6665 }
   6666 
   6667 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6668 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
   6669             __m512 __B)
   6670 {
   6671   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
   6672                                                         /* idx */ ,
   6673                                                         (__v16sf) __A,
   6674                                                         (__v16sf) __B,
   6675                                                         (__mmask16) __U);
   6676 }
   6677 
   6678 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6679 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
   6680 {
   6681   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6682              (__v16si) __B,
   6683              (__mmask16) -1);
   6684 }
   6685 
   6686 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6687 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   6688 {
   6689   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6690              (__v16si) __B, __U);
   6691 }
   6692 
   6693 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6694 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
   6695 {
   6696   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6697             (__v8di) __B,
   6698             (__mmask8) -1);
   6699 }
   6700 
   6701 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6702 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   6703 {
   6704   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6705             (__v8di) __B, __U);
   6706 }
   6707 
   6708 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
   6709   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6710                                              (__v8si)_mm256_undefined_si256(), \
   6711                                              (__mmask8)-1, (int)(R)); })
   6712 
   6713 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   6714   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6715                                              (__v8si)(__m256i)(W), \
   6716                                              (__mmask8)(U), (int)(R)); })
   6717 
   6718 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
   6719   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6720                                              (__v8si)_mm256_setzero_si256(), \
   6721                                              (__mmask8)(U), (int)(R)); })
   6722 
   6723 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6724 _mm512_cvttpd_epu32 (__m512d __A)
   6725 {
   6726   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6727                   (__v8si)
   6728                   _mm256_undefined_si256 (),
   6729                   (__mmask8) -1,
   6730                   _MM_FROUND_CUR_DIRECTION);
   6731 }
   6732 
   6733 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6734 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   6735 {
   6736   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6737                   (__v8si) __W,
   6738                   (__mmask8) __U,
   6739                   _MM_FROUND_CUR_DIRECTION);
   6740 }
   6741 
   6742 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6743 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
   6744 {
   6745   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6746                   (__v8si)
   6747                   _mm256_setzero_si256 (),
   6748                   (__mmask8) __U,
   6749                   _MM_FROUND_CUR_DIRECTION);
   6750 }
   6751 
   6752 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
   6753   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6754                                                 (__v2df)(__m128d)(B), \
   6755                                                 (__v2df)_mm_setzero_pd(), \
   6756                                                 (__mmask8)-1, (int)(imm), \
   6757                                                 (int)(R)); })
   6758 
   6759 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
   6760   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6761                                                 (__v2df)(__m128d)(B), \
   6762                                                 (__v2df)_mm_setzero_pd(), \
   6763                                                 (__mmask8)-1, (int)(imm), \
   6764                                                 _MM_FROUND_CUR_DIRECTION); })
   6765 
   6766 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
   6767   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6768                                                 (__v2df)(__m128d)(B), \
   6769                                                 (__v2df)(__m128d)(W), \
   6770                                                 (__mmask8)(U), (int)(imm), \
   6771                                                 _MM_FROUND_CUR_DIRECTION); })
   6772 
   6773 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
   6774   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6775                                                 (__v2df)(__m128d)(B), \
   6776                                                 (__v2df)(__m128d)(W), \
   6777                                                 (__mmask8)(U), (int)(I), \
   6778                                                 (int)(R)); })
   6779 
   6780 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
   6781   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6782                                                 (__v2df)(__m128d)(B), \
   6783                                                 (__v2df)_mm_setzero_pd(), \
   6784                                                 (__mmask8)(U), (int)(I), \
   6785                                                 _MM_FROUND_CUR_DIRECTION); })
   6786 
   6787 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
   6788   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6789                                                 (__v2df)(__m128d)(B), \
   6790                                                 (__v2df)_mm_setzero_pd(), \
   6791                                                 (__mmask8)(U), (int)(I), \
   6792                                                 (int)(R)); })
   6793 
   6794 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
   6795   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6796                                                (__v4sf)(__m128)(B), \
   6797                                                (__v4sf)_mm_setzero_ps(), \
   6798                                                (__mmask8)-1, (int)(imm), \
   6799                                                (int)(R)); })
   6800 
   6801 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
   6802   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6803                                                (__v4sf)(__m128)(B), \
   6804                                                (__v4sf)_mm_setzero_ps(), \
   6805                                                (__mmask8)-1, (int)(imm), \
   6806                                                _MM_FROUND_CUR_DIRECTION); })
   6807 
   6808 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
   6809   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6810                                                (__v4sf)(__m128)(B), \
   6811                                                (__v4sf)(__m128)(W), \
   6812                                                (__mmask8)(U), (int)(I), \
   6813                                                _MM_FROUND_CUR_DIRECTION); })
   6814 
   6815 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
   6816   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6817                                                (__v4sf)(__m128)(B), \
   6818                                                (__v4sf)(__m128)(W), \
   6819                                                (__mmask8)(U), (int)(I), \
   6820                                                (int)(R)); })
   6821 
   6822 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
   6823   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6824                                                (__v4sf)(__m128)(B), \
   6825                                                (__v4sf)_mm_setzero_ps(), \
   6826                                                (__mmask8)(U), (int)(I), \
   6827                                                _MM_FROUND_CUR_DIRECTION); })
   6828 
   6829 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
   6830   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6831                                                (__v4sf)(__m128)(B), \
   6832                                                (__v4sf)_mm_setzero_ps(), \
   6833                                                (__mmask8)(U), (int)(I), \
   6834                                                (int)(R)); })
   6835 
   6836 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
   6837   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6838                                            (__v8df)(__m512d)(B), \
   6839                                            (__v8df)_mm512_undefined_pd(), \
   6840                                            (__mmask8)-1, (int)(R)); })
   6841 
   6842 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
   6843   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6844                                            (__v8df)(__m512d)(B), \
   6845                                            (__v8df)(__m512d)(W), \
   6846                                            (__mmask8)(U), (int)(R)); })
   6847 
   6848 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
   6849   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6850                                            (__v8df)(__m512d)(B), \
   6851                                            (__v8df)_mm512_setzero_pd(), \
   6852                                            (__mmask8)(U), (int)(R)); })
   6853 
   6854 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6855 _mm512_scalef_pd (__m512d __A, __m512d __B)
   6856 {
   6857   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6858                 (__v8df) __B,
   6859                 (__v8df)
   6860                 _mm512_undefined_pd (),
   6861                 (__mmask8) -1,
   6862                 _MM_FROUND_CUR_DIRECTION);
   6863 }
   6864 
   6865 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6866 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   6867 {
   6868   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6869                 (__v8df) __B,
   6870                 (__v8df) __W,
   6871                 (__mmask8) __U,
   6872                 _MM_FROUND_CUR_DIRECTION);
   6873 }
   6874 
   6875 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6876 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
   6877 {
   6878   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6879                 (__v8df) __B,
   6880                 (__v8df)
   6881                 _mm512_setzero_pd (),
   6882                 (__mmask8) __U,
   6883                 _MM_FROUND_CUR_DIRECTION);
   6884 }
   6885 
   6886 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
   6887   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6888                                           (__v16sf)(__m512)(B), \
   6889                                           (__v16sf)_mm512_undefined_ps(), \
   6890                                           (__mmask16)-1, (int)(R)); })
   6891 
   6892 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
   6893   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6894                                           (__v16sf)(__m512)(B), \
   6895                                           (__v16sf)(__m512)(W), \
   6896                                           (__mmask16)(U), (int)(R)); })
   6897 
   6898 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
   6899   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6900                                           (__v16sf)(__m512)(B), \
   6901                                           (__v16sf)_mm512_setzero_ps(), \
   6902                                           (__mmask16)(U), (int)(R)); })
   6903 
   6904 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6905 _mm512_scalef_ps (__m512 __A, __m512 __B)
   6906 {
   6907   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6908                (__v16sf) __B,
   6909                (__v16sf)
   6910                _mm512_undefined_ps (),
   6911                (__mmask16) -1,
   6912                _MM_FROUND_CUR_DIRECTION);
   6913 }
   6914 
   6915 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6916 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   6917 {
   6918   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6919                (__v16sf) __B,
   6920                (__v16sf) __W,
   6921                (__mmask16) __U,
   6922                _MM_FROUND_CUR_DIRECTION);
   6923 }
   6924 
   6925 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6926 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
   6927 {
   6928   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6929                (__v16sf) __B,
   6930                (__v16sf)
   6931                _mm512_setzero_ps (),
   6932                (__mmask16) __U,
   6933                _MM_FROUND_CUR_DIRECTION);
   6934 }
   6935 
   6936 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
   6937   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6938                                               (__v2df)(__m128d)(B), \
   6939                                               (__v2df)_mm_setzero_pd(), \
   6940                                               (__mmask8)-1, (int)(R)); })
   6941 
   6942 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6943 _mm_scalef_sd (__m128d __A, __m128d __B)
   6944 {
   6945   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
   6946               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
   6947               (__mmask8) -1,
   6948               _MM_FROUND_CUR_DIRECTION);
   6949 }
   6950 
   6951 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6952 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   6953 {
   6954  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   6955                  (__v2df) __B,
   6956                 (__v2df) __W,
   6957                 (__mmask8) __U,
   6958                 _MM_FROUND_CUR_DIRECTION);
   6959 }
   6960 
   6961 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
   6962   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6963                                               (__v2df)(__m128d)(B), \
   6964                                               (__v2df)(__m128d)(W), \
   6965                                               (__mmask8)(U), (int)(R)); })
   6966 
   6967 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6968 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
   6969 {
   6970  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   6971                  (__v2df) __B,
   6972                 (__v2df) _mm_setzero_pd (),
   6973                 (__mmask8) __U,
   6974                 _MM_FROUND_CUR_DIRECTION);
   6975 }
   6976 
   6977 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
   6978   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6979                                               (__v2df)(__m128d)(B), \
   6980                                               (__v2df)_mm_setzero_pd(), \
   6981                                               (__mmask8)(U), (int)(R)); })
   6982 
   6983 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
   6984   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   6985                                              (__v4sf)(__m128)(B), \
   6986                                              (__v4sf)_mm_setzero_ps(), \
   6987                                              (__mmask8)-1, (int)(R)); })
   6988 
   6989 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6990 _mm_scalef_ss (__m128 __A, __m128 __B)
   6991 {
   6992   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
   6993              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
   6994              (__mmask8) -1,
   6995              _MM_FROUND_CUR_DIRECTION);
   6996 }
   6997 
   6998 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6999 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7000 {
   7001  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7002                 (__v4sf) __B,
   7003                 (__v4sf) __W,
   7004                 (__mmask8) __U,
   7005                 _MM_FROUND_CUR_DIRECTION);
   7006 }
   7007 
   7008 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
   7009   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7010                                              (__v4sf)(__m128)(B), \
   7011                                              (__v4sf)(__m128)(W), \
   7012                                              (__mmask8)(U), (int)(R)); })
   7013 
   7014 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7015 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7016 {
   7017  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7018                  (__v4sf) __B,
   7019                 (__v4sf) _mm_setzero_ps (),
   7020                 (__mmask8) __U,
   7021                 _MM_FROUND_CUR_DIRECTION);
   7022 }
   7023 
   7024 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
   7025   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7026                                              (__v4sf)(__m128)(B), \
   7027                                              (__v4sf)_mm_setzero_ps(), \
   7028                                              (__mmask8)(U), \
   7029                                              _MM_FROUND_CUR_DIRECTION); })
   7030 
   7031 #define _mm512_srai_epi32(A, B) __extension__ ({ \
   7032   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
   7033                                          (__v16si)_mm512_setzero_si512(), \
   7034                                          (__mmask16)-1); })
   7035 
   7036 #define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \
   7037   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
   7038                                          (__v16si)(__m512i)(W), \
   7039                                          (__mmask16)(U)); })
   7040 
   7041 #define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \
   7042   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
   7043                                          (__v16si)_mm512_setzero_si512(), \
   7044                                          (__mmask16)(U)); })
   7045 
   7046 #define _mm512_srai_epi64(A, B) __extension__ ({ \
   7047   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   7048                                          (__v8di)_mm512_setzero_si512(), \
   7049                                          (__mmask8)-1); })
   7050 
   7051 #define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \
   7052   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   7053                                          (__v8di)(__m512i)(W), \
   7054                                          (__mmask8)(U)); })
   7055 
   7056 #define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \
   7057   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   7058                                          (__v8di)_mm512_setzero_si512(), \
   7059                                          (__mmask8)(U)); })
   7060 
   7061 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
   7062   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7063                                          (__v16sf)(__m512)(B), (int)(imm), \
   7064                                          (__v16sf)_mm512_undefined_ps(), \
   7065                                          (__mmask16)-1); })
   7066 
   7067 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
   7068   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7069                                          (__v16sf)(__m512)(B), (int)(imm), \
   7070                                          (__v16sf)(__m512)(W), \
   7071                                          (__mmask16)(U)); })
   7072 
   7073 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
   7074   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7075                                          (__v16sf)(__m512)(B), (int)(imm), \
   7076                                          (__v16sf)_mm512_setzero_ps(), \
   7077                                          (__mmask16)(U)); })
   7078 
   7079 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
   7080   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7081                                           (__v8df)(__m512d)(B), (int)(imm), \
   7082                                           (__v8df)_mm512_undefined_pd(), \
   7083                                           (__mmask8)-1); })
   7084 
   7085 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
   7086   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7087                                           (__v8df)(__m512d)(B), (int)(imm), \
   7088                                           (__v8df)(__m512d)(W), \
   7089                                           (__mmask8)(U)); })
   7090 
   7091 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
   7092   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7093                                           (__v8df)(__m512d)(B), (int)(imm), \
   7094                                           (__v8df)_mm512_setzero_pd(), \
   7095                                           (__mmask8)(U)); })
   7096 
   7097 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
   7098   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7099                                           (__v16si)(__m512i)(B), (int)(imm), \
   7100                                           (__v16si)_mm512_setzero_si512(), \
   7101                                           (__mmask16)-1); })
   7102 
   7103 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
   7104   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7105                                           (__v16si)(__m512i)(B), (int)(imm), \
   7106                                           (__v16si)(__m512i)(W), \
   7107                                           (__mmask16)(U)); })
   7108 
   7109 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
   7110   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7111                                           (__v16si)(__m512i)(B), (int)(imm), \
   7112                                           (__v16si)_mm512_setzero_si512(), \
   7113                                           (__mmask16)(U)); })
   7114 
   7115 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
   7116   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7117                                           (__v8di)(__m512i)(B), (int)(imm), \
   7118                                           (__v8di)_mm512_setzero_si512(), \
   7119                                           (__mmask8)-1); })
   7120 
   7121 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
   7122   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7123                                           (__v8di)(__m512i)(B), (int)(imm), \
   7124                                           (__v8di)(__m512i)(W), \
   7125                                           (__mmask8)(U)); })
   7126 
   7127 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
   7128   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7129                                           (__v8di)(__m512i)(B), (int)(imm), \
   7130                                           (__v8di)_mm512_setzero_si512(), \
   7131                                           (__mmask8)(U)); })
   7132 
   7133 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
   7134   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7135                                    (__v8df)(__m512d)(B), \
   7136                                    0  + (((M) >> 0) & 0x1), \
   7137                                    8  + (((M) >> 1) & 0x1), \
   7138                                    2  + (((M) >> 2) & 0x1), \
   7139                                    10 + (((M) >> 3) & 0x1), \
   7140                                    4  + (((M) >> 4) & 0x1), \
   7141                                    12 + (((M) >> 5) & 0x1), \
   7142                                    6  + (((M) >> 6) & 0x1), \
   7143                                    14 + (((M) >> 7) & 0x1)); })
   7144 
   7145 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7146   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7147                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7148                                        (__v8df)(__m512d)(W)); })
   7149 
   7150 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7151   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7152                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7153                                        (__v8df)_mm512_setzero_pd()); })
   7154 
   7155 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
   7156   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
   7157                                    (__v16sf)(__m512)(B), \
   7158                                    0  + (((M) >> 0) & 0x3), \
   7159                                    0  + (((M) >> 2) & 0x3), \
   7160                                    16 + (((M) >> 4) & 0x3), \
   7161                                    16 + (((M) >> 6) & 0x3), \
   7162                                    4  + (((M) >> 0) & 0x3), \
   7163                                    4  + (((M) >> 2) & 0x3), \
   7164                                    20 + (((M) >> 4) & 0x3), \
   7165                                    20 + (((M) >> 6) & 0x3), \
   7166                                    8  + (((M) >> 0) & 0x3), \
   7167                                    8  + (((M) >> 2) & 0x3), \
   7168                                    24 + (((M) >> 4) & 0x3), \
   7169                                    24 + (((M) >> 6) & 0x3), \
   7170                                    12 + (((M) >> 0) & 0x3), \
   7171                                    12 + (((M) >> 2) & 0x3), \
   7172                                    28 + (((M) >> 4) & 0x3), \
   7173                                    28 + (((M) >> 6) & 0x3)); })
   7174 
   7175 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7176   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7177                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7178                                       (__v16sf)(__m512)(W)); })
   7179 
   7180 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7181   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7182                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7183                                       (__v16sf)_mm512_setzero_ps()); })
   7184 
   7185 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
   7186   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7187                                             (__v2df)(__m128d)(B), \
   7188                                             (__v2df)_mm_setzero_pd(), \
   7189                                             (__mmask8)-1, (int)(R)); })
   7190 
   7191 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7192 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7193 {
   7194  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7195                  (__v2df) __B,
   7196                 (__v2df) __W,
   7197                 (__mmask8) __U,
   7198                 _MM_FROUND_CUR_DIRECTION);
   7199 }
   7200 
   7201 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
   7202   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7203                                             (__v2df)(__m128d)(B), \
   7204                                             (__v2df)(__m128d)(W), \
   7205                                             (__mmask8)(U), (int)(R)); })
   7206 
   7207 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7208 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7209 {
   7210  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7211                  (__v2df) __B,
   7212                 (__v2df) _mm_setzero_pd (),
   7213                 (__mmask8) __U,
   7214                 _MM_FROUND_CUR_DIRECTION);
   7215 }
   7216 
   7217 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
   7218   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7219                                             (__v2df)(__m128d)(B), \
   7220                                             (__v2df)_mm_setzero_pd(), \
   7221                                             (__mmask8)(U), (int)(R)); })
   7222 
   7223 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
   7224   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7225                                            (__v4sf)(__m128)(B), \
   7226                                            (__v4sf)_mm_setzero_ps(), \
   7227                                            (__mmask8)-1, (int)(R)); })
   7228 
   7229 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7230 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7231 {
   7232  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7233                  (__v4sf) __B,
   7234                 (__v4sf) __W,
   7235                 (__mmask8) __U,
   7236                 _MM_FROUND_CUR_DIRECTION);
   7237 }
   7238 
   7239 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
   7240   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7241                                            (__v4sf)(__m128)(B), \
   7242                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
   7243                                            (int)(R)); })
   7244 
   7245 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7246 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7247 {
   7248  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7249                  (__v4sf) __B,
   7250                 (__v4sf) _mm_setzero_ps (),
   7251                 (__mmask8) __U,
   7252                 _MM_FROUND_CUR_DIRECTION);
   7253 }
   7254 
   7255 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
   7256   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7257                                            (__v4sf)(__m128)(B), \
   7258                                            (__v4sf)_mm_setzero_ps(), \
   7259                                            (__mmask8)(U), (int)(R)); })
   7260 
   7261 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7262 _mm512_broadcast_f32x4 (__m128 __A)
   7263 {
   7264   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   7265                  (__v16sf)
   7266                  _mm512_undefined_ps (),
   7267                  (__mmask16) -1);
   7268 }
   7269 
   7270 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7271 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
   7272 {
   7273   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   7274                  (__v16sf) __O,
   7275                  __M);
   7276 }
   7277 
   7278 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7279 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
   7280 {
   7281   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   7282                  (__v16sf)
   7283                  _mm512_setzero_ps (),
   7284                  __M);
   7285 }
   7286 
   7287 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7288 _mm512_broadcast_f64x4 (__m256d __A)
   7289 {
   7290   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   7291                   (__v8df)
   7292                   _mm512_undefined_pd (),
   7293                   (__mmask8) -1);
   7294 }
   7295 
   7296 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7297 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
   7298 {
   7299   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   7300                   (__v8df) __O,
   7301                   __M);
   7302 }
   7303 
   7304 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7305 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
   7306 {
   7307   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   7308                   (__v8df)
   7309                   _mm512_setzero_pd (),
   7310                   __M);
   7311 }
   7312 
   7313 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7314 _mm512_broadcast_i32x4 (__m128i __A)
   7315 {
   7316   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   7317                   (__v16si)
   7318                   _mm512_undefined_epi32 (),
   7319                   (__mmask16) -1);
   7320 }
   7321 
   7322 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7323 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
   7324 {
   7325   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   7326                   (__v16si) __O,
   7327                   __M);
   7328 }
   7329 
   7330 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7331 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
   7332 {
   7333   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   7334                   (__v16si)
   7335                   _mm512_setzero_si512 (),
   7336                   __M);
   7337 }
   7338 
   7339 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7340 _mm512_broadcast_i64x4 (__m256i __A)
   7341 {
   7342   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   7343                   (__v8di)
   7344                   _mm512_undefined_epi32 (),
   7345                   (__mmask8) -1);
   7346 }
   7347 
   7348 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7349 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
   7350 {
   7351   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   7352                   (__v8di) __O,
   7353                   __M);
   7354 }
   7355 
   7356 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7357 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
   7358 {
   7359   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   7360                   (__v8di)
   7361                   _mm512_setzero_si512 (),
   7362                   __M);
   7363 }
   7364 
   7365 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7366 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
   7367 {
   7368   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7369                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7370                                               (__v8df) __O);
   7371 }
   7372 
   7373 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7374 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   7375 {
   7376   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7377                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7378                                               (__v8df) _mm512_setzero_pd());
   7379 }
   7380 
   7381 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7382 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
   7383 {
   7384   return (__m512)__builtin_ia32_selectps_512(__M,
   7385                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7386                                              (__v16sf) __O);
   7387 }
   7388 
   7389 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7390 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
   7391 {
   7392   return (__m512)__builtin_ia32_selectps_512(__M,
   7393                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7394                                              (__v16sf) _mm512_setzero_ps());
   7395 }
   7396 
   7397 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7398 _mm512_cvtsepi32_epi8 (__m512i __A)
   7399 {
   7400   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7401                (__v16qi) _mm_undefined_si128 (),
   7402                (__mmask16) -1);
   7403 }
   7404 
   7405 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7406 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7407 {
   7408   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7409                (__v16qi) __O, __M);
   7410 }
   7411 
   7412 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7413 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
   7414 {
   7415   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7416                (__v16qi) _mm_setzero_si128 (),
   7417                __M);
   7418 }
   7419 
   7420 static __inline__ void __DEFAULT_FN_ATTRS
   7421 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7422 {
   7423   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7424 }
   7425 
   7426 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7427 _mm512_cvtsepi32_epi16 (__m512i __A)
   7428 {
   7429   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7430                (__v16hi) _mm256_undefined_si256 (),
   7431                (__mmask16) -1);
   7432 }
   7433 
   7434 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7435 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7436 {
   7437   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7438                (__v16hi) __O, __M);
   7439 }
   7440 
   7441 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7442 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
   7443 {
   7444   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7445                (__v16hi) _mm256_setzero_si256 (),
   7446                __M);
   7447 }
   7448 
   7449 static __inline__ void __DEFAULT_FN_ATTRS
   7450 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7451 {
   7452   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7453 }
   7454 
   7455 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7456 _mm512_cvtsepi64_epi8 (__m512i __A)
   7457 {
   7458   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7459                (__v16qi) _mm_undefined_si128 (),
   7460                (__mmask8) -1);
   7461 }
   7462 
   7463 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7464 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7465 {
   7466   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7467                (__v16qi) __O, __M);
   7468 }
   7469 
   7470 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7471 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
   7472 {
   7473   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7474                (__v16qi) _mm_setzero_si128 (),
   7475                __M);
   7476 }
   7477 
   7478 static __inline__ void __DEFAULT_FN_ATTRS
   7479 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7480 {
   7481   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7482 }
   7483 
   7484 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7485 _mm512_cvtsepi64_epi32 (__m512i __A)
   7486 {
   7487   __v8si __O;
   7488   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7489                (__v8si) _mm256_undefined_si256 (),
   7490                (__mmask8) -1);
   7491 }
   7492 
   7493 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7494 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7495 {
   7496   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7497                (__v8si) __O, __M);
   7498 }
   7499 
   7500 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7501 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
   7502 {
   7503   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7504                (__v8si) _mm256_setzero_si256 (),
   7505                __M);
   7506 }
   7507 
   7508 static __inline__ void __DEFAULT_FN_ATTRS
   7509 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
   7510 {
   7511   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7512 }
   7513 
   7514 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7515 _mm512_cvtsepi64_epi16 (__m512i __A)
   7516 {
   7517   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7518                (__v8hi) _mm_undefined_si128 (),
   7519                (__mmask8) -1);
   7520 }
   7521 
   7522 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7523 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7524 {
   7525   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7526                (__v8hi) __O, __M);
   7527 }
   7528 
   7529 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7530 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
   7531 {
   7532   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7533                (__v8hi) _mm_setzero_si128 (),
   7534                __M);
   7535 }
   7536 
   7537 static __inline__ void __DEFAULT_FN_ATTRS
   7538 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
   7539 {
   7540   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7541 }
   7542 
   7543 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7544 _mm512_cvtusepi32_epi8 (__m512i __A)
   7545 {
   7546   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7547                 (__v16qi) _mm_undefined_si128 (),
   7548                 (__mmask16) -1);
   7549 }
   7550 
   7551 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7552 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7553 {
   7554   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7555                 (__v16qi) __O,
   7556                 __M);
   7557 }
   7558 
   7559 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7560 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
   7561 {
   7562   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7563                 (__v16qi) _mm_setzero_si128 (),
   7564                 __M);
   7565 }
   7566 
   7567 static __inline__ void __DEFAULT_FN_ATTRS
   7568 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7569 {
   7570   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7571 }
   7572 
   7573 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7574 _mm512_cvtusepi32_epi16 (__m512i __A)
   7575 {
   7576   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7577                 (__v16hi) _mm256_undefined_si256 (),
   7578                 (__mmask16) -1);
   7579 }
   7580 
   7581 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7582 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7583 {
   7584   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7585                 (__v16hi) __O,
   7586                 __M);
   7587 }
   7588 
   7589 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7590 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
   7591 {
   7592   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7593                 (__v16hi) _mm256_setzero_si256 (),
   7594                 __M);
   7595 }
   7596 
   7597 static __inline__ void __DEFAULT_FN_ATTRS
   7598 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7599 {
   7600   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7601 }
   7602 
   7603 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7604 _mm512_cvtusepi64_epi8 (__m512i __A)
   7605 {
   7606   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7607                 (__v16qi) _mm_undefined_si128 (),
   7608                 (__mmask8) -1);
   7609 }
   7610 
   7611 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7612 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7613 {
   7614   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7615                 (__v16qi) __O,
   7616                 __M);
   7617 }
   7618 
   7619 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7620 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
   7621 {
   7622   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7623                 (__v16qi) _mm_setzero_si128 (),
   7624                 __M);
   7625 }
   7626 
   7627 static __inline__ void __DEFAULT_FN_ATTRS
   7628 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7629 {
   7630   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7631 }
   7632 
   7633 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7634 _mm512_cvtusepi64_epi32 (__m512i __A)
   7635 {
   7636   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7637                 (__v8si) _mm256_undefined_si256 (),
   7638                 (__mmask8) -1);
   7639 }
   7640 
   7641 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7642 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7643 {
   7644   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7645                 (__v8si) __O, __M);
   7646 }
   7647 
   7648 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7649 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
   7650 {
   7651   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7652                 (__v8si) _mm256_setzero_si256 (),
   7653                 __M);
   7654 }
   7655 
   7656 static __inline__ void __DEFAULT_FN_ATTRS
   7657 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7658 {
   7659   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
   7660 }
   7661 
   7662 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7663 _mm512_cvtusepi64_epi16 (__m512i __A)
   7664 {
   7665   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7666                 (__v8hi) _mm_undefined_si128 (),
   7667                 (__mmask8) -1);
   7668 }
   7669 
   7670 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7671 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7672 {
   7673   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7674                 (__v8hi) __O, __M);
   7675 }
   7676 
   7677 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7678 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
   7679 {
   7680   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7681                 (__v8hi) _mm_setzero_si128 (),
   7682                 __M);
   7683 }
   7684 
   7685 static __inline__ void __DEFAULT_FN_ATTRS
   7686 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7687 {
   7688   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
   7689 }
   7690 
   7691 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7692 _mm512_cvtepi32_epi8 (__m512i __A)
   7693 {
   7694   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7695               (__v16qi) _mm_undefined_si128 (),
   7696               (__mmask16) -1);
   7697 }
   7698 
   7699 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7700 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7701 {
   7702   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7703               (__v16qi) __O, __M);
   7704 }
   7705 
   7706 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7707 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
   7708 {
   7709   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7710               (__v16qi) _mm_setzero_si128 (),
   7711               __M);
   7712 }
   7713 
   7714 static __inline__ void __DEFAULT_FN_ATTRS
   7715 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7716 {
   7717   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7718 }
   7719 
   7720 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7721 _mm512_cvtepi32_epi16 (__m512i __A)
   7722 {
   7723   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7724               (__v16hi) _mm256_undefined_si256 (),
   7725               (__mmask16) -1);
   7726 }
   7727 
   7728 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7729 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7730 {
   7731   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7732               (__v16hi) __O, __M);
   7733 }
   7734 
   7735 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7736 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
   7737 {
   7738   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7739               (__v16hi) _mm256_setzero_si256 (),
   7740               __M);
   7741 }
   7742 
   7743 static __inline__ void __DEFAULT_FN_ATTRS
   7744 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
   7745 {
   7746   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
   7747 }
   7748 
   7749 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7750 _mm512_cvtepi64_epi8 (__m512i __A)
   7751 {
   7752   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7753               (__v16qi) _mm_undefined_si128 (),
   7754               (__mmask8) -1);
   7755 }
   7756 
   7757 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7758 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7759 {
   7760   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7761               (__v16qi) __O, __M);
   7762 }
   7763 
   7764 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7765 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
   7766 {
   7767   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7768               (__v16qi) _mm_setzero_si128 (),
   7769               __M);
   7770 }
   7771 
   7772 static __inline__ void __DEFAULT_FN_ATTRS
   7773 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7774 {
   7775   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7776 }
   7777 
   7778 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7779 _mm512_cvtepi64_epi32 (__m512i __A)
   7780 {
   7781   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7782               (__v8si) _mm256_undefined_si256 (),
   7783               (__mmask8) -1);
   7784 }
   7785 
   7786 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7787 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7788 {
   7789   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7790               (__v8si) __O, __M);
   7791 }
   7792 
   7793 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7794 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
   7795 {
   7796   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7797               (__v8si) _mm256_setzero_si256 (),
   7798               __M);
   7799 }
   7800 
   7801 static __inline__ void __DEFAULT_FN_ATTRS
   7802 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7803 {
   7804   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7805 }
   7806 
   7807 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7808 _mm512_cvtepi64_epi16 (__m512i __A)
   7809 {
   7810   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7811               (__v8hi) _mm_undefined_si128 (),
   7812               (__mmask8) -1);
   7813 }
   7814 
   7815 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7816 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7817 {
   7818   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7819               (__v8hi) __O, __M);
   7820 }
   7821 
   7822 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7823 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
   7824 {
   7825   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7826               (__v8hi) _mm_setzero_si128 (),
   7827               __M);
   7828 }
   7829 
   7830 static __inline__ void __DEFAULT_FN_ATTRS
   7831 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7832 {
   7833   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7834 }
   7835 
   7836 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
   7837   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
   7838                                             (__v4si)_mm_undefined_si128(), \
   7839                                             (__mmask8)-1); })
   7840 
   7841 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
   7842   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
   7843                                             (__v4si)(__m128i)(W), \
   7844                                             (__mmask8)(U)); })
   7845 
   7846 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
   7847   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
   7848                                             (__v4si)_mm_setzero_si128(), \
   7849                                             (__mmask8)(U)); })
   7850 
   7851 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
   7852   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
   7853                                             (__v4di)_mm256_undefined_si256(), \
   7854                                             (__mmask8)-1); })
   7855 
   7856 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
   7857   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
   7858                                             (__v4di)(__m256i)(W), \
   7859                                             (__mmask8)(U)); })
   7860 
   7861 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
   7862   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
   7863                                             (__v4di)_mm256_setzero_si256(), \
   7864                                             (__mmask8)(U)); })
   7865 
   7866 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
   7867   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
   7868                                            (__v4df)(__m256d)(B), (int)(imm), \
   7869                                            (__v8df)_mm512_undefined_pd(), \
   7870                                            (__mmask8)-1); })
   7871 
   7872 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
   7873   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
   7874                                            (__v4df)(__m256d)(B), (int)(imm), \
   7875                                            (__v8df)(__m512d)(W), \
   7876                                            (__mmask8)(U)); })
   7877 
   7878 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
   7879   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
   7880                                            (__v4df)(__m256d)(B), (int)(imm), \
   7881                                            (__v8df)_mm512_setzero_pd(), \
   7882                                            (__mmask8)(U)); })
   7883 
   7884 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
   7885   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
   7886                                            (__v4di)(__m256i)(B), (int)(imm), \
   7887                                            (__v8di)_mm512_setzero_si512(), \
   7888                                            (__mmask8)-1); })
   7889 
   7890 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
   7891   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
   7892                                            (__v4di)(__m256i)(B), (int)(imm), \
   7893                                            (__v8di)(__m512i)(W), \
   7894                                            (__mmask8)(U)); })
   7895 
   7896 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
   7897   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
   7898                                            (__v4di)(__m256i)(B), (int)(imm), \
   7899                                            (__v8di)_mm512_setzero_si512(), \
   7900                                            (__mmask8)(U)); })
   7901 
   7902 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
   7903   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
   7904                                           (__v4sf)(__m128)(B), (int)(imm), \
   7905                                           (__v16sf)_mm512_undefined_ps(), \
   7906                                           (__mmask16)-1); })
   7907 
   7908 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
   7909   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
   7910                                           (__v4sf)(__m128)(B), (int)(imm), \
   7911                                           (__v16sf)(__m512)(W), \
   7912                                           (__mmask16)(U)); })
   7913 
   7914 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
   7915   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
   7916                                           (__v4sf)(__m128)(B), (int)(imm), \
   7917                                           (__v16sf)_mm512_setzero_ps(), \
   7918                                           (__mmask16)(U)); })
   7919 
   7920 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
   7921   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
   7922                                            (__v4si)(__m128i)(B), (int)(imm), \
   7923                                            (__v16si)_mm512_setzero_si512(), \
   7924                                            (__mmask16)-1); })
   7925 
   7926 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
   7927   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
   7928                                            (__v4si)(__m128i)(B), (int)(imm), \
   7929                                            (__v16si)(__m512i)(W), \
   7930                                            (__mmask16)(U)); })
   7931 
   7932 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
   7933   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
   7934                                            (__v4si)(__m128i)(B), (int)(imm), \
   7935                                            (__v16si)_mm512_setzero_si512(), \
   7936                                            (__mmask16)(U)); })
   7937 
   7938 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
   7939   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7940                                             (int)(((C)<<2) | (B)), \
   7941                                             (__v8df)_mm512_undefined_pd(), \
   7942                                             (__mmask8)-1, (int)(R)); })
   7943 
   7944 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
   7945   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7946                                             (int)(((C)<<2) | (B)), \
   7947                                             (__v8df)(__m512d)(W), \
   7948                                             (__mmask8)(U), (int)(R)); })
   7949 
   7950 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
   7951   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7952                                             (int)(((C)<<2) | (B)), \
   7953                                             (__v8df)_mm512_setzero_pd(), \
   7954                                             (__mmask8)(U), (int)(R)); })
   7955 
   7956 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
   7957   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7958                                             (int)(((C)<<2) | (B)), \
   7959                                             (__v8df)_mm512_setzero_pd(), \
   7960                                             (__mmask8)-1, \
   7961                                             _MM_FROUND_CUR_DIRECTION); })
   7962 
   7963 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
   7964   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7965                                             (int)(((C)<<2) | (B)), \
   7966                                             (__v8df)(__m512d)(W), \
   7967                                             (__mmask8)(U), \
   7968                                             _MM_FROUND_CUR_DIRECTION); })
   7969 
   7970 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
   7971   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7972                                             (int)(((C)<<2) | (B)), \
   7973                                             (__v8df)_mm512_setzero_pd(), \
   7974                                             (__mmask8)(U), \
   7975                                             _MM_FROUND_CUR_DIRECTION); })
   7976 
   7977 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
   7978   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   7979                                            (int)(((C)<<2) | (B)), \
   7980                                            (__v16sf)_mm512_undefined_ps(), \
   7981                                            (__mmask16)-1, (int)(R)); })
   7982 
   7983 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
   7984   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   7985                                            (int)(((C)<<2) | (B)), \
   7986                                            (__v16sf)(__m512)(W), \
   7987                                            (__mmask16)(U), (int)(R)); })
   7988 
   7989 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
   7990   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   7991                                            (int)(((C)<<2) | (B)), \
   7992                                            (__v16sf)_mm512_setzero_ps(), \
   7993                                            (__mmask16)(U), (int)(R)); })
   7994 
   7995 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
   7996   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   7997                                            (int)(((C)<<2)|(B)), \
   7998                                            (__v16sf)_mm512_undefined_ps(), \
   7999                                            (__mmask16)-1, \
   8000                                            _MM_FROUND_CUR_DIRECTION); })
   8001 
   8002 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8003   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8004                                            (int)(((C)<<2)|(B)), \
   8005                                            (__v16sf)(__m512)(W), \
   8006                                            (__mmask16)(U), \
   8007                                            _MM_FROUND_CUR_DIRECTION); })
   8008 
   8009 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8010   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8011                                            (int)(((C)<<2)|(B)), \
   8012                                            (__v16sf)_mm512_setzero_ps(), \
   8013                                            (__mmask16)(U), \
   8014                                            _MM_FROUND_CUR_DIRECTION); })
   8015 
   8016 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
   8017   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8018                                            (__v8df)_mm512_undefined_pd(), \
   8019                                            (__mmask8)-1, (int)(R)); })
   8020 
   8021 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
   8022   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8023                                            (__v8df)(__m512d)(W), \
   8024                                            (__mmask8)(U), (int)(R)); })
   8025 
   8026 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
   8027   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8028                                            (__v8df)_mm512_setzero_pd(), \
   8029                                            (__mmask8)(U), (int)(R)); })
   8030 
   8031 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8032 _mm512_getexp_pd (__m512d __A)
   8033 {
   8034   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8035                 (__v8df) _mm512_undefined_pd (),
   8036                 (__mmask8) -1,
   8037                 _MM_FROUND_CUR_DIRECTION);
   8038 }
   8039 
   8040 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8041 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8042 {
   8043   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8044                 (__v8df) __W,
   8045                 (__mmask8) __U,
   8046                 _MM_FROUND_CUR_DIRECTION);
   8047 }
   8048 
   8049 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8050 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
   8051 {
   8052   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8053                 (__v8df) _mm512_setzero_pd (),
   8054                 (__mmask8) __U,
   8055                 _MM_FROUND_CUR_DIRECTION);
   8056 }
   8057 
   8058 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
   8059   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8060                                           (__v16sf)_mm512_undefined_ps(), \
   8061                                           (__mmask16)-1, (int)(R)); })
   8062 
   8063 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
   8064   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8065                                           (__v16sf)(__m512)(W), \
   8066                                           (__mmask16)(U), (int)(R)); })
   8067 
   8068 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
   8069   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8070                                           (__v16sf)_mm512_setzero_ps(), \
   8071                                           (__mmask16)(U), (int)(R)); })
   8072 
   8073 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8074 _mm512_getexp_ps (__m512 __A)
   8075 {
   8076   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8077                (__v16sf) _mm512_undefined_ps (),
   8078                (__mmask16) -1,
   8079                _MM_FROUND_CUR_DIRECTION);
   8080 }
   8081 
   8082 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8083 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8084 {
   8085   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8086                (__v16sf) __W,
   8087                (__mmask16) __U,
   8088                _MM_FROUND_CUR_DIRECTION);
   8089 }
   8090 
   8091 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8092 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
   8093 {
   8094   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8095                (__v16sf) _mm512_setzero_ps (),
   8096                (__mmask16) __U,
   8097                _MM_FROUND_CUR_DIRECTION);
   8098 }
   8099 
   8100 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
   8101   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
   8102                                        (float const *)(addr), \
   8103                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8104                                        (int)(scale)); })
   8105 
   8106 #define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
   8107                                   __addr, __scale) __extension__({\
   8108 __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
   8109                               __addr,(__v8di) __index, __mask, __scale);\
   8110 })
   8111 
   8112 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
   8113   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
   8114                                         (int const *)(addr), \
   8115                                         (__v8di)(__m512i)(index), \
   8116                                         (__mmask8)-1, (int)(scale)); })
   8117 
   8118 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8119   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
   8120                                         (int const *)(addr), \
   8121                                         (__v8di)(__m512i)(index), \
   8122                                         (__mmask8)(mask), (int)(scale)); })
   8123 
   8124 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
   8125   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
   8126                                        (double const *)(addr), \
   8127                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8128                                        (int)(scale)); })
   8129 
   8130 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8131   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
   8132                                        (double const *)(addr), \
   8133                                        (__v8di)(__m512i)(index), \
   8134                                        (__mmask8)(mask), (int)(scale)); })
   8135 
   8136 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
   8137   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
   8138                                        (long long const *)(addr), \
   8139                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8140                                        (int)(scale)); })
   8141 
   8142 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8143   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
   8144                                        (long long const *)(addr), \
   8145                                        (__v8di)(__m512i)(index), \
   8146                                        (__mmask8)(mask), (int)(scale)); })
   8147 
   8148 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
   8149   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
   8150                                        (float const *)(addr), \
   8151                                        (__v16sf)(__m512)(index), \
   8152                                        (__mmask16)-1, (int)(scale)); })
   8153 
   8154 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8155   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
   8156                                        (float const *)(addr), \
   8157                                        (__v16sf)(__m512)(index), \
   8158                                        (__mmask16)(mask), (int)(scale)); })
   8159 
   8160 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
   8161   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
   8162                                         (int const *)(addr), \
   8163                                         (__v16si)(__m512i)(index), \
   8164                                         (__mmask16)-1, (int)(scale)); })
   8165 
   8166 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8167   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
   8168                                         (int const *)(addr), \
   8169                                         (__v16si)(__m512i)(index), \
   8170                                         (__mmask16)(mask), (int)(scale)); })
   8171 
   8172 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
   8173   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
   8174                                        (double const *)(addr), \
   8175                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8176                                        (int)(scale)); })
   8177 
   8178 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8179   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
   8180                                        (double const *)(addr), \
   8181                                        (__v8si)(__m256i)(index), \
   8182                                        (__mmask8)(mask), (int)(scale)); })
   8183 
   8184 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
   8185   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
   8186                                        (long long const *)(addr), \
   8187                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8188                                        (int)(scale)); })
   8189 
   8190 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8191   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
   8192                                        (long long const *)(addr), \
   8193                                        (__v8si)(__m256i)(index), \
   8194                                        (__mmask8)(mask), (int)(scale)); })
   8195 
   8196 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
   8197   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
   8198                                 (__v8di)(__m512i)(index), \
   8199                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8200 
   8201 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8202   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
   8203                                 (__v8di)(__m512i)(index), \
   8204                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8205 
   8206 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8207   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
   8208                                 (__v8di)(__m512i)(index), \
   8209                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8210 
   8211 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8212   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
   8213                                 (__v8di)(__m512i)(index), \
   8214                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8215 
   8216 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
   8217   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
   8218                                (__v8di)(__m512i)(index), \
   8219                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8220 
   8221 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8222   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
   8223                                (__v8di)(__m512i)(index), \
   8224                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8225 
   8226 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8227   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
   8228                                (__v8di)(__m512i)(index), \
   8229                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8230 
   8231 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8232   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
   8233                                (__v8di)(__m512i)(index), \
   8234                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8235 
   8236 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
   8237   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
   8238                                 (__v16si)(__m512i)(index), \
   8239                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8240 
   8241 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8242   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
   8243                                 (__v16si)(__m512i)(index), \
   8244                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8245 
   8246 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8247   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
   8248                                 (__v16si)(__m512i)(index), \
   8249                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8250 
   8251 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8252   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
   8253                                 (__v16si)(__m512i)(index), \
   8254                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8255 
   8256 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
   8257   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
   8258                                (__v8si)(__m256i)(index), \
   8259                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8260 
   8261 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8262   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
   8263                                (__v8si)(__m256i)(index), \
   8264                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8265 
   8266 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8267   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
   8268                                (__v8si)(__m256i)(index), \
   8269                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8270 
   8271 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8272   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
   8273                                (__v8si)(__m256i)(index), \
   8274                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8275 
   8276 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8277 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8278 {
   8279  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
   8280           (__v4sf) __B,
   8281           (__v4sf) __W,
   8282           (__mmask8) __U,
   8283           _MM_FROUND_CUR_DIRECTION);
   8284 }
   8285 
   8286 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
   8287   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
   8288                                         (__v4sf)(__m128)(B), \
   8289                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
   8290                                         (int)(R)); })
   8291 
   8292 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8293 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8294 {
   8295  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8296           (__v4sf) __B,
   8297           (__v4sf) __C,
   8298           (__mmask8) __U,
   8299           _MM_FROUND_CUR_DIRECTION);
   8300 }
   8301 
   8302 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8303   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8304                                          (__v4sf)(__m128)(B), \
   8305                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8306                                          _MM_FROUND_CUR_DIRECTION); })
   8307 
   8308 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8309 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8310 {
   8311  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
   8312           (__v4sf) __X,
   8313           (__v4sf) __Y,
   8314           (__mmask8) __U,
   8315           _MM_FROUND_CUR_DIRECTION);
   8316 }
   8317 
   8318 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
   8319   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
   8320                                          (__v4sf)(__m128)(X), \
   8321                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8322                                          (int)(R)); })
   8323 
   8324 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8325 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8326 {
   8327  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
   8328           -(__v4sf) __B,
   8329           (__v4sf) __W,
   8330           (__mmask8) __U,
   8331           _MM_FROUND_CUR_DIRECTION);
   8332 }
   8333 
   8334 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8335   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
   8336                                         -(__v4sf)(__m128)(B), \
   8337                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
   8338                                         (int)(R)); })
   8339 
   8340 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8341 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8342 {
   8343  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8344           (__v4sf) __B,
   8345           -(__v4sf) __C,
   8346           (__mmask8) __U,
   8347           _MM_FROUND_CUR_DIRECTION);
   8348 }
   8349 
   8350 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8351   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8352                                          (__v4sf)(__m128)(B), \
   8353                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8354                                          (int)(R)); })
   8355 
   8356 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8357 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8358 {
   8359  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
   8360           (__v4sf) __X,
   8361           -(__v4sf) __Y,
   8362           (__mmask8) __U,
   8363           _MM_FROUND_CUR_DIRECTION);
   8364 }
   8365 
   8366 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
   8367   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
   8368                                          (__v4sf)(__m128)(X), \
   8369                                          -(__v4sf)(__m128)(Y), (__mmask8)(U), \
   8370                                          (int)(R)); })
   8371 
   8372 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8373 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8374 {
   8375  return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
   8376           (__v4sf) __B,
   8377           (__v4sf) __W,
   8378           (__mmask8) __U,
   8379           _MM_FROUND_CUR_DIRECTION);
   8380 }
   8381 
   8382 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
   8383   (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
   8384                                         (__v4sf)(__m128)(B), \
   8385                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
   8386                                         (int)(R)); })
   8387 
   8388 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8389 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8390 {
   8391  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8392           (__v4sf) __B,
   8393           (__v4sf) __C,
   8394           (__mmask8) __U,
   8395           _MM_FROUND_CUR_DIRECTION);
   8396 }
   8397 
   8398 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8399   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8400                                          (__v4sf)(__m128)(B), \
   8401                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8402                                          (int)(R)); })
   8403 
   8404 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8405 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8406 {
   8407  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
   8408           (__v4sf) __X,
   8409           (__v4sf) __Y,
   8410           (__mmask8) __U,
   8411           _MM_FROUND_CUR_DIRECTION);
   8412 }
   8413 
   8414 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
   8415   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
   8416                                          (__v4sf)(__m128)(X), \
   8417                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8418                                          (int)(R)); })
   8419 
   8420 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8421 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8422 {
   8423  return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
   8424           -(__v4sf) __B,
   8425           (__v4sf) __W,
   8426           (__mmask8) __U,
   8427           _MM_FROUND_CUR_DIRECTION);
   8428 }
   8429 
   8430 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8431   (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
   8432                                         -(__v4sf)(__m128)(B), \
   8433                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
   8434                                         (int)(R)); })
   8435 
   8436 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8437 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8438 {
   8439  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8440           (__v4sf) __B,
   8441           -(__v4sf) __C,
   8442           (__mmask8) __U,
   8443           _MM_FROUND_CUR_DIRECTION);
   8444 }
   8445 
   8446 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8447   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8448                                          (__v4sf)(__m128)(B), \
   8449                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8450                                          _MM_FROUND_CUR_DIRECTION); })
   8451 
   8452 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8453 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8454 {
   8455  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
   8456           (__v4sf) __X,
   8457           -(__v4sf) __Y,
   8458           (__mmask8) __U,
   8459           _MM_FROUND_CUR_DIRECTION);
   8460 }
   8461 
   8462 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
   8463   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
   8464                                          (__v4sf)(__m128)(X), \
   8465                                          -(__v4sf)(__m128)(Y), (__mmask8)(U), \
   8466                                          (int)(R)); })
   8467 
   8468 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8469 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8470 {
   8471  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
   8472           (__v2df) __B,
   8473           (__v2df) __W,
   8474           (__mmask8) __U,
   8475           _MM_FROUND_CUR_DIRECTION);
   8476 }
   8477 
   8478 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
   8479   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
   8480                                          (__v2df)(__m128d)(B), \
   8481                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
   8482                                          (int)(R)); })
   8483 
   8484 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8485 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8486 {
   8487  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8488           (__v2df) __B,
   8489           (__v2df) __C,
   8490           (__mmask8) __U,
   8491           _MM_FROUND_CUR_DIRECTION);
   8492 }
   8493 
   8494 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8495   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8496                                           (__v2df)(__m128d)(B), \
   8497                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8498                                           _MM_FROUND_CUR_DIRECTION); })
   8499 
   8500 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8501 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8502 {
   8503  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
   8504           (__v2df) __X,
   8505           (__v2df) __Y,
   8506           (__mmask8) __U,
   8507           _MM_FROUND_CUR_DIRECTION);
   8508 }
   8509 
   8510 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
   8511   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
   8512                                           (__v2df)(__m128d)(X), \
   8513                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8514                                           (int)(R)); })
   8515 
   8516 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8517 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8518 {
   8519  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
   8520           -(__v2df) __B,
   8521           (__v2df) __W,
   8522           (__mmask8) __U,
   8523           _MM_FROUND_CUR_DIRECTION);
   8524 }
   8525 
   8526 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8527   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
   8528                                          -(__v2df)(__m128d)(B), \
   8529                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
   8530                                          (int)(R)); })
   8531 
   8532 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8533 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8534 {
   8535  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8536           (__v2df) __B,
   8537           -(__v2df) __C,
   8538           (__mmask8) __U,
   8539           _MM_FROUND_CUR_DIRECTION);
   8540 }
   8541 
   8542 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8543   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8544                                           (__v2df)(__m128d)(B), \
   8545                                           -(__v2df)(__m128d)(C), \
   8546                                           (__mmask8)(U), (int)(R)); })
   8547 
   8548 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8549 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8550 {
   8551  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
   8552           (__v2df) __X,
   8553           -(__v2df) __Y,
   8554           (__mmask8) __U,
   8555           _MM_FROUND_CUR_DIRECTION);
   8556 }
   8557 
   8558 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
   8559   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
   8560                                           (__v2df)(__m128d)(X), \
   8561                                           -(__v2df)(__m128d)(Y), \
   8562                                           (__mmask8)(U), (int)(R)); })
   8563 
   8564 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8565 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8566 {
   8567  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
   8568           (__v2df) __B,
   8569           (__v2df) __W,
   8570           (__mmask8) __U,
   8571           _MM_FROUND_CUR_DIRECTION);
   8572 }
   8573 
   8574 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
   8575   (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
   8576                                          (__v2df)(__m128d)(B), \
   8577                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
   8578                                          (int)(R)); })
   8579 
   8580 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8581 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8582 {
   8583  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8584           (__v2df) __B,
   8585           (__v2df) __C,
   8586           (__mmask8) __U,
   8587           _MM_FROUND_CUR_DIRECTION);
   8588 }
   8589 
   8590 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8591   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8592                                           (__v2df)(__m128d)(B), \
   8593                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8594                                           (int)(R)); })
   8595 
   8596 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8597 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8598 {
   8599  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
   8600           (__v2df) __X,
   8601           (__v2df) __Y,
   8602           (__mmask8) __U,
   8603           _MM_FROUND_CUR_DIRECTION);
   8604 }
   8605 
   8606 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
   8607   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
   8608                                           (__v2df)(__m128d)(X), \
   8609                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8610                                           (int)(R)); })
   8611 
   8612 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8613 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8614 {
   8615  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
   8616           -(__v2df) __B,
   8617           (__v2df) __W,
   8618           (__mmask8) __U,
   8619           _MM_FROUND_CUR_DIRECTION);
   8620 }
   8621 
   8622 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8623   (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
   8624                                          -(__v2df)(__m128d)(B), \
   8625                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
   8626                                          (int)(R)); })
   8627 
   8628 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8629 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8630 {
   8631  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8632           (__v2df) __B,
   8633           -(__v2df) __C,
   8634           (__mmask8) __U,
   8635           _MM_FROUND_CUR_DIRECTION);
   8636 }
   8637 
   8638 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8639   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8640                                           (__v2df)(__m128d)(B), \
   8641                                           -(__v2df)(__m128d)(C), \
   8642                                           (__mmask8)(U), \
   8643                                           _MM_FROUND_CUR_DIRECTION); })
   8644 
   8645 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8646 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8647 {
   8648  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W),
   8649           (__v2df) __X,
   8650           -(__v2df) (__Y),
   8651           (__mmask8) __U,
   8652           _MM_FROUND_CUR_DIRECTION);
   8653 }
   8654 
   8655 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
   8656   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
   8657                                           (__v2df)(__m128d)(X), \
   8658                                           -(__v2df)(__m128d)(Y), \
   8659                                           (__mmask8)(U), (int)(R)); })
   8660 
   8661 #define _mm512_permutex_pd(X, C) __extension__ ({ \
   8662   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   8663                                    (__v8df)_mm512_undefined_pd(), \
   8664                                    0 + (((C) >> 0) & 0x3), \
   8665                                    0 + (((C) >> 2) & 0x3), \
   8666                                    0 + (((C) >> 4) & 0x3), \
   8667                                    0 + (((C) >> 6) & 0x3), \
   8668                                    4 + (((C) >> 0) & 0x3), \
   8669                                    4 + (((C) >> 2) & 0x3), \
   8670                                    4 + (((C) >> 4) & 0x3), \
   8671                                    4 + (((C) >> 6) & 0x3)); })
   8672 
   8673 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
   8674   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8675                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8676                                        (__v8df)(__m512d)(W)); })
   8677 
   8678 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
   8679   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8680                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8681                                        (__v8df)_mm512_setzero_pd()); })
   8682 
   8683 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
   8684   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
   8685                                    (__v8di)_mm512_undefined_epi32(), \
   8686                                    0 + (((C) >> 0) & 0x3), \
   8687                                    0 + (((C) >> 2) & 0x3), \
   8688                                    0 + (((C) >> 4) & 0x3), \
   8689                                    0 + (((C) >> 6) & 0x3), \
   8690                                    4 + (((C) >> 0) & 0x3), \
   8691                                    4 + (((C) >> 2) & 0x3), \
   8692                                    4 + (((C) >> 4) & 0x3), \
   8693                                    4 + (((C) >> 6) & 0x3)); })
   8694 
   8695 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
   8696   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8697                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8698                                       (__v8di)(__m512i)(W)); })
   8699 
   8700 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
   8701   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8702                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8703                                       (__v8di)_mm512_setzero_si512()); })
   8704 
   8705 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8706 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
   8707 {
   8708   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8709                  (__v8di) __X,
   8710                  (__v8df) _mm512_undefined_pd (),
   8711                  (__mmask8) -1);
   8712 }
   8713 
   8714 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8715 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
   8716 {
   8717   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8718                  (__v8di) __X,
   8719                  (__v8df) __W,
   8720                  (__mmask8) __U);
   8721 }
   8722 
   8723 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8724 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
   8725 {
   8726   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8727                  (__v8di) __X,
   8728                  (__v8df) _mm512_setzero_pd (),
   8729                  (__mmask8) __U);
   8730 }
   8731 
   8732 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8733 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
   8734 {
   8735   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8736                  (__v8di) __X,
   8737                  (__v8di) _mm512_setzero_si512 (),
   8738                  __M);
   8739 }
   8740 
   8741 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8742 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
   8743 {
   8744   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8745                  (__v8di) __X,
   8746                  (__v8di) _mm512_undefined_epi32 (),
   8747                  (__mmask8) -1);
   8748 }
   8749 
   8750 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8751 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
   8752              __m512i __Y)
   8753 {
   8754   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8755                  (__v8di) __X,
   8756                  (__v8di) __W,
   8757                  __M);
   8758 }
   8759 
   8760 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8761 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
   8762 {
   8763   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8764                 (__v16si) __X,
   8765                 (__v16sf) _mm512_undefined_ps (),
   8766                 (__mmask16) -1);
   8767 }
   8768 
   8769 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8770 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
   8771 {
   8772   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8773                 (__v16si) __X,
   8774                 (__v16sf) __W,
   8775                 (__mmask16) __U);
   8776 }
   8777 
   8778 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8779 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
   8780 {
   8781   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8782                 (__v16si) __X,
   8783                 (__v16sf) _mm512_setzero_ps (),
   8784                 (__mmask16) __U);
   8785 }
   8786 
   8787 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8788 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
   8789 {
   8790   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8791                  (__v16si) __X,
   8792                  (__v16si) _mm512_setzero_si512 (),
   8793                  __M);
   8794 }
   8795 
   8796 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8797 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
   8798 {
   8799   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8800                  (__v16si) __X,
   8801                  (__v16si) _mm512_undefined_epi32 (),
   8802                  (__mmask16) -1);
   8803 }
   8804 
   8805 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8806 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
   8807              __m512i __Y)
   8808 {
   8809   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8810                  (__v16si) __X,
   8811                  (__v16si) __W,
   8812                  __M);
   8813 }
   8814 
   8815 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8816 _mm512_kand (__mmask16 __A, __mmask16 __B)
   8817 {
   8818   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
   8819 }
   8820 
   8821 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8822 _mm512_kandn (__mmask16 __A, __mmask16 __B)
   8823 {
   8824   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
   8825 }
   8826 
   8827 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8828 _mm512_kor (__mmask16 __A, __mmask16 __B)
   8829 {
   8830   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
   8831 }
   8832 
   8833 static __inline__ int __DEFAULT_FN_ATTRS
   8834 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
   8835 {
   8836   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
   8837 }
   8838 
   8839 static __inline__ int __DEFAULT_FN_ATTRS
   8840 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
   8841 {
   8842   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
   8843 }
   8844 
   8845 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8846 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   8847 {
   8848   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
   8849 }
   8850 
   8851 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8852 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
   8853 {
   8854   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
   8855 }
   8856 
   8857 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8858 _mm512_kxor (__mmask16 __A, __mmask16 __B)
   8859 {
   8860   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
   8861 }
   8862 
   8863 static __inline__ void __DEFAULT_FN_ATTRS
   8864 _mm512_stream_si512 (__m512i * __P, __m512i __A)
   8865 {
   8866   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
   8867 }
   8868 
   8869 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8870 _mm512_stream_load_si512 (void *__P)
   8871 {
   8872   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
   8873 }
   8874 
   8875 static __inline__ void __DEFAULT_FN_ATTRS
   8876 _mm512_stream_pd (double *__P, __m512d __A)
   8877 {
   8878   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
   8879 }
   8880 
   8881 static __inline__ void __DEFAULT_FN_ATTRS
   8882 _mm512_stream_ps (float *__P, __m512 __A)
   8883 {
   8884   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
   8885 }
   8886 
   8887 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8888 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8889 {
   8890   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   8891                   (__v8df) __W,
   8892                   (__mmask8) __U);
   8893 }
   8894 
   8895 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8896 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
   8897 {
   8898   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   8899                   (__v8df)
   8900                   _mm512_setzero_pd (),
   8901                   (__mmask8) __U);
   8902 }
   8903 
   8904 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8905 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   8906 {
   8907   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   8908                   (__v8di) __W,
   8909                   (__mmask8) __U);
   8910 }
   8911 
   8912 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8913 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
   8914 {
   8915   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   8916                   (__v8di)
   8917                   _mm512_setzero_si512 (),
   8918                   (__mmask8) __U);
   8919 }
   8920 
   8921 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8922 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8923 {
   8924   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   8925                  (__v16sf) __W,
   8926                  (__mmask16) __U);
   8927 }
   8928 
   8929 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8930 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
   8931 {
   8932   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   8933                  (__v16sf)
   8934                  _mm512_setzero_ps (),
   8935                  (__mmask16) __U);
   8936 }
   8937 
   8938 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8939 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   8940 {
   8941   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   8942                   (__v16si) __W,
   8943                   (__mmask16) __U);
   8944 }
   8945 
   8946 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8947 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
   8948 {
   8949   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   8950                   (__v16si)
   8951                   _mm512_setzero_si512 (),
   8952                   (__mmask16) __U);
   8953 }
   8954 
   8955 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
   8956   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   8957                                       (__v4sf)(__m128)(Y), (int)(P), \
   8958                                       (__mmask8)-1, (int)(R)); })
   8959 
   8960 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
   8961   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   8962                                       (__v4sf)(__m128)(Y), (int)(P), \
   8963                                       (__mmask8)(M), (int)(R)); })
   8964 
   8965 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
   8966   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   8967                                       (__v4sf)(__m128)(Y), (int)(P), \
   8968                                       (__mmask8)-1, \
   8969                                       _MM_FROUND_CUR_DIRECTION); })
   8970 
   8971 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
   8972   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   8973                                       (__v4sf)(__m128)(Y), (int)(P), \
   8974                                       (__mmask8)(M), \
   8975                                       _MM_FROUND_CUR_DIRECTION); })
   8976 
   8977 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
   8978   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   8979                                       (__v2df)(__m128d)(Y), (int)(P), \
   8980                                       (__mmask8)-1, (int)(R)); })
   8981 
   8982 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
   8983   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   8984                                       (__v2df)(__m128d)(Y), (int)(P), \
   8985                                       (__mmask8)(M), (int)(R)); })
   8986 
   8987 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
   8988   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   8989                                       (__v2df)(__m128d)(Y), (int)(P), \
   8990                                       (__mmask8)-1, \
   8991                                       _MM_FROUND_CUR_DIRECTION); })
   8992 
   8993 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
   8994   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   8995                                       (__v2df)(__m128d)(Y), (int)(P), \
   8996                                       (__mmask8)(M), \
   8997                                       _MM_FROUND_CUR_DIRECTION); })
   8998 
   8999 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9000 _mm512_movehdup_ps (__m512 __A)
   9001 {
   9002   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9003                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
   9004 }
   9005 
   9006 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9007 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9008 {
   9009   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9010                                              (__v16sf)_mm512_movehdup_ps(__A),
   9011                                              (__v16sf)__W);
   9012 }
   9013 
   9014 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9015 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
   9016 {
   9017   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9018                                              (__v16sf)_mm512_movehdup_ps(__A),
   9019                                              (__v16sf)_mm512_setzero_ps());
   9020 }
   9021 
   9022 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9023 _mm512_moveldup_ps (__m512 __A)
   9024 {
   9025   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9026                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
   9027 }
   9028 
   9029 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9030 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9031 {
   9032   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9033                                              (__v16sf)_mm512_moveldup_ps(__A),
   9034                                              (__v16sf)__W);
   9035 }
   9036 
   9037 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9038 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
   9039 {
   9040   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9041                                              (__v16sf)_mm512_moveldup_ps(__A),
   9042                                              (__v16sf)_mm512_setzero_ps());
   9043 }
   9044 
   9045 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
   9046   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   9047                                    (__v16si)_mm512_undefined_epi32(), \
   9048                                    0  + (((I) >> 0) & 0x3), \
   9049                                    0  + (((I) >> 2) & 0x3), \
   9050                                    0  + (((I) >> 4) & 0x3), \
   9051                                    0  + (((I) >> 6) & 0x3), \
   9052                                    4  + (((I) >> 0) & 0x3), \
   9053                                    4  + (((I) >> 2) & 0x3), \
   9054                                    4  + (((I) >> 4) & 0x3), \
   9055                                    4  + (((I) >> 6) & 0x3), \
   9056                                    8  + (((I) >> 0) & 0x3), \
   9057                                    8  + (((I) >> 2) & 0x3), \
   9058                                    8  + (((I) >> 4) & 0x3), \
   9059                                    8  + (((I) >> 6) & 0x3), \
   9060                                    12 + (((I) >> 0) & 0x3), \
   9061                                    12 + (((I) >> 2) & 0x3), \
   9062                                    12 + (((I) >> 4) & 0x3), \
   9063                                    12 + (((I) >> 6) & 0x3)); })
   9064 
   9065 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
   9066   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9067                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9068                                       (__v16si)(__m512i)(W)); })
   9069 
   9070 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
   9071   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9072                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9073                                       (__v16si)_mm512_setzero_si512()); })
   9074 
   9075 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9076 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9077 {
   9078   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9079                 (__v8df) __W,
   9080                 (__mmask8) __U);
   9081 }
   9082 
   9083 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9084 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
   9085 {
   9086   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9087                 (__v8df) _mm512_setzero_pd (),
   9088                 (__mmask8) __U);
   9089 }
   9090 
   9091 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9092 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9093 {
   9094   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9095                 (__v8di) __W,
   9096                 (__mmask8) __U);
   9097 }
   9098 
   9099 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9100 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
   9101 {
   9102   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9103                 (__v8di) _mm512_setzero_pd (),
   9104                 (__mmask8) __U);
   9105 }
   9106 
   9107 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9108 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
   9109 {
   9110   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9111               (__v8df) __W,
   9112               (__mmask8) __U);
   9113 }
   9114 
   9115 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9116 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
   9117 {
   9118   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9119               (__v8df) _mm512_setzero_pd(),
   9120               (__mmask8) __U);
   9121 }
   9122 
   9123 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9124 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
   9125 {
   9126   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9127               (__v8di) __W,
   9128               (__mmask8) __U);
   9129 }
   9130 
   9131 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9132 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
   9133 {
   9134   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9135               (__v8di) _mm512_setzero_pd(),
   9136               (__mmask8) __U);
   9137 }
   9138 
   9139 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9140 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
   9141 {
   9142   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9143                    (__v16sf) __W,
   9144                    (__mmask16) __U);
   9145 }
   9146 
   9147 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9148 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
   9149 {
   9150   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9151                    (__v16sf) _mm512_setzero_ps(),
   9152                    (__mmask16) __U);
   9153 }
   9154 
   9155 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9156 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
   9157 {
   9158   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9159               (__v16si) __W,
   9160               (__mmask16) __U);
   9161 }
   9162 
   9163 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9164 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
   9165 {
   9166   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9167               (__v16si) _mm512_setzero_ps(),
   9168               (__mmask16) __U);
   9169 }
   9170 
   9171 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9172 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9173 {
   9174   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9175                (__v16sf) __W,
   9176                (__mmask16) __U);
   9177 }
   9178 
   9179 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9180 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
   9181 {
   9182   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9183                (__v16sf) _mm512_setzero_ps(),
   9184                (__mmask16) __U);
   9185 }
   9186 
   9187 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9188 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9189 {
   9190   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9191                 (__v16si) __W,
   9192                 (__mmask16) __U);
   9193 }
   9194 
   9195 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9196 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
   9197 {
   9198   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9199                 (__v16si) _mm512_setzero_ps(),
   9200                 (__mmask16) __U);
   9201 }
   9202 
   9203 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
   9204   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9205                                            (__v8df)_mm512_undefined_pd(), \
   9206                                            (__mmask8)-1, (int)(R)); })
   9207 
   9208 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
   9209   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9210                                            (__v8df)(__m512d)(W), \
   9211                                            (__mmask8)(U), (int)(R)); })
   9212 
   9213 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
   9214   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9215                                            (__v8df)_mm512_setzero_pd(), \
   9216                                            (__mmask8)(U), (int)(R)); })
   9217 
   9218 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9219 _mm512_cvtps_pd (__m256 __A)
   9220 {
   9221   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9222                 (__v8df)
   9223                 _mm512_undefined_pd (),
   9224                 (__mmask8) -1,
   9225                 _MM_FROUND_CUR_DIRECTION);
   9226 }
   9227 
   9228 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9229 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
   9230 {
   9231   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9232                 (__v8df) __W,
   9233                 (__mmask8) __U,
   9234                 _MM_FROUND_CUR_DIRECTION);
   9235 }
   9236 
   9237 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9238 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
   9239 {
   9240   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9241                 (__v8df)
   9242                 _mm512_setzero_pd (),
   9243                 (__mmask8) __U,
   9244                 _MM_FROUND_CUR_DIRECTION);
   9245 }
   9246 
   9247 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9248 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9249 {
   9250   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9251               (__v8df) __A,
   9252               (__v8df) __W);
   9253 }
   9254 
   9255 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9256 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
   9257 {
   9258   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9259               (__v8df) __A,
   9260               (__v8df) _mm512_setzero_pd ());
   9261 }
   9262 
   9263 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9264 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9265 {
   9266   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9267              (__v16sf) __A,
   9268              (__v16sf) __W);
   9269 }
   9270 
   9271 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9272 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
   9273 {
   9274   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9275              (__v16sf) __A,
   9276              (__v16sf) _mm512_setzero_ps ());
   9277 }
   9278 
   9279 static __inline__ void __DEFAULT_FN_ATTRS
   9280 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
   9281 {
   9282   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
   9283             (__mmask8) __U);
   9284 }
   9285 
   9286 static __inline__ void __DEFAULT_FN_ATTRS
   9287 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   9288 {
   9289   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
   9290             (__mmask8) __U);
   9291 }
   9292 
   9293 static __inline__ void __DEFAULT_FN_ATTRS
   9294 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
   9295 {
   9296   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
   9297             (__mmask16) __U);
   9298 }
   9299 
   9300 static __inline__ void __DEFAULT_FN_ATTRS
   9301 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   9302 {
   9303   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
   9304             (__mmask16) __U);
   9305 }
   9306 
   9307 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
   9308   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9309                                              (__v2df)(__m128d)(B), \
   9310                                              (__v4sf)_mm_undefined_ps(), \
   9311                                              (__mmask8)-1, (int)(R)); })
   9312 
   9313 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
   9314   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9315                                              (__v2df)(__m128d)(B), \
   9316                                              (__v4sf)(__m128)(W), \
   9317                                              (__mmask8)(U), (int)(R)); })
   9318 
   9319 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
   9320   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9321                                              (__v2df)(__m128d)(B), \
   9322                                              (__v4sf)_mm_setzero_ps(), \
   9323                                              (__mmask8)(U), (int)(R)); })
   9324 
   9325 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9326 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
   9327 {
   9328   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9329                                              (__v2df)(__B),
   9330                                              (__v4sf)(__W),
   9331                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9332 }
   9333 
   9334 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9335 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
   9336 {
   9337   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9338                                              (__v2df)(__B),
   9339                                              (__v4sf)_mm_setzero_ps(),
   9340                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9341 }
   9342 
   9343 #define _mm_cvtss_i32 _mm_cvtss_si32
   9344 #define _mm_cvtss_i64 _mm_cvtss_si64
   9345 #define _mm_cvtsd_i32 _mm_cvtsd_si32
   9346 #define _mm_cvtsd_i64 _mm_cvtsd_si64
   9347 #define _mm_cvti32_sd _mm_cvtsi32_sd
   9348 #define _mm_cvti64_sd _mm_cvtsi64_sd
   9349 #define _mm_cvti32_ss _mm_cvtsi32_ss
   9350 #define _mm_cvti64_ss _mm_cvtsi64_ss
   9351 
   9352 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
   9353   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9354                                      (int)(R)); })
   9355 
   9356 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
   9357   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9358                                      (int)(R)); })
   9359 
   9360 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
   9361   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9362 
   9363 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
   9364   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9365 
   9366 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
   9367   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9368                                     (int)(R)); })
   9369 
   9370 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
   9371   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9372                                     (int)(R)); })
   9373 
   9374 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
   9375   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9376                                               (__v4sf)(__m128)(B), \
   9377                                               (__v2df)_mm_undefined_pd(), \
   9378                                               (__mmask8)-1, (int)(R)); })
   9379 
   9380 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
   9381   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9382                                               (__v4sf)(__m128)(B), \
   9383                                               (__v2df)(__m128d)(W), \
   9384                                               (__mmask8)(U), (int)(R)); })
   9385 
   9386 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
   9387   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9388                                               (__v4sf)(__m128)(B), \
   9389                                               (__v2df)_mm_setzero_pd(), \
   9390                                               (__mmask8)(U), (int)(R)); })
   9391 
   9392 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9393 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
   9394 {
   9395   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9396                                               (__v4sf)(__B),
   9397                                               (__v2df)(__W),
   9398                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9399 }
   9400 
   9401 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9402 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
   9403 {
   9404   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9405                                               (__v4sf)(__B),
   9406                                               (__v2df)_mm_setzero_pd(),
   9407                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9408 }
   9409 
   9410 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9411 _mm_cvtu32_sd (__m128d __A, unsigned __B)
   9412 {
   9413   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
   9414 }
   9415 
   9416 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
   9417   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
   9418                                       (unsigned long long)(B), (int)(R)); })
   9419 
   9420 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9421 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
   9422 {
   9423   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
   9424                  _MM_FROUND_CUR_DIRECTION);
   9425 }
   9426 
   9427 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
   9428   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
   9429                                      (int)(R)); })
   9430 
   9431 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9432 _mm_cvtu32_ss (__m128 __A, unsigned __B)
   9433 {
   9434   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
   9435                 _MM_FROUND_CUR_DIRECTION);
   9436 }
   9437 
   9438 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
   9439   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
   9440                                      (unsigned long long)(B), (int)(R)); })
   9441 
   9442 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9443 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
   9444 {
   9445   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
   9446                 _MM_FROUND_CUR_DIRECTION);
   9447 }
   9448 
   9449 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9450 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   9451 {
   9452   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
   9453                  __M);
   9454 }
   9455 
   9456 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9457 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   9458 {
   9459   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
   9460                  __M);
   9461 }
   9462 
   9463 static __inline __m512i __DEFAULT_FN_ATTRS
   9464 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
   9465      int __E, int __F, int __G, int __H,
   9466      int __I, int __J, int __K, int __L,
   9467      int __M, int __N, int __O, int __P)
   9468 {
   9469   return __extension__ (__m512i)(__v16si)
   9470   { __P, __O, __N, __M, __L, __K, __J, __I,
   9471     __H, __G, __F, __E, __D, __C, __B, __A };
   9472 }
   9473 
   9474 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
   9475        e8,e9,e10,e11,e12,e13,e14,e15)          \
   9476   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
   9477                    (e5),(e4),(e3),(e2),(e1),(e0))
   9478 
   9479 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9480 _mm512_set_epi64 (long long __A, long long __B, long long __C,
   9481      long long __D, long long __E, long long __F,
   9482      long long __G, long long __H)
   9483 {
   9484   return __extension__ (__m512i) (__v8di)
   9485   { __H, __G, __F, __E, __D, __C, __B, __A };
   9486 }
   9487 
   9488 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
   9489   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9490 
   9491 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9492 _mm512_set_pd (double __A, double __B, double __C, double __D,
   9493         double __E, double __F, double __G, double __H)
   9494 {
   9495   return __extension__ (__m512d)
   9496   { __H, __G, __F, __E, __D, __C, __B, __A };
   9497 }
   9498 
   9499 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
   9500   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9501 
   9502 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9503 _mm512_set_ps (float __A, float __B, float __C, float __D,
   9504         float __E, float __F, float __G, float __H,
   9505         float __I, float __J, float __K, float __L,
   9506         float __M, float __N, float __O, float __P)
   9507 {
   9508   return __extension__ (__m512)
   9509   { __P, __O, __N, __M, __L, __K, __J, __I,
   9510     __H, __G, __F, __E, __D, __C, __B, __A };
   9511 }
   9512 
   9513 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
   9514   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
   9515                 (e4),(e3),(e2),(e1),(e0))
   9516 
   9517 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9518 _mm512_abs_ps(__m512 A)
   9519 {
   9520   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
   9521 }
   9522 
   9523 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9524 _mm512_mask_abs_ps(__m512 W, __mmask16 K, __m512 A)
   9525 {
   9526   return (__m512)_mm512_mask_and_epi32((__m512i)W, K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
   9527 }
   9528 
   9529 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9530 _mm512_abs_pd(__m512d A)
   9531 {
   9532   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A) ;
   9533 }
   9534 
   9535 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9536 _mm512_mask_abs_pd(__m512d W, __mmask8 K, __m512d A)
   9537 {
   9538   return (__m512d)_mm512_mask_and_epi64((__v8di)W, K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A);
   9539 }
   9540 
   9541 #undef __DEFAULT_FN_ATTRS
   9542 
   9543 #endif // __AVX512FINTRIN_H
   9544