Home | History | Annotate | Download | only in include
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef char __v64qi __attribute__((__vector_size__(64)));
     31 typedef short __v32hi __attribute__((__vector_size__(64)));
     32 typedef double __v8df __attribute__((__vector_size__(64)));
     33 typedef float __v16sf __attribute__((__vector_size__(64)));
     34 typedef long long __v8di __attribute__((__vector_size__(64)));
     35 typedef int __v16si __attribute__((__vector_size__(64)));
     36 
     37 /* Unsigned types */
     38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
     39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
     40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
     41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
     42 
     43 typedef float __m512 __attribute__((__vector_size__(64)));
     44 typedef double __m512d __attribute__((__vector_size__(64)));
     45 typedef long long __m512i __attribute__((__vector_size__(64)));
     46 
     47 typedef unsigned char __mmask8;
     48 typedef unsigned short __mmask16;
     49 
     50 /* Rounding mode macros.  */
     51 #define _MM_FROUND_TO_NEAREST_INT   0x00
     52 #define _MM_FROUND_TO_NEG_INF       0x01
     53 #define _MM_FROUND_TO_POS_INF       0x02
     54 #define _MM_FROUND_TO_ZERO          0x03
     55 #define _MM_FROUND_CUR_DIRECTION    0x04
     56 
     57 /* Constants for integer comparison predicates */
     58 typedef enum {
     59     _MM_CMPINT_EQ,      /* Equal */
     60     _MM_CMPINT_LT,      /* Less than */
     61     _MM_CMPINT_LE,      /* Less than or Equal */
     62     _MM_CMPINT_UNUSED,
     63     _MM_CMPINT_NE,      /* Not Equal */
     64     _MM_CMPINT_NLT,     /* Not Less than */
     65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
     66     _MM_CMPINT_NLE      /* Not Less than or Equal */
     67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
     68 } _MM_CMPINT_ENUM;
     69 
     70 typedef enum
     71 {
     72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
     73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
     74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
     75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
     76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
     77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
     78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
     79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
     80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
     81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
     82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
     83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
     84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
     85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
     86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
     87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
     88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
     89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
     90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
     91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
     92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
     93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
     94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
     95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
     96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
     97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
     98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
     99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
    100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
    101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
    102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
    103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
    104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
    105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
    106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
    107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
    108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
    109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
    110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
    111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
    112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
    113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
    114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
    115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
    116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
    117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
    118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
    119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
    120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
    121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
    122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
    123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
    124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
    125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
    126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
    127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
    128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
    129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
    130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
    131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
    132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
    133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
    134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
    135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
    136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
    137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
    138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
    139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
    140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
    141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
    142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
    143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
    144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
    145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
    146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
    147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
    148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
    149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
    150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
    151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
    152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
    153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
    154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
    155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
    156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
    157   _MM_PERM_DDDD = 0xFF
    158 } _MM_PERM_ENUM;
    159 
    160 typedef enum
    161 {
    162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
    163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
    164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
    165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
    166 } _MM_MANTISSA_NORM_ENUM;
    167 
    168 typedef enum
    169 {
    170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
    171   _MM_MANT_SIGN_zero,   /* sign = 0             */
    172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
    173 } _MM_MANTISSA_SIGN_ENUM;
    174 
    175 /* Define the default attributes for the functions in this file. */
    176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
    177 
    178 /* Create vectors with repeated elements */
    179 
    180 static  __inline __m512i __DEFAULT_FN_ATTRS
    181 _mm512_setzero_si512(void)
    182 {
    183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    184 }
    185 
    186 #define _mm512_setzero_epi32 _mm512_setzero_si512
    187 
    188 static __inline__ __m512d __DEFAULT_FN_ATTRS
    189 _mm512_undefined_pd(void)
    190 {
    191   return (__m512d)__builtin_ia32_undef512();
    192 }
    193 
    194 static __inline__ __m512 __DEFAULT_FN_ATTRS
    195 _mm512_undefined(void)
    196 {
    197   return (__m512)__builtin_ia32_undef512();
    198 }
    199 
    200 static __inline__ __m512 __DEFAULT_FN_ATTRS
    201 _mm512_undefined_ps(void)
    202 {
    203   return (__m512)__builtin_ia32_undef512();
    204 }
    205 
    206 static __inline__ __m512i __DEFAULT_FN_ATTRS
    207 _mm512_undefined_epi32(void)
    208 {
    209   return (__m512i)__builtin_ia32_undef512();
    210 }
    211 
    212 static __inline__ __m512i __DEFAULT_FN_ATTRS
    213 _mm512_broadcastd_epi32 (__m128i __A)
    214 {
    215   return (__m512i)__builtin_shufflevector((__v4si) __A,
    216                                           (__v4si)_mm_undefined_si128(),
    217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    218 }
    219 
    220 static __inline__ __m512i __DEFAULT_FN_ATTRS
    221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
    222 {
    223   return (__m512i)__builtin_ia32_selectd_512(__M,
    224                                              (__v16si) _mm512_broadcastd_epi32(__A),
    225                                              (__v16si) __O);
    226 }
    227 
    228 static __inline__ __m512i __DEFAULT_FN_ATTRS
    229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
    230 {
    231   return (__m512i)__builtin_ia32_selectd_512(__M,
    232                                              (__v16si) _mm512_broadcastd_epi32(__A),
    233                                              (__v16si) _mm512_setzero_si512());
    234 }
    235 
    236 static __inline__ __m512i __DEFAULT_FN_ATTRS
    237 _mm512_broadcastq_epi64 (__m128i __A)
    238 {
    239   return (__m512i)__builtin_shufflevector((__v2di) __A,
    240                                           (__v2di) _mm_undefined_si128(),
    241                                           0, 0, 0, 0, 0, 0, 0, 0);
    242 }
    243 
    244 static __inline__ __m512i __DEFAULT_FN_ATTRS
    245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
    246 {
    247   return (__m512i)__builtin_ia32_selectq_512(__M,
    248                                              (__v8di) _mm512_broadcastq_epi64(__A),
    249                                              (__v8di) __O);
    250 
    251 }
    252 
    253 static __inline__ __m512i __DEFAULT_FN_ATTRS
    254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
    255 {
    256   return (__m512i)__builtin_ia32_selectq_512(__M,
    257                                              (__v8di) _mm512_broadcastq_epi64(__A),
    258                                              (__v8di) _mm512_setzero_si512());
    259 }
    260 
    261 static __inline __m512i __DEFAULT_FN_ATTRS
    262 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
    263 {
    264   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
    265                  (__v16si)
    266                  _mm512_setzero_si512 (),
    267                  __M);
    268 }
    269 
    270 static __inline __m512i __DEFAULT_FN_ATTRS
    271 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
    272 {
    273 #ifdef __x86_64__
    274   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
    275                  (__v8di)
    276                  _mm512_setzero_si512 (),
    277                  __M);
    278 #else
    279   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
    280                  (__v8di)
    281                  _mm512_setzero_si512 (),
    282                  __M);
    283 #endif
    284 }
    285 
    286 static __inline __m512 __DEFAULT_FN_ATTRS
    287 _mm512_setzero_ps(void)
    288 {
    289   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    290                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    291 }
    292 
    293 #define _mm512_setzero _mm512_setzero_ps
    294 
    295 static  __inline __m512d __DEFAULT_FN_ATTRS
    296 _mm512_setzero_pd(void)
    297 {
    298   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    299 }
    300 
    301 static __inline __m512 __DEFAULT_FN_ATTRS
    302 _mm512_set1_ps(float __w)
    303 {
    304   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    305                    __w, __w, __w, __w, __w, __w, __w, __w  };
    306 }
    307 
    308 static __inline __m512d __DEFAULT_FN_ATTRS
    309 _mm512_set1_pd(double __w)
    310 {
    311   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    312 }
    313 
    314 static __inline __m512i __DEFAULT_FN_ATTRS
    315 _mm512_set1_epi8(char __w)
    316 {
    317   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
    318                              __w, __w, __w, __w, __w, __w, __w, __w,
    319                              __w, __w, __w, __w, __w, __w, __w, __w,
    320                              __w, __w, __w, __w, __w, __w, __w, __w,
    321                              __w, __w, __w, __w, __w, __w, __w, __w,
    322                              __w, __w, __w, __w, __w, __w, __w, __w,
    323                              __w, __w, __w, __w, __w, __w, __w, __w,
    324                              __w, __w, __w, __w, __w, __w, __w, __w  };
    325 }
    326 
    327 static __inline __m512i __DEFAULT_FN_ATTRS
    328 _mm512_set1_epi16(short __w)
    329 {
    330   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
    331                              __w, __w, __w, __w, __w, __w, __w, __w,
    332                              __w, __w, __w, __w, __w, __w, __w, __w,
    333                              __w, __w, __w, __w, __w, __w, __w, __w };
    334 }
    335 
    336 static __inline __m512i __DEFAULT_FN_ATTRS
    337 _mm512_set1_epi32(int __s)
    338 {
    339   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    340                              __s, __s, __s, __s, __s, __s, __s, __s };
    341 }
    342 
    343 static __inline __m512i __DEFAULT_FN_ATTRS
    344 _mm512_set1_epi64(long long __d)
    345 {
    346   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    347 }
    348 
    349 static __inline__ __m512 __DEFAULT_FN_ATTRS
    350 _mm512_broadcastss_ps(__m128 __A)
    351 {
    352   return (__m512)__builtin_shufflevector((__v4sf) __A,
    353                                          (__v4sf)_mm_undefined_ps(),
    354                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    355 }
    356 
    357 static __inline __m512i __DEFAULT_FN_ATTRS
    358 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    359 {
    360   return  (__m512i)(__v16si)
    361    { __D, __C, __B, __A, __D, __C, __B, __A,
    362      __D, __C, __B, __A, __D, __C, __B, __A };
    363 }
    364 
    365 static __inline __m512i __DEFAULT_FN_ATTRS
    366 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    367        long long __D)
    368 {
    369   return  (__m512i) (__v8di)
    370    { __D, __C, __B, __A, __D, __C, __B, __A };
    371 }
    372 
    373 static __inline __m512d __DEFAULT_FN_ATTRS
    374 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    375 {
    376   return  (__m512d)
    377    { __D, __C, __B, __A, __D, __C, __B, __A };
    378 }
    379 
    380 static __inline __m512 __DEFAULT_FN_ATTRS
    381 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    382 {
    383   return  (__m512)
    384    { __D, __C, __B, __A, __D, __C, __B, __A,
    385      __D, __C, __B, __A, __D, __C, __B, __A };
    386 }
    387 
    388 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
    389   _mm512_set4_epi32((e3),(e2),(e1),(e0))
    390 
    391 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
    392   _mm512_set4_epi64((e3),(e2),(e1),(e0))
    393 
    394 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
    395   _mm512_set4_pd((e3),(e2),(e1),(e0))
    396 
    397 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
    398   _mm512_set4_ps((e3),(e2),(e1),(e0))
    399 
    400 static __inline__ __m512d __DEFAULT_FN_ATTRS
    401 _mm512_broadcastsd_pd(__m128d __A)
    402 {
    403   return (__m512d)__builtin_shufflevector((__v2df) __A,
    404                                           (__v2df) _mm_undefined_pd(),
    405                                           0, 0, 0, 0, 0, 0, 0, 0);
    406 }
    407 
    408 /* Cast between vector types */
    409 
    410 static __inline __m512d __DEFAULT_FN_ATTRS
    411 _mm512_castpd256_pd512(__m256d __a)
    412 {
    413   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    414 }
    415 
    416 static __inline __m512 __DEFAULT_FN_ATTRS
    417 _mm512_castps256_ps512(__m256 __a)
    418 {
    419   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    420                                           -1, -1, -1, -1, -1, -1, -1, -1);
    421 }
    422 
    423 static __inline __m128d __DEFAULT_FN_ATTRS
    424 _mm512_castpd512_pd128(__m512d __a)
    425 {
    426   return __builtin_shufflevector(__a, __a, 0, 1);
    427 }
    428 
    429 static __inline __m256d __DEFAULT_FN_ATTRS
    430 _mm512_castpd512_pd256 (__m512d __A)
    431 {
    432   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
    433 }
    434 
    435 static __inline __m128 __DEFAULT_FN_ATTRS
    436 _mm512_castps512_ps128(__m512 __a)
    437 {
    438   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    439 }
    440 
    441 static __inline __m256 __DEFAULT_FN_ATTRS
    442 _mm512_castps512_ps256 (__m512 __A)
    443 {
    444   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
    445 }
    446 
    447 static __inline __m512 __DEFAULT_FN_ATTRS
    448 _mm512_castpd_ps (__m512d __A)
    449 {
    450   return (__m512) (__A);
    451 }
    452 
    453 static __inline __m512i __DEFAULT_FN_ATTRS
    454 _mm512_castpd_si512 (__m512d __A)
    455 {
    456   return (__m512i) (__A);
    457 }
    458 
    459 static __inline__ __m512d __DEFAULT_FN_ATTRS
    460 _mm512_castpd128_pd512 (__m128d __A)
    461 {
    462   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    463 }
    464 
    465 static __inline __m512d __DEFAULT_FN_ATTRS
    466 _mm512_castps_pd (__m512 __A)
    467 {
    468   return (__m512d) (__A);
    469 }
    470 
    471 static __inline __m512i __DEFAULT_FN_ATTRS
    472 _mm512_castps_si512 (__m512 __A)
    473 {
    474   return (__m512i) (__A);
    475 }
    476 
    477 static __inline__ __m512 __DEFAULT_FN_ATTRS
    478 _mm512_castps128_ps512 (__m128 __A)
    479 {
    480     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    481 }
    482 
    483 static __inline__ __m512i __DEFAULT_FN_ATTRS
    484 _mm512_castsi128_si512 (__m128i __A)
    485 {
    486    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    487 }
    488 
    489 static __inline__ __m512i __DEFAULT_FN_ATTRS
    490 _mm512_castsi256_si512 (__m256i __A)
    491 {
    492    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
    493 }
    494 
    495 static __inline __m512 __DEFAULT_FN_ATTRS
    496 _mm512_castsi512_ps (__m512i __A)
    497 {
    498   return (__m512) (__A);
    499 }
    500 
    501 static __inline __m512d __DEFAULT_FN_ATTRS
    502 _mm512_castsi512_pd (__m512i __A)
    503 {
    504   return (__m512d) (__A);
    505 }
    506 
    507 static __inline __m128i __DEFAULT_FN_ATTRS
    508 _mm512_castsi512_si128 (__m512i __A)
    509 {
    510   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
    511 }
    512 
    513 static __inline __m256i __DEFAULT_FN_ATTRS
    514 _mm512_castsi512_si256 (__m512i __A)
    515 {
    516   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
    517 }
    518 
    519 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
    520 _mm512_int2mask(int __a)
    521 {
    522   return (__mmask16)__a;
    523 }
    524 
    525 static __inline__ int __DEFAULT_FN_ATTRS
    526 _mm512_mask2int(__mmask16 __a)
    527 {
    528   return (int)__a;
    529 }
    530 
    531 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
    532 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
    533 ///    contain the value of the source vector. The upper 384 bits are set
    534 ///    to zero.
    535 ///
    536 /// \headerfile <x86intrin.h>
    537 ///
    538 /// This intrinsic has no corresponding instruction.
    539 ///
    540 /// \param __a
    541 ///    A 128-bit vector of [2 x double].
    542 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
    543 ///    contain the value of the parameter. The upper 384 bits are set to zero.
    544 static __inline __m512d __DEFAULT_FN_ATTRS
    545 _mm512_zextpd128_pd512(__m128d __a)
    546 {
    547   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
    548 }
    549 
    550 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
    551 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
    552 ///    contain the value of the source vector. The upper 256 bits are set
    553 ///    to zero.
    554 ///
    555 /// \headerfile <x86intrin.h>
    556 ///
    557 /// This intrinsic has no corresponding instruction.
    558 ///
    559 /// \param __a
    560 ///    A 256-bit vector of [4 x double].
    561 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
    562 ///    contain the value of the parameter. The upper 256 bits are set to zero.
    563 static __inline __m512d __DEFAULT_FN_ATTRS
    564 _mm512_zextpd256_pd512(__m256d __a)
    565 {
    566   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
    567 }
    568 
    569 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
    570 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
    571 ///    the value of the source vector. The upper 384 bits are set to zero.
    572 ///
    573 /// \headerfile <x86intrin.h>
    574 ///
    575 /// This intrinsic has no corresponding instruction.
    576 ///
    577 /// \param __a
    578 ///    A 128-bit vector of [4 x float].
    579 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
    580 ///    contain the value of the parameter. The upper 384 bits are set to zero.
    581 static __inline __m512 __DEFAULT_FN_ATTRS
    582 _mm512_zextps128_ps512(__m128 __a)
    583 {
    584   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
    585 }
    586 
    587 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
    588 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
    589 ///    the value of the source vector. The upper 256 bits are set to zero.
    590 ///
    591 /// \headerfile <x86intrin.h>
    592 ///
    593 /// This intrinsic has no corresponding instruction.
    594 ///
    595 /// \param __a
    596 ///    A 256-bit vector of [8 x float].
    597 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
    598 ///    contain the value of the parameter. The upper 256 bits are set to zero.
    599 static __inline __m512 __DEFAULT_FN_ATTRS
    600 _mm512_zextps256_ps512(__m256 __a)
    601 {
    602   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
    603 }
    604 
    605 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
    606 ///    The lower 128 bits contain the value of the source vector. The upper
    607 ///    384 bits are set to zero.
    608 ///
    609 /// \headerfile <x86intrin.h>
    610 ///
    611 /// This intrinsic has no corresponding instruction.
    612 ///
    613 /// \param __a
    614 ///    A 128-bit integer vector.
    615 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
    616 ///    the parameter. The upper 384 bits are set to zero.
    617 static __inline __m512i __DEFAULT_FN_ATTRS
    618 _mm512_zextsi128_si512(__m128i __a)
    619 {
    620   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
    621 }
    622 
    623 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
    624 ///    The lower 256 bits contain the value of the source vector. The upper
    625 ///    256 bits are set to zero.
    626 ///
    627 /// \headerfile <x86intrin.h>
    628 ///
    629 /// This intrinsic has no corresponding instruction.
    630 ///
    631 /// \param __a
    632 ///    A 256-bit integer vector.
    633 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
    634 ///    the parameter. The upper 256 bits are set to zero.
    635 static __inline __m512i __DEFAULT_FN_ATTRS
    636 _mm512_zextsi256_si512(__m256i __a)
    637 {
    638   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
    639 }
    640 
    641 /* Bitwise operators */
    642 static __inline__ __m512i __DEFAULT_FN_ATTRS
    643 _mm512_and_epi32(__m512i __a, __m512i __b)
    644 {
    645   return (__m512i)((__v16su)__a & (__v16su)__b);
    646 }
    647 
    648 static __inline__ __m512i __DEFAULT_FN_ATTRS
    649 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    650 {
    651   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    652                 (__v16si) _mm512_and_epi32(__a, __b),
    653                 (__v16si) __src);
    654 }
    655 
    656 static __inline__ __m512i __DEFAULT_FN_ATTRS
    657 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    658 {
    659   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
    660                                          __k, __a, __b);
    661 }
    662 
    663 static __inline__ __m512i __DEFAULT_FN_ATTRS
    664 _mm512_and_epi64(__m512i __a, __m512i __b)
    665 {
    666   return (__m512i)((__v8du)__a & (__v8du)__b);
    667 }
    668 
    669 static __inline__ __m512i __DEFAULT_FN_ATTRS
    670 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    671 {
    672     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
    673                 (__v8di) _mm512_and_epi64(__a, __b),
    674                 (__v8di) __src);
    675 }
    676 
    677 static __inline__ __m512i __DEFAULT_FN_ATTRS
    678 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    679 {
    680   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
    681                                          __k, __a, __b);
    682 }
    683 
    684 static __inline__ __m512i __DEFAULT_FN_ATTRS
    685 _mm512_andnot_si512 (__m512i __A, __m512i __B)
    686 {
    687   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    688 }
    689 
    690 static __inline__ __m512i __DEFAULT_FN_ATTRS
    691 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    692 {
    693   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
    694 }
    695 
    696 static __inline__ __m512i __DEFAULT_FN_ATTRS
    697 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    698 {
    699   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    700                                          (__v16si)_mm512_andnot_epi32(__A, __B),
    701                                          (__v16si)__W);
    702 }
    703 
    704 static __inline__ __m512i __DEFAULT_FN_ATTRS
    705 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    706 {
    707   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
    708                                            __U, __A, __B);
    709 }
    710 
    711 static __inline__ __m512i __DEFAULT_FN_ATTRS
    712 _mm512_andnot_epi64(__m512i __A, __m512i __B)
    713 {
    714   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    715 }
    716 
    717 static __inline__ __m512i __DEFAULT_FN_ATTRS
    718 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    719 {
    720   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    721                                           (__v8di)_mm512_andnot_epi64(__A, __B),
    722                                           (__v8di)__W);
    723 }
    724 
    725 static __inline__ __m512i __DEFAULT_FN_ATTRS
    726 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    727 {
    728   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
    729                                            __U, __A, __B);
    730 }
    731 
    732 static __inline__ __m512i __DEFAULT_FN_ATTRS
    733 _mm512_or_epi32(__m512i __a, __m512i __b)
    734 {
    735   return (__m512i)((__v16su)__a | (__v16su)__b);
    736 }
    737 
    738 static __inline__ __m512i __DEFAULT_FN_ATTRS
    739 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    740 {
    741   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    742                                              (__v16si)_mm512_or_epi32(__a, __b),
    743                                              (__v16si)__src);
    744 }
    745 
    746 static __inline__ __m512i __DEFAULT_FN_ATTRS
    747 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    748 {
    749   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
    750 }
    751 
    752 static __inline__ __m512i __DEFAULT_FN_ATTRS
    753 _mm512_or_epi64(__m512i __a, __m512i __b)
    754 {
    755   return (__m512i)((__v8du)__a | (__v8du)__b);
    756 }
    757 
    758 static __inline__ __m512i __DEFAULT_FN_ATTRS
    759 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    760 {
    761   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    762                                              (__v8di)_mm512_or_epi64(__a, __b),
    763                                              (__v8di)__src);
    764 }
    765 
    766 static __inline__ __m512i __DEFAULT_FN_ATTRS
    767 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    768 {
    769   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
    770 }
    771 
    772 static __inline__ __m512i __DEFAULT_FN_ATTRS
    773 _mm512_xor_epi32(__m512i __a, __m512i __b)
    774 {
    775   return (__m512i)((__v16su)__a ^ (__v16su)__b);
    776 }
    777 
    778 static __inline__ __m512i __DEFAULT_FN_ATTRS
    779 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    780 {
    781   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    782                                             (__v16si)_mm512_xor_epi32(__a, __b),
    783                                             (__v16si)__src);
    784 }
    785 
    786 static __inline__ __m512i __DEFAULT_FN_ATTRS
    787 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    788 {
    789   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
    790 }
    791 
    792 static __inline__ __m512i __DEFAULT_FN_ATTRS
    793 _mm512_xor_epi64(__m512i __a, __m512i __b)
    794 {
    795   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    796 }
    797 
    798 static __inline__ __m512i __DEFAULT_FN_ATTRS
    799 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    800 {
    801   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    802                                              (__v8di)_mm512_xor_epi64(__a, __b),
    803                                              (__v8di)__src);
    804 }
    805 
    806 static __inline__ __m512i __DEFAULT_FN_ATTRS
    807 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    808 {
    809   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
    810 }
    811 
    812 static __inline__ __m512i __DEFAULT_FN_ATTRS
    813 _mm512_and_si512(__m512i __a, __m512i __b)
    814 {
    815   return (__m512i)((__v8du)__a & (__v8du)__b);
    816 }
    817 
    818 static __inline__ __m512i __DEFAULT_FN_ATTRS
    819 _mm512_or_si512(__m512i __a, __m512i __b)
    820 {
    821   return (__m512i)((__v8du)__a | (__v8du)__b);
    822 }
    823 
    824 static __inline__ __m512i __DEFAULT_FN_ATTRS
    825 _mm512_xor_si512(__m512i __a, __m512i __b)
    826 {
    827   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    828 }
    829 
    830 /* Arithmetic */
    831 
    832 static __inline __m512d __DEFAULT_FN_ATTRS
    833 _mm512_add_pd(__m512d __a, __m512d __b)
    834 {
    835   return (__m512d)((__v8df)__a + (__v8df)__b);
    836 }
    837 
    838 static __inline __m512 __DEFAULT_FN_ATTRS
    839 _mm512_add_ps(__m512 __a, __m512 __b)
    840 {
    841   return (__m512)((__v16sf)__a + (__v16sf)__b);
    842 }
    843 
    844 static __inline __m512d __DEFAULT_FN_ATTRS
    845 _mm512_mul_pd(__m512d __a, __m512d __b)
    846 {
    847   return (__m512d)((__v8df)__a * (__v8df)__b);
    848 }
    849 
    850 static __inline __m512 __DEFAULT_FN_ATTRS
    851 _mm512_mul_ps(__m512 __a, __m512 __b)
    852 {
    853   return (__m512)((__v16sf)__a * (__v16sf)__b);
    854 }
    855 
    856 static __inline __m512d __DEFAULT_FN_ATTRS
    857 _mm512_sub_pd(__m512d __a, __m512d __b)
    858 {
    859   return (__m512d)((__v8df)__a - (__v8df)__b);
    860 }
    861 
    862 static __inline __m512 __DEFAULT_FN_ATTRS
    863 _mm512_sub_ps(__m512 __a, __m512 __b)
    864 {
    865   return (__m512)((__v16sf)__a - (__v16sf)__b);
    866 }
    867 
    868 static __inline__ __m512i __DEFAULT_FN_ATTRS
    869 _mm512_add_epi64 (__m512i __A, __m512i __B)
    870 {
    871   return (__m512i) ((__v8du) __A + (__v8du) __B);
    872 }
    873 
    874 static __inline__ __m512i __DEFAULT_FN_ATTRS
    875 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    876 {
    877   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    878                                              (__v8di)_mm512_add_epi64(__A, __B),
    879                                              (__v8di)__W);
    880 }
    881 
    882 static __inline__ __m512i __DEFAULT_FN_ATTRS
    883 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    884 {
    885   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    886                                              (__v8di)_mm512_add_epi64(__A, __B),
    887                                              (__v8di)_mm512_setzero_si512());
    888 }
    889 
    890 static __inline__ __m512i __DEFAULT_FN_ATTRS
    891 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    892 {
    893   return (__m512i) ((__v8du) __A - (__v8du) __B);
    894 }
    895 
    896 static __inline__ __m512i __DEFAULT_FN_ATTRS
    897 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    898 {
    899   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    900                                              (__v8di)_mm512_sub_epi64(__A, __B),
    901                                              (__v8di)__W);
    902 }
    903 
    904 static __inline__ __m512i __DEFAULT_FN_ATTRS
    905 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    906 {
    907   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    908                                              (__v8di)_mm512_sub_epi64(__A, __B),
    909                                              (__v8di)_mm512_setzero_si512());
    910 }
    911 
    912 static __inline__ __m512i __DEFAULT_FN_ATTRS
    913 _mm512_add_epi32 (__m512i __A, __m512i __B)
    914 {
    915   return (__m512i) ((__v16su) __A + (__v16su) __B);
    916 }
    917 
    918 static __inline__ __m512i __DEFAULT_FN_ATTRS
    919 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    920 {
    921   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    922                                              (__v16si)_mm512_add_epi32(__A, __B),
    923                                              (__v16si)__W);
    924 }
    925 
    926 static __inline__ __m512i __DEFAULT_FN_ATTRS
    927 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    928 {
    929   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    930                                              (__v16si)_mm512_add_epi32(__A, __B),
    931                                              (__v16si)_mm512_setzero_si512());
    932 }
    933 
    934 static __inline__ __m512i __DEFAULT_FN_ATTRS
    935 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    936 {
    937   return (__m512i) ((__v16su) __A - (__v16su) __B);
    938 }
    939 
    940 static __inline__ __m512i __DEFAULT_FN_ATTRS
    941 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    942 {
    943   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    944                                              (__v16si)_mm512_sub_epi32(__A, __B),
    945                                              (__v16si)__W);
    946 }
    947 
    948 static __inline__ __m512i __DEFAULT_FN_ATTRS
    949 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    950 {
    951   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    952                                              (__v16si)_mm512_sub_epi32(__A, __B),
    953                                              (__v16si)_mm512_setzero_si512());
    954 }
    955 
    956 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
    957   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    958                                         (__v8df)(__m512d)(B), \
    959                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
    960                                         (int)(R)); })
    961 
    962 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
    963   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    964                                         (__v8df)(__m512d)(B), \
    965                                         (__v8df)_mm512_setzero_pd(), \
    966                                         (__mmask8)(U), (int)(R)); })
    967 
    968 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
    969   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    970                                         (__v8df)(__m512d)(B), \
    971                                         (__v8df)_mm512_undefined_pd(), \
    972                                         (__mmask8)-1, (int)(R)); })
    973 
    974 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    975 _mm512_max_pd(__m512d __A, __m512d __B)
    976 {
    977   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    978              (__v8df) __B,
    979              (__v8df)
    980              _mm512_setzero_pd (),
    981              (__mmask8) -1,
    982              _MM_FROUND_CUR_DIRECTION);
    983 }
    984 
    985 static __inline__ __m512d __DEFAULT_FN_ATTRS
    986 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
    987 {
    988   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    989                   (__v8df) __B,
    990                   (__v8df) __W,
    991                   (__mmask8) __U,
    992                   _MM_FROUND_CUR_DIRECTION);
    993 }
    994 
    995 static __inline__ __m512d __DEFAULT_FN_ATTRS
    996 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
    997 {
    998   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    999                   (__v8df) __B,
   1000                   (__v8df)
   1001                   _mm512_setzero_pd (),
   1002                   (__mmask8) __U,
   1003                   _MM_FROUND_CUR_DIRECTION);
   1004 }
   1005 
   1006 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
   1007   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1008                                        (__v16sf)(__m512)(B), \
   1009                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1010                                        (int)(R)); })
   1011 
   1012 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
   1013   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1014                                        (__v16sf)(__m512)(B), \
   1015                                        (__v16sf)_mm512_setzero_ps(), \
   1016                                        (__mmask16)(U), (int)(R)); })
   1017 
   1018 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
   1019   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1020                                        (__v16sf)(__m512)(B), \
   1021                                        (__v16sf)_mm512_undefined_ps(), \
   1022                                        (__mmask16)-1, (int)(R)); })
   1023 
   1024 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1025 _mm512_max_ps(__m512 __A, __m512 __B)
   1026 {
   1027   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1028             (__v16sf) __B,
   1029             (__v16sf)
   1030             _mm512_setzero_ps (),
   1031             (__mmask16) -1,
   1032             _MM_FROUND_CUR_DIRECTION);
   1033 }
   1034 
   1035 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1036 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1037 {
   1038   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1039                  (__v16sf) __B,
   1040                  (__v16sf) __W,
   1041                  (__mmask16) __U,
   1042                  _MM_FROUND_CUR_DIRECTION);
   1043 }
   1044 
   1045 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1046 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1047 {
   1048   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1049                  (__v16sf) __B,
   1050                  (__v16sf)
   1051                  _mm512_setzero_ps (),
   1052                  (__mmask16) __U,
   1053                  _MM_FROUND_CUR_DIRECTION);
   1054 }
   1055 
   1056 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1057 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1058   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
   1059                 (__v4sf) __B,
   1060                 (__v4sf) __W,
   1061                 (__mmask8) __U,
   1062                 _MM_FROUND_CUR_DIRECTION);
   1063 }
   1064 
   1065 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1066 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1067   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
   1068                 (__v4sf) __B,
   1069                 (__v4sf)  _mm_setzero_ps (),
   1070                 (__mmask8) __U,
   1071                 _MM_FROUND_CUR_DIRECTION);
   1072 }
   1073 
   1074 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
   1075   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1076                                           (__v4sf)(__m128)(B), \
   1077                                           (__v4sf)_mm_setzero_ps(), \
   1078                                           (__mmask8)-1, (int)(R)); })
   1079 
   1080 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
   1081   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1082                                           (__v4sf)(__m128)(B), \
   1083                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1084                                           (int)(R)); })
   1085 
   1086 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
   1087   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1088                                           (__v4sf)(__m128)(B), \
   1089                                           (__v4sf)_mm_setzero_ps(), \
   1090                                           (__mmask8)(U), (int)(R)); })
   1091 
   1092 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1093 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1094   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
   1095                 (__v2df) __B,
   1096                 (__v2df) __W,
   1097                 (__mmask8) __U,
   1098                 _MM_FROUND_CUR_DIRECTION);
   1099 }
   1100 
   1101 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1102 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1103   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
   1104                 (__v2df) __B,
   1105                 (__v2df)  _mm_setzero_pd (),
   1106                 (__mmask8) __U,
   1107                 _MM_FROUND_CUR_DIRECTION);
   1108 }
   1109 
   1110 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
   1111   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1112                                            (__v2df)(__m128d)(B), \
   1113                                            (__v2df)_mm_setzero_pd(), \
   1114                                            (__mmask8)-1, (int)(R)); })
   1115 
   1116 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
   1117   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1118                                            (__v2df)(__m128d)(B), \
   1119                                            (__v2df)(__m128d)(W), \
   1120                                            (__mmask8)(U), (int)(R)); })
   1121 
   1122 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
   1123   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1124                                            (__v2df)(__m128d)(B), \
   1125                                            (__v2df)_mm_setzero_pd(), \
   1126                                            (__mmask8)(U), (int)(R)); })
   1127 
   1128 static __inline __m512i
   1129 __DEFAULT_FN_ATTRS
   1130 _mm512_max_epi32(__m512i __A, __m512i __B)
   1131 {
   1132   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1133               (__v16si) __B,
   1134               (__v16si)
   1135               _mm512_setzero_si512 (),
   1136               (__mmask16) -1);
   1137 }
   1138 
   1139 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1140 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1141 {
   1142   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1143                    (__v16si) __B,
   1144                    (__v16si) __W, __M);
   1145 }
   1146 
   1147 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1148 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1149 {
   1150   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1151                    (__v16si) __B,
   1152                    (__v16si)
   1153                    _mm512_setzero_si512 (),
   1154                    __M);
   1155 }
   1156 
   1157 static __inline __m512i __DEFAULT_FN_ATTRS
   1158 _mm512_max_epu32(__m512i __A, __m512i __B)
   1159 {
   1160   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1161               (__v16si) __B,
   1162               (__v16si)
   1163               _mm512_setzero_si512 (),
   1164               (__mmask16) -1);
   1165 }
   1166 
   1167 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1168 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1169 {
   1170   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1171                    (__v16si) __B,
   1172                    (__v16si) __W, __M);
   1173 }
   1174 
   1175 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1176 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1177 {
   1178   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1179                    (__v16si) __B,
   1180                    (__v16si)
   1181                    _mm512_setzero_si512 (),
   1182                    __M);
   1183 }
   1184 
   1185 static __inline __m512i __DEFAULT_FN_ATTRS
   1186 _mm512_max_epi64(__m512i __A, __m512i __B)
   1187 {
   1188   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1189               (__v8di) __B,
   1190               (__v8di)
   1191               _mm512_setzero_si512 (),
   1192               (__mmask8) -1);
   1193 }
   1194 
   1195 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1196 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1197 {
   1198   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1199                    (__v8di) __B,
   1200                    (__v8di) __W, __M);
   1201 }
   1202 
   1203 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1204 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1205 {
   1206   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1207                    (__v8di) __B,
   1208                    (__v8di)
   1209                    _mm512_setzero_si512 (),
   1210                    __M);
   1211 }
   1212 
   1213 static __inline __m512i __DEFAULT_FN_ATTRS
   1214 _mm512_max_epu64(__m512i __A, __m512i __B)
   1215 {
   1216   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1217               (__v8di) __B,
   1218               (__v8di)
   1219               _mm512_setzero_si512 (),
   1220               (__mmask8) -1);
   1221 }
   1222 
   1223 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1224 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1225 {
   1226   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1227                    (__v8di) __B,
   1228                    (__v8di) __W, __M);
   1229 }
   1230 
   1231 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1232 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1233 {
   1234   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1235                    (__v8di) __B,
   1236                    (__v8di)
   1237                    _mm512_setzero_si512 (),
   1238                    __M);
   1239 }
   1240 
   1241 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
   1242   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1243                                         (__v8df)(__m512d)(B), \
   1244                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   1245                                         (int)(R)); })
   1246 
   1247 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
   1248   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1249                                         (__v8df)(__m512d)(B), \
   1250                                         (__v8df)_mm512_setzero_pd(), \
   1251                                         (__mmask8)(U), (int)(R)); })
   1252 
   1253 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
   1254   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1255                                         (__v8df)(__m512d)(B), \
   1256                                         (__v8df)_mm512_undefined_pd(), \
   1257                                         (__mmask8)-1, (int)(R)); })
   1258 
   1259 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1260 _mm512_min_pd(__m512d __A, __m512d __B)
   1261 {
   1262   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1263              (__v8df) __B,
   1264              (__v8df)
   1265              _mm512_setzero_pd (),
   1266              (__mmask8) -1,
   1267              _MM_FROUND_CUR_DIRECTION);
   1268 }
   1269 
   1270 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1271 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   1272 {
   1273   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1274                   (__v8df) __B,
   1275                   (__v8df) __W,
   1276                   (__mmask8) __U,
   1277                   _MM_FROUND_CUR_DIRECTION);
   1278 }
   1279 
   1280 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
   1281   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1282                                        (__v16sf)(__m512)(B), \
   1283                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1284                                        (int)(R)); })
   1285 
   1286 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
   1287   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1288                                        (__v16sf)(__m512)(B), \
   1289                                        (__v16sf)_mm512_setzero_ps(), \
   1290                                        (__mmask16)(U), (int)(R)); })
   1291 
   1292 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
   1293   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1294                                        (__v16sf)(__m512)(B), \
   1295                                        (__v16sf)_mm512_undefined_ps(), \
   1296                                        (__mmask16)-1, (int)(R)); })
   1297 
   1298 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1299 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   1300 {
   1301   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1302                   (__v8df) __B,
   1303                   (__v8df)
   1304                   _mm512_setzero_pd (),
   1305                   (__mmask8) __U,
   1306                   _MM_FROUND_CUR_DIRECTION);
   1307 }
   1308 
   1309 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1310 _mm512_min_ps(__m512 __A, __m512 __B)
   1311 {
   1312   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1313             (__v16sf) __B,
   1314             (__v16sf)
   1315             _mm512_setzero_ps (),
   1316             (__mmask16) -1,
   1317             _MM_FROUND_CUR_DIRECTION);
   1318 }
   1319 
   1320 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1321 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1322 {
   1323   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1324                  (__v16sf) __B,
   1325                  (__v16sf) __W,
   1326                  (__mmask16) __U,
   1327                  _MM_FROUND_CUR_DIRECTION);
   1328 }
   1329 
   1330 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1331 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1332 {
   1333   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1334                  (__v16sf) __B,
   1335                  (__v16sf)
   1336                  _mm512_setzero_ps (),
   1337                  (__mmask16) __U,
   1338                  _MM_FROUND_CUR_DIRECTION);
   1339 }
   1340 
   1341 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1342 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1343   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1344                 (__v4sf) __B,
   1345                 (__v4sf) __W,
   1346                 (__mmask8) __U,
   1347                 _MM_FROUND_CUR_DIRECTION);
   1348 }
   1349 
   1350 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1351 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1352   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1353                 (__v4sf) __B,
   1354                 (__v4sf)  _mm_setzero_ps (),
   1355                 (__mmask8) __U,
   1356                 _MM_FROUND_CUR_DIRECTION);
   1357 }
   1358 
   1359 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
   1360   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1361                                           (__v4sf)(__m128)(B), \
   1362                                           (__v4sf)_mm_setzero_ps(), \
   1363                                           (__mmask8)-1, (int)(R)); })
   1364 
   1365 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
   1366   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1367                                           (__v4sf)(__m128)(B), \
   1368                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1369                                           (int)(R)); })
   1370 
   1371 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
   1372   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1373                                           (__v4sf)(__m128)(B), \
   1374                                           (__v4sf)_mm_setzero_ps(), \
   1375                                           (__mmask8)(U), (int)(R)); })
   1376 
   1377 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1378 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1379   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1380                 (__v2df) __B,
   1381                 (__v2df) __W,
   1382                 (__mmask8) __U,
   1383                 _MM_FROUND_CUR_DIRECTION);
   1384 }
   1385 
   1386 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1387 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1388   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1389                 (__v2df) __B,
   1390                 (__v2df)  _mm_setzero_pd (),
   1391                 (__mmask8) __U,
   1392                 _MM_FROUND_CUR_DIRECTION);
   1393 }
   1394 
   1395 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
   1396   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1397                                            (__v2df)(__m128d)(B), \
   1398                                            (__v2df)_mm_setzero_pd(), \
   1399                                            (__mmask8)-1, (int)(R)); })
   1400 
   1401 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
   1402   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1403                                            (__v2df)(__m128d)(B), \
   1404                                            (__v2df)(__m128d)(W), \
   1405                                            (__mmask8)(U), (int)(R)); })
   1406 
   1407 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
   1408   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1409                                            (__v2df)(__m128d)(B), \
   1410                                            (__v2df)_mm_setzero_pd(), \
   1411                                            (__mmask8)(U), (int)(R)); })
   1412 
   1413 static __inline __m512i
   1414 __DEFAULT_FN_ATTRS
   1415 _mm512_min_epi32(__m512i __A, __m512i __B)
   1416 {
   1417   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1418               (__v16si) __B,
   1419               (__v16si)
   1420               _mm512_setzero_si512 (),
   1421               (__mmask16) -1);
   1422 }
   1423 
   1424 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1425 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1426 {
   1427   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1428                    (__v16si) __B,
   1429                    (__v16si) __W, __M);
   1430 }
   1431 
   1432 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1433 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1434 {
   1435   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1436                    (__v16si) __B,
   1437                    (__v16si)
   1438                    _mm512_setzero_si512 (),
   1439                    __M);
   1440 }
   1441 
   1442 static __inline __m512i __DEFAULT_FN_ATTRS
   1443 _mm512_min_epu32(__m512i __A, __m512i __B)
   1444 {
   1445   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1446               (__v16si) __B,
   1447               (__v16si)
   1448               _mm512_setzero_si512 (),
   1449               (__mmask16) -1);
   1450 }
   1451 
   1452 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1453 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1454 {
   1455   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1456                    (__v16si) __B,
   1457                    (__v16si) __W, __M);
   1458 }
   1459 
   1460 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1461 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1462 {
   1463   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1464                    (__v16si) __B,
   1465                    (__v16si)
   1466                    _mm512_setzero_si512 (),
   1467                    __M);
   1468 }
   1469 
   1470 static __inline __m512i __DEFAULT_FN_ATTRS
   1471 _mm512_min_epi64(__m512i __A, __m512i __B)
   1472 {
   1473   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1474               (__v8di) __B,
   1475               (__v8di)
   1476               _mm512_setzero_si512 (),
   1477               (__mmask8) -1);
   1478 }
   1479 
   1480 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1481 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1482 {
   1483   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1484                    (__v8di) __B,
   1485                    (__v8di) __W, __M);
   1486 }
   1487 
   1488 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1489 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1490 {
   1491   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1492                    (__v8di) __B,
   1493                    (__v8di)
   1494                    _mm512_setzero_si512 (),
   1495                    __M);
   1496 }
   1497 
   1498 static __inline __m512i __DEFAULT_FN_ATTRS
   1499 _mm512_min_epu64(__m512i __A, __m512i __B)
   1500 {
   1501   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1502               (__v8di) __B,
   1503               (__v8di)
   1504               _mm512_setzero_si512 (),
   1505               (__mmask8) -1);
   1506 }
   1507 
   1508 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1509 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1510 {
   1511   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1512                    (__v8di) __B,
   1513                    (__v8di) __W, __M);
   1514 }
   1515 
   1516 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1517 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1518 {
   1519   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1520                    (__v8di) __B,
   1521                    (__v8di)
   1522                    _mm512_setzero_si512 (),
   1523                    __M);
   1524 }
   1525 
   1526 static __inline __m512i __DEFAULT_FN_ATTRS
   1527 _mm512_mul_epi32(__m512i __X, __m512i __Y)
   1528 {
   1529   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
   1530 }
   1531 
   1532 static __inline __m512i __DEFAULT_FN_ATTRS
   1533 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1534 {
   1535   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1536                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1537                                              (__v8di)__W);
   1538 }
   1539 
   1540 static __inline __m512i __DEFAULT_FN_ATTRS
   1541 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
   1542 {
   1543   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1544                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1545                                              (__v8di)_mm512_setzero_si512 ());
   1546 }
   1547 
   1548 static __inline __m512i __DEFAULT_FN_ATTRS
   1549 _mm512_mul_epu32(__m512i __X, __m512i __Y)
   1550 {
   1551   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
   1552 }
   1553 
   1554 static __inline __m512i __DEFAULT_FN_ATTRS
   1555 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1556 {
   1557   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1558                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1559                                              (__v8di)__W);
   1560 }
   1561 
   1562 static __inline __m512i __DEFAULT_FN_ATTRS
   1563 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
   1564 {
   1565   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1566                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1567                                              (__v8di)_mm512_setzero_si512 ());
   1568 }
   1569 
   1570 static __inline __m512i __DEFAULT_FN_ATTRS
   1571 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
   1572 {
   1573   return (__m512i) ((__v16su) __A * (__v16su) __B);
   1574 }
   1575 
   1576 static __inline __m512i __DEFAULT_FN_ATTRS
   1577 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
   1578 {
   1579   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1580                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1581                                              (__v16si)_mm512_setzero_si512());
   1582 }
   1583 
   1584 static __inline __m512i __DEFAULT_FN_ATTRS
   1585 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1586 {
   1587   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1588                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1589                                              (__v16si)__W);
   1590 }
   1591 
   1592 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
   1593   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1594                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
   1595                                          (int)(R)); })
   1596 
   1597 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
   1598   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1599                                          (__v8df)_mm512_setzero_pd(), \
   1600                                          (__mmask8)(U), (int)(R)); })
   1601 
   1602 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
   1603   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1604                                          (__v8df)_mm512_undefined_pd(), \
   1605                                          (__mmask8)-1, (int)(R)); })
   1606 
   1607 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1608 _mm512_sqrt_pd(__m512d __a)
   1609 {
   1610   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
   1611                                                 (__v8df) _mm512_setzero_pd (),
   1612                                                 (__mmask8) -1,
   1613                                                 _MM_FROUND_CUR_DIRECTION);
   1614 }
   1615 
   1616 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1617 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1618 {
   1619   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1620                    (__v8df) __W,
   1621                    (__mmask8) __U,
   1622                    _MM_FROUND_CUR_DIRECTION);
   1623 }
   1624 
   1625 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1626 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   1627 {
   1628   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1629                    (__v8df)
   1630                    _mm512_setzero_pd (),
   1631                    (__mmask8) __U,
   1632                    _MM_FROUND_CUR_DIRECTION);
   1633 }
   1634 
   1635 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
   1636   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1637                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
   1638                                         (int)(R)); })
   1639 
   1640 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
   1641   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1642                                         (__v16sf)_mm512_setzero_ps(), \
   1643                                         (__mmask16)(U), (int)(R)); })
   1644 
   1645 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
   1646   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1647                                         (__v16sf)_mm512_undefined_ps(), \
   1648                                         (__mmask16)-1, (int)(R)); })
   1649 
   1650 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1651 _mm512_sqrt_ps(__m512 __a)
   1652 {
   1653   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
   1654                                                (__v16sf) _mm512_setzero_ps (),
   1655                                                (__mmask16) -1,
   1656                                                _MM_FROUND_CUR_DIRECTION);
   1657 }
   1658 
   1659 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1660 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
   1661 {
   1662   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1663                                                (__v16sf) __W,
   1664                                                (__mmask16) __U,
   1665                                                _MM_FROUND_CUR_DIRECTION);
   1666 }
   1667 
   1668 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1669 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
   1670 {
   1671   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1672                                                (__v16sf) _mm512_setzero_ps (),
   1673                                                (__mmask16) __U,
   1674                                                _MM_FROUND_CUR_DIRECTION);
   1675 }
   1676 
   1677 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1678 _mm512_rsqrt14_pd(__m512d __A)
   1679 {
   1680   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1681                  (__v8df)
   1682                  _mm512_setzero_pd (),
   1683                  (__mmask8) -1);}
   1684 
   1685 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1686 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1687 {
   1688   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1689                   (__v8df) __W,
   1690                   (__mmask8) __U);
   1691 }
   1692 
   1693 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1694 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1695 {
   1696   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1697                   (__v8df)
   1698                   _mm512_setzero_pd (),
   1699                   (__mmask8) __U);
   1700 }
   1701 
   1702 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1703 _mm512_rsqrt14_ps(__m512 __A)
   1704 {
   1705   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1706                 (__v16sf)
   1707                 _mm512_setzero_ps (),
   1708                 (__mmask16) -1);
   1709 }
   1710 
   1711 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1712 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1713 {
   1714   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1715                  (__v16sf) __W,
   1716                  (__mmask16) __U);
   1717 }
   1718 
   1719 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1720 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1721 {
   1722   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1723                  (__v16sf)
   1724                  _mm512_setzero_ps (),
   1725                  (__mmask16) __U);
   1726 }
   1727 
   1728 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1729 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
   1730 {
   1731   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1732              (__v4sf) __B,
   1733              (__v4sf)
   1734              _mm_setzero_ps (),
   1735              (__mmask8) -1);
   1736 }
   1737 
   1738 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1739 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1740 {
   1741  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1742           (__v4sf) __B,
   1743           (__v4sf) __W,
   1744           (__mmask8) __U);
   1745 }
   1746 
   1747 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1748 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1749 {
   1750  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1751           (__v4sf) __B,
   1752           (__v4sf) _mm_setzero_ps (),
   1753           (__mmask8) __U);
   1754 }
   1755 
   1756 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1757 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
   1758 {
   1759   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
   1760               (__v2df) __B,
   1761               (__v2df)
   1762               _mm_setzero_pd (),
   1763               (__mmask8) -1);
   1764 }
   1765 
   1766 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1767 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1768 {
   1769  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1770           (__v2df) __B,
   1771           (__v2df) __W,
   1772           (__mmask8) __U);
   1773 }
   1774 
   1775 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1776 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1777 {
   1778  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1779           (__v2df) __B,
   1780           (__v2df) _mm_setzero_pd (),
   1781           (__mmask8) __U);
   1782 }
   1783 
   1784 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1785 _mm512_rcp14_pd(__m512d __A)
   1786 {
   1787   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1788                (__v8df)
   1789                _mm512_setzero_pd (),
   1790                (__mmask8) -1);
   1791 }
   1792 
   1793 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1794 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1795 {
   1796   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1797                 (__v8df) __W,
   1798                 (__mmask8) __U);
   1799 }
   1800 
   1801 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1802 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1803 {
   1804   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1805                 (__v8df)
   1806                 _mm512_setzero_pd (),
   1807                 (__mmask8) __U);
   1808 }
   1809 
   1810 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1811 _mm512_rcp14_ps(__m512 __A)
   1812 {
   1813   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1814               (__v16sf)
   1815               _mm512_setzero_ps (),
   1816               (__mmask16) -1);
   1817 }
   1818 
   1819 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1820 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1821 {
   1822   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1823                    (__v16sf) __W,
   1824                    (__mmask16) __U);
   1825 }
   1826 
   1827 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1828 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1829 {
   1830   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1831                    (__v16sf)
   1832                    _mm512_setzero_ps (),
   1833                    (__mmask16) __U);
   1834 }
   1835 
   1836 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1837 _mm_rcp14_ss(__m128 __A, __m128 __B)
   1838 {
   1839   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1840                  (__v4sf) __B,
   1841                  (__v4sf)
   1842                  _mm_setzero_ps (),
   1843                  (__mmask8) -1);
   1844 }
   1845 
   1846 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1847 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1848 {
   1849  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1850           (__v4sf) __B,
   1851           (__v4sf) __W,
   1852           (__mmask8) __U);
   1853 }
   1854 
   1855 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1856 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1857 {
   1858  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1859           (__v4sf) __B,
   1860           (__v4sf) _mm_setzero_ps (),
   1861           (__mmask8) __U);
   1862 }
   1863 
   1864 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1865 _mm_rcp14_sd(__m128d __A, __m128d __B)
   1866 {
   1867   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
   1868             (__v2df) __B,
   1869             (__v2df)
   1870             _mm_setzero_pd (),
   1871             (__mmask8) -1);
   1872 }
   1873 
   1874 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1875 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1876 {
   1877  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1878           (__v2df) __B,
   1879           (__v2df) __W,
   1880           (__mmask8) __U);
   1881 }
   1882 
   1883 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1884 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1885 {
   1886  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1887           (__v2df) __B,
   1888           (__v2df) _mm_setzero_pd (),
   1889           (__mmask8) __U);
   1890 }
   1891 
   1892 static __inline __m512 __DEFAULT_FN_ATTRS
   1893 _mm512_floor_ps(__m512 __A)
   1894 {
   1895   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1896                                                   _MM_FROUND_FLOOR,
   1897                                                   (__v16sf) __A, -1,
   1898                                                   _MM_FROUND_CUR_DIRECTION);
   1899 }
   1900 
   1901 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1902 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1903 {
   1904   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1905                    _MM_FROUND_FLOOR,
   1906                    (__v16sf) __W, __U,
   1907                    _MM_FROUND_CUR_DIRECTION);
   1908 }
   1909 
   1910 static __inline __m512d __DEFAULT_FN_ATTRS
   1911 _mm512_floor_pd(__m512d __A)
   1912 {
   1913   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1914                                                    _MM_FROUND_FLOOR,
   1915                                                    (__v8df) __A, -1,
   1916                                                    _MM_FROUND_CUR_DIRECTION);
   1917 }
   1918 
   1919 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1920 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1921 {
   1922   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1923                 _MM_FROUND_FLOOR,
   1924                 (__v8df) __W, __U,
   1925                 _MM_FROUND_CUR_DIRECTION);
   1926 }
   1927 
   1928 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1929 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1930 {
   1931   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1932                    _MM_FROUND_CEIL,
   1933                    (__v16sf) __W, __U,
   1934                    _MM_FROUND_CUR_DIRECTION);
   1935 }
   1936 
   1937 static __inline __m512 __DEFAULT_FN_ATTRS
   1938 _mm512_ceil_ps(__m512 __A)
   1939 {
   1940   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1941                                                   _MM_FROUND_CEIL,
   1942                                                   (__v16sf) __A, -1,
   1943                                                   _MM_FROUND_CUR_DIRECTION);
   1944 }
   1945 
   1946 static __inline __m512d __DEFAULT_FN_ATTRS
   1947 _mm512_ceil_pd(__m512d __A)
   1948 {
   1949   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1950                                                    _MM_FROUND_CEIL,
   1951                                                    (__v8df) __A, -1,
   1952                                                    _MM_FROUND_CUR_DIRECTION);
   1953 }
   1954 
   1955 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1956 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1957 {
   1958   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1959                 _MM_FROUND_CEIL,
   1960                 (__v8df) __W, __U,
   1961                 _MM_FROUND_CUR_DIRECTION);
   1962 }
   1963 
   1964 static __inline __m512i __DEFAULT_FN_ATTRS
   1965 _mm512_abs_epi64(__m512i __A)
   1966 {
   1967   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1968              (__v8di)
   1969              _mm512_setzero_si512 (),
   1970              (__mmask8) -1);
   1971 }
   1972 
   1973 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1974 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   1975 {
   1976   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1977                   (__v8di) __W,
   1978                   (__mmask8) __U);
   1979 }
   1980 
   1981 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1982 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   1983 {
   1984   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1985                   (__v8di)
   1986                   _mm512_setzero_si512 (),
   1987                   (__mmask8) __U);
   1988 }
   1989 
   1990 static __inline __m512i __DEFAULT_FN_ATTRS
   1991 _mm512_abs_epi32(__m512i __A)
   1992 {
   1993   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1994              (__v16si)
   1995              _mm512_setzero_si512 (),
   1996              (__mmask16) -1);
   1997 }
   1998 
   1999 static __inline__ __m512i __DEFAULT_FN_ATTRS
   2000 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   2001 {
   2002   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   2003                   (__v16si) __W,
   2004                   (__mmask16) __U);
   2005 }
   2006 
   2007 static __inline__ __m512i __DEFAULT_FN_ATTRS
   2008 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   2009 {
   2010   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   2011                   (__v16si)
   2012                   _mm512_setzero_si512 (),
   2013                   (__mmask16) __U);
   2014 }
   2015 
   2016 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2017 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2018   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   2019                 (__v4sf) __B,
   2020                 (__v4sf) __W,
   2021                 (__mmask8) __U,
   2022                 _MM_FROUND_CUR_DIRECTION);
   2023 }
   2024 
   2025 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2026 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2027   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   2028                 (__v4sf) __B,
   2029                 (__v4sf)  _mm_setzero_ps (),
   2030                 (__mmask8) __U,
   2031                 _MM_FROUND_CUR_DIRECTION);
   2032 }
   2033 
   2034 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
   2035   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2036                                           (__v4sf)(__m128)(B), \
   2037                                           (__v4sf)_mm_setzero_ps(), \
   2038                                           (__mmask8)-1, (int)(R)); })
   2039 
   2040 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
   2041   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2042                                           (__v4sf)(__m128)(B), \
   2043                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2044                                           (int)(R)); })
   2045 
   2046 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
   2047   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2048                                           (__v4sf)(__m128)(B), \
   2049                                           (__v4sf)_mm_setzero_ps(), \
   2050                                           (__mmask8)(U), (int)(R)); })
   2051 
   2052 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2053 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2054   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   2055                 (__v2df) __B,
   2056                 (__v2df) __W,
   2057                 (__mmask8) __U,
   2058                 _MM_FROUND_CUR_DIRECTION);
   2059 }
   2060 
   2061 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2062 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2063   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   2064                 (__v2df) __B,
   2065                 (__v2df)  _mm_setzero_pd (),
   2066                 (__mmask8) __U,
   2067                 _MM_FROUND_CUR_DIRECTION);
   2068 }
   2069 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
   2070   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2071                                            (__v2df)(__m128d)(B), \
   2072                                            (__v2df)_mm_setzero_pd(), \
   2073                                            (__mmask8)-1, (int)(R)); })
   2074 
   2075 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
   2076   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2077                                            (__v2df)(__m128d)(B), \
   2078                                            (__v2df)(__m128d)(W), \
   2079                                            (__mmask8)(U), (int)(R)); })
   2080 
   2081 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
   2082   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2083                                            (__v2df)(__m128d)(B), \
   2084                                            (__v2df)_mm_setzero_pd(), \
   2085                                            (__mmask8)(U), (int)(R)); })
   2086 
   2087 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2088 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2089   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2090                                               (__v8df)_mm512_add_pd(__A, __B),
   2091                                               (__v8df)__W);
   2092 }
   2093 
   2094 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2095 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2096   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2097                                               (__v8df)_mm512_add_pd(__A, __B),
   2098                                               (__v8df)_mm512_setzero_pd());
   2099 }
   2100 
   2101 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2102 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2103   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2104                                              (__v16sf)_mm512_add_ps(__A, __B),
   2105                                              (__v16sf)__W);
   2106 }
   2107 
   2108 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2109 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2110   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2111                                              (__v16sf)_mm512_add_ps(__A, __B),
   2112                                              (__v16sf)_mm512_setzero_ps());
   2113 }
   2114 
   2115 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
   2116   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2117                                         (__v8df)(__m512d)(B), \
   2118                                         (__v8df)_mm512_setzero_pd(), \
   2119                                         (__mmask8)-1, (int)(R)); })
   2120 
   2121 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
   2122   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2123                                         (__v8df)(__m512d)(B), \
   2124                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2125                                         (int)(R)); })
   2126 
   2127 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
   2128   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2129                                         (__v8df)(__m512d)(B), \
   2130                                         (__v8df)_mm512_setzero_pd(), \
   2131                                         (__mmask8)(U), (int)(R)); })
   2132 
   2133 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
   2134   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2135                                        (__v16sf)(__m512)(B), \
   2136                                        (__v16sf)_mm512_setzero_ps(), \
   2137                                        (__mmask16)-1, (int)(R)); })
   2138 
   2139 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
   2140   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2141                                        (__v16sf)(__m512)(B), \
   2142                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2143                                        (int)(R)); })
   2144 
   2145 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
   2146   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2147                                        (__v16sf)(__m512)(B), \
   2148                                        (__v16sf)_mm512_setzero_ps(), \
   2149                                        (__mmask16)(U), (int)(R)); })
   2150 
   2151 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2152 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2153   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2154                 (__v4sf) __B,
   2155                 (__v4sf) __W,
   2156                 (__mmask8) __U,
   2157                 _MM_FROUND_CUR_DIRECTION);
   2158 }
   2159 
   2160 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2161 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2162   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2163                 (__v4sf) __B,
   2164                 (__v4sf)  _mm_setzero_ps (),
   2165                 (__mmask8) __U,
   2166                 _MM_FROUND_CUR_DIRECTION);
   2167 }
   2168 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
   2169   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2170                                           (__v4sf)(__m128)(B), \
   2171                                           (__v4sf)_mm_setzero_ps(), \
   2172                                           (__mmask8)-1, (int)(R)); })
   2173 
   2174 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
   2175   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2176                                           (__v4sf)(__m128)(B), \
   2177                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2178                                           (int)(R)); })
   2179 
   2180 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
   2181   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2182                                           (__v4sf)(__m128)(B), \
   2183                                           (__v4sf)_mm_setzero_ps(), \
   2184                                           (__mmask8)(U), (int)(R)); })
   2185 
   2186 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2187 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2188   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2189                 (__v2df) __B,
   2190                 (__v2df) __W,
   2191                 (__mmask8) __U,
   2192                 _MM_FROUND_CUR_DIRECTION);
   2193 }
   2194 
   2195 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2196 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2197   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2198                 (__v2df) __B,
   2199                 (__v2df)  _mm_setzero_pd (),
   2200                 (__mmask8) __U,
   2201                 _MM_FROUND_CUR_DIRECTION);
   2202 }
   2203 
   2204 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
   2205   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2206                                            (__v2df)(__m128d)(B), \
   2207                                            (__v2df)_mm_setzero_pd(), \
   2208                                            (__mmask8)-1, (int)(R)); })
   2209 
   2210 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
   2211   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2212                                            (__v2df)(__m128d)(B), \
   2213                                            (__v2df)(__m128d)(W), \
   2214                                            (__mmask8)(U), (int)(R)); })
   2215 
   2216 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
   2217   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2218                                            (__v2df)(__m128d)(B), \
   2219                                            (__v2df)_mm_setzero_pd(), \
   2220                                            (__mmask8)(U), (int)(R)); })
   2221 
   2222 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2223 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2224   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2225                                               (__v8df)_mm512_sub_pd(__A, __B),
   2226                                               (__v8df)__W);
   2227 }
   2228 
   2229 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2230 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2231   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2232                                               (__v8df)_mm512_sub_pd(__A, __B),
   2233                                               (__v8df)_mm512_setzero_pd());
   2234 }
   2235 
   2236 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2237 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2238   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2239                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2240                                              (__v16sf)__W);
   2241 }
   2242 
   2243 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2244 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2245   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2246                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2247                                              (__v16sf)_mm512_setzero_ps());
   2248 }
   2249 
   2250 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
   2251   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2252                                         (__v8df)(__m512d)(B), \
   2253                                         (__v8df)_mm512_setzero_pd(), \
   2254                                         (__mmask8)-1, (int)(R)); })
   2255 
   2256 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
   2257   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2258                                         (__v8df)(__m512d)(B), \
   2259                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2260                                         (int)(R)); })
   2261 
   2262 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
   2263   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2264                                         (__v8df)(__m512d)(B), \
   2265                                         (__v8df)_mm512_setzero_pd(), \
   2266                                         (__mmask8)(U), (int)(R)); })
   2267 
   2268 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
   2269   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2270                                        (__v16sf)(__m512)(B), \
   2271                                        (__v16sf)_mm512_setzero_ps(), \
   2272                                        (__mmask16)-1, (int)(R)); })
   2273 
   2274 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
   2275   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2276                                        (__v16sf)(__m512)(B), \
   2277                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2278                                        (int)(R)); });
   2279 
   2280 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
   2281   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2282                                        (__v16sf)(__m512)(B), \
   2283                                        (__v16sf)_mm512_setzero_ps(), \
   2284                                        (__mmask16)(U), (int)(R)); });
   2285 
   2286 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2287 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2288   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2289                 (__v4sf) __B,
   2290                 (__v4sf) __W,
   2291                 (__mmask8) __U,
   2292                 _MM_FROUND_CUR_DIRECTION);
   2293 }
   2294 
   2295 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2296 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2297   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2298                 (__v4sf) __B,
   2299                 (__v4sf)  _mm_setzero_ps (),
   2300                 (__mmask8) __U,
   2301                 _MM_FROUND_CUR_DIRECTION);
   2302 }
   2303 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
   2304   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2305                                           (__v4sf)(__m128)(B), \
   2306                                           (__v4sf)_mm_setzero_ps(), \
   2307                                           (__mmask8)-1, (int)(R)); })
   2308 
   2309 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
   2310   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2311                                           (__v4sf)(__m128)(B), \
   2312                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2313                                           (int)(R)); })
   2314 
   2315 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
   2316   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2317                                           (__v4sf)(__m128)(B), \
   2318                                           (__v4sf)_mm_setzero_ps(), \
   2319                                           (__mmask8)(U), (int)(R)); })
   2320 
   2321 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2322 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2323   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2324                 (__v2df) __B,
   2325                 (__v2df) __W,
   2326                 (__mmask8) __U,
   2327                 _MM_FROUND_CUR_DIRECTION);
   2328 }
   2329 
   2330 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2331 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2332   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2333                 (__v2df) __B,
   2334                 (__v2df)  _mm_setzero_pd (),
   2335                 (__mmask8) __U,
   2336                 _MM_FROUND_CUR_DIRECTION);
   2337 }
   2338 
   2339 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
   2340   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2341                                            (__v2df)(__m128d)(B), \
   2342                                            (__v2df)_mm_setzero_pd(), \
   2343                                            (__mmask8)-1, (int)(R)); })
   2344 
   2345 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
   2346   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2347                                            (__v2df)(__m128d)(B), \
   2348                                            (__v2df)(__m128d)(W), \
   2349                                            (__mmask8)(U), (int)(R)); })
   2350 
   2351 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
   2352   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2353                                            (__v2df)(__m128d)(B), \
   2354                                            (__v2df)_mm_setzero_pd(), \
   2355                                            (__mmask8)(U), (int)(R)); })
   2356 
   2357 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2358 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2359   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2360                                               (__v8df)_mm512_mul_pd(__A, __B),
   2361                                               (__v8df)__W);
   2362 }
   2363 
   2364 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2365 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2366   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2367                                               (__v8df)_mm512_mul_pd(__A, __B),
   2368                                               (__v8df)_mm512_setzero_pd());
   2369 }
   2370 
   2371 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2372 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2373   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2374                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2375                                              (__v16sf)__W);
   2376 }
   2377 
   2378 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2379 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2380   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2381                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2382                                              (__v16sf)_mm512_setzero_ps());
   2383 }
   2384 
   2385 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
   2386   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2387                                         (__v8df)(__m512d)(B), \
   2388                                         (__v8df)_mm512_setzero_pd(), \
   2389                                         (__mmask8)-1, (int)(R)); })
   2390 
   2391 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
   2392   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2393                                         (__v8df)(__m512d)(B), \
   2394                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2395                                         (int)(R)); })
   2396 
   2397 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
   2398   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2399                                         (__v8df)(__m512d)(B), \
   2400                                         (__v8df)_mm512_setzero_pd(), \
   2401                                         (__mmask8)(U), (int)(R)); })
   2402 
   2403 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
   2404   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2405                                        (__v16sf)(__m512)(B), \
   2406                                        (__v16sf)_mm512_setzero_ps(), \
   2407                                        (__mmask16)-1, (int)(R)); })
   2408 
   2409 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
   2410   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2411                                        (__v16sf)(__m512)(B), \
   2412                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2413                                        (int)(R)); });
   2414 
   2415 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
   2416   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2417                                        (__v16sf)(__m512)(B), \
   2418                                        (__v16sf)_mm512_setzero_ps(), \
   2419                                        (__mmask16)(U), (int)(R)); });
   2420 
   2421 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2422 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2423   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2424                 (__v4sf) __B,
   2425                 (__v4sf) __W,
   2426                 (__mmask8) __U,
   2427                 _MM_FROUND_CUR_DIRECTION);
   2428 }
   2429 
   2430 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2431 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2432   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2433                 (__v4sf) __B,
   2434                 (__v4sf)  _mm_setzero_ps (),
   2435                 (__mmask8) __U,
   2436                 _MM_FROUND_CUR_DIRECTION);
   2437 }
   2438 
   2439 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
   2440   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2441                                           (__v4sf)(__m128)(B), \
   2442                                           (__v4sf)_mm_setzero_ps(), \
   2443                                           (__mmask8)-1, (int)(R)); })
   2444 
   2445 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
   2446   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2447                                           (__v4sf)(__m128)(B), \
   2448                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2449                                           (int)(R)); })
   2450 
   2451 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
   2452   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2453                                           (__v4sf)(__m128)(B), \
   2454                                           (__v4sf)_mm_setzero_ps(), \
   2455                                           (__mmask8)(U), (int)(R)); })
   2456 
   2457 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2458 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2459   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2460                 (__v2df) __B,
   2461                 (__v2df) __W,
   2462                 (__mmask8) __U,
   2463                 _MM_FROUND_CUR_DIRECTION);
   2464 }
   2465 
   2466 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2467 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2468   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2469                 (__v2df) __B,
   2470                 (__v2df)  _mm_setzero_pd (),
   2471                 (__mmask8) __U,
   2472                 _MM_FROUND_CUR_DIRECTION);
   2473 }
   2474 
   2475 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
   2476   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2477                                            (__v2df)(__m128d)(B), \
   2478                                            (__v2df)_mm_setzero_pd(), \
   2479                                            (__mmask8)-1, (int)(R)); })
   2480 
   2481 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
   2482   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2483                                            (__v2df)(__m128d)(B), \
   2484                                            (__v2df)(__m128d)(W), \
   2485                                            (__mmask8)(U), (int)(R)); })
   2486 
   2487 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
   2488   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2489                                            (__v2df)(__m128d)(B), \
   2490                                            (__v2df)_mm_setzero_pd(), \
   2491                                            (__mmask8)(U), (int)(R)); })
   2492 
   2493 static __inline __m512d __DEFAULT_FN_ATTRS
   2494 _mm512_div_pd(__m512d __a, __m512d __b)
   2495 {
   2496   return (__m512d)((__v8df)__a/(__v8df)__b);
   2497 }
   2498 
   2499 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2500 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2501   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2502                                               (__v8df)_mm512_div_pd(__A, __B),
   2503                                               (__v8df)__W);
   2504 }
   2505 
   2506 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2507 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2508   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2509                                               (__v8df)_mm512_div_pd(__A, __B),
   2510                                               (__v8df)_mm512_setzero_pd());
   2511 }
   2512 
   2513 static __inline __m512 __DEFAULT_FN_ATTRS
   2514 _mm512_div_ps(__m512 __a, __m512 __b)
   2515 {
   2516   return (__m512)((__v16sf)__a/(__v16sf)__b);
   2517 }
   2518 
   2519 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2520 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2521   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2522                                              (__v16sf)_mm512_div_ps(__A, __B),
   2523                                              (__v16sf)__W);
   2524 }
   2525 
   2526 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2527 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2528   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2529                                              (__v16sf)_mm512_div_ps(__A, __B),
   2530                                              (__v16sf)_mm512_setzero_ps());
   2531 }
   2532 
   2533 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
   2534   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2535                                         (__v8df)(__m512d)(B), \
   2536                                         (__v8df)_mm512_setzero_pd(), \
   2537                                         (__mmask8)-1, (int)(R)); })
   2538 
   2539 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
   2540   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2541                                         (__v8df)(__m512d)(B), \
   2542                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2543                                         (int)(R)); })
   2544 
   2545 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
   2546   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2547                                         (__v8df)(__m512d)(B), \
   2548                                         (__v8df)_mm512_setzero_pd(), \
   2549                                         (__mmask8)(U), (int)(R)); })
   2550 
   2551 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
   2552   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2553                                        (__v16sf)(__m512)(B), \
   2554                                        (__v16sf)_mm512_setzero_ps(), \
   2555                                        (__mmask16)-1, (int)(R)); })
   2556 
   2557 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
   2558   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2559                                        (__v16sf)(__m512)(B), \
   2560                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2561                                        (int)(R)); });
   2562 
   2563 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
   2564   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2565                                        (__v16sf)(__m512)(B), \
   2566                                        (__v16sf)_mm512_setzero_ps(), \
   2567                                        (__mmask16)(U), (int)(R)); });
   2568 
   2569 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   2570   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
   2571                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
   2572                                          _MM_FROUND_CUR_DIRECTION); })
   2573 
   2574 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
   2575   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2576                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2577                                          _MM_FROUND_CUR_DIRECTION); })
   2578 
   2579 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
   2580   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2581                                          (__v16sf)_mm512_setzero_ps(), \
   2582                                          (__mmask16)(A), \
   2583                                          _MM_FROUND_CUR_DIRECTION); })
   2584 
   2585 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
   2586   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2587                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2588                                          (int)(R)); })
   2589 
   2590 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
   2591   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2592                                          (__v16sf)_mm512_setzero_ps(), \
   2593                                          (__mmask16)(A), (int)(R)); })
   2594 
   2595 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
   2596   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
   2597                                          (__v16sf)_mm512_undefined_ps(), \
   2598                                          (__mmask16)-1, (int)(R)); })
   2599 
   2600 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   2601   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
   2602                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
   2603                                           _MM_FROUND_CUR_DIRECTION); })
   2604 
   2605 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
   2606   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2607                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2608                                           _MM_FROUND_CUR_DIRECTION); })
   2609 
   2610 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
   2611   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2612                                           (__v8df)_mm512_setzero_pd(), \
   2613                                           (__mmask8)(A), \
   2614                                           _MM_FROUND_CUR_DIRECTION); })
   2615 
   2616 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
   2617   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2618                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2619                                           (int)(R)); })
   2620 
   2621 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
   2622   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2623                                           (__v8df)_mm512_setzero_pd(), \
   2624                                           (__mmask8)(A), (int)(R)); })
   2625 
   2626 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
   2627   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
   2628                                           (__v8df)_mm512_undefined_pd(), \
   2629                                           (__mmask8)-1, (int)(R)); })
   2630 
   2631 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   2632   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2633                                            (__v8df)(__m512d)(B), \
   2634                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2635                                            (int)(R)); })
   2636 
   2637 
   2638 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2639   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2640                                            (__v8df)(__m512d)(B), \
   2641                                            (__v8df)(__m512d)(C), \
   2642                                            (__mmask8)(U), (int)(R)); })
   2643 
   2644 
   2645 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2646   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
   2647                                             (__v8df)(__m512d)(B), \
   2648                                             (__v8df)(__m512d)(C), \
   2649                                             (__mmask8)(U), (int)(R)); })
   2650 
   2651 
   2652 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2653   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2654                                             (__v8df)(__m512d)(B), \
   2655                                             (__v8df)(__m512d)(C), \
   2656                                             (__mmask8)(U), (int)(R)); })
   2657 
   2658 
   2659 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   2660   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2661                                            (__v8df)(__m512d)(B), \
   2662                                            -(__v8df)(__m512d)(C), \
   2663                                            (__mmask8)-1, (int)(R)); })
   2664 
   2665 
   2666 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2667   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2668                                            (__v8df)(__m512d)(B), \
   2669                                            -(__v8df)(__m512d)(C), \
   2670                                            (__mmask8)(U), (int)(R)); })
   2671 
   2672 
   2673 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2674   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2675                                             (__v8df)(__m512d)(B), \
   2676                                             -(__v8df)(__m512d)(C), \
   2677                                             (__mmask8)(U), (int)(R)); })
   2678 
   2679 
   2680 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   2681   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2682                                            (__v8df)(__m512d)(B), \
   2683                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2684                                            (int)(R)); })
   2685 
   2686 
   2687 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2688   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
   2689                                             (__v8df)(__m512d)(B), \
   2690                                             (__v8df)(__m512d)(C), \
   2691                                             (__mmask8)(U), (int)(R)); })
   2692 
   2693 
   2694 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2695   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2696                                             (__v8df)(__m512d)(B), \
   2697                                             (__v8df)(__m512d)(C), \
   2698                                             (__mmask8)(U), (int)(R)); })
   2699 
   2700 
   2701 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   2702   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2703                                            (__v8df)(__m512d)(B), \
   2704                                            -(__v8df)(__m512d)(C), \
   2705                                            (__mmask8)-1, (int)(R)); })
   2706 
   2707 
   2708 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2709   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2710                                             (__v8df)(__m512d)(B), \
   2711                                             -(__v8df)(__m512d)(C), \
   2712                                             (__mmask8)(U), (int)(R)); })
   2713 
   2714 
   2715 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2716 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2717 {
   2718   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2719                                                     (__v8df) __B,
   2720                                                     (__v8df) __C,
   2721                                                     (__mmask8) -1,
   2722                                                     _MM_FROUND_CUR_DIRECTION);
   2723 }
   2724 
   2725 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2726 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2727 {
   2728   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2729                                                     (__v8df) __B,
   2730                                                     (__v8df) __C,
   2731                                                     (__mmask8) __U,
   2732                                                     _MM_FROUND_CUR_DIRECTION);
   2733 }
   2734 
   2735 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2736 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2737 {
   2738   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2739                                                      (__v8df) __B,
   2740                                                      (__v8df) __C,
   2741                                                      (__mmask8) __U,
   2742                                                      _MM_FROUND_CUR_DIRECTION);
   2743 }
   2744 
   2745 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2746 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2747 {
   2748   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2749                                                      (__v8df) __B,
   2750                                                      (__v8df) __C,
   2751                                                      (__mmask8) __U,
   2752                                                      _MM_FROUND_CUR_DIRECTION);
   2753 }
   2754 
   2755 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2756 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2757 {
   2758   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2759                                                     (__v8df) __B,
   2760                                                     -(__v8df) __C,
   2761                                                     (__mmask8) -1,
   2762                                                     _MM_FROUND_CUR_DIRECTION);
   2763 }
   2764 
   2765 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2766 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2767 {
   2768   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2769                                                     (__v8df) __B,
   2770                                                     -(__v8df) __C,
   2771                                                     (__mmask8) __U,
   2772                                                     _MM_FROUND_CUR_DIRECTION);
   2773 }
   2774 
   2775 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2776 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2777 {
   2778   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2779                                                      (__v8df) __B,
   2780                                                      -(__v8df) __C,
   2781                                                      (__mmask8) __U,
   2782                                                      _MM_FROUND_CUR_DIRECTION);
   2783 }
   2784 
   2785 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2786 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2787 {
   2788   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2789                                                     (__v8df) __B,
   2790                                                     (__v8df) __C,
   2791                                                     (__mmask8) -1,
   2792                                                     _MM_FROUND_CUR_DIRECTION);
   2793 }
   2794 
   2795 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2796 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2797 {
   2798   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   2799                                                      (__v8df) __B,
   2800                                                      (__v8df) __C,
   2801                                                      (__mmask8) __U,
   2802                                                      _MM_FROUND_CUR_DIRECTION);
   2803 }
   2804 
   2805 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2806 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2807 {
   2808   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2809                                                      (__v8df) __B,
   2810                                                      (__v8df) __C,
   2811                                                      (__mmask8) __U,
   2812                                                      _MM_FROUND_CUR_DIRECTION);
   2813 }
   2814 
   2815 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2816 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2817 {
   2818   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2819                                                     (__v8df) __B,
   2820                                                     -(__v8df) __C,
   2821                                                     (__mmask8) -1,
   2822                                                     _MM_FROUND_CUR_DIRECTION);
   2823 }
   2824 
   2825 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2826 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2827 {
   2828   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2829                                                      (__v8df) __B,
   2830                                                      -(__v8df) __C,
   2831                                                      (__mmask8) __U,
   2832                                                      _MM_FROUND_CUR_DIRECTION);
   2833 }
   2834 
   2835 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   2836   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2837                                           (__v16sf)(__m512)(B), \
   2838                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2839                                           (int)(R)); })
   2840 
   2841 
   2842 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2843   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2844                                           (__v16sf)(__m512)(B), \
   2845                                           (__v16sf)(__m512)(C), \
   2846                                           (__mmask16)(U), (int)(R)); })
   2847 
   2848 
   2849 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2850   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
   2851                                            (__v16sf)(__m512)(B), \
   2852                                            (__v16sf)(__m512)(C), \
   2853                                            (__mmask16)(U), (int)(R)); })
   2854 
   2855 
   2856 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2857   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2858                                            (__v16sf)(__m512)(B), \
   2859                                            (__v16sf)(__m512)(C), \
   2860                                            (__mmask16)(U), (int)(R)); })
   2861 
   2862 
   2863 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   2864   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2865                                           (__v16sf)(__m512)(B), \
   2866                                           -(__v16sf)(__m512)(C), \
   2867                                           (__mmask16)-1, (int)(R)); })
   2868 
   2869 
   2870 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2871   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2872                                           (__v16sf)(__m512)(B), \
   2873                                           -(__v16sf)(__m512)(C), \
   2874                                           (__mmask16)(U), (int)(R)); })
   2875 
   2876 
   2877 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2878   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2879                                            (__v16sf)(__m512)(B), \
   2880                                            -(__v16sf)(__m512)(C), \
   2881                                            (__mmask16)(U), (int)(R)); })
   2882 
   2883 
   2884 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   2885   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2886                                           (__v16sf)(__m512)(B), \
   2887                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2888                                           (int)(R)); })
   2889 
   2890 
   2891 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2892   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
   2893                                            (__v16sf)(__m512)(B), \
   2894                                            (__v16sf)(__m512)(C), \
   2895                                            (__mmask16)(U), (int)(R)); })
   2896 
   2897 
   2898 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2899   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2900                                            (__v16sf)(__m512)(B), \
   2901                                            (__v16sf)(__m512)(C), \
   2902                                            (__mmask16)(U), (int)(R)); })
   2903 
   2904 
   2905 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   2906   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2907                                           (__v16sf)(__m512)(B), \
   2908                                           -(__v16sf)(__m512)(C), \
   2909                                           (__mmask16)-1, (int)(R)); })
   2910 
   2911 
   2912 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2913   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2914                                            (__v16sf)(__m512)(B), \
   2915                                            -(__v16sf)(__m512)(C), \
   2916                                            (__mmask16)(U), (int)(R)); })
   2917 
   2918 
   2919 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2920 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2921 {
   2922   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2923                                                    (__v16sf) __B,
   2924                                                    (__v16sf) __C,
   2925                                                    (__mmask16) -1,
   2926                                                    _MM_FROUND_CUR_DIRECTION);
   2927 }
   2928 
   2929 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2930 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2931 {
   2932   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2933                                                    (__v16sf) __B,
   2934                                                    (__v16sf) __C,
   2935                                                    (__mmask16) __U,
   2936                                                    _MM_FROUND_CUR_DIRECTION);
   2937 }
   2938 
   2939 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2940 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2941 {
   2942   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2943                                                     (__v16sf) __B,
   2944                                                     (__v16sf) __C,
   2945                                                     (__mmask16) __U,
   2946                                                     _MM_FROUND_CUR_DIRECTION);
   2947 }
   2948 
   2949 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2950 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2951 {
   2952   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2953                                                     (__v16sf) __B,
   2954                                                     (__v16sf) __C,
   2955                                                     (__mmask16) __U,
   2956                                                     _MM_FROUND_CUR_DIRECTION);
   2957 }
   2958 
   2959 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2960 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2961 {
   2962   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2963                                                    (__v16sf) __B,
   2964                                                    -(__v16sf) __C,
   2965                                                    (__mmask16) -1,
   2966                                                    _MM_FROUND_CUR_DIRECTION);
   2967 }
   2968 
   2969 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2970 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2971 {
   2972   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2973                                                    (__v16sf) __B,
   2974                                                    -(__v16sf) __C,
   2975                                                    (__mmask16) __U,
   2976                                                    _MM_FROUND_CUR_DIRECTION);
   2977 }
   2978 
   2979 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2980 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2981 {
   2982   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2983                                                     (__v16sf) __B,
   2984                                                     -(__v16sf) __C,
   2985                                                     (__mmask16) __U,
   2986                                                     _MM_FROUND_CUR_DIRECTION);
   2987 }
   2988 
   2989 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2990 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2991 {
   2992   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2993                                                    (__v16sf) __B,
   2994                                                    (__v16sf) __C,
   2995                                                    (__mmask16) -1,
   2996                                                    _MM_FROUND_CUR_DIRECTION);
   2997 }
   2998 
   2999 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3000 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3001 {
   3002   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   3003                                                     (__v16sf) __B,
   3004                                                     (__v16sf) __C,
   3005                                                     (__mmask16) __U,
   3006                                                     _MM_FROUND_CUR_DIRECTION);
   3007 }
   3008 
   3009 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3010 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3011 {
   3012   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3013                                                     (__v16sf) __B,
   3014                                                     (__v16sf) __C,
   3015                                                     (__mmask16) __U,
   3016                                                     _MM_FROUND_CUR_DIRECTION);
   3017 }
   3018 
   3019 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3020 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3021 {
   3022   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   3023                                                    (__v16sf) __B,
   3024                                                    -(__v16sf) __C,
   3025                                                    (__mmask16) -1,
   3026                                                    _MM_FROUND_CUR_DIRECTION);
   3027 }
   3028 
   3029 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3030 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3031 {
   3032   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3033                                                     (__v16sf) __B,
   3034                                                     -(__v16sf) __C,
   3035                                                     (__mmask16) __U,
   3036                                                     _MM_FROUND_CUR_DIRECTION);
   3037 }
   3038 
   3039 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   3040   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3041                                               (__v8df)(__m512d)(B), \
   3042                                               (__v8df)(__m512d)(C), \
   3043                                               (__mmask8)-1, (int)(R)); })
   3044 
   3045 
   3046 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3047   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3048                                               (__v8df)(__m512d)(B), \
   3049                                               (__v8df)(__m512d)(C), \
   3050                                               (__mmask8)(U), (int)(R)); })
   3051 
   3052 
   3053 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3054   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
   3055                                                (__v8df)(__m512d)(B), \
   3056                                                (__v8df)(__m512d)(C), \
   3057                                                (__mmask8)(U), (int)(R)); })
   3058 
   3059 
   3060 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   3061   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3062                                                (__v8df)(__m512d)(B), \
   3063                                                (__v8df)(__m512d)(C), \
   3064                                                (__mmask8)(U), (int)(R)); })
   3065 
   3066 
   3067 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   3068   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3069                                               (__v8df)(__m512d)(B), \
   3070                                               -(__v8df)(__m512d)(C), \
   3071                                               (__mmask8)-1, (int)(R)); })
   3072 
   3073 
   3074 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3075   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3076                                               (__v8df)(__m512d)(B), \
   3077                                               -(__v8df)(__m512d)(C), \
   3078                                               (__mmask8)(U), (int)(R)); })
   3079 
   3080 
   3081 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   3082   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3083                                                (__v8df)(__m512d)(B), \
   3084                                                -(__v8df)(__m512d)(C), \
   3085                                                (__mmask8)(U), (int)(R)); })
   3086 
   3087 
   3088 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3089 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   3090 {
   3091   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3092                                                        (__v8df) __B,
   3093                                                        (__v8df) __C,
   3094                                                        (__mmask8) -1,
   3095                                                        _MM_FROUND_CUR_DIRECTION);
   3096 }
   3097 
   3098 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3099 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3100 {
   3101   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3102                                                        (__v8df) __B,
   3103                                                        (__v8df) __C,
   3104                                                        (__mmask8) __U,
   3105                                                        _MM_FROUND_CUR_DIRECTION);
   3106 }
   3107 
   3108 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3109 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3110 {
   3111   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   3112                                                         (__v8df) __B,
   3113                                                         (__v8df) __C,
   3114                                                         (__mmask8) __U,
   3115                                                         _MM_FROUND_CUR_DIRECTION);
   3116 }
   3117 
   3118 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3119 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3120 {
   3121   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3122                                                         (__v8df) __B,
   3123                                                         (__v8df) __C,
   3124                                                         (__mmask8) __U,
   3125                                                         _MM_FROUND_CUR_DIRECTION);
   3126 }
   3127 
   3128 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3129 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   3130 {
   3131   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3132                                                        (__v8df) __B,
   3133                                                        -(__v8df) __C,
   3134                                                        (__mmask8) -1,
   3135                                                        _MM_FROUND_CUR_DIRECTION);
   3136 }
   3137 
   3138 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3139 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3140 {
   3141   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3142                                                        (__v8df) __B,
   3143                                                        -(__v8df) __C,
   3144                                                        (__mmask8) __U,
   3145                                                        _MM_FROUND_CUR_DIRECTION);
   3146 }
   3147 
   3148 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3149 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3150 {
   3151   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3152                                                         (__v8df) __B,
   3153                                                         -(__v8df) __C,
   3154                                                         (__mmask8) __U,
   3155                                                         _MM_FROUND_CUR_DIRECTION);
   3156 }
   3157 
   3158 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   3159   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3160                                              (__v16sf)(__m512)(B), \
   3161                                              (__v16sf)(__m512)(C), \
   3162                                              (__mmask16)-1, (int)(R)); })
   3163 
   3164 
   3165 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3166   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3167                                              (__v16sf)(__m512)(B), \
   3168                                              (__v16sf)(__m512)(C), \
   3169                                              (__mmask16)(U), (int)(R)); })
   3170 
   3171 
   3172 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3173   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
   3174                                               (__v16sf)(__m512)(B), \
   3175                                               (__v16sf)(__m512)(C), \
   3176                                               (__mmask16)(U), (int)(R)); })
   3177 
   3178 
   3179 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   3180   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3181                                               (__v16sf)(__m512)(B), \
   3182                                               (__v16sf)(__m512)(C), \
   3183                                               (__mmask16)(U), (int)(R)); })
   3184 
   3185 
   3186 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   3187   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3188                                              (__v16sf)(__m512)(B), \
   3189                                              -(__v16sf)(__m512)(C), \
   3190                                              (__mmask16)-1, (int)(R)); })
   3191 
   3192 
   3193 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3194   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3195                                              (__v16sf)(__m512)(B), \
   3196                                              -(__v16sf)(__m512)(C), \
   3197                                              (__mmask16)(U), (int)(R)); })
   3198 
   3199 
   3200 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   3201   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3202                                               (__v16sf)(__m512)(B), \
   3203                                               -(__v16sf)(__m512)(C), \
   3204                                               (__mmask16)(U), (int)(R)); })
   3205 
   3206 
   3207 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3208 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3209 {
   3210   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3211                                                       (__v16sf) __B,
   3212                                                       (__v16sf) __C,
   3213                                                       (__mmask16) -1,
   3214                                                       _MM_FROUND_CUR_DIRECTION);
   3215 }
   3216 
   3217 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3218 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3219 {
   3220   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3221                                                       (__v16sf) __B,
   3222                                                       (__v16sf) __C,
   3223                                                       (__mmask16) __U,
   3224                                                       _MM_FROUND_CUR_DIRECTION);
   3225 }
   3226 
   3227 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3228 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3229 {
   3230   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3231                                                        (__v16sf) __B,
   3232                                                        (__v16sf) __C,
   3233                                                        (__mmask16) __U,
   3234                                                        _MM_FROUND_CUR_DIRECTION);
   3235 }
   3236 
   3237 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3238 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3239 {
   3240   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3241                                                        (__v16sf) __B,
   3242                                                        (__v16sf) __C,
   3243                                                        (__mmask16) __U,
   3244                                                        _MM_FROUND_CUR_DIRECTION);
   3245 }
   3246 
   3247 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3248 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   3249 {
   3250   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3251                                                       (__v16sf) __B,
   3252                                                       -(__v16sf) __C,
   3253                                                       (__mmask16) -1,
   3254                                                       _MM_FROUND_CUR_DIRECTION);
   3255 }
   3256 
   3257 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3258 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3259 {
   3260   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3261                                                       (__v16sf) __B,
   3262                                                       -(__v16sf) __C,
   3263                                                       (__mmask16) __U,
   3264                                                       _MM_FROUND_CUR_DIRECTION);
   3265 }
   3266 
   3267 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3268 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3269 {
   3270   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3271                                                        (__v16sf) __B,
   3272                                                        -(__v16sf) __C,
   3273                                                        (__mmask16) __U,
   3274                                                        _MM_FROUND_CUR_DIRECTION);
   3275 }
   3276 
   3277 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3278   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
   3279                                             (__v8df)(__m512d)(B), \
   3280                                             (__v8df)(__m512d)(C), \
   3281                                             (__mmask8)(U), (int)(R)); })
   3282 
   3283 
   3284 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3285 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3286 {
   3287   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   3288                                                      (__v8df) __B,
   3289                                                      (__v8df) __C,
   3290                                                      (__mmask8) __U,
   3291                                                      _MM_FROUND_CUR_DIRECTION);
   3292 }
   3293 
   3294 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3295   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
   3296                                            (__v16sf)(__m512)(B), \
   3297                                            (__v16sf)(__m512)(C), \
   3298                                            (__mmask16)(U), (int)(R)); })
   3299 
   3300 
   3301 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3302 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3303 {
   3304   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   3305                                                     (__v16sf) __B,
   3306                                                     (__v16sf) __C,
   3307                                                     (__mmask16) __U,
   3308                                                     _MM_FROUND_CUR_DIRECTION);
   3309 }
   3310 
   3311 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   3312   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
   3313                                                (__v8df)(__m512d)(B), \
   3314                                                (__v8df)(__m512d)(C), \
   3315                                                (__mmask8)(U), (int)(R)); })
   3316 
   3317 
   3318 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3319 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3320 {
   3321   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3322                                                         (__v8df) __B,
   3323                                                         (__v8df) __C,
   3324                                                         (__mmask8) __U,
   3325                                                         _MM_FROUND_CUR_DIRECTION);
   3326 }
   3327 
   3328 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   3329   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
   3330                                               (__v16sf)(__m512)(B), \
   3331                                               (__v16sf)(__m512)(C), \
   3332                                               (__mmask16)(U), (int)(R)); })
   3333 
   3334 
   3335 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3336 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3337 {
   3338   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3339                                                        (__v16sf) __B,
   3340                                                        (__v16sf) __C,
   3341                                                        (__mmask16) __U,
   3342                                                        _MM_FROUND_CUR_DIRECTION);
   3343 }
   3344 
   3345 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3346   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
   3347                                             (__v8df)(__m512d)(B), \
   3348                                             (__v8df)(__m512d)(C), \
   3349                                             (__mmask8)(U), (int)(R)); })
   3350 
   3351 
   3352 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3353 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3354 {
   3355   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3356                                                      (__v8df) __B,
   3357                                                      (__v8df) __C,
   3358                                                      (__mmask8) __U,
   3359                                                      _MM_FROUND_CUR_DIRECTION);
   3360 }
   3361 
   3362 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3363   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
   3364                                            (__v16sf)(__m512)(B), \
   3365                                            (__v16sf)(__m512)(C), \
   3366                                            (__mmask16)(U), (int)(R)); })
   3367 
   3368 
   3369 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3370 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3371 {
   3372   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3373                                                     (__v16sf) __B,
   3374                                                     (__v16sf) __C,
   3375                                                     (__mmask16) __U,
   3376                                                     _MM_FROUND_CUR_DIRECTION);
   3377 }
   3378 
   3379 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3380   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
   3381                                             (__v8df)(__m512d)(B), \
   3382                                             (__v8df)(__m512d)(C), \
   3383                                             (__mmask8)(U), (int)(R)); })
   3384 
   3385 
   3386 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3387   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
   3388                                              (__v8df)(__m512d)(B), \
   3389                                              (__v8df)(__m512d)(C), \
   3390                                              (__mmask8)(U), (int)(R)); })
   3391 
   3392 
   3393 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3394 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3395 {
   3396   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3397                                                      (__v8df) __B,
   3398                                                      (__v8df) __C,
   3399                                                      (__mmask8) __U,
   3400                                                      _MM_FROUND_CUR_DIRECTION);
   3401 }
   3402 
   3403 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3404 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3405 {
   3406   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3407                                                       (__v8df) __B,
   3408                                                       (__v8df) __C,
   3409                                                       (__mmask8) __U,
   3410                                                       _MM_FROUND_CUR_DIRECTION);
   3411 }
   3412 
   3413 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3414   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
   3415                                            (__v16sf)(__m512)(B), \
   3416                                            (__v16sf)(__m512)(C), \
   3417                                            (__mmask16)(U), (int)(R)); })
   3418 
   3419 
   3420 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3421   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
   3422                                             (__v16sf)(__m512)(B), \
   3423                                             (__v16sf)(__m512)(C), \
   3424                                             (__mmask16)(U), (int)(R)); })
   3425 
   3426 
   3427 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3428 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3429 {
   3430   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3431                                                     (__v16sf) __B,
   3432                                                     (__v16sf) __C,
   3433                                                     (__mmask16) __U,
   3434                                                     _MM_FROUND_CUR_DIRECTION);
   3435 }
   3436 
   3437 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3438 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3439 {
   3440   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3441                                                      (__v16sf) __B,
   3442                                                      (__v16sf) __C,
   3443                                                      (__mmask16) __U,
   3444                                                      _MM_FROUND_CUR_DIRECTION);
   3445 }
   3446 
   3447 
   3448 
   3449 /* Vector permutations */
   3450 
   3451 static __inline __m512i __DEFAULT_FN_ATTRS
   3452 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   3453 {
   3454   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3455                                                        /* idx */ ,
   3456                                                        (__v16si) __A,
   3457                                                        (__v16si) __B,
   3458                                                        (__mmask16) -1);
   3459 }
   3460 
   3461 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3462 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   3463                                 __m512i __I, __m512i __B)
   3464 {
   3465   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3466                                                         /* idx */ ,
   3467                                                         (__v16si) __A,
   3468                                                         (__v16si) __B,
   3469                                                         (__mmask16) __U);
   3470 }
   3471 
   3472 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3473 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   3474                                  __m512i __I, __m512i __B)
   3475 {
   3476   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   3477                                                         /* idx */ ,
   3478                                                         (__v16si) __A,
   3479                                                         (__v16si) __B,
   3480                                                         (__mmask16) __U);
   3481 }
   3482 
   3483 static __inline __m512i __DEFAULT_FN_ATTRS
   3484 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   3485 {
   3486   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3487                                                        /* idx */ ,
   3488                                                        (__v8di) __A,
   3489                                                        (__v8di) __B,
   3490                                                        (__mmask8) -1);
   3491 }
   3492 
   3493 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3494 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   3495                                 __m512i __B)
   3496 {
   3497   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3498                                                        /* idx */ ,
   3499                                                        (__v8di) __A,
   3500                                                        (__v8di) __B,
   3501                                                        (__mmask8) __U);
   3502 }
   3503 
   3504 
   3505 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3506 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   3507          __m512i __I, __m512i __B)
   3508 {
   3509   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   3510                                                         /* idx */ ,
   3511                                                         (__v8di) __A,
   3512                                                         (__v8di) __B,
   3513                                                         (__mmask8) __U);
   3514 }
   3515 
   3516 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   3517   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
   3518                                    (__v8di)(__m512i)(A), \
   3519                                    ((int)(I) & 0x7) + 0, \
   3520                                    ((int)(I) & 0x7) + 1, \
   3521                                    ((int)(I) & 0x7) + 2, \
   3522                                    ((int)(I) & 0x7) + 3, \
   3523                                    ((int)(I) & 0x7) + 4, \
   3524                                    ((int)(I) & 0x7) + 5, \
   3525                                    ((int)(I) & 0x7) + 6, \
   3526                                    ((int)(I) & 0x7) + 7); })
   3527 
   3528 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
   3529   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3530                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3531                                  (__v8di)(__m512i)(W)); })
   3532 
   3533 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
   3534   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3535                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3536                                  (__v8di)_mm512_setzero_si512()); })
   3537 
   3538 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   3539   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
   3540                                    (__v16si)(__m512i)(A), \
   3541                                    ((int)(I) & 0xf) + 0, \
   3542                                    ((int)(I) & 0xf) + 1, \
   3543                                    ((int)(I) & 0xf) + 2, \
   3544                                    ((int)(I) & 0xf) + 3, \
   3545                                    ((int)(I) & 0xf) + 4, \
   3546                                    ((int)(I) & 0xf) + 5, \
   3547                                    ((int)(I) & 0xf) + 6, \
   3548                                    ((int)(I) & 0xf) + 7, \
   3549                                    ((int)(I) & 0xf) + 8, \
   3550                                    ((int)(I) & 0xf) + 9, \
   3551                                    ((int)(I) & 0xf) + 10, \
   3552                                    ((int)(I) & 0xf) + 11, \
   3553                                    ((int)(I) & 0xf) + 12, \
   3554                                    ((int)(I) & 0xf) + 13, \
   3555                                    ((int)(I) & 0xf) + 14, \
   3556                                    ((int)(I) & 0xf) + 15); })
   3557 
   3558 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
   3559   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3560                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3561                                 (__v16si)(__m512i)(W)); })
   3562 
   3563 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
   3564   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3565                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3566                                 (__v16si)_mm512_setzero_si512()); })
   3567 /* Vector Extract */
   3568 
   3569 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
   3570   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
   3571                                    (__v8df)_mm512_undefined_pd(), \
   3572                                    ((I) & 1) ? 4 : 0,             \
   3573                                    ((I) & 1) ? 5 : 1,             \
   3574                                    ((I) & 1) ? 6 : 2,             \
   3575                                    ((I) & 1) ? 7 : 3); })
   3576 
   3577 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
   3578   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3579                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3580                                    (__v4df)(W)); })
   3581 
   3582 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
   3583   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3584                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3585                                    (__v4df)_mm256_setzero_pd()); })
   3586 
   3587 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
   3588   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
   3589                                   (__v16sf)_mm512_undefined_ps(), \
   3590                                   0 + ((I) & 0x3) * 4,            \
   3591                                   1 + ((I) & 0x3) * 4,            \
   3592                                   2 + ((I) & 0x3) * 4,            \
   3593                                   3 + ((I) & 0x3) * 4); })
   3594 
   3595 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
   3596   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3597                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3598                                    (__v4sf)(W)); })
   3599 
   3600 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
   3601   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3602                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3603                                    (__v4sf)_mm_setzero_ps()); })
   3604 
   3605 /* Vector Blend */
   3606 
   3607 static __inline __m512d __DEFAULT_FN_ATTRS
   3608 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   3609 {
   3610   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   3611                  (__v8df) __W,
   3612                  (__v8df) __A);
   3613 }
   3614 
   3615 static __inline __m512 __DEFAULT_FN_ATTRS
   3616 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   3617 {
   3618   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   3619                 (__v16sf) __W,
   3620                 (__v16sf) __A);
   3621 }
   3622 
   3623 static __inline __m512i __DEFAULT_FN_ATTRS
   3624 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   3625 {
   3626   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   3627                 (__v8di) __W,
   3628                 (__v8di) __A);
   3629 }
   3630 
   3631 static __inline __m512i __DEFAULT_FN_ATTRS
   3632 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   3633 {
   3634   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   3635                 (__v16si) __W,
   3636                 (__v16si) __A);
   3637 }
   3638 
   3639 /* Compare */
   3640 
   3641 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   3642   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3643                                           (__v16sf)(__m512)(B), (int)(P), \
   3644                                           (__mmask16)-1, (int)(R)); })
   3645 
   3646 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   3647   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3648                                           (__v16sf)(__m512)(B), (int)(P), \
   3649                                           (__mmask16)(U), (int)(R)); })
   3650 
   3651 #define _mm512_cmp_ps_mask(A, B, P) \
   3652   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3653 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   3654   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3655 
   3656 #define _mm512_cmpeq_ps_mask(A, B) \
   3657     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
   3658 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
   3659     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
   3660 
   3661 #define _mm512_cmplt_ps_mask(A, B) \
   3662     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
   3663 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
   3664     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
   3665 
   3666 #define _mm512_cmple_ps_mask(A, B) \
   3667     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
   3668 #define _mm512_mask_cmple_ps_mask(k, A, B) \
   3669     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
   3670 
   3671 #define _mm512_cmpunord_ps_mask(A, B) \
   3672     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
   3673 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
   3674     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
   3675 
   3676 #define _mm512_cmpneq_ps_mask(A, B) \
   3677     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
   3678 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
   3679     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
   3680 
   3681 #define _mm512_cmpnlt_ps_mask(A, B) \
   3682     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
   3683 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
   3684     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
   3685 
   3686 #define _mm512_cmpnle_ps_mask(A, B) \
   3687     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
   3688 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
   3689     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
   3690 
   3691 #define _mm512_cmpord_ps_mask(A, B) \
   3692     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
   3693 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
   3694     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
   3695 
   3696 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   3697   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3698                                          (__v8df)(__m512d)(B), (int)(P), \
   3699                                          (__mmask8)-1, (int)(R)); })
   3700 
   3701 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   3702   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3703                                          (__v8df)(__m512d)(B), (int)(P), \
   3704                                          (__mmask8)(U), (int)(R)); })
   3705 
   3706 #define _mm512_cmp_pd_mask(A, B, P) \
   3707   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3708 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   3709   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3710 
   3711 #define _mm512_cmpeq_pd_mask(A, B) \
   3712     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
   3713 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
   3714     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
   3715 
   3716 #define _mm512_cmplt_pd_mask(A, B) \
   3717     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
   3718 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
   3719     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
   3720 
   3721 #define _mm512_cmple_pd_mask(A, B) \
   3722     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
   3723 #define _mm512_mask_cmple_pd_mask(k, A, B) \
   3724     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
   3725 
   3726 #define _mm512_cmpunord_pd_mask(A, B) \
   3727     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
   3728 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
   3729     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
   3730 
   3731 #define _mm512_cmpneq_pd_mask(A, B) \
   3732     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
   3733 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
   3734     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
   3735 
   3736 #define _mm512_cmpnlt_pd_mask(A, B) \
   3737     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
   3738 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
   3739     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
   3740 
   3741 #define _mm512_cmpnle_pd_mask(A, B) \
   3742     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
   3743 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
   3744     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
   3745 
   3746 #define _mm512_cmpord_pd_mask(A, B) \
   3747     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
   3748 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
   3749     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
   3750 
   3751 /* Conversion */
   3752 
   3753 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
   3754   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3755                                              (__v16si)_mm512_undefined_epi32(), \
   3756                                              (__mmask16)-1, (int)(R)); })
   3757 
   3758 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
   3759   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3760                                              (__v16si)(__m512i)(W), \
   3761                                              (__mmask16)(U), (int)(R)); })
   3762 
   3763 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
   3764   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3765                                              (__v16si)_mm512_setzero_si512(), \
   3766                                              (__mmask16)(U), (int)(R)); })
   3767 
   3768 
   3769 static __inline __m512i __DEFAULT_FN_ATTRS
   3770 _mm512_cvttps_epu32(__m512 __A)
   3771 {
   3772   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3773                   (__v16si)
   3774                   _mm512_setzero_si512 (),
   3775                   (__mmask16) -1,
   3776                   _MM_FROUND_CUR_DIRECTION);
   3777 }
   3778 
   3779 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3780 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   3781 {
   3782   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3783                    (__v16si) __W,
   3784                    (__mmask16) __U,
   3785                    _MM_FROUND_CUR_DIRECTION);
   3786 }
   3787 
   3788 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3789 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   3790 {
   3791   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3792                    (__v16si) _mm512_setzero_si512 (),
   3793                    (__mmask16) __U,
   3794                    _MM_FROUND_CUR_DIRECTION);
   3795 }
   3796 
   3797 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   3798   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3799                                           (__v16sf)_mm512_setzero_ps(), \
   3800                                           (__mmask16)-1, (int)(R)); })
   3801 
   3802 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
   3803   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3804                                           (__v16sf)(__m512)(W), \
   3805                                           (__mmask16)(U), (int)(R)); })
   3806 
   3807 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
   3808   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3809                                           (__v16sf)_mm512_setzero_ps(), \
   3810                                           (__mmask16)(U), (int)(R)); })
   3811 
   3812 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   3813   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3814                                            (__v16sf)_mm512_setzero_ps(), \
   3815                                            (__mmask16)-1, (int)(R)); })
   3816 
   3817 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
   3818   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3819                                            (__v16sf)(__m512)(W), \
   3820                                            (__mmask16)(U), (int)(R)); })
   3821 
   3822 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
   3823   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3824                                            (__v16sf)_mm512_setzero_ps(), \
   3825                                            (__mmask16)(U), (int)(R)); })
   3826 
   3827 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3828 _mm512_cvtepu32_ps (__m512i __A)
   3829 {
   3830   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3831                  (__v16sf) _mm512_undefined_ps (),
   3832                  (__mmask16) -1,
   3833                  _MM_FROUND_CUR_DIRECTION);
   3834 }
   3835 
   3836 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3837 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3838 {
   3839   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3840                  (__v16sf) __W,
   3841                  (__mmask16) __U,
   3842                  _MM_FROUND_CUR_DIRECTION);
   3843 }
   3844 
   3845 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3846 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   3847 {
   3848   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3849                  (__v16sf) _mm512_setzero_ps (),
   3850                  (__mmask16) __U,
   3851                  _MM_FROUND_CUR_DIRECTION);
   3852 }
   3853 
   3854 static __inline __m512d __DEFAULT_FN_ATTRS
   3855 _mm512_cvtepi32_pd(__m256i __A)
   3856 {
   3857   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
   3858 }
   3859 
   3860 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3861 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3862 {
   3863   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3864                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3865                                               (__v8df)__W);
   3866 }
   3867 
   3868 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3869 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   3870 {
   3871   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3872                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3873                                               (__v8df)_mm512_setzero_pd());
   3874 }
   3875 
   3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3877 _mm512_cvtepi32lo_pd(__m512i __A)
   3878 {
   3879   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
   3880 }
   3881 
   3882 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3883 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3884 {
   3885   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3886 }
   3887 
   3888 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3889 _mm512_cvtepi32_ps (__m512i __A)
   3890 {
   3891   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3892                 (__v16sf) _mm512_undefined_ps (),
   3893                 (__mmask16) -1,
   3894                 _MM_FROUND_CUR_DIRECTION);
   3895 }
   3896 
   3897 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3898 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3899 {
   3900   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3901                 (__v16sf) __W,
   3902                 (__mmask16) __U,
   3903                 _MM_FROUND_CUR_DIRECTION);
   3904 }
   3905 
   3906 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3907 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   3908 {
   3909   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3910                 (__v16sf) _mm512_setzero_ps (),
   3911                 (__mmask16) __U,
   3912                 _MM_FROUND_CUR_DIRECTION);
   3913 }
   3914 
   3915 static __inline __m512d __DEFAULT_FN_ATTRS
   3916 _mm512_cvtepu32_pd(__m256i __A)
   3917 {
   3918   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
   3919 }
   3920 
   3921 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3922 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3923 {
   3924   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3925                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3926                                               (__v8df)__W);
   3927 }
   3928 
   3929 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3930 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   3931 {
   3932   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3933                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3934                                               (__v8df)_mm512_setzero_pd());
   3935 }
   3936 
   3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3938 _mm512_cvtepu32lo_pd(__m512i __A)
   3939 {
   3940   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
   3941 }
   3942 
   3943 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3944 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3945 {
   3946   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3947 }
   3948 
   3949 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   3950   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3951                                           (__v8sf)_mm256_setzero_ps(), \
   3952                                           (__mmask8)-1, (int)(R)); })
   3953 
   3954 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   3955   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3956                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
   3957                                           (int)(R)); })
   3958 
   3959 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   3960   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3961                                           (__v8sf)_mm256_setzero_ps(), \
   3962                                           (__mmask8)(U), (int)(R)); })
   3963 
   3964 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3965 _mm512_cvtpd_ps (__m512d __A)
   3966 {
   3967   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3968                 (__v8sf) _mm256_undefined_ps (),
   3969                 (__mmask8) -1,
   3970                 _MM_FROUND_CUR_DIRECTION);
   3971 }
   3972 
   3973 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3974 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   3975 {
   3976   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3977                 (__v8sf) __W,
   3978                 (__mmask8) __U,
   3979                 _MM_FROUND_CUR_DIRECTION);
   3980 }
   3981 
   3982 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3983 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   3984 {
   3985   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3986                 (__v8sf) _mm256_setzero_ps (),
   3987                 (__mmask8) __U,
   3988                 _MM_FROUND_CUR_DIRECTION);
   3989 }
   3990 
   3991 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3992 _mm512_cvtpd_pslo (__m512d __A)
   3993 {
   3994   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
   3995                 (__v8sf) _mm256_setzero_ps (),
   3996                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3997 }
   3998 
   3999 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4000 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
   4001 {
   4002   return (__m512) __builtin_shufflevector (
   4003                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
   4004                                                __U, __A),
   4005                 (__v8sf) _mm256_setzero_ps (),
   4006                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4007 }
   4008 
   4009 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
   4010   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4011                                             (__v16hi)_mm256_undefined_si256(), \
   4012                                             (__mmask16)-1); })
   4013 
   4014 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
   4015   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4016                                             (__v16hi)(__m256i)(U), \
   4017                                             (__mmask16)(W)); })
   4018 
   4019 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
   4020   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4021                                             (__v16hi)_mm256_setzero_si256(), \
   4022                                             (__mmask16)(W)); })
   4023 
   4024 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   4025   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4026                                             (__v16hi)_mm256_setzero_si256(), \
   4027                                             (__mmask16)-1); })
   4028 
   4029 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
   4030   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4031                                             (__v16hi)(__m256i)(U), \
   4032                                             (__mmask16)(W)); })
   4033 
   4034 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
   4035   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4036                                             (__v16hi)_mm256_setzero_si256(), \
   4037                                             (__mmask16)(W)); })
   4038 
   4039 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
   4040   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4041                                            (__v16sf)_mm512_undefined_ps(), \
   4042                                            (__mmask16)-1, (int)(R)); })
   4043 
   4044 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
   4045   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4046                                            (__v16sf)(__m512)(W), \
   4047                                            (__mmask16)(U), (int)(R)); })
   4048 
   4049 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
   4050   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4051                                            (__v16sf)_mm512_setzero_ps(), \
   4052                                            (__mmask16)(U), (int)(R)); })
   4053 
   4054 
   4055 static  __inline __m512 __DEFAULT_FN_ATTRS
   4056 _mm512_cvtph_ps(__m256i __A)
   4057 {
   4058   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4059                 (__v16sf)
   4060                 _mm512_setzero_ps (),
   4061                 (__mmask16) -1,
   4062                 _MM_FROUND_CUR_DIRECTION);
   4063 }
   4064 
   4065 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4066 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   4067 {
   4068   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4069                  (__v16sf) __W,
   4070                  (__mmask16) __U,
   4071                  _MM_FROUND_CUR_DIRECTION);
   4072 }
   4073 
   4074 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4075 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   4076 {
   4077   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4078                  (__v16sf) _mm512_setzero_ps (),
   4079                  (__mmask16) __U,
   4080                  _MM_FROUND_CUR_DIRECTION);
   4081 }
   4082 
   4083 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   4084   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4085                                             (__v8si)_mm256_setzero_si256(), \
   4086                                             (__mmask8)-1, (int)(R)); })
   4087 
   4088 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4089   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4090                                             (__v8si)(__m256i)(W), \
   4091                                             (__mmask8)(U), (int)(R)); })
   4092 
   4093 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   4094   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4095                                             (__v8si)_mm256_setzero_si256(), \
   4096                                             (__mmask8)(U), (int)(R)); })
   4097 
   4098 static __inline __m256i __DEFAULT_FN_ATTRS
   4099 _mm512_cvttpd_epi32(__m512d __a)
   4100 {
   4101   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   4102                                                    (__v8si)_mm256_setzero_si256(),
   4103                                                    (__mmask8) -1,
   4104                                                     _MM_FROUND_CUR_DIRECTION);
   4105 }
   4106 
   4107 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4108 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4109 {
   4110   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4111                   (__v8si) __W,
   4112                   (__mmask8) __U,
   4113                   _MM_FROUND_CUR_DIRECTION);
   4114 }
   4115 
   4116 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4117 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   4118 {
   4119   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4120                   (__v8si) _mm256_setzero_si256 (),
   4121                   (__mmask8) __U,
   4122                   _MM_FROUND_CUR_DIRECTION);
   4123 }
   4124 
   4125 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   4126   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4127                                             (__v16si)_mm512_setzero_si512(), \
   4128                                             (__mmask16)-1, (int)(R)); })
   4129 
   4130 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4131   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4132                                             (__v16si)(__m512i)(W), \
   4133                                             (__mmask16)(U), (int)(R)); })
   4134 
   4135 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   4136   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4137                                             (__v16si)_mm512_setzero_si512(), \
   4138                                             (__mmask16)(U), (int)(R)); })
   4139 
   4140 static __inline __m512i __DEFAULT_FN_ATTRS
   4141 _mm512_cvttps_epi32(__m512 __a)
   4142 {
   4143   return (__m512i)
   4144     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   4145                                      (__v16si) _mm512_setzero_si512 (),
   4146                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   4147 }
   4148 
   4149 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4150 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4151 {
   4152   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4153                   (__v16si) __W,
   4154                   (__mmask16) __U,
   4155                   _MM_FROUND_CUR_DIRECTION);
   4156 }
   4157 
   4158 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4159 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   4160 {
   4161   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4162                   (__v16si) _mm512_setzero_si512 (),
   4163                   (__mmask16) __U,
   4164                   _MM_FROUND_CUR_DIRECTION);
   4165 }
   4166 
   4167 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   4168   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4169                                            (__v16si)_mm512_setzero_si512(), \
   4170                                            (__mmask16)-1, (int)(R)); })
   4171 
   4172 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4173   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4174                                            (__v16si)(__m512i)(W), \
   4175                                            (__mmask16)(U), (int)(R)); })
   4176 
   4177 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
   4178   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4179                                            (__v16si)_mm512_setzero_si512(), \
   4180                                            (__mmask16)(U), (int)(R)); })
   4181 
   4182 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4183 _mm512_cvtps_epi32 (__m512 __A)
   4184 {
   4185   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4186                  (__v16si) _mm512_undefined_epi32 (),
   4187                  (__mmask16) -1,
   4188                  _MM_FROUND_CUR_DIRECTION);
   4189 }
   4190 
   4191 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4192 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4193 {
   4194   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4195                  (__v16si) __W,
   4196                  (__mmask16) __U,
   4197                  _MM_FROUND_CUR_DIRECTION);
   4198 }
   4199 
   4200 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4201 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   4202 {
   4203   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4204                  (__v16si)
   4205                  _mm512_setzero_si512 (),
   4206                  (__mmask16) __U,
   4207                  _MM_FROUND_CUR_DIRECTION);
   4208 }
   4209 
   4210 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   4211   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4212                                            (__v8si)_mm256_setzero_si256(), \
   4213                                            (__mmask8)-1, (int)(R)); })
   4214 
   4215 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4216   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4217                                            (__v8si)(__m256i)(W), \
   4218                                            (__mmask8)(U), (int)(R)); })
   4219 
   4220 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
   4221   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4222                                            (__v8si)_mm256_setzero_si256(), \
   4223                                            (__mmask8)(U), (int)(R)); })
   4224 
   4225 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4226 _mm512_cvtpd_epi32 (__m512d __A)
   4227 {
   4228   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4229                  (__v8si)
   4230                  _mm256_undefined_si256 (),
   4231                  (__mmask8) -1,
   4232                  _MM_FROUND_CUR_DIRECTION);
   4233 }
   4234 
   4235 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4236 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4237 {
   4238   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4239                  (__v8si) __W,
   4240                  (__mmask8) __U,
   4241                  _MM_FROUND_CUR_DIRECTION);
   4242 }
   4243 
   4244 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4245 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   4246 {
   4247   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4248                  (__v8si)
   4249                  _mm256_setzero_si256 (),
   4250                  (__mmask8) __U,
   4251                  _MM_FROUND_CUR_DIRECTION);
   4252 }
   4253 
   4254 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   4255   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4256                                             (__v16si)_mm512_setzero_si512(), \
   4257                                             (__mmask16)-1, (int)(R)); })
   4258 
   4259 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
   4260   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4261                                             (__v16si)(__m512i)(W), \
   4262                                             (__mmask16)(U), (int)(R)); })
   4263 
   4264 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
   4265   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4266                                             (__v16si)_mm512_setzero_si512(), \
   4267                                             (__mmask16)(U), (int)(R)); })
   4268 
   4269 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4270 _mm512_cvtps_epu32 ( __m512 __A)
   4271 {
   4272   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
   4273                   (__v16si)\
   4274                   _mm512_undefined_epi32 (),\
   4275                   (__mmask16) -1,\
   4276                   _MM_FROUND_CUR_DIRECTION);\
   4277 }
   4278 
   4279 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4280 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   4281 {
   4282   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4283                   (__v16si) __W,
   4284                   (__mmask16) __U,
   4285                   _MM_FROUND_CUR_DIRECTION);
   4286 }
   4287 
   4288 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4289 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
   4290 {
   4291   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4292                   (__v16si)
   4293                   _mm512_setzero_si512 (),
   4294                   (__mmask16) __U ,
   4295                   _MM_FROUND_CUR_DIRECTION);
   4296 }
   4297 
   4298 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   4299   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4300                                             (__v8si)_mm256_setzero_si256(), \
   4301                                             (__mmask8)-1, (int)(R)); })
   4302 
   4303 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   4304   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4305                                             (__v8si)(W), \
   4306                                             (__mmask8)(U), (int)(R)); })
   4307 
   4308 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   4309   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4310                                             (__v8si)_mm256_setzero_si256(), \
   4311                                             (__mmask8)(U), (int)(R)); })
   4312 
   4313 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4314 _mm512_cvtpd_epu32 (__m512d __A)
   4315 {
   4316   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4317                   (__v8si)
   4318                   _mm256_undefined_si256 (),
   4319                   (__mmask8) -1,
   4320                   _MM_FROUND_CUR_DIRECTION);
   4321 }
   4322 
   4323 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4324 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   4325 {
   4326   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4327                   (__v8si) __W,
   4328                   (__mmask8) __U,
   4329                   _MM_FROUND_CUR_DIRECTION);
   4330 }
   4331 
   4332 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4333 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   4334 {
   4335   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4336                   (__v8si)
   4337                   _mm256_setzero_si256 (),
   4338                   (__mmask8) __U,
   4339                   _MM_FROUND_CUR_DIRECTION);
   4340 }
   4341 
   4342 static __inline__ double __DEFAULT_FN_ATTRS
   4343 _mm512_cvtsd_f64(__m512d __a)
   4344 {
   4345   return __a[0];
   4346 }
   4347 
   4348 static __inline__ float __DEFAULT_FN_ATTRS
   4349 _mm512_cvtss_f32(__m512 __a)
   4350 {
   4351   return __a[0];
   4352 }
   4353 
   4354 /* Unpack and Interleave */
   4355 
   4356 static __inline __m512d __DEFAULT_FN_ATTRS
   4357 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   4358 {
   4359   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4360                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4361 }
   4362 
   4363 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4364 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4365 {
   4366   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4367                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4368                                            (__v8df)__W);
   4369 }
   4370 
   4371 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4372 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
   4373 {
   4374   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4375                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4376                                            (__v8df)_mm512_setzero_pd());
   4377 }
   4378 
   4379 static __inline __m512d __DEFAULT_FN_ATTRS
   4380 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   4381 {
   4382   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4383                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4384 }
   4385 
   4386 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4387 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4388 {
   4389   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4390                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4391                                            (__v8df)__W);
   4392 }
   4393 
   4394 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4395 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   4396 {
   4397   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4398                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4399                                            (__v8df)_mm512_setzero_pd());
   4400 }
   4401 
   4402 static __inline __m512 __DEFAULT_FN_ATTRS
   4403 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   4404 {
   4405   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4406                                          2,    18,    3,    19,
   4407                                          2+4,  18+4,  3+4,  19+4,
   4408                                          2+8,  18+8,  3+8,  19+8,
   4409                                          2+12, 18+12, 3+12, 19+12);
   4410 }
   4411 
   4412 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4413 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4414 {
   4415   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4416                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4417                                           (__v16sf)__W);
   4418 }
   4419 
   4420 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4421 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4422 {
   4423   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4424                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4425                                           (__v16sf)_mm512_setzero_ps());
   4426 }
   4427 
   4428 static __inline __m512 __DEFAULT_FN_ATTRS
   4429 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   4430 {
   4431   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4432                                          0,    16,    1,    17,
   4433                                          0+4,  16+4,  1+4,  17+4,
   4434                                          0+8,  16+8,  1+8,  17+8,
   4435                                          0+12, 16+12, 1+12, 17+12);
   4436 }
   4437 
   4438 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4439 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4440 {
   4441   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4442                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4443                                           (__v16sf)__W);
   4444 }
   4445 
   4446 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4447 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4448 {
   4449   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4450                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4451                                           (__v16sf)_mm512_setzero_ps());
   4452 }
   4453 
   4454 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4455 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
   4456 {
   4457   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4458                                           2,    18,    3,    19,
   4459                                           2+4,  18+4,  3+4,  19+4,
   4460                                           2+8,  18+8,  3+8,  19+8,
   4461                                           2+12, 18+12, 3+12, 19+12);
   4462 }
   4463 
   4464 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4465 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4466 {
   4467   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4468                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4469                                        (__v16si)__W);
   4470 }
   4471 
   4472 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4473 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4474 {
   4475   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4476                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4477                                        (__v16si)_mm512_setzero_si512());
   4478 }
   4479 
   4480 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4481 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
   4482 {
   4483   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4484                                           0,    16,    1,    17,
   4485                                           0+4,  16+4,  1+4,  17+4,
   4486                                           0+8,  16+8,  1+8,  17+8,
   4487                                           0+12, 16+12, 1+12, 17+12);
   4488 }
   4489 
   4490 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4491 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4492 {
   4493   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4494                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4495                                        (__v16si)__W);
   4496 }
   4497 
   4498 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4499 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4500 {
   4501   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4502                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4503                                        (__v16si)_mm512_setzero_si512());
   4504 }
   4505 
   4506 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4507 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
   4508 {
   4509   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4510                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4511 }
   4512 
   4513 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4514 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4515 {
   4516   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4517                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4518                                         (__v8di)__W);
   4519 }
   4520 
   4521 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4522 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
   4523 {
   4524   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4525                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4526                                         (__v8di)_mm512_setzero_si512());
   4527 }
   4528 
   4529 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4530 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   4531 {
   4532   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4533                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4534 }
   4535 
   4536 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4537 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4538 {
   4539   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4540                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4541                                         (__v8di)__W);
   4542 }
   4543 
   4544 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4545 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4546 {
   4547   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4548                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4549                                         (__v8di)_mm512_setzero_si512());
   4550 }
   4551 
   4552 /* Bit Test */
   4553 
   4554 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4555 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   4556 {
   4557   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4558             (__v16si) __B,
   4559             (__mmask16) -1);
   4560 }
   4561 
   4562 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4563 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   4564 {
   4565   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4566                  (__v16si) __B, __U);
   4567 }
   4568 
   4569 static __inline __mmask8 __DEFAULT_FN_ATTRS
   4570 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   4571 {
   4572   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   4573                  (__v8di) __B,
   4574                  (__mmask8) -1);
   4575 }
   4576 
   4577 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4578 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   4579 {
   4580   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   4581 }
   4582 
   4583 
   4584 /* SIMD load ops */
   4585 
   4586 static __inline __m512i __DEFAULT_FN_ATTRS
   4587 _mm512_loadu_si512 (void const *__P)
   4588 {
   4589   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4590                   (__v16si)
   4591                   _mm512_setzero_si512 (),
   4592                   (__mmask16) -1);
   4593 }
   4594 
   4595 static __inline __m512i __DEFAULT_FN_ATTRS
   4596 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   4597 {
   4598   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4599                   (__v16si) __W,
   4600                   (__mmask16) __U);
   4601 }
   4602 
   4603 
   4604 static __inline __m512i __DEFAULT_FN_ATTRS
   4605 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   4606 {
   4607   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
   4608                                                      (__v16si)
   4609                                                      _mm512_setzero_si512 (),
   4610                                                      (__mmask16) __U);
   4611 }
   4612 
   4613 static __inline __m512i __DEFAULT_FN_ATTRS
   4614 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   4615 {
   4616   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
   4617                   (__v8di) __W,
   4618                   (__mmask8) __U);
   4619 }
   4620 
   4621 static __inline __m512i __DEFAULT_FN_ATTRS
   4622 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   4623 {
   4624   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
   4625                                                      (__v8di)
   4626                                                      _mm512_setzero_si512 (),
   4627                                                      (__mmask8) __U);
   4628 }
   4629 
   4630 static __inline __m512 __DEFAULT_FN_ATTRS
   4631 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   4632 {
   4633   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
   4634                    (__v16sf) __W,
   4635                    (__mmask16) __U);
   4636 }
   4637 
   4638 static __inline __m512 __DEFAULT_FN_ATTRS
   4639 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   4640 {
   4641   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
   4642                                                   (__v16sf)
   4643                                                   _mm512_setzero_ps (),
   4644                                                   (__mmask16) __U);
   4645 }
   4646 
   4647 static __inline __m512d __DEFAULT_FN_ATTRS
   4648 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   4649 {
   4650   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
   4651                 (__v8df) __W,
   4652                 (__mmask8) __U);
   4653 }
   4654 
   4655 static __inline __m512d __DEFAULT_FN_ATTRS
   4656 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   4657 {
   4658   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
   4659                                                    (__v8df)
   4660                                                    _mm512_setzero_pd (),
   4661                                                    (__mmask8) __U);
   4662 }
   4663 
   4664 static __inline __m512d __DEFAULT_FN_ATTRS
   4665 _mm512_loadu_pd(void const *__p)
   4666 {
   4667   struct __loadu_pd {
   4668     __m512d __v;
   4669   } __attribute__((__packed__, __may_alias__));
   4670   return ((struct __loadu_pd*)__p)->__v;
   4671 }
   4672 
   4673 static __inline __m512 __DEFAULT_FN_ATTRS
   4674 _mm512_loadu_ps(void const *__p)
   4675 {
   4676   struct __loadu_ps {
   4677     __m512 __v;
   4678   } __attribute__((__packed__, __may_alias__));
   4679   return ((struct __loadu_ps*)__p)->__v;
   4680 }
   4681 
   4682 static __inline __m512 __DEFAULT_FN_ATTRS
   4683 _mm512_load_ps(void const *__p)
   4684 {
   4685   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   4686                                                   (__v16sf)
   4687                                                   _mm512_setzero_ps (),
   4688                                                   (__mmask16) -1);
   4689 }
   4690 
   4691 static __inline __m512 __DEFAULT_FN_ATTRS
   4692 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
   4693 {
   4694   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
   4695                    (__v16sf) __W,
   4696                    (__mmask16) __U);
   4697 }
   4698 
   4699 static __inline __m512 __DEFAULT_FN_ATTRS
   4700 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   4701 {
   4702   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   4703                                                   (__v16sf)
   4704                                                   _mm512_setzero_ps (),
   4705                                                   (__mmask16) __U);
   4706 }
   4707 
   4708 static __inline __m512d __DEFAULT_FN_ATTRS
   4709 _mm512_load_pd(void const *__p)
   4710 {
   4711   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   4712                                                    (__v8df)
   4713                                                    _mm512_setzero_pd (),
   4714                                                    (__mmask8) -1);
   4715 }
   4716 
   4717 static __inline __m512d __DEFAULT_FN_ATTRS
   4718 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
   4719 {
   4720   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
   4721                           (__v8df) __W,
   4722                           (__mmask8) __U);
   4723 }
   4724 
   4725 static __inline __m512d __DEFAULT_FN_ATTRS
   4726 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   4727 {
   4728   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   4729                                                    (__v8df)
   4730                                                    _mm512_setzero_pd (),
   4731                                                    (__mmask8) __U);
   4732 }
   4733 
   4734 static __inline __m512i __DEFAULT_FN_ATTRS
   4735 _mm512_load_si512 (void const *__P)
   4736 {
   4737   return *(__m512i *) __P;
   4738 }
   4739 
   4740 static __inline __m512i __DEFAULT_FN_ATTRS
   4741 _mm512_load_epi32 (void const *__P)
   4742 {
   4743   return *(__m512i *) __P;
   4744 }
   4745 
   4746 static __inline __m512i __DEFAULT_FN_ATTRS
   4747 _mm512_load_epi64 (void const *__P)
   4748 {
   4749   return *(__m512i *) __P;
   4750 }
   4751 
   4752 /* SIMD store ops */
   4753 
   4754 static __inline void __DEFAULT_FN_ATTRS
   4755 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   4756 {
   4757   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
   4758                                      (__mmask8) __U);
   4759 }
   4760 
   4761 static __inline void __DEFAULT_FN_ATTRS
   4762 _mm512_storeu_si512 (void *__P, __m512i __A)
   4763 {
   4764   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
   4765             (__mmask16) -1);
   4766 }
   4767 
   4768 static __inline void __DEFAULT_FN_ATTRS
   4769 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   4770 {
   4771   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
   4772                                      (__mmask16) __U);
   4773 }
   4774 
   4775 static __inline void __DEFAULT_FN_ATTRS
   4776 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   4777 {
   4778   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
   4779 }
   4780 
   4781 static __inline void __DEFAULT_FN_ATTRS
   4782 _mm512_storeu_pd(void *__P, __m512d __A)
   4783 {
   4784   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
   4785 }
   4786 
   4787 static __inline void __DEFAULT_FN_ATTRS
   4788 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   4789 {
   4790   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
   4791                                    (__mmask16) __U);
   4792 }
   4793 
   4794 static __inline void __DEFAULT_FN_ATTRS
   4795 _mm512_storeu_ps(void *__P, __m512 __A)
   4796 {
   4797   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
   4798 }
   4799 
   4800 static __inline void __DEFAULT_FN_ATTRS
   4801 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   4802 {
   4803   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   4804 }
   4805 
   4806 static __inline void __DEFAULT_FN_ATTRS
   4807 _mm512_store_pd(void *__P, __m512d __A)
   4808 {
   4809   *(__m512d*)__P = __A;
   4810 }
   4811 
   4812 static __inline void __DEFAULT_FN_ATTRS
   4813 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   4814 {
   4815   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   4816                                    (__mmask16) __U);
   4817 }
   4818 
   4819 static __inline void __DEFAULT_FN_ATTRS
   4820 _mm512_store_ps(void *__P, __m512 __A)
   4821 {
   4822   *(__m512*)__P = __A;
   4823 }
   4824 
   4825 static __inline void __DEFAULT_FN_ATTRS
   4826 _mm512_store_si512 (void *__P, __m512i __A)
   4827 {
   4828   *(__m512i *) __P = __A;
   4829 }
   4830 
   4831 static __inline void __DEFAULT_FN_ATTRS
   4832 _mm512_store_epi32 (void *__P, __m512i __A)
   4833 {
   4834   *(__m512i *) __P = __A;
   4835 }
   4836 
   4837 static __inline void __DEFAULT_FN_ATTRS
   4838 _mm512_store_epi64 (void *__P, __m512i __A)
   4839 {
   4840   *(__m512i *) __P = __A;
   4841 }
   4842 
   4843 /* Mask ops */
   4844 
   4845 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4846 _mm512_knot(__mmask16 __M)
   4847 {
   4848   return __builtin_ia32_knothi(__M);
   4849 }
   4850 
   4851 /* Integer compare */
   4852 
   4853 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4854 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   4855   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4856                                                    (__mmask16)-1);
   4857 }
   4858 
   4859 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4860 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4861   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4862                                                    __u);
   4863 }
   4864 
   4865 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4866 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   4867   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4868                                                  (__mmask16)-1);
   4869 }
   4870 
   4871 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4872 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4873   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4874                                                  __u);
   4875 }
   4876 
   4877 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4878 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4879   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4880                                                   __u);
   4881 }
   4882 
   4883 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4884 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   4885   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4886                                                   (__mmask8)-1);
   4887 }
   4888 
   4889 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4890 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   4891   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4892                                                 (__mmask8)-1);
   4893 }
   4894 
   4895 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4896 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4897   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4898                                                 __u);
   4899 }
   4900 
   4901 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4902 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   4903   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4904                                                 (__mmask16)-1);
   4905 }
   4906 
   4907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4908 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4909   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4910                                                 __u);
   4911 }
   4912 
   4913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4914 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   4915   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4916                                                  (__mmask16)-1);
   4917 }
   4918 
   4919 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4920 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4921   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4922                                                  __u);
   4923 }
   4924 
   4925 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4926 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   4927   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4928                                                (__mmask8)-1);
   4929 }
   4930 
   4931 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4932 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4933   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4934                                                __u);
   4935 }
   4936 
   4937 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4938 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   4939   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4940                                                 (__mmask8)-1);
   4941 }
   4942 
   4943 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4944 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4945   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4946                                                 __u);
   4947 }
   4948 
   4949 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4950 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   4951   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4952                                                    (__mmask16)-1);
   4953 }
   4954 
   4955 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4956 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4957   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4958                                                    __u);
   4959 }
   4960 
   4961 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4962 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   4963   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4964                                                  (__mmask16)-1);
   4965 }
   4966 
   4967 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4968 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4969   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4970                                                  __u);
   4971 }
   4972 
   4973 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4974 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4975   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4976                                                   __u);
   4977 }
   4978 
   4979 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4980 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   4981   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4982                                                   (__mmask8)-1);
   4983 }
   4984 
   4985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4986 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   4987   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4988                                                 (__mmask8)-1);
   4989 }
   4990 
   4991 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4992 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4993   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4994                                                 __u);
   4995 }
   4996 
   4997 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4998 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   4999   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5000                                                 (__mmask16)-1);
   5001 }
   5002 
   5003 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5004 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5005   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5006                                                 __u);
   5007 }
   5008 
   5009 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5010 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   5011   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5012                                                  (__mmask16)-1);
   5013 }
   5014 
   5015 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5016 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5017   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5018                                                  __u);
   5019 }
   5020 
   5021 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5022 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   5023   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5024                                                (__mmask8)-1);
   5025 }
   5026 
   5027 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5028 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5029   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5030                                                __u);
   5031 }
   5032 
   5033 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5034 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   5035   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5036                                                 (__mmask8)-1);
   5037 }
   5038 
   5039 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5040 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5041   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5042                                                 __u);
   5043 }
   5044 
   5045 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5046 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   5047   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5048                                                 (__mmask16)-1);
   5049 }
   5050 
   5051 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5052 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5053   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5054                                                 __u);
   5055 }
   5056 
   5057 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5058 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   5059   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5060                                                  (__mmask16)-1);
   5061 }
   5062 
   5063 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5064 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5065   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5066                                                  __u);
   5067 }
   5068 
   5069 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5070 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   5071   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5072                                                (__mmask8)-1);
   5073 }
   5074 
   5075 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5076 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5077   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5078                                                __u);
   5079 }
   5080 
   5081 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5082 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   5083   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5084                                                 (__mmask8)-1);
   5085 }
   5086 
   5087 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5088 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5089   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5090                                                 __u);
   5091 }
   5092 
   5093 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5094 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   5095   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5096                                                 (__mmask16)-1);
   5097 }
   5098 
   5099 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5100 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5101   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5102                                                 __u);
   5103 }
   5104 
   5105 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5106 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   5107   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5108                                                  (__mmask16)-1);
   5109 }
   5110 
   5111 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5112 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5113   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5114                                                  __u);
   5115 }
   5116 
   5117 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5118 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   5119   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5120                                                (__mmask8)-1);
   5121 }
   5122 
   5123 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5124 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5125   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5126                                                __u);
   5127 }
   5128 
   5129 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5130 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   5131   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5132                                                 (__mmask8)-1);
   5133 }
   5134 
   5135 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5136 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5137   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5138                                                 __u);
   5139 }
   5140 
   5141 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5142 _mm512_cvtepi8_epi32(__m128i __A)
   5143 {
   5144   /* This function always performs a signed extension, but __v16qi is a char
   5145      which may be signed or unsigned, so use __v16qs. */
   5146   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
   5147 }
   5148 
   5149 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5150 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5151 {
   5152   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5153                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5154                                              (__v16si)__W);
   5155 }
   5156 
   5157 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5158 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
   5159 {
   5160   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5161                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5162                                              (__v16si)_mm512_setzero_si512());
   5163 }
   5164 
   5165 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5166 _mm512_cvtepi8_epi64(__m128i __A)
   5167 {
   5168   /* This function always performs a signed extension, but __v16qi is a char
   5169      which may be signed or unsigned, so use __v16qs. */
   5170   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5171 }
   5172 
   5173 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5174 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5175 {
   5176   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5177                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5178                                              (__v8di)__W);
   5179 }
   5180 
   5181 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5182 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   5183 {
   5184   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5185                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5186                                              (__v8di)_mm512_setzero_si512 ());
   5187 }
   5188 
   5189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5190 _mm512_cvtepi32_epi64(__m256i __X)
   5191 {
   5192   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
   5193 }
   5194 
   5195 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5196 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5197 {
   5198   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5199                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5200                                              (__v8di)__W);
   5201 }
   5202 
   5203 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5204 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
   5205 {
   5206   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5207                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5208                                              (__v8di)_mm512_setzero_si512());
   5209 }
   5210 
   5211 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5212 _mm512_cvtepi16_epi32(__m256i __A)
   5213 {
   5214   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
   5215 }
   5216 
   5217 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5218 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5219 {
   5220   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5221                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5222                                             (__v16si)__W);
   5223 }
   5224 
   5225 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5226 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
   5227 {
   5228   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5229                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5230                                             (__v16si)_mm512_setzero_si512 ());
   5231 }
   5232 
   5233 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5234 _mm512_cvtepi16_epi64(__m128i __A)
   5235 {
   5236   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
   5237 }
   5238 
   5239 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5240 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5241 {
   5242   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5243                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5244                                              (__v8di)__W);
   5245 }
   5246 
   5247 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5248 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   5249 {
   5250   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5251                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5252                                              (__v8di)_mm512_setzero_si512());
   5253 }
   5254 
   5255 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5256 _mm512_cvtepu8_epi32(__m128i __A)
   5257 {
   5258   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
   5259 }
   5260 
   5261 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5262 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5263 {
   5264   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5265                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5266                                              (__v16si)__W);
   5267 }
   5268 
   5269 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5270 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
   5271 {
   5272   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5273                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5274                                              (__v16si)_mm512_setzero_si512());
   5275 }
   5276 
   5277 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5278 _mm512_cvtepu8_epi64(__m128i __A)
   5279 {
   5280   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5281 }
   5282 
   5283 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5284 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5285 {
   5286   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5287                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5288                                              (__v8di)__W);
   5289 }
   5290 
   5291 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5292 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   5293 {
   5294   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5295                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5296                                              (__v8di)_mm512_setzero_si512());
   5297 }
   5298 
   5299 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5300 _mm512_cvtepu32_epi64(__m256i __X)
   5301 {
   5302   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
   5303 }
   5304 
   5305 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5306 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5307 {
   5308   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5309                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5310                                              (__v8di)__W);
   5311 }
   5312 
   5313 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5314 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
   5315 {
   5316   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5317                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5318                                              (__v8di)_mm512_setzero_si512());
   5319 }
   5320 
   5321 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5322 _mm512_cvtepu16_epi32(__m256i __A)
   5323 {
   5324   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
   5325 }
   5326 
   5327 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5328 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5329 {
   5330   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5331                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5332                                             (__v16si)__W);
   5333 }
   5334 
   5335 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5336 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
   5337 {
   5338   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5339                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5340                                             (__v16si)_mm512_setzero_si512());
   5341 }
   5342 
   5343 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5344 _mm512_cvtepu16_epi64(__m128i __A)
   5345 {
   5346   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
   5347 }
   5348 
   5349 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5350 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5351 {
   5352   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5353                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5354                                              (__v8di)__W);
   5355 }
   5356 
   5357 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5358 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   5359 {
   5360   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5361                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5362                                              (__v8di)_mm512_setzero_si512());
   5363 }
   5364 
   5365 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5366 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   5367 {
   5368   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5369               (__v16si) __B,
   5370               (__v16si)
   5371               _mm512_setzero_si512 (),
   5372               (__mmask16) -1);
   5373 }
   5374 
   5375 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5376 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5377 {
   5378   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5379               (__v16si) __B,
   5380               (__v16si) __W,
   5381               (__mmask16) __U);
   5382 }
   5383 
   5384 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5385 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5386 {
   5387   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5388               (__v16si) __B,
   5389               (__v16si)
   5390               _mm512_setzero_si512 (),
   5391               (__mmask16) __U);
   5392 }
   5393 
   5394 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5395 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   5396 {
   5397   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5398               (__v8di) __B,
   5399               (__v8di)
   5400               _mm512_setzero_si512 (),
   5401               (__mmask8) -1);
   5402 }
   5403 
   5404 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5405 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5406 {
   5407   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5408               (__v8di) __B,
   5409               (__v8di) __W,
   5410               (__mmask8) __U);
   5411 }
   5412 
   5413 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5414 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5415 {
   5416   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5417               (__v8di) __B,
   5418               (__v8di)
   5419               _mm512_setzero_si512 (),
   5420               (__mmask8) __U);
   5421 }
   5422 
   5423 
   5424 
   5425 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   5426   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5427                                          (__v16si)(__m512i)(b), (int)(p), \
   5428                                          (__mmask16)-1); })
   5429 
   5430 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   5431   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5432                                           (__v16si)(__m512i)(b), (int)(p), \
   5433                                           (__mmask16)-1); })
   5434 
   5435 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   5436   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5437                                         (__v8di)(__m512i)(b), (int)(p), \
   5438                                         (__mmask8)-1); })
   5439 
   5440 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   5441   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5442                                          (__v8di)(__m512i)(b), (int)(p), \
   5443                                          (__mmask8)-1); })
   5444 
   5445 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   5446   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5447                                          (__v16si)(__m512i)(b), (int)(p), \
   5448                                          (__mmask16)(m)); })
   5449 
   5450 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   5451   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5452                                           (__v16si)(__m512i)(b), (int)(p), \
   5453                                           (__mmask16)(m)); })
   5454 
   5455 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   5456   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5457                                         (__v8di)(__m512i)(b), (int)(p), \
   5458                                         (__mmask8)(m)); })
   5459 
   5460 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   5461   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5462                                          (__v8di)(__m512i)(b), (int)(p), \
   5463                                          (__mmask8)(m)); })
   5464 
   5465 #define _mm512_rol_epi32(a, b) __extension__ ({ \
   5466   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5467                                         (__v16si)_mm512_setzero_si512(), \
   5468                                         (__mmask16)-1); })
   5469 
   5470 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
   5471   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5472                                         (__v16si)(__m512i)(W), \
   5473                                         (__mmask16)(U)); })
   5474 
   5475 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
   5476   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5477                                         (__v16si)_mm512_setzero_si512(), \
   5478                                         (__mmask16)(U)); })
   5479 
   5480 #define _mm512_rol_epi64(a, b) __extension__ ({ \
   5481   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5482                                         (__v8di)_mm512_setzero_si512(), \
   5483                                         (__mmask8)-1); })
   5484 
   5485 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
   5486   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5487                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5488 
   5489 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
   5490   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5491                                         (__v8di)_mm512_setzero_si512(), \
   5492                                         (__mmask8)(U)); })
   5493 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5494 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   5495 {
   5496   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5497               (__v16si) __B,
   5498               (__v16si)
   5499               _mm512_setzero_si512 (),
   5500               (__mmask16) -1);
   5501 }
   5502 
   5503 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5504 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5505 {
   5506   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5507               (__v16si) __B,
   5508               (__v16si) __W,
   5509               (__mmask16) __U);
   5510 }
   5511 
   5512 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5513 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5514 {
   5515   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5516               (__v16si) __B,
   5517               (__v16si)
   5518               _mm512_setzero_si512 (),
   5519               (__mmask16) __U);
   5520 }
   5521 
   5522 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5523 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   5524 {
   5525   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5526               (__v8di) __B,
   5527               (__v8di)
   5528               _mm512_setzero_si512 (),
   5529               (__mmask8) -1);
   5530 }
   5531 
   5532 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5533 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5534 {
   5535   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5536               (__v8di) __B,
   5537               (__v8di) __W,
   5538               (__mmask8) __U);
   5539 }
   5540 
   5541 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5542 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5543 {
   5544   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5545               (__v8di) __B,
   5546               (__v8di)
   5547               _mm512_setzero_si512 (),
   5548               (__mmask8) __U);
   5549 }
   5550 
   5551 #define _mm512_ror_epi32(A, B) __extension__ ({ \
   5552   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5553                                         (__v16si)_mm512_setzero_si512(), \
   5554                                         (__mmask16)-1); })
   5555 
   5556 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   5557   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5558                                         (__v16si)(__m512i)(W), \
   5559                                         (__mmask16)(U)); })
   5560 
   5561 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
   5562   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5563                                         (__v16si)_mm512_setzero_si512(), \
   5564                                         (__mmask16)(U)); })
   5565 
   5566 #define _mm512_ror_epi64(A, B) __extension__ ({ \
   5567   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5568                                         (__v8di)_mm512_setzero_si512(), \
   5569                                         (__mmask8)-1); })
   5570 
   5571 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   5572   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5573                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5574 
   5575 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
   5576   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5577                                         (__v8di)_mm512_setzero_si512(), \
   5578                                         (__mmask8)(U)); })
   5579 
   5580 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5581 _mm512_slli_epi32(__m512i __A, int __B)
   5582 {
   5583   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
   5584 }
   5585 
   5586 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5587 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5588 {
   5589   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5590                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5591                                          (__v16si)__W);
   5592 }
   5593 
   5594 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5595 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5596   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5597                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5598                                          (__v16si)_mm512_setzero_si512());
   5599 }
   5600 
   5601 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5602 _mm512_slli_epi64(__m512i __A, int __B)
   5603 {
   5604   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
   5605 }
   5606 
   5607 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5608 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5609 {
   5610   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5611                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5612                                           (__v8di)__W);
   5613 }
   5614 
   5615 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5616 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
   5617 {
   5618   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5619                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5620                                           (__v8di)_mm512_setzero_si512());
   5621 }
   5622 
   5623 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5624 _mm512_srli_epi32(__m512i __A, int __B)
   5625 {
   5626   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
   5627 }
   5628 
   5629 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5630 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5631 {
   5632   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5633                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5634                                          (__v16si)__W);
   5635 }
   5636 
   5637 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5638 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5639   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5640                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5641                                          (__v16si)_mm512_setzero_si512());
   5642 }
   5643 
   5644 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5645 _mm512_srli_epi64(__m512i __A, int __B)
   5646 {
   5647   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
   5648 }
   5649 
   5650 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5651 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5652 {
   5653   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5654                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5655                                           (__v8di)__W);
   5656 }
   5657 
   5658 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5659 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
   5660 {
   5661   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5662                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5663                                           (__v8di)_mm512_setzero_si512());
   5664 }
   5665 
   5666 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5667 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5668 {
   5669   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5670               (__v16si) __W,
   5671               (__mmask16) __U);
   5672 }
   5673 
   5674 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5675 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
   5676 {
   5677   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5678               (__v16si)
   5679               _mm512_setzero_si512 (),
   5680               (__mmask16) __U);
   5681 }
   5682 
   5683 static __inline__ void __DEFAULT_FN_ATTRS
   5684 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5685 {
   5686   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
   5687           (__mmask16) __U);
   5688 }
   5689 
   5690 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5691 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   5692 {
   5693   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5694                  (__v16si) __A,
   5695                  (__v16si) __W);
   5696 }
   5697 
   5698 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5699 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
   5700 {
   5701   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5702                  (__v16si) __A,
   5703                  (__v16si) _mm512_setzero_si512 ());
   5704 }
   5705 
   5706 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5707 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   5708 {
   5709   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5710                  (__v8di) __A,
   5711                  (__v8di) __W);
   5712 }
   5713 
   5714 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5715 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
   5716 {
   5717   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5718                  (__v8di) __A,
   5719                  (__v8di) _mm512_setzero_si512 ());
   5720 }
   5721 
   5722 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5723 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5724 {
   5725   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5726               (__v8di) __W,
   5727               (__mmask8) __U);
   5728 }
   5729 
   5730 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5731 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5732 {
   5733   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5734               (__v8di)
   5735               _mm512_setzero_si512 (),
   5736               (__mmask8) __U);
   5737 }
   5738 
   5739 static __inline__ void __DEFAULT_FN_ATTRS
   5740 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5741 {
   5742   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
   5743           (__mmask8) __U);
   5744 }
   5745 
   5746 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5747 _mm512_movedup_pd (__m512d __A)
   5748 {
   5749   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
   5750                                           0, 0, 2, 2, 4, 4, 6, 6);
   5751 }
   5752 
   5753 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5754 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   5755 {
   5756   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5757                                               (__v8df)_mm512_movedup_pd(__A),
   5758                                               (__v8df)__W);
   5759 }
   5760 
   5761 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5762 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   5763 {
   5764   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5765                                               (__v8df)_mm512_movedup_pd(__A),
   5766                                               (__v8df)_mm512_setzero_pd());
   5767 }
   5768 
   5769 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
   5770   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5771                                              (__v8df)(__m512d)(B), \
   5772                                              (__v8di)(__m512i)(C), (int)(imm), \
   5773                                              (__mmask8)-1, (int)(R)); })
   5774 
   5775 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
   5776   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5777                                              (__v8df)(__m512d)(B), \
   5778                                              (__v8di)(__m512i)(C), (int)(imm), \
   5779                                              (__mmask8)(U), (int)(R)); })
   5780 
   5781 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5782   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5783                                              (__v8df)(__m512d)(B), \
   5784                                              (__v8di)(__m512i)(C), (int)(imm), \
   5785                                              (__mmask8)-1, \
   5786                                              _MM_FROUND_CUR_DIRECTION); })
   5787 
   5788 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5789   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5790                                              (__v8df)(__m512d)(B), \
   5791                                              (__v8di)(__m512i)(C), (int)(imm), \
   5792                                              (__mmask8)(U), \
   5793                                              _MM_FROUND_CUR_DIRECTION); })
   5794 
   5795 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
   5796   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5797                                               (__v8df)(__m512d)(B), \
   5798                                               (__v8di)(__m512i)(C), \
   5799                                               (int)(imm), (__mmask8)(U), \
   5800                                               (int)(R)); })
   5801 
   5802 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5803   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5804                                               (__v8df)(__m512d)(B), \
   5805                                               (__v8di)(__m512i)(C), \
   5806                                               (int)(imm), (__mmask8)(U), \
   5807                                               _MM_FROUND_CUR_DIRECTION); })
   5808 
   5809 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
   5810   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5811                                             (__v16sf)(__m512)(B), \
   5812                                             (__v16si)(__m512i)(C), (int)(imm), \
   5813                                             (__mmask16)-1, (int)(R)); })
   5814 
   5815 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
   5816   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5817                                             (__v16sf)(__m512)(B), \
   5818                                             (__v16si)(__m512i)(C), (int)(imm), \
   5819                                             (__mmask16)(U), (int)(R)); })
   5820 
   5821 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5822   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5823                                             (__v16sf)(__m512)(B), \
   5824                                             (__v16si)(__m512i)(C), (int)(imm), \
   5825                                             (__mmask16)-1, \
   5826                                             _MM_FROUND_CUR_DIRECTION); })
   5827 
   5828 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5829   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5830                                             (__v16sf)(__m512)(B), \
   5831                                             (__v16si)(__m512i)(C), (int)(imm), \
   5832                                             (__mmask16)(U), \
   5833                                             _MM_FROUND_CUR_DIRECTION); })
   5834 
   5835 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
   5836   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5837                                              (__v16sf)(__m512)(B), \
   5838                                              (__v16si)(__m512i)(C), \
   5839                                              (int)(imm), (__mmask16)(U), \
   5840                                              (int)(R)); })
   5841 
   5842 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5843   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5844                                              (__v16sf)(__m512)(B), \
   5845                                              (__v16si)(__m512i)(C), \
   5846                                              (int)(imm), (__mmask16)(U), \
   5847                                              _MM_FROUND_CUR_DIRECTION); })
   5848 
   5849 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
   5850   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5851                                           (__v2df)(__m128d)(B), \
   5852                                           (__v2di)(__m128i)(C), (int)(imm), \
   5853                                           (__mmask8)-1, (int)(R)); })
   5854 
   5855 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
   5856   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5857                                           (__v2df)(__m128d)(B), \
   5858                                           (__v2di)(__m128i)(C), (int)(imm), \
   5859                                           (__mmask8)(U), (int)(R)); })
   5860 
   5861 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
   5862   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5863                                           (__v2df)(__m128d)(B), \
   5864                                           (__v2di)(__m128i)(C), (int)(imm), \
   5865                                           (__mmask8)-1, \
   5866                                           _MM_FROUND_CUR_DIRECTION); })
   5867 
   5868 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
   5869   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5870                                           (__v2df)(__m128d)(B), \
   5871                                           (__v2di)(__m128i)(C), (int)(imm), \
   5872                                           (__mmask8)(U), \
   5873                                           _MM_FROUND_CUR_DIRECTION); })
   5874 
   5875 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
   5876   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5877                                            (__v2df)(__m128d)(B), \
   5878                                            (__v2di)(__m128i)(C), (int)(imm), \
   5879                                            (__mmask8)(U), (int)(R)); })
   5880 
   5881 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
   5882   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5883                                            (__v2df)(__m128d)(B), \
   5884                                            (__v2di)(__m128i)(C), (int)(imm), \
   5885                                            (__mmask8)(U), \
   5886                                            _MM_FROUND_CUR_DIRECTION); })
   5887 
   5888 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
   5889   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5890                                          (__v4sf)(__m128)(B), \
   5891                                          (__v4si)(__m128i)(C), (int)(imm), \
   5892                                          (__mmask8)-1, (int)(R)); })
   5893 
   5894 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
   5895   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5896                                          (__v4sf)(__m128)(B), \
   5897                                          (__v4si)(__m128i)(C), (int)(imm), \
   5898                                          (__mmask8)(U), (int)(R)); })
   5899 
   5900 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
   5901   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5902                                          (__v4sf)(__m128)(B), \
   5903                                          (__v4si)(__m128i)(C), (int)(imm), \
   5904                                          (__mmask8)-1, \
   5905                                          _MM_FROUND_CUR_DIRECTION); })
   5906 
   5907 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
   5908   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5909                                          (__v4sf)(__m128)(B), \
   5910                                          (__v4si)(__m128i)(C), (int)(imm), \
   5911                                          (__mmask8)(U), \
   5912                                          _MM_FROUND_CUR_DIRECTION); })
   5913 
   5914 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
   5915   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5916                                           (__v4sf)(__m128)(B), \
   5917                                           (__v4si)(__m128i)(C), (int)(imm), \
   5918                                           (__mmask8)(U), (int)(R)); })
   5919 
   5920 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
   5921   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5922                                           (__v4sf)(__m128)(B), \
   5923                                           (__v4si)(__m128i)(C), (int)(imm), \
   5924                                           (__mmask8)(U), \
   5925                                           _MM_FROUND_CUR_DIRECTION); })
   5926 
   5927 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
   5928   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5929                                                  (__v2df)(__m128d)(B), \
   5930                                                  (__v2df)_mm_setzero_pd(), \
   5931                                                  (__mmask8)-1, (int)(R)); })
   5932 
   5933 
   5934 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5935 _mm_getexp_sd (__m128d __A, __m128d __B)
   5936 {
   5937   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
   5938                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5939 }
   5940 
   5941 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5942 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5943 {
   5944  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5945           (__v2df) __B,
   5946           (__v2df) __W,
   5947           (__mmask8) __U,
   5948           _MM_FROUND_CUR_DIRECTION);
   5949 }
   5950 
   5951 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
   5952   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5953                                                  (__v2df)(__m128d)(B), \
   5954                                                  (__v2df)(__m128d)(W), \
   5955                                                  (__mmask8)(U), (int)(R)); })
   5956 
   5957 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5958 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   5959 {
   5960  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5961           (__v2df) __B,
   5962           (__v2df) _mm_setzero_pd (),
   5963           (__mmask8) __U,
   5964           _MM_FROUND_CUR_DIRECTION);
   5965 }
   5966 
   5967 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
   5968   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5969                                                  (__v2df)(__m128d)(B), \
   5970                                                  (__v2df)_mm_setzero_pd(), \
   5971                                                  (__mmask8)(U), (int)(R)); })
   5972 
   5973 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
   5974   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5975                                                 (__v4sf)(__m128)(B), \
   5976                                                 (__v4sf)_mm_setzero_ps(), \
   5977                                                 (__mmask8)-1, (int)(R)); })
   5978 
   5979 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5980 _mm_getexp_ss (__m128 __A, __m128 __B)
   5981 {
   5982   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5983                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5984 }
   5985 
   5986 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5987 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5988 {
   5989  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5990           (__v4sf) __B,
   5991           (__v4sf) __W,
   5992           (__mmask8) __U,
   5993           _MM_FROUND_CUR_DIRECTION);
   5994 }
   5995 
   5996 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
   5997   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5998                                                 (__v4sf)(__m128)(B), \
   5999                                                 (__v4sf)(__m128)(W), \
   6000                                                 (__mmask8)(U), (int)(R)); })
   6001 
   6002 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6003 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   6004 {
   6005  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   6006           (__v4sf) __B,
   6007           (__v4sf) _mm_setzero_pd (),
   6008           (__mmask8) __U,
   6009           _MM_FROUND_CUR_DIRECTION);
   6010 }
   6011 
   6012 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
   6013   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   6014                                                 (__v4sf)(__m128)(B), \
   6015                                                 (__v4sf)_mm_setzero_ps(), \
   6016                                                 (__mmask8)(U), (int)(R)); })
   6017 
   6018 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
   6019   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6020                                                (__v2df)(__m128d)(B), \
   6021                                                (int)(((D)<<2) | (C)), \
   6022                                                (__v2df)_mm_setzero_pd(), \
   6023                                                (__mmask8)-1, (int)(R)); })
   6024 
   6025 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
   6026   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6027                                                (__v2df)(__m128d)(B), \
   6028                                                (int)(((D)<<2) | (C)), \
   6029                                                (__v2df)_mm_setzero_pd(), \
   6030                                                (__mmask8)-1, \
   6031                                                _MM_FROUND_CUR_DIRECTION); })
   6032 
   6033 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
   6034   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6035                                                (__v2df)(__m128d)(B), \
   6036                                                (int)(((D)<<2) | (C)), \
   6037                                                (__v2df)(__m128d)(W), \
   6038                                                (__mmask8)(U), \
   6039                                                _MM_FROUND_CUR_DIRECTION); })
   6040 
   6041 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
   6042   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6043                                                (__v2df)(__m128d)(B), \
   6044                                                (int)(((D)<<2) | (C)), \
   6045                                                (__v2df)(__m128d)(W), \
   6046                                                (__mmask8)(U), (int)(R)); })
   6047 
   6048 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
   6049   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6050                                                (__v2df)(__m128d)(B), \
   6051                                                (int)(((D)<<2) | (C)), \
   6052                                                (__v2df)_mm_setzero_pd(), \
   6053                                                (__mmask8)(U), \
   6054                                                _MM_FROUND_CUR_DIRECTION); })
   6055 
   6056 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
   6057   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6058                                                (__v2df)(__m128d)(B), \
   6059                                                (int)(((D)<<2) | (C)), \
   6060                                                (__v2df)_mm_setzero_pd(), \
   6061                                                (__mmask8)(U), (int)(R)); })
   6062 
   6063 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
   6064   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6065                                               (__v4sf)(__m128)(B), \
   6066                                               (int)(((D)<<2) | (C)), \
   6067                                               (__v4sf)_mm_setzero_ps(), \
   6068                                               (__mmask8)-1, (int)(R)); })
   6069 
   6070 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
   6071   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6072                                               (__v4sf)(__m128)(B), \
   6073                                               (int)(((D)<<2) | (C)), \
   6074                                               (__v4sf)_mm_setzero_ps(), \
   6075                                               (__mmask8)-1, \
   6076                                               _MM_FROUND_CUR_DIRECTION); })
   6077 
   6078 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
   6079   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6080                                               (__v4sf)(__m128)(B), \
   6081                                               (int)(((D)<<2) | (C)), \
   6082                                               (__v4sf)(__m128)(W), \
   6083                                               (__mmask8)(U), \
   6084                                               _MM_FROUND_CUR_DIRECTION); })
   6085 
   6086 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
   6087   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6088                                               (__v4sf)(__m128)(B), \
   6089                                               (int)(((D)<<2) | (C)), \
   6090                                               (__v4sf)(__m128)(W), \
   6091                                               (__mmask8)(U), (int)(R)); })
   6092 
   6093 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
   6094   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6095                                               (__v4sf)(__m128)(B), \
   6096                                               (int)(((D)<<2) | (C)), \
   6097                                               (__v4sf)_mm_setzero_pd(), \
   6098                                               (__mmask8)(U), \
   6099                                               _MM_FROUND_CUR_DIRECTION); })
   6100 
   6101 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
   6102   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6103                                               (__v4sf)(__m128)(B), \
   6104                                               (int)(((D)<<2) | (C)), \
   6105                                               (__v4sf)_mm_setzero_ps(), \
   6106                                               (__mmask8)(U), (int)(R)); })
   6107 
   6108 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6109 _mm512_kmov (__mmask16 __A)
   6110 {
   6111   return  __A;
   6112 }
   6113 
   6114 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
   6115   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
   6116                               (int)(P), (int)(R)); })
   6117 
   6118 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
   6119   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
   6120                               (int)(P), (int)(R)); })
   6121 
   6122 #ifdef __x86_64__
   6123 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   6124   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6125 #endif
   6126 
   6127 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6128 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   6129          __mmask16 __U, __m512i __B)
   6130 {
   6131   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   6132                    (__v16si) __I
   6133                    /* idx */ ,
   6134                    (__v16si) __B,
   6135                    (__mmask16) __U);
   6136 }
   6137 
   6138 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6139 _mm512_sll_epi32(__m512i __A, __m128i __B)
   6140 {
   6141   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
   6142 }
   6143 
   6144 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6145 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6146 {
   6147   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6148                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6149                                           (__v16si)__W);
   6150 }
   6151 
   6152 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6153 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6154 {
   6155   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6156                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6157                                           (__v16si)_mm512_setzero_si512());
   6158 }
   6159 
   6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6161 _mm512_sll_epi64(__m512i __A, __m128i __B)
   6162 {
   6163   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
   6164 }
   6165 
   6166 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6167 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6168 {
   6169   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6170                                              (__v8di)_mm512_sll_epi64(__A, __B),
   6171                                              (__v8di)__W);
   6172 }
   6173 
   6174 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6175 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6176 {
   6177   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6178                                            (__v8di)_mm512_sll_epi64(__A, __B),
   6179                                            (__v8di)_mm512_setzero_si512());
   6180 }
   6181 
   6182 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6183 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
   6184 {
   6185   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
   6186 }
   6187 
   6188 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6189 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6190 {
   6191   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6192                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6193                                            (__v16si)__W);
   6194 }
   6195 
   6196 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6197 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6198 {
   6199   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6200                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6201                                            (__v16si)_mm512_setzero_si512());
   6202 }
   6203 
   6204 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6205 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
   6206 {
   6207   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
   6208 }
   6209 
   6210 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6211 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6212 {
   6213   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6214                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6215                                             (__v8di)__W);
   6216 }
   6217 
   6218 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6219 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6220 {
   6221   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6222                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6223                                             (__v8di)_mm512_setzero_si512());
   6224 }
   6225 
   6226 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6227 _mm512_sra_epi32(__m512i __A, __m128i __B)
   6228 {
   6229   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
   6230 }
   6231 
   6232 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6233 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6234 {
   6235   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6236                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6237                                           (__v16si)__W);
   6238 }
   6239 
   6240 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6241 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6242 {
   6243   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6244                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6245                                           (__v16si)_mm512_setzero_si512());
   6246 }
   6247 
   6248 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6249 _mm512_sra_epi64(__m512i __A, __m128i __B)
   6250 {
   6251   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
   6252 }
   6253 
   6254 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6255 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6256 {
   6257   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6258                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6259                                            (__v8di)__W);
   6260 }
   6261 
   6262 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6263 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6264 {
   6265   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6266                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6267                                            (__v8di)_mm512_setzero_si512());
   6268 }
   6269 
   6270 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6271 _mm512_srav_epi32(__m512i __X, __m512i __Y)
   6272 {
   6273   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
   6274 }
   6275 
   6276 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6277 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6278 {
   6279   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6280                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6281                                            (__v16si)__W);
   6282 }
   6283 
   6284 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6285 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6286 {
   6287   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6288                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6289                                            (__v16si)_mm512_setzero_si512());
   6290 }
   6291 
   6292 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6293 _mm512_srav_epi64(__m512i __X, __m512i __Y)
   6294 {
   6295   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
   6296 }
   6297 
   6298 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6299 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6300 {
   6301   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6302                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6303                                             (__v8di)__W);
   6304 }
   6305 
   6306 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6307 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6308 {
   6309   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6310                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6311                                             (__v8di)_mm512_setzero_si512());
   6312 }
   6313 
   6314 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6315 _mm512_srl_epi32(__m512i __A, __m128i __B)
   6316 {
   6317   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
   6318 }
   6319 
   6320 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6321 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6322 {
   6323   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6324                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6325                                           (__v16si)__W);
   6326 }
   6327 
   6328 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6329 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6330 {
   6331   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6332                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6333                                           (__v16si)_mm512_setzero_si512());
   6334 }
   6335 
   6336 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6337 _mm512_srl_epi64(__m512i __A, __m128i __B)
   6338 {
   6339   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
   6340 }
   6341 
   6342 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6343 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6344 {
   6345   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6346                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6347                                            (__v8di)__W);
   6348 }
   6349 
   6350 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6351 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6352 {
   6353   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6354                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6355                                            (__v8di)_mm512_setzero_si512());
   6356 }
   6357 
   6358 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6359 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
   6360 {
   6361   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
   6362 }
   6363 
   6364 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6365 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6366 {
   6367   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6368                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6369                                            (__v16si)__W);
   6370 }
   6371 
   6372 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6373 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6374 {
   6375   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6376                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6377                                            (__v16si)_mm512_setzero_si512());
   6378 }
   6379 
   6380 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6381 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
   6382 {
   6383   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
   6384 }
   6385 
   6386 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6387 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6388 {
   6389   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6390                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6391                                             (__v8di)__W);
   6392 }
   6393 
   6394 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6395 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6396 {
   6397   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6398                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6399                                             (__v8di)_mm512_setzero_si512());
   6400 }
   6401 
   6402 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6403   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6404                                             (__v16si)(__m512i)(B), \
   6405                                             (__v16si)(__m512i)(C), (int)(imm), \
   6406                                             (__mmask16)-1); })
   6407 
   6408 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6409   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6410                                             (__v16si)(__m512i)(B), \
   6411                                             (__v16si)(__m512i)(C), (int)(imm), \
   6412                                             (__mmask16)(U)); })
   6413 
   6414 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6415   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
   6416                                              (__v16si)(__m512i)(B), \
   6417                                              (__v16si)(__m512i)(C), \
   6418                                              (int)(imm), (__mmask16)(U)); })
   6419 
   6420 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6421   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6422                                             (__v8di)(__m512i)(B), \
   6423                                             (__v8di)(__m512i)(C), (int)(imm), \
   6424                                             (__mmask8)-1); })
   6425 
   6426 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6427   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6428                                             (__v8di)(__m512i)(B), \
   6429                                             (__v8di)(__m512i)(C), (int)(imm), \
   6430                                             (__mmask8)(U)); })
   6431 
   6432 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6433   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
   6434                                              (__v8di)(__m512i)(B), \
   6435                                              (__v8di)(__m512i)(C), (int)(imm), \
   6436                                              (__mmask8)(U)); })
   6437 
   6438 #ifdef __x86_64__
   6439 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
   6440   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6441 #endif
   6442 
   6443 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
   6444   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6445 
   6446 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
   6447   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6448 
   6449 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
   6450   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6451 
   6452 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6453 _mm_cvtsd_u32 (__m128d __A)
   6454 {
   6455   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
   6456              _MM_FROUND_CUR_DIRECTION);
   6457 }
   6458 
   6459 #ifdef __x86_64__
   6460 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
   6461   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
   6462                                                   (int)(R)); })
   6463 
   6464 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6465 _mm_cvtsd_u64 (__m128d __A)
   6466 {
   6467   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
   6468                  __A,
   6469                  _MM_FROUND_CUR_DIRECTION);
   6470 }
   6471 #endif
   6472 
   6473 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
   6474   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6475 
   6476 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
   6477   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6478 
   6479 #ifdef __x86_64__
   6480 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
   6481   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6482 
   6483 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
   6484   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6485 #endif
   6486 
   6487 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
   6488   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6489 
   6490 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6491 _mm_cvtss_u32 (__m128 __A)
   6492 {
   6493   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
   6494              _MM_FROUND_CUR_DIRECTION);
   6495 }
   6496 
   6497 #ifdef __x86_64__
   6498 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
   6499   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
   6500                                                   (int)(R)); })
   6501 
   6502 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6503 _mm_cvtss_u64 (__m128 __A)
   6504 {
   6505   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
   6506                  __A,
   6507                  _MM_FROUND_CUR_DIRECTION);
   6508 }
   6509 #endif
   6510 
   6511 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
   6512   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6513 
   6514 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
   6515   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6516 
   6517 static __inline__ int __DEFAULT_FN_ATTRS
   6518 _mm_cvttsd_i32 (__m128d __A)
   6519 {
   6520   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
   6521               _MM_FROUND_CUR_DIRECTION);
   6522 }
   6523 
   6524 #ifdef __x86_64__
   6525 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
   6526   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6527 
   6528 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
   6529   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6530 
   6531 static __inline__ long long __DEFAULT_FN_ATTRS
   6532 _mm_cvttsd_i64 (__m128d __A)
   6533 {
   6534   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
   6535               _MM_FROUND_CUR_DIRECTION);
   6536 }
   6537 #endif
   6538 
   6539 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
   6540   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6541 
   6542 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6543 _mm_cvttsd_u32 (__m128d __A)
   6544 {
   6545   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
   6546               _MM_FROUND_CUR_DIRECTION);
   6547 }
   6548 
   6549 #ifdef __x86_64__
   6550 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
   6551   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
   6552                                                    (int)(R)); })
   6553 
   6554 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6555 _mm_cvttsd_u64 (__m128d __A)
   6556 {
   6557   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
   6558                   __A,
   6559                   _MM_FROUND_CUR_DIRECTION);
   6560 }
   6561 #endif
   6562 
   6563 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
   6564   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6565 
   6566 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
   6567   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6568 
   6569 static __inline__ int __DEFAULT_FN_ATTRS
   6570 _mm_cvttss_i32 (__m128 __A)
   6571 {
   6572   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
   6573               _MM_FROUND_CUR_DIRECTION);
   6574 }
   6575 
   6576 #ifdef __x86_64__
   6577 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
   6578   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6579 
   6580 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
   6581   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6582 
   6583 static __inline__ long long __DEFAULT_FN_ATTRS
   6584 _mm_cvttss_i64 (__m128 __A)
   6585 {
   6586   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
   6587               _MM_FROUND_CUR_DIRECTION);
   6588 }
   6589 #endif
   6590 
   6591 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
   6592   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6593 
   6594 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6595 _mm_cvttss_u32 (__m128 __A)
   6596 {
   6597   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
   6598               _MM_FROUND_CUR_DIRECTION);
   6599 }
   6600 
   6601 #ifdef __x86_64__
   6602 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
   6603   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
   6604                                                    (int)(R)); })
   6605 
   6606 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6607 _mm_cvttss_u64 (__m128 __A)
   6608 {
   6609   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
   6610                   __A,
   6611                   _MM_FROUND_CUR_DIRECTION);
   6612 }
   6613 #endif
   6614 
   6615 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6616 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
   6617             __m512d __B)
   6618 {
   6619   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
   6620               (__v8di) __I
   6621               /* idx */ ,
   6622               (__v8df) __B,
   6623               (__mmask8) __U);
   6624 }
   6625 
   6626 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6627 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
   6628             __m512 __B)
   6629 {
   6630   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
   6631                    (__v16si) __I
   6632                    /* idx */ ,
   6633                    (__v16sf) __B,
   6634                    (__mmask16) __U);
   6635 }
   6636 
   6637 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6638 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
   6639          __mmask8 __U, __m512i __B)
   6640 {
   6641   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
   6642                    (__v8di) __I
   6643                    /* idx */ ,
   6644                    (__v8di) __B,
   6645                    (__mmask8) __U);
   6646 }
   6647 
   6648 #define _mm512_permute_pd(X, C) __extension__ ({ \
   6649   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   6650                                    (__v8df)_mm512_undefined_pd(), \
   6651                                    0 + (((C) >> 0) & 0x1), \
   6652                                    0 + (((C) >> 1) & 0x1), \
   6653                                    2 + (((C) >> 2) & 0x1), \
   6654                                    2 + (((C) >> 3) & 0x1), \
   6655                                    4 + (((C) >> 4) & 0x1), \
   6656                                    4 + (((C) >> 5) & 0x1), \
   6657                                    6 + (((C) >> 6) & 0x1), \
   6658                                    6 + (((C) >> 7) & 0x1)); })
   6659 
   6660 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6661   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6662                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6663                                        (__v8df)(__m512d)(W)); })
   6664 
   6665 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
   6666   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6667                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6668                                        (__v8df)_mm512_setzero_pd()); })
   6669 
   6670 #define _mm512_permute_ps(X, C) __extension__ ({ \
   6671   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
   6672                                   (__v16sf)_mm512_undefined_ps(), \
   6673                                    0  + (((C) >> 0) & 0x3), \
   6674                                    0  + (((C) >> 2) & 0x3), \
   6675                                    0  + (((C) >> 4) & 0x3), \
   6676                                    0  + (((C) >> 6) & 0x3), \
   6677                                    4  + (((C) >> 0) & 0x3), \
   6678                                    4  + (((C) >> 2) & 0x3), \
   6679                                    4  + (((C) >> 4) & 0x3), \
   6680                                    4  + (((C) >> 6) & 0x3), \
   6681                                    8  + (((C) >> 0) & 0x3), \
   6682                                    8  + (((C) >> 2) & 0x3), \
   6683                                    8  + (((C) >> 4) & 0x3), \
   6684                                    8  + (((C) >> 6) & 0x3), \
   6685                                    12 + (((C) >> 0) & 0x3), \
   6686                                    12 + (((C) >> 2) & 0x3), \
   6687                                    12 + (((C) >> 4) & 0x3), \
   6688                                    12 + (((C) >> 6) & 0x3)); })
   6689 
   6690 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6691   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6692                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6693                                       (__v16sf)(__m512)(W)); })
   6694 
   6695 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
   6696   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6697                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6698                                       (__v16sf)_mm512_setzero_ps()); })
   6699 
   6700 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6701 _mm512_permutevar_pd(__m512d __A, __m512i __C)
   6702 {
   6703   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
   6704 }
   6705 
   6706 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6707 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
   6708 {
   6709   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6710                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6711                                          (__v8df)__W);
   6712 }
   6713 
   6714 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6715 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
   6716 {
   6717   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6718                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6719                                          (__v8df)_mm512_setzero_pd());
   6720 }
   6721 
   6722 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6723 _mm512_permutevar_ps(__m512 __A, __m512i __C)
   6724 {
   6725   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
   6726 }
   6727 
   6728 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6729 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
   6730 {
   6731   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6732                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6733                                         (__v16sf)__W);
   6734 }
   6735 
   6736 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6737 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
   6738 {
   6739   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6740                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6741                                         (__v16sf)_mm512_setzero_ps());
   6742 }
   6743 
   6744 static __inline __m512d __DEFAULT_FN_ATTRS
   6745 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
   6746 {
   6747   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6748                     /* idx */ ,
   6749                     (__v8df) __A,
   6750                     (__v8df) __B,
   6751                     (__mmask8) -1);
   6752 }
   6753 
   6754 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6755 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
   6756 {
   6757   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6758                     /* idx */ ,
   6759                     (__v8df) __A,
   6760                     (__v8df) __B,
   6761                     (__mmask8) __U);
   6762 }
   6763 
   6764 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6765 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
   6766             __m512d __B)
   6767 {
   6768   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
   6769                                                          /* idx */ ,
   6770                                                          (__v8df) __A,
   6771                                                          (__v8df) __B,
   6772                                                          (__mmask8) __U);
   6773 }
   6774 
   6775 static __inline __m512 __DEFAULT_FN_ATTRS
   6776 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
   6777 {
   6778   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6779                                                          /* idx */ ,
   6780                                                          (__v16sf) __A,
   6781                                                          (__v16sf) __B,
   6782                                                          (__mmask16) -1);
   6783 }
   6784 
   6785 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6786 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
   6787 {
   6788   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6789                                                          /* idx */ ,
   6790                                                          (__v16sf) __A,
   6791                                                          (__v16sf) __B,
   6792                                                          (__mmask16) __U);
   6793 }
   6794 
   6795 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6796 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
   6797             __m512 __B)
   6798 {
   6799   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
   6800                                                         /* idx */ ,
   6801                                                         (__v16sf) __A,
   6802                                                         (__v16sf) __B,
   6803                                                         (__mmask16) __U);
   6804 }
   6805 
   6806 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6807 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
   6808 {
   6809   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6810              (__v16si) __B,
   6811              (__mmask16) -1);
   6812 }
   6813 
   6814 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6815 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   6816 {
   6817   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6818              (__v16si) __B, __U);
   6819 }
   6820 
   6821 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6822 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
   6823 {
   6824   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6825             (__v8di) __B,
   6826             (__mmask8) -1);
   6827 }
   6828 
   6829 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6830 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   6831 {
   6832   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6833             (__v8di) __B, __U);
   6834 }
   6835 
   6836 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
   6837   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6838                                              (__v8si)_mm256_undefined_si256(), \
   6839                                              (__mmask8)-1, (int)(R)); })
   6840 
   6841 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   6842   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6843                                              (__v8si)(__m256i)(W), \
   6844                                              (__mmask8)(U), (int)(R)); })
   6845 
   6846 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
   6847   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6848                                              (__v8si)_mm256_setzero_si256(), \
   6849                                              (__mmask8)(U), (int)(R)); })
   6850 
   6851 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6852 _mm512_cvttpd_epu32 (__m512d __A)
   6853 {
   6854   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6855                   (__v8si)
   6856                   _mm256_undefined_si256 (),
   6857                   (__mmask8) -1,
   6858                   _MM_FROUND_CUR_DIRECTION);
   6859 }
   6860 
   6861 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6862 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   6863 {
   6864   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6865                   (__v8si) __W,
   6866                   (__mmask8) __U,
   6867                   _MM_FROUND_CUR_DIRECTION);
   6868 }
   6869 
   6870 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6871 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
   6872 {
   6873   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6874                   (__v8si)
   6875                   _mm256_setzero_si256 (),
   6876                   (__mmask8) __U,
   6877                   _MM_FROUND_CUR_DIRECTION);
   6878 }
   6879 
   6880 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
   6881   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6882                                                 (__v2df)(__m128d)(B), \
   6883                                                 (__v2df)_mm_setzero_pd(), \
   6884                                                 (__mmask8)-1, (int)(imm), \
   6885                                                 (int)(R)); })
   6886 
   6887 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
   6888   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6889                                                 (__v2df)(__m128d)(B), \
   6890                                                 (__v2df)_mm_setzero_pd(), \
   6891                                                 (__mmask8)-1, (int)(imm), \
   6892                                                 _MM_FROUND_CUR_DIRECTION); })
   6893 
   6894 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
   6895   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6896                                                 (__v2df)(__m128d)(B), \
   6897                                                 (__v2df)(__m128d)(W), \
   6898                                                 (__mmask8)(U), (int)(imm), \
   6899                                                 _MM_FROUND_CUR_DIRECTION); })
   6900 
   6901 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
   6902   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6903                                                 (__v2df)(__m128d)(B), \
   6904                                                 (__v2df)(__m128d)(W), \
   6905                                                 (__mmask8)(U), (int)(I), \
   6906                                                 (int)(R)); })
   6907 
   6908 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
   6909   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6910                                                 (__v2df)(__m128d)(B), \
   6911                                                 (__v2df)_mm_setzero_pd(), \
   6912                                                 (__mmask8)(U), (int)(I), \
   6913                                                 _MM_FROUND_CUR_DIRECTION); })
   6914 
   6915 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
   6916   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6917                                                 (__v2df)(__m128d)(B), \
   6918                                                 (__v2df)_mm_setzero_pd(), \
   6919                                                 (__mmask8)(U), (int)(I), \
   6920                                                 (int)(R)); })
   6921 
   6922 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
   6923   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6924                                                (__v4sf)(__m128)(B), \
   6925                                                (__v4sf)_mm_setzero_ps(), \
   6926                                                (__mmask8)-1, (int)(imm), \
   6927                                                (int)(R)); })
   6928 
   6929 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
   6930   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6931                                                (__v4sf)(__m128)(B), \
   6932                                                (__v4sf)_mm_setzero_ps(), \
   6933                                                (__mmask8)-1, (int)(imm), \
   6934                                                _MM_FROUND_CUR_DIRECTION); })
   6935 
   6936 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
   6937   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6938                                                (__v4sf)(__m128)(B), \
   6939                                                (__v4sf)(__m128)(W), \
   6940                                                (__mmask8)(U), (int)(I), \
   6941                                                _MM_FROUND_CUR_DIRECTION); })
   6942 
   6943 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
   6944   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6945                                                (__v4sf)(__m128)(B), \
   6946                                                (__v4sf)(__m128)(W), \
   6947                                                (__mmask8)(U), (int)(I), \
   6948                                                (int)(R)); })
   6949 
   6950 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
   6951   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6952                                                (__v4sf)(__m128)(B), \
   6953                                                (__v4sf)_mm_setzero_ps(), \
   6954                                                (__mmask8)(U), (int)(I), \
   6955                                                _MM_FROUND_CUR_DIRECTION); })
   6956 
   6957 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
   6958   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6959                                                (__v4sf)(__m128)(B), \
   6960                                                (__v4sf)_mm_setzero_ps(), \
   6961                                                (__mmask8)(U), (int)(I), \
   6962                                                (int)(R)); })
   6963 
   6964 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
   6965   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6966                                            (__v8df)(__m512d)(B), \
   6967                                            (__v8df)_mm512_undefined_pd(), \
   6968                                            (__mmask8)-1, (int)(R)); })
   6969 
   6970 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
   6971   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6972                                            (__v8df)(__m512d)(B), \
   6973                                            (__v8df)(__m512d)(W), \
   6974                                            (__mmask8)(U), (int)(R)); })
   6975 
   6976 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
   6977   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6978                                            (__v8df)(__m512d)(B), \
   6979                                            (__v8df)_mm512_setzero_pd(), \
   6980                                            (__mmask8)(U), (int)(R)); })
   6981 
   6982 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6983 _mm512_scalef_pd (__m512d __A, __m512d __B)
   6984 {
   6985   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6986                 (__v8df) __B,
   6987                 (__v8df)
   6988                 _mm512_undefined_pd (),
   6989                 (__mmask8) -1,
   6990                 _MM_FROUND_CUR_DIRECTION);
   6991 }
   6992 
   6993 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6994 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   6995 {
   6996   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6997                 (__v8df) __B,
   6998                 (__v8df) __W,
   6999                 (__mmask8) __U,
   7000                 _MM_FROUND_CUR_DIRECTION);
   7001 }
   7002 
   7003 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7004 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
   7005 {
   7006   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   7007                 (__v8df) __B,
   7008                 (__v8df)
   7009                 _mm512_setzero_pd (),
   7010                 (__mmask8) __U,
   7011                 _MM_FROUND_CUR_DIRECTION);
   7012 }
   7013 
   7014 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
   7015   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7016                                           (__v16sf)(__m512)(B), \
   7017                                           (__v16sf)_mm512_undefined_ps(), \
   7018                                           (__mmask16)-1, (int)(R)); })
   7019 
   7020 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
   7021   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7022                                           (__v16sf)(__m512)(B), \
   7023                                           (__v16sf)(__m512)(W), \
   7024                                           (__mmask16)(U), (int)(R)); })
   7025 
   7026 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
   7027   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7028                                           (__v16sf)(__m512)(B), \
   7029                                           (__v16sf)_mm512_setzero_ps(), \
   7030                                           (__mmask16)(U), (int)(R)); })
   7031 
   7032 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7033 _mm512_scalef_ps (__m512 __A, __m512 __B)
   7034 {
   7035   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7036                (__v16sf) __B,
   7037                (__v16sf)
   7038                _mm512_undefined_ps (),
   7039                (__mmask16) -1,
   7040                _MM_FROUND_CUR_DIRECTION);
   7041 }
   7042 
   7043 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7044 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   7045 {
   7046   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7047                (__v16sf) __B,
   7048                (__v16sf) __W,
   7049                (__mmask16) __U,
   7050                _MM_FROUND_CUR_DIRECTION);
   7051 }
   7052 
   7053 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7054 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
   7055 {
   7056   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7057                (__v16sf) __B,
   7058                (__v16sf)
   7059                _mm512_setzero_ps (),
   7060                (__mmask16) __U,
   7061                _MM_FROUND_CUR_DIRECTION);
   7062 }
   7063 
   7064 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
   7065   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7066                                               (__v2df)(__m128d)(B), \
   7067                                               (__v2df)_mm_setzero_pd(), \
   7068                                               (__mmask8)-1, (int)(R)); })
   7069 
   7070 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7071 _mm_scalef_sd (__m128d __A, __m128d __B)
   7072 {
   7073   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
   7074               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
   7075               (__mmask8) -1,
   7076               _MM_FROUND_CUR_DIRECTION);
   7077 }
   7078 
   7079 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7080 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7081 {
   7082  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   7083                  (__v2df) __B,
   7084                 (__v2df) __W,
   7085                 (__mmask8) __U,
   7086                 _MM_FROUND_CUR_DIRECTION);
   7087 }
   7088 
   7089 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
   7090   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7091                                               (__v2df)(__m128d)(B), \
   7092                                               (__v2df)(__m128d)(W), \
   7093                                               (__mmask8)(U), (int)(R)); })
   7094 
   7095 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7096 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7097 {
   7098  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   7099                  (__v2df) __B,
   7100                 (__v2df) _mm_setzero_pd (),
   7101                 (__mmask8) __U,
   7102                 _MM_FROUND_CUR_DIRECTION);
   7103 }
   7104 
   7105 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
   7106   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7107                                               (__v2df)(__m128d)(B), \
   7108                                               (__v2df)_mm_setzero_pd(), \
   7109                                               (__mmask8)(U), (int)(R)); })
   7110 
   7111 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
   7112   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7113                                              (__v4sf)(__m128)(B), \
   7114                                              (__v4sf)_mm_setzero_ps(), \
   7115                                              (__mmask8)-1, (int)(R)); })
   7116 
   7117 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7118 _mm_scalef_ss (__m128 __A, __m128 __B)
   7119 {
   7120   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
   7121              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
   7122              (__mmask8) -1,
   7123              _MM_FROUND_CUR_DIRECTION);
   7124 }
   7125 
   7126 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7127 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7128 {
   7129  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7130                 (__v4sf) __B,
   7131                 (__v4sf) __W,
   7132                 (__mmask8) __U,
   7133                 _MM_FROUND_CUR_DIRECTION);
   7134 }
   7135 
   7136 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
   7137   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7138                                              (__v4sf)(__m128)(B), \
   7139                                              (__v4sf)(__m128)(W), \
   7140                                              (__mmask8)(U), (int)(R)); })
   7141 
   7142 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7143 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7144 {
   7145  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7146                  (__v4sf) __B,
   7147                 (__v4sf) _mm_setzero_ps (),
   7148                 (__mmask8) __U,
   7149                 _MM_FROUND_CUR_DIRECTION);
   7150 }
   7151 
   7152 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
   7153   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7154                                              (__v4sf)(__m128)(B), \
   7155                                              (__v4sf)_mm_setzero_ps(), \
   7156                                              (__mmask8)(U), \
   7157                                              _MM_FROUND_CUR_DIRECTION); })
   7158 
   7159 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7160 _mm512_srai_epi32(__m512i __A, int __B)
   7161 {
   7162   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
   7163 }
   7164 
   7165 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7166 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   7167 {
   7168   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7169                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7170                                          (__v16si)__W);
   7171 }
   7172 
   7173 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7174 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
   7175   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7176                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7177                                          (__v16si)_mm512_setzero_si512());
   7178 }
   7179 
   7180 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7181 _mm512_srai_epi64(__m512i __A, int __B)
   7182 {
   7183   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
   7184 }
   7185 
   7186 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7187 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   7188 {
   7189   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7190                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7191                                           (__v8di)__W);
   7192 }
   7193 
   7194 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7195 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
   7196 {
   7197   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7198                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7199                                           (__v8di)_mm512_setzero_si512());
   7200 }
   7201 
   7202 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
   7203   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7204                                          (__v16sf)(__m512)(B), (int)(imm), \
   7205                                          (__v16sf)_mm512_undefined_ps(), \
   7206                                          (__mmask16)-1); })
   7207 
   7208 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
   7209   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7210                                          (__v16sf)(__m512)(B), (int)(imm), \
   7211                                          (__v16sf)(__m512)(W), \
   7212                                          (__mmask16)(U)); })
   7213 
   7214 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
   7215   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7216                                          (__v16sf)(__m512)(B), (int)(imm), \
   7217                                          (__v16sf)_mm512_setzero_ps(), \
   7218                                          (__mmask16)(U)); })
   7219 
   7220 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
   7221   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7222                                           (__v8df)(__m512d)(B), (int)(imm), \
   7223                                           (__v8df)_mm512_undefined_pd(), \
   7224                                           (__mmask8)-1); })
   7225 
   7226 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
   7227   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7228                                           (__v8df)(__m512d)(B), (int)(imm), \
   7229                                           (__v8df)(__m512d)(W), \
   7230                                           (__mmask8)(U)); })
   7231 
   7232 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
   7233   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7234                                           (__v8df)(__m512d)(B), (int)(imm), \
   7235                                           (__v8df)_mm512_setzero_pd(), \
   7236                                           (__mmask8)(U)); })
   7237 
   7238 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
   7239   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7240                                           (__v16si)(__m512i)(B), (int)(imm), \
   7241                                           (__v16si)_mm512_setzero_si512(), \
   7242                                           (__mmask16)-1); })
   7243 
   7244 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
   7245   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7246                                           (__v16si)(__m512i)(B), (int)(imm), \
   7247                                           (__v16si)(__m512i)(W), \
   7248                                           (__mmask16)(U)); })
   7249 
   7250 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
   7251   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7252                                           (__v16si)(__m512i)(B), (int)(imm), \
   7253                                           (__v16si)_mm512_setzero_si512(), \
   7254                                           (__mmask16)(U)); })
   7255 
   7256 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
   7257   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7258                                           (__v8di)(__m512i)(B), (int)(imm), \
   7259                                           (__v8di)_mm512_setzero_si512(), \
   7260                                           (__mmask8)-1); })
   7261 
   7262 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
   7263   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7264                                           (__v8di)(__m512i)(B), (int)(imm), \
   7265                                           (__v8di)(__m512i)(W), \
   7266                                           (__mmask8)(U)); })
   7267 
   7268 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
   7269   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7270                                           (__v8di)(__m512i)(B), (int)(imm), \
   7271                                           (__v8di)_mm512_setzero_si512(), \
   7272                                           (__mmask8)(U)); })
   7273 
   7274 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
   7275   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7276                                    (__v8df)(__m512d)(B), \
   7277                                    0  + (((M) >> 0) & 0x1), \
   7278                                    8  + (((M) >> 1) & 0x1), \
   7279                                    2  + (((M) >> 2) & 0x1), \
   7280                                    10 + (((M) >> 3) & 0x1), \
   7281                                    4  + (((M) >> 4) & 0x1), \
   7282                                    12 + (((M) >> 5) & 0x1), \
   7283                                    6  + (((M) >> 6) & 0x1), \
   7284                                    14 + (((M) >> 7) & 0x1)); })
   7285 
   7286 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7287   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7288                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7289                                        (__v8df)(__m512d)(W)); })
   7290 
   7291 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7292   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7293                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7294                                        (__v8df)_mm512_setzero_pd()); })
   7295 
   7296 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
   7297   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
   7298                                    (__v16sf)(__m512)(B), \
   7299                                    0  + (((M) >> 0) & 0x3), \
   7300                                    0  + (((M) >> 2) & 0x3), \
   7301                                    16 + (((M) >> 4) & 0x3), \
   7302                                    16 + (((M) >> 6) & 0x3), \
   7303                                    4  + (((M) >> 0) & 0x3), \
   7304                                    4  + (((M) >> 2) & 0x3), \
   7305                                    20 + (((M) >> 4) & 0x3), \
   7306                                    20 + (((M) >> 6) & 0x3), \
   7307                                    8  + (((M) >> 0) & 0x3), \
   7308                                    8  + (((M) >> 2) & 0x3), \
   7309                                    24 + (((M) >> 4) & 0x3), \
   7310                                    24 + (((M) >> 6) & 0x3), \
   7311                                    12 + (((M) >> 0) & 0x3), \
   7312                                    12 + (((M) >> 2) & 0x3), \
   7313                                    28 + (((M) >> 4) & 0x3), \
   7314                                    28 + (((M) >> 6) & 0x3)); })
   7315 
   7316 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7317   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7318                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7319                                       (__v16sf)(__m512)(W)); })
   7320 
   7321 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7322   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7323                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7324                                       (__v16sf)_mm512_setzero_ps()); })
   7325 
   7326 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
   7327   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7328                                             (__v2df)(__m128d)(B), \
   7329                                             (__v2df)_mm_setzero_pd(), \
   7330                                             (__mmask8)-1, (int)(R)); })
   7331 
   7332 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7333 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7334 {
   7335  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7336                  (__v2df) __B,
   7337                 (__v2df) __W,
   7338                 (__mmask8) __U,
   7339                 _MM_FROUND_CUR_DIRECTION);
   7340 }
   7341 
   7342 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
   7343   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7344                                             (__v2df)(__m128d)(B), \
   7345                                             (__v2df)(__m128d)(W), \
   7346                                             (__mmask8)(U), (int)(R)); })
   7347 
   7348 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7349 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7350 {
   7351  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7352                  (__v2df) __B,
   7353                 (__v2df) _mm_setzero_pd (),
   7354                 (__mmask8) __U,
   7355                 _MM_FROUND_CUR_DIRECTION);
   7356 }
   7357 
   7358 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
   7359   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7360                                             (__v2df)(__m128d)(B), \
   7361                                             (__v2df)_mm_setzero_pd(), \
   7362                                             (__mmask8)(U), (int)(R)); })
   7363 
   7364 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
   7365   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7366                                            (__v4sf)(__m128)(B), \
   7367                                            (__v4sf)_mm_setzero_ps(), \
   7368                                            (__mmask8)-1, (int)(R)); })
   7369 
   7370 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7371 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7372 {
   7373  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7374                  (__v4sf) __B,
   7375                 (__v4sf) __W,
   7376                 (__mmask8) __U,
   7377                 _MM_FROUND_CUR_DIRECTION);
   7378 }
   7379 
   7380 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
   7381   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7382                                            (__v4sf)(__m128)(B), \
   7383                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
   7384                                            (int)(R)); })
   7385 
   7386 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7387 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7388 {
   7389  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7390                  (__v4sf) __B,
   7391                 (__v4sf) _mm_setzero_ps (),
   7392                 (__mmask8) __U,
   7393                 _MM_FROUND_CUR_DIRECTION);
   7394 }
   7395 
   7396 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
   7397   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7398                                            (__v4sf)(__m128)(B), \
   7399                                            (__v4sf)_mm_setzero_ps(), \
   7400                                            (__mmask8)(U), (int)(R)); })
   7401 
   7402 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7403 _mm512_broadcast_f32x4(__m128 __A)
   7404 {
   7405   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
   7406                                          0, 1, 2, 3, 0, 1, 2, 3,
   7407                                          0, 1, 2, 3, 0, 1, 2, 3);
   7408 }
   7409 
   7410 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7411 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
   7412 {
   7413   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7414                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7415                                            (__v16sf)__O);
   7416 }
   7417 
   7418 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7419 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
   7420 {
   7421   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7422                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7423                                            (__v16sf)_mm512_setzero_ps());
   7424 }
   7425 
   7426 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7427 _mm512_broadcast_f64x4(__m256d __A)
   7428 {
   7429   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
   7430                                           0, 1, 2, 3, 0, 1, 2, 3);
   7431 }
   7432 
   7433 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7434 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
   7435 {
   7436   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7437                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7438                                             (__v8df)__O);
   7439 }
   7440 
   7441 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7442 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
   7443 {
   7444   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7445                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7446                                             (__v8df)_mm512_setzero_pd());
   7447 }
   7448 
   7449 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7450 _mm512_broadcast_i32x4(__m128i __A)
   7451 {
   7452   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   7453                                           0, 1, 2, 3, 0, 1, 2, 3,
   7454                                           0, 1, 2, 3, 0, 1, 2, 3);
   7455 }
   7456 
   7457 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7458 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
   7459 {
   7460   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7461                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7462                                            (__v16si)__O);
   7463 }
   7464 
   7465 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7466 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
   7467 {
   7468   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7469                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7470                                            (__v16si)_mm512_setzero_si512());
   7471 }
   7472 
   7473 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7474 _mm512_broadcast_i64x4(__m256i __A)
   7475 {
   7476   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
   7477                                           0, 1, 2, 3, 0, 1, 2, 3);
   7478 }
   7479 
   7480 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7481 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
   7482 {
   7483   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7484                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7485                                             (__v8di)__O);
   7486 }
   7487 
   7488 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7489 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
   7490 {
   7491   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7492                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7493                                             (__v8di)_mm512_setzero_si512());
   7494 }
   7495 
   7496 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7497 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
   7498 {
   7499   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7500                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7501                                               (__v8df) __O);
   7502 }
   7503 
   7504 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7505 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   7506 {
   7507   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7508                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7509                                               (__v8df) _mm512_setzero_pd());
   7510 }
   7511 
   7512 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7513 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
   7514 {
   7515   return (__m512)__builtin_ia32_selectps_512(__M,
   7516                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7517                                              (__v16sf) __O);
   7518 }
   7519 
   7520 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7521 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
   7522 {
   7523   return (__m512)__builtin_ia32_selectps_512(__M,
   7524                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7525                                              (__v16sf) _mm512_setzero_ps());
   7526 }
   7527 
   7528 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7529 _mm512_cvtsepi32_epi8 (__m512i __A)
   7530 {
   7531   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7532                (__v16qi) _mm_undefined_si128 (),
   7533                (__mmask16) -1);
   7534 }
   7535 
   7536 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7537 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7538 {
   7539   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7540                (__v16qi) __O, __M);
   7541 }
   7542 
   7543 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7544 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
   7545 {
   7546   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7547                (__v16qi) _mm_setzero_si128 (),
   7548                __M);
   7549 }
   7550 
   7551 static __inline__ void __DEFAULT_FN_ATTRS
   7552 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7553 {
   7554   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7555 }
   7556 
   7557 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7558 _mm512_cvtsepi32_epi16 (__m512i __A)
   7559 {
   7560   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7561                (__v16hi) _mm256_undefined_si256 (),
   7562                (__mmask16) -1);
   7563 }
   7564 
   7565 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7566 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7567 {
   7568   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7569                (__v16hi) __O, __M);
   7570 }
   7571 
   7572 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7573 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
   7574 {
   7575   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7576                (__v16hi) _mm256_setzero_si256 (),
   7577                __M);
   7578 }
   7579 
   7580 static __inline__ void __DEFAULT_FN_ATTRS
   7581 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7582 {
   7583   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7584 }
   7585 
   7586 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7587 _mm512_cvtsepi64_epi8 (__m512i __A)
   7588 {
   7589   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7590                (__v16qi) _mm_undefined_si128 (),
   7591                (__mmask8) -1);
   7592 }
   7593 
   7594 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7595 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7596 {
   7597   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7598                (__v16qi) __O, __M);
   7599 }
   7600 
   7601 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7602 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
   7603 {
   7604   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7605                (__v16qi) _mm_setzero_si128 (),
   7606                __M);
   7607 }
   7608 
   7609 static __inline__ void __DEFAULT_FN_ATTRS
   7610 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7611 {
   7612   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7613 }
   7614 
   7615 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7616 _mm512_cvtsepi64_epi32 (__m512i __A)
   7617 {
   7618   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7619                (__v8si) _mm256_undefined_si256 (),
   7620                (__mmask8) -1);
   7621 }
   7622 
   7623 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7624 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7625 {
   7626   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7627                (__v8si) __O, __M);
   7628 }
   7629 
   7630 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7631 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
   7632 {
   7633   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7634                (__v8si) _mm256_setzero_si256 (),
   7635                __M);
   7636 }
   7637 
   7638 static __inline__ void __DEFAULT_FN_ATTRS
   7639 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
   7640 {
   7641   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7642 }
   7643 
   7644 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7645 _mm512_cvtsepi64_epi16 (__m512i __A)
   7646 {
   7647   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7648                (__v8hi) _mm_undefined_si128 (),
   7649                (__mmask8) -1);
   7650 }
   7651 
   7652 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7653 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7654 {
   7655   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7656                (__v8hi) __O, __M);
   7657 }
   7658 
   7659 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7660 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
   7661 {
   7662   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7663                (__v8hi) _mm_setzero_si128 (),
   7664                __M);
   7665 }
   7666 
   7667 static __inline__ void __DEFAULT_FN_ATTRS
   7668 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
   7669 {
   7670   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7671 }
   7672 
   7673 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7674 _mm512_cvtusepi32_epi8 (__m512i __A)
   7675 {
   7676   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7677                 (__v16qi) _mm_undefined_si128 (),
   7678                 (__mmask16) -1);
   7679 }
   7680 
   7681 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7682 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7683 {
   7684   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7685                 (__v16qi) __O,
   7686                 __M);
   7687 }
   7688 
   7689 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7690 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
   7691 {
   7692   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7693                 (__v16qi) _mm_setzero_si128 (),
   7694                 __M);
   7695 }
   7696 
   7697 static __inline__ void __DEFAULT_FN_ATTRS
   7698 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7699 {
   7700   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7701 }
   7702 
   7703 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7704 _mm512_cvtusepi32_epi16 (__m512i __A)
   7705 {
   7706   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7707                 (__v16hi) _mm256_undefined_si256 (),
   7708                 (__mmask16) -1);
   7709 }
   7710 
   7711 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7712 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7713 {
   7714   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7715                 (__v16hi) __O,
   7716                 __M);
   7717 }
   7718 
   7719 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7720 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
   7721 {
   7722   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7723                 (__v16hi) _mm256_setzero_si256 (),
   7724                 __M);
   7725 }
   7726 
   7727 static __inline__ void __DEFAULT_FN_ATTRS
   7728 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7729 {
   7730   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7731 }
   7732 
   7733 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7734 _mm512_cvtusepi64_epi8 (__m512i __A)
   7735 {
   7736   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7737                 (__v16qi) _mm_undefined_si128 (),
   7738                 (__mmask8) -1);
   7739 }
   7740 
   7741 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7742 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7743 {
   7744   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7745                 (__v16qi) __O,
   7746                 __M);
   7747 }
   7748 
   7749 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7750 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
   7751 {
   7752   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7753                 (__v16qi) _mm_setzero_si128 (),
   7754                 __M);
   7755 }
   7756 
   7757 static __inline__ void __DEFAULT_FN_ATTRS
   7758 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7759 {
   7760   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7761 }
   7762 
   7763 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7764 _mm512_cvtusepi64_epi32 (__m512i __A)
   7765 {
   7766   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7767                 (__v8si) _mm256_undefined_si256 (),
   7768                 (__mmask8) -1);
   7769 }
   7770 
   7771 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7772 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7773 {
   7774   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7775                 (__v8si) __O, __M);
   7776 }
   7777 
   7778 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7779 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
   7780 {
   7781   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7782                 (__v8si) _mm256_setzero_si256 (),
   7783                 __M);
   7784 }
   7785 
   7786 static __inline__ void __DEFAULT_FN_ATTRS
   7787 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7788 {
   7789   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
   7790 }
   7791 
   7792 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7793 _mm512_cvtusepi64_epi16 (__m512i __A)
   7794 {
   7795   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7796                 (__v8hi) _mm_undefined_si128 (),
   7797                 (__mmask8) -1);
   7798 }
   7799 
   7800 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7801 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7802 {
   7803   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7804                 (__v8hi) __O, __M);
   7805 }
   7806 
   7807 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7808 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
   7809 {
   7810   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7811                 (__v8hi) _mm_setzero_si128 (),
   7812                 __M);
   7813 }
   7814 
   7815 static __inline__ void __DEFAULT_FN_ATTRS
   7816 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7817 {
   7818   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
   7819 }
   7820 
   7821 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7822 _mm512_cvtepi32_epi8 (__m512i __A)
   7823 {
   7824   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7825               (__v16qi) _mm_undefined_si128 (),
   7826               (__mmask16) -1);
   7827 }
   7828 
   7829 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7830 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7831 {
   7832   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7833               (__v16qi) __O, __M);
   7834 }
   7835 
   7836 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7837 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
   7838 {
   7839   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7840               (__v16qi) _mm_setzero_si128 (),
   7841               __M);
   7842 }
   7843 
   7844 static __inline__ void __DEFAULT_FN_ATTRS
   7845 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7846 {
   7847   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7848 }
   7849 
   7850 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7851 _mm512_cvtepi32_epi16 (__m512i __A)
   7852 {
   7853   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7854               (__v16hi) _mm256_undefined_si256 (),
   7855               (__mmask16) -1);
   7856 }
   7857 
   7858 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7859 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7860 {
   7861   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7862               (__v16hi) __O, __M);
   7863 }
   7864 
   7865 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7866 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
   7867 {
   7868   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7869               (__v16hi) _mm256_setzero_si256 (),
   7870               __M);
   7871 }
   7872 
   7873 static __inline__ void __DEFAULT_FN_ATTRS
   7874 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
   7875 {
   7876   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
   7877 }
   7878 
   7879 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7880 _mm512_cvtepi64_epi8 (__m512i __A)
   7881 {
   7882   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7883               (__v16qi) _mm_undefined_si128 (),
   7884               (__mmask8) -1);
   7885 }
   7886 
   7887 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7888 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7889 {
   7890   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7891               (__v16qi) __O, __M);
   7892 }
   7893 
   7894 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7895 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
   7896 {
   7897   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7898               (__v16qi) _mm_setzero_si128 (),
   7899               __M);
   7900 }
   7901 
   7902 static __inline__ void __DEFAULT_FN_ATTRS
   7903 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7904 {
   7905   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7906 }
   7907 
   7908 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7909 _mm512_cvtepi64_epi32 (__m512i __A)
   7910 {
   7911   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7912               (__v8si) _mm256_undefined_si256 (),
   7913               (__mmask8) -1);
   7914 }
   7915 
   7916 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7917 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7918 {
   7919   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7920               (__v8si) __O, __M);
   7921 }
   7922 
   7923 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7924 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
   7925 {
   7926   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7927               (__v8si) _mm256_setzero_si256 (),
   7928               __M);
   7929 }
   7930 
   7931 static __inline__ void __DEFAULT_FN_ATTRS
   7932 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7933 {
   7934   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7935 }
   7936 
   7937 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7938 _mm512_cvtepi64_epi16 (__m512i __A)
   7939 {
   7940   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7941               (__v8hi) _mm_undefined_si128 (),
   7942               (__mmask8) -1);
   7943 }
   7944 
   7945 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7946 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7947 {
   7948   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7949               (__v8hi) __O, __M);
   7950 }
   7951 
   7952 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7953 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
   7954 {
   7955   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7956               (__v8hi) _mm_setzero_si128 (),
   7957               __M);
   7958 }
   7959 
   7960 static __inline__ void __DEFAULT_FN_ATTRS
   7961 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7962 {
   7963   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7964 }
   7965 
   7966 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
   7967   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
   7968                                    (__v16si)_mm512_undefined_epi32(), \
   7969                                    0 + ((imm) & 0x3) * 4,             \
   7970                                    1 + ((imm) & 0x3) * 4,             \
   7971                                    2 + ((imm) & 0x3) * 4,             \
   7972                                    3 + ((imm) & 0x3) * 4); })
   7973 
   7974 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
   7975   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7976                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7977                                 (__v4si)(W)); })
   7978 
   7979 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
   7980   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7981                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7982                                 (__v4si)_mm_setzero_si128()); })
   7983 
   7984 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
   7985   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
   7986                                    (__v8di)_mm512_undefined_epi32(), \
   7987                                    ((imm) & 1) ? 4 : 0,              \
   7988                                    ((imm) & 1) ? 5 : 1,              \
   7989                                    ((imm) & 1) ? 6 : 2,              \
   7990                                    ((imm) & 1) ? 7 : 3); })
   7991 
   7992 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
   7993   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7994                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   7995                                 (__v4di)(W)); })
   7996 
   7997 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
   7998   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7999                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   8000                                 (__v4di)_mm256_setzero_si256()); })
   8001 
   8002 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
   8003   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   8004                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
   8005                                  ((imm) & 0x1) ?  0 :  8, \
   8006                                  ((imm) & 0x1) ?  1 :  9, \
   8007                                  ((imm) & 0x1) ?  2 : 10, \
   8008                                  ((imm) & 0x1) ?  3 : 11, \
   8009                                  ((imm) & 0x1) ?  8 :  4, \
   8010                                  ((imm) & 0x1) ?  9 :  5, \
   8011                                  ((imm) & 0x1) ? 10 :  6, \
   8012                                  ((imm) & 0x1) ? 11 :  7); })
   8013 
   8014 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
   8015   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8016                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   8017                                   (__v8df)(W)); })
   8018 
   8019 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
   8020   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8021                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   8022                                   (__v8df)_mm512_setzero_pd()); })
   8023 
   8024 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
   8025   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
   8026                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
   8027                                  ((imm) & 0x1) ?  0 :  8, \
   8028                                  ((imm) & 0x1) ?  1 :  9, \
   8029                                  ((imm) & 0x1) ?  2 : 10, \
   8030                                  ((imm) & 0x1) ?  3 : 11, \
   8031                                  ((imm) & 0x1) ?  8 :  4, \
   8032                                  ((imm) & 0x1) ?  9 :  5, \
   8033                                  ((imm) & 0x1) ? 10 :  6, \
   8034                                  ((imm) & 0x1) ? 11 :  7); })
   8035 
   8036 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
   8037   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8038                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   8039                                   (__v8di)(W)); })
   8040 
   8041 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
   8042   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8043                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   8044                                   (__v8di)_mm512_setzero_si512()); })
   8045 
   8046 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
   8047   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
   8048                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
   8049                                   (((imm) & 0x3) == 0) ? 16 :  0, \
   8050                                   (((imm) & 0x3) == 0) ? 17 :  1, \
   8051                                   (((imm) & 0x3) == 0) ? 18 :  2, \
   8052                                   (((imm) & 0x3) == 0) ? 19 :  3, \
   8053                                   (((imm) & 0x3) == 1) ? 16 :  4, \
   8054                                   (((imm) & 0x3) == 1) ? 17 :  5, \
   8055                                   (((imm) & 0x3) == 1) ? 18 :  6, \
   8056                                   (((imm) & 0x3) == 1) ? 19 :  7, \
   8057                                   (((imm) & 0x3) == 2) ? 16 :  8, \
   8058                                   (((imm) & 0x3) == 2) ? 17 :  9, \
   8059                                   (((imm) & 0x3) == 2) ? 18 : 10, \
   8060                                   (((imm) & 0x3) == 2) ? 19 : 11, \
   8061                                   (((imm) & 0x3) == 3) ? 16 : 12, \
   8062                                   (((imm) & 0x3) == 3) ? 17 : 13, \
   8063                                   (((imm) & 0x3) == 3) ? 18 : 14, \
   8064                                   (((imm) & 0x3) == 3) ? 19 : 15); })
   8065 
   8066 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
   8067   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   8068                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   8069                                  (__v16sf)(W)); })
   8070 
   8071 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
   8072   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   8073                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   8074                                  (__v16sf)_mm512_setzero_ps()); })
   8075 
   8076 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
   8077   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   8078                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
   8079                                  (((imm) & 0x3) == 0) ? 16 :  0, \
   8080                                  (((imm) & 0x3) == 0) ? 17 :  1, \
   8081                                  (((imm) & 0x3) == 0) ? 18 :  2, \
   8082                                  (((imm) & 0x3) == 0) ? 19 :  3, \
   8083                                  (((imm) & 0x3) == 1) ? 16 :  4, \
   8084                                  (((imm) & 0x3) == 1) ? 17 :  5, \
   8085                                  (((imm) & 0x3) == 1) ? 18 :  6, \
   8086                                  (((imm) & 0x3) == 1) ? 19 :  7, \
   8087                                  (((imm) & 0x3) == 2) ? 16 :  8, \
   8088                                  (((imm) & 0x3) == 2) ? 17 :  9, \
   8089                                  (((imm) & 0x3) == 2) ? 18 : 10, \
   8090                                  (((imm) & 0x3) == 2) ? 19 : 11, \
   8091                                  (((imm) & 0x3) == 3) ? 16 : 12, \
   8092                                  (((imm) & 0x3) == 3) ? 17 : 13, \
   8093                                  (((imm) & 0x3) == 3) ? 18 : 14, \
   8094                                  (((imm) & 0x3) == 3) ? 19 : 15); })
   8095 
   8096 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
   8097   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   8098                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   8099                                  (__v16si)(W)); })
   8100 
   8101 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
   8102   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   8103                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   8104                                  (__v16si)_mm512_setzero_si512()); })
   8105 
   8106 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
   8107   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8108                                             (int)(((C)<<2) | (B)), \
   8109                                             (__v8df)_mm512_undefined_pd(), \
   8110                                             (__mmask8)-1, (int)(R)); })
   8111 
   8112 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
   8113   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8114                                             (int)(((C)<<2) | (B)), \
   8115                                             (__v8df)(__m512d)(W), \
   8116                                             (__mmask8)(U), (int)(R)); })
   8117 
   8118 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
   8119   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8120                                             (int)(((C)<<2) | (B)), \
   8121                                             (__v8df)_mm512_setzero_pd(), \
   8122                                             (__mmask8)(U), (int)(R)); })
   8123 
   8124 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
   8125   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8126                                             (int)(((C)<<2) | (B)), \
   8127                                             (__v8df)_mm512_setzero_pd(), \
   8128                                             (__mmask8)-1, \
   8129                                             _MM_FROUND_CUR_DIRECTION); })
   8130 
   8131 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
   8132   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8133                                             (int)(((C)<<2) | (B)), \
   8134                                             (__v8df)(__m512d)(W), \
   8135                                             (__mmask8)(U), \
   8136                                             _MM_FROUND_CUR_DIRECTION); })
   8137 
   8138 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
   8139   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8140                                             (int)(((C)<<2) | (B)), \
   8141                                             (__v8df)_mm512_setzero_pd(), \
   8142                                             (__mmask8)(U), \
   8143                                             _MM_FROUND_CUR_DIRECTION); })
   8144 
   8145 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
   8146   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8147                                            (int)(((C)<<2) | (B)), \
   8148                                            (__v16sf)_mm512_undefined_ps(), \
   8149                                            (__mmask16)-1, (int)(R)); })
   8150 
   8151 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
   8152   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8153                                            (int)(((C)<<2) | (B)), \
   8154                                            (__v16sf)(__m512)(W), \
   8155                                            (__mmask16)(U), (int)(R)); })
   8156 
   8157 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
   8158   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8159                                            (int)(((C)<<2) | (B)), \
   8160                                            (__v16sf)_mm512_setzero_ps(), \
   8161                                            (__mmask16)(U), (int)(R)); })
   8162 
   8163 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
   8164   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8165                                            (int)(((C)<<2)|(B)), \
   8166                                            (__v16sf)_mm512_undefined_ps(), \
   8167                                            (__mmask16)-1, \
   8168                                            _MM_FROUND_CUR_DIRECTION); })
   8169 
   8170 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8171   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8172                                            (int)(((C)<<2)|(B)), \
   8173                                            (__v16sf)(__m512)(W), \
   8174                                            (__mmask16)(U), \
   8175                                            _MM_FROUND_CUR_DIRECTION); })
   8176 
   8177 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8178   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8179                                            (int)(((C)<<2)|(B)), \
   8180                                            (__v16sf)_mm512_setzero_ps(), \
   8181                                            (__mmask16)(U), \
   8182                                            _MM_FROUND_CUR_DIRECTION); })
   8183 
   8184 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
   8185   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8186                                            (__v8df)_mm512_undefined_pd(), \
   8187                                            (__mmask8)-1, (int)(R)); })
   8188 
   8189 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
   8190   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8191                                            (__v8df)(__m512d)(W), \
   8192                                            (__mmask8)(U), (int)(R)); })
   8193 
   8194 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
   8195   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8196                                            (__v8df)_mm512_setzero_pd(), \
   8197                                            (__mmask8)(U), (int)(R)); })
   8198 
   8199 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8200 _mm512_getexp_pd (__m512d __A)
   8201 {
   8202   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8203                 (__v8df) _mm512_undefined_pd (),
   8204                 (__mmask8) -1,
   8205                 _MM_FROUND_CUR_DIRECTION);
   8206 }
   8207 
   8208 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8209 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8210 {
   8211   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8212                 (__v8df) __W,
   8213                 (__mmask8) __U,
   8214                 _MM_FROUND_CUR_DIRECTION);
   8215 }
   8216 
   8217 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8218 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
   8219 {
   8220   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8221                 (__v8df) _mm512_setzero_pd (),
   8222                 (__mmask8) __U,
   8223                 _MM_FROUND_CUR_DIRECTION);
   8224 }
   8225 
   8226 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
   8227   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8228                                           (__v16sf)_mm512_undefined_ps(), \
   8229                                           (__mmask16)-1, (int)(R)); })
   8230 
   8231 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
   8232   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8233                                           (__v16sf)(__m512)(W), \
   8234                                           (__mmask16)(U), (int)(R)); })
   8235 
   8236 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
   8237   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8238                                           (__v16sf)_mm512_setzero_ps(), \
   8239                                           (__mmask16)(U), (int)(R)); })
   8240 
   8241 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8242 _mm512_getexp_ps (__m512 __A)
   8243 {
   8244   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8245                (__v16sf) _mm512_undefined_ps (),
   8246                (__mmask16) -1,
   8247                _MM_FROUND_CUR_DIRECTION);
   8248 }
   8249 
   8250 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8251 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8252 {
   8253   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8254                (__v16sf) __W,
   8255                (__mmask16) __U,
   8256                _MM_FROUND_CUR_DIRECTION);
   8257 }
   8258 
   8259 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8260 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
   8261 {
   8262   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8263                (__v16sf) _mm512_setzero_ps (),
   8264                (__mmask16) __U,
   8265                _MM_FROUND_CUR_DIRECTION);
   8266 }
   8267 
   8268 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
   8269   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
   8270                                        (float const *)(addr), \
   8271                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8272                                        (int)(scale)); })
   8273 
   8274 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
   8275   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
   8276                                        (float const *)(addr), \
   8277                                        (__v8di)(__m512i)(index), \
   8278                                        (__mmask8)(mask), (int)(scale)); })
   8279 
   8280 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
   8281   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
   8282                                         (int const *)(addr), \
   8283                                         (__v8di)(__m512i)(index), \
   8284                                         (__mmask8)-1, (int)(scale)); })
   8285 
   8286 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8287   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
   8288                                         (int const *)(addr), \
   8289                                         (__v8di)(__m512i)(index), \
   8290                                         (__mmask8)(mask), (int)(scale)); })
   8291 
   8292 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
   8293   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
   8294                                        (double const *)(addr), \
   8295                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8296                                        (int)(scale)); })
   8297 
   8298 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8299   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
   8300                                        (double const *)(addr), \
   8301                                        (__v8di)(__m512i)(index), \
   8302                                        (__mmask8)(mask), (int)(scale)); })
   8303 
   8304 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
   8305   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
   8306                                        (long long const *)(addr), \
   8307                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8308                                        (int)(scale)); })
   8309 
   8310 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8311   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
   8312                                        (long long const *)(addr), \
   8313                                        (__v8di)(__m512i)(index), \
   8314                                        (__mmask8)(mask), (int)(scale)); })
   8315 
   8316 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
   8317   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
   8318                                        (float const *)(addr), \
   8319                                        (__v16sf)(__m512)(index), \
   8320                                        (__mmask16)-1, (int)(scale)); })
   8321 
   8322 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8323   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
   8324                                        (float const *)(addr), \
   8325                                        (__v16sf)(__m512)(index), \
   8326                                        (__mmask16)(mask), (int)(scale)); })
   8327 
   8328 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
   8329   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
   8330                                         (int const *)(addr), \
   8331                                         (__v16si)(__m512i)(index), \
   8332                                         (__mmask16)-1, (int)(scale)); })
   8333 
   8334 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8335   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
   8336                                         (int const *)(addr), \
   8337                                         (__v16si)(__m512i)(index), \
   8338                                         (__mmask16)(mask), (int)(scale)); })
   8339 
   8340 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
   8341   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
   8342                                        (double const *)(addr), \
   8343                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8344                                        (int)(scale)); })
   8345 
   8346 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8347   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
   8348                                        (double const *)(addr), \
   8349                                        (__v8si)(__m256i)(index), \
   8350                                        (__mmask8)(mask), (int)(scale)); })
   8351 
   8352 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
   8353   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
   8354                                        (long long const *)(addr), \
   8355                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8356                                        (int)(scale)); })
   8357 
   8358 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8359   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
   8360                                        (long long const *)(addr), \
   8361                                        (__v8si)(__m256i)(index), \
   8362                                        (__mmask8)(mask), (int)(scale)); })
   8363 
   8364 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
   8365   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
   8366                                 (__v8di)(__m512i)(index), \
   8367                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8368 
   8369 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8370   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
   8371                                 (__v8di)(__m512i)(index), \
   8372                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8373 
   8374 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8375   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
   8376                                 (__v8di)(__m512i)(index), \
   8377                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8378 
   8379 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8380   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
   8381                                 (__v8di)(__m512i)(index), \
   8382                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8383 
   8384 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
   8385   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
   8386                                (__v8di)(__m512i)(index), \
   8387                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8388 
   8389 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8390   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
   8391                                (__v8di)(__m512i)(index), \
   8392                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8393 
   8394 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8395   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
   8396                                (__v8di)(__m512i)(index), \
   8397                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8398 
   8399 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8400   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
   8401                                (__v8di)(__m512i)(index), \
   8402                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8403 
   8404 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
   8405   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
   8406                                 (__v16si)(__m512i)(index), \
   8407                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8408 
   8409 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8410   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
   8411                                 (__v16si)(__m512i)(index), \
   8412                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8413 
   8414 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8415   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
   8416                                 (__v16si)(__m512i)(index), \
   8417                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8418 
   8419 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8420   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
   8421                                 (__v16si)(__m512i)(index), \
   8422                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8423 
   8424 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
   8425   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
   8426                                (__v8si)(__m256i)(index), \
   8427                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8428 
   8429 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8430   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
   8431                                (__v8si)(__m256i)(index), \
   8432                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8433 
   8434 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8435   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
   8436                                (__v8si)(__m256i)(index), \
   8437                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8438 
   8439 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8440   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
   8441                                (__v8si)(__m256i)(index), \
   8442                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8443 
   8444 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8445 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8446 {
   8447  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8448           (__v4sf) __A,
   8449           (__v4sf) __B,
   8450           (__mmask8) __U,
   8451           _MM_FROUND_CUR_DIRECTION);
   8452 }
   8453 
   8454 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
   8455   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8456                                         (__v4sf)(__m128)(A), \
   8457                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8458                                         (int)(R)); })
   8459 
   8460 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8461 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8462 {
   8463  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8464           (__v4sf) __B,
   8465           (__v4sf) __C,
   8466           (__mmask8) __U,
   8467           _MM_FROUND_CUR_DIRECTION);
   8468 }
   8469 
   8470 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8471   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8472                                          (__v4sf)(__m128)(B), \
   8473                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8474                                          _MM_FROUND_CUR_DIRECTION); })
   8475 
   8476 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8477 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8478 {
   8479  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
   8480           (__v4sf) __X,
   8481           (__v4sf) __Y,
   8482           (__mmask8) __U,
   8483           _MM_FROUND_CUR_DIRECTION);
   8484 }
   8485 
   8486 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
   8487   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
   8488                                          (__v4sf)(__m128)(X), \
   8489                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8490                                          (int)(R)); })
   8491 
   8492 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8493 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8494 {
   8495  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8496           (__v4sf) __A,
   8497           -(__v4sf) __B,
   8498           (__mmask8) __U,
   8499           _MM_FROUND_CUR_DIRECTION);
   8500 }
   8501 
   8502 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8503   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8504                                         (__v4sf)(__m128)(A), \
   8505                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8506                                         (int)(R)); })
   8507 
   8508 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8509 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8510 {
   8511  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8512           (__v4sf) __B,
   8513           -(__v4sf) __C,
   8514           (__mmask8) __U,
   8515           _MM_FROUND_CUR_DIRECTION);
   8516 }
   8517 
   8518 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8519   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8520                                          (__v4sf)(__m128)(B), \
   8521                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8522                                          (int)(R)); })
   8523 
   8524 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8525 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8526 {
   8527  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
   8528           (__v4sf) __X,
   8529           (__v4sf) __Y,
   8530           (__mmask8) __U,
   8531           _MM_FROUND_CUR_DIRECTION);
   8532 }
   8533 
   8534 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
   8535   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
   8536                                          (__v4sf)(__m128)(X), \
   8537                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8538                                          (int)(R)); })
   8539 
   8540 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8541 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8542 {
   8543  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8544           -(__v4sf) __A,
   8545           (__v4sf) __B,
   8546           (__mmask8) __U,
   8547           _MM_FROUND_CUR_DIRECTION);
   8548 }
   8549 
   8550 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
   8551   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8552                                         -(__v4sf)(__m128)(A), \
   8553                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8554                                         (int)(R)); })
   8555 
   8556 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8557 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8558 {
   8559  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8560           (__v4sf) __B,
   8561           (__v4sf) __C,
   8562           (__mmask8) __U,
   8563           _MM_FROUND_CUR_DIRECTION);
   8564 }
   8565 
   8566 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8567   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8568                                          (__v4sf)(__m128)(B), \
   8569                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8570                                          (int)(R)); })
   8571 
   8572 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8573 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8574 {
   8575  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
   8576           (__v4sf) __X,
   8577           (__v4sf) __Y,
   8578           (__mmask8) __U,
   8579           _MM_FROUND_CUR_DIRECTION);
   8580 }
   8581 
   8582 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
   8583   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
   8584                                          (__v4sf)(__m128)(X), \
   8585                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8586                                          (int)(R)); })
   8587 
   8588 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8589 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8590 {
   8591  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8592           -(__v4sf) __A,
   8593           -(__v4sf) __B,
   8594           (__mmask8) __U,
   8595           _MM_FROUND_CUR_DIRECTION);
   8596 }
   8597 
   8598 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8599   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8600                                         -(__v4sf)(__m128)(A), \
   8601                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
   8602                                         (int)(R)); })
   8603 
   8604 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8605 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8606 {
   8607  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8608           (__v4sf) __B,
   8609           -(__v4sf) __C,
   8610           (__mmask8) __U,
   8611           _MM_FROUND_CUR_DIRECTION);
   8612 }
   8613 
   8614 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8615   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8616                                          (__v4sf)(__m128)(B), \
   8617                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8618                                          _MM_FROUND_CUR_DIRECTION); })
   8619 
   8620 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8621 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8622 {
   8623  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
   8624           (__v4sf) __X,
   8625           (__v4sf) __Y,
   8626           (__mmask8) __U,
   8627           _MM_FROUND_CUR_DIRECTION);
   8628 }
   8629 
   8630 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
   8631   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
   8632                                          (__v4sf)(__m128)(X), \
   8633                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8634                                          (int)(R)); })
   8635 
   8636 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8637 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8638 {
   8639  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8640           (__v2df) __A,
   8641           (__v2df) __B,
   8642           (__mmask8) __U,
   8643           _MM_FROUND_CUR_DIRECTION);
   8644 }
   8645 
   8646 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
   8647   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8648                                          (__v2df)(__m128d)(A), \
   8649                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8650                                          (int)(R)); })
   8651 
   8652 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8653 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8654 {
   8655  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8656           (__v2df) __B,
   8657           (__v2df) __C,
   8658           (__mmask8) __U,
   8659           _MM_FROUND_CUR_DIRECTION);
   8660 }
   8661 
   8662 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8663   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8664                                           (__v2df)(__m128d)(B), \
   8665                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8666                                           _MM_FROUND_CUR_DIRECTION); })
   8667 
   8668 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8669 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8670 {
   8671  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
   8672           (__v2df) __X,
   8673           (__v2df) __Y,
   8674           (__mmask8) __U,
   8675           _MM_FROUND_CUR_DIRECTION);
   8676 }
   8677 
   8678 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
   8679   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
   8680                                           (__v2df)(__m128d)(X), \
   8681                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8682                                           (int)(R)); })
   8683 
   8684 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8685 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8686 {
   8687  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8688           (__v2df) __A,
   8689           -(__v2df) __B,
   8690           (__mmask8) __U,
   8691           _MM_FROUND_CUR_DIRECTION);
   8692 }
   8693 
   8694 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8695   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8696                                          (__v2df)(__m128d)(A), \
   8697                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8698                                          (int)(R)); })
   8699 
   8700 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8701 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8702 {
   8703  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8704           (__v2df) __B,
   8705           -(__v2df) __C,
   8706           (__mmask8) __U,
   8707           _MM_FROUND_CUR_DIRECTION);
   8708 }
   8709 
   8710 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8711   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8712                                           (__v2df)(__m128d)(B), \
   8713                                           -(__v2df)(__m128d)(C), \
   8714                                           (__mmask8)(U), (int)(R)); })
   8715 
   8716 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8717 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8718 {
   8719  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
   8720           (__v2df) __X,
   8721           (__v2df) __Y,
   8722           (__mmask8) __U,
   8723           _MM_FROUND_CUR_DIRECTION);
   8724 }
   8725 
   8726 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
   8727   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
   8728                                           (__v2df)(__m128d)(X), \
   8729                                           (__v2df)(__m128d)(Y), \
   8730                                           (__mmask8)(U), (int)(R)); })
   8731 
   8732 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8733 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8734 {
   8735  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8736           -(__v2df) __A,
   8737           (__v2df) __B,
   8738           (__mmask8) __U,
   8739           _MM_FROUND_CUR_DIRECTION);
   8740 }
   8741 
   8742 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
   8743   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8744                                          -(__v2df)(__m128d)(A), \
   8745                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8746                                          (int)(R)); })
   8747 
   8748 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8749 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8750 {
   8751  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8752           (__v2df) __B,
   8753           (__v2df) __C,
   8754           (__mmask8) __U,
   8755           _MM_FROUND_CUR_DIRECTION);
   8756 }
   8757 
   8758 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8759   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8760                                           (__v2df)(__m128d)(B), \
   8761                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8762                                           (int)(R)); })
   8763 
   8764 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8765 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8766 {
   8767  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
   8768           (__v2df) __X,
   8769           (__v2df) __Y,
   8770           (__mmask8) __U,
   8771           _MM_FROUND_CUR_DIRECTION);
   8772 }
   8773 
   8774 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
   8775   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
   8776                                           (__v2df)(__m128d)(X), \
   8777                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8778                                           (int)(R)); })
   8779 
   8780 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8781 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8782 {
   8783  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8784           -(__v2df) __A,
   8785           -(__v2df) __B,
   8786           (__mmask8) __U,
   8787           _MM_FROUND_CUR_DIRECTION);
   8788 }
   8789 
   8790 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8791   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8792                                          -(__v2df)(__m128d)(A), \
   8793                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8794                                          (int)(R)); })
   8795 
   8796 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8797 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8798 {
   8799  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8800           (__v2df) __B,
   8801           -(__v2df) __C,
   8802           (__mmask8) __U,
   8803           _MM_FROUND_CUR_DIRECTION);
   8804 }
   8805 
   8806 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8807   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8808                                           (__v2df)(__m128d)(B), \
   8809                                           -(__v2df)(__m128d)(C), \
   8810                                           (__mmask8)(U), \
   8811                                           _MM_FROUND_CUR_DIRECTION); })
   8812 
   8813 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8814 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8815 {
   8816  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
   8817           (__v2df) __X,
   8818           (__v2df) (__Y),
   8819           (__mmask8) __U,
   8820           _MM_FROUND_CUR_DIRECTION);
   8821 }
   8822 
   8823 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
   8824   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
   8825                                           (__v2df)(__m128d)(X), \
   8826                                           (__v2df)(__m128d)(Y), \
   8827                                           (__mmask8)(U), (int)(R)); })
   8828 
   8829 #define _mm512_permutex_pd(X, C) __extension__ ({ \
   8830   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   8831                                    (__v8df)_mm512_undefined_pd(), \
   8832                                    0 + (((C) >> 0) & 0x3), \
   8833                                    0 + (((C) >> 2) & 0x3), \
   8834                                    0 + (((C) >> 4) & 0x3), \
   8835                                    0 + (((C) >> 6) & 0x3), \
   8836                                    4 + (((C) >> 0) & 0x3), \
   8837                                    4 + (((C) >> 2) & 0x3), \
   8838                                    4 + (((C) >> 4) & 0x3), \
   8839                                    4 + (((C) >> 6) & 0x3)); })
   8840 
   8841 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
   8842   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8843                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8844                                        (__v8df)(__m512d)(W)); })
   8845 
   8846 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
   8847   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8848                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8849                                        (__v8df)_mm512_setzero_pd()); })
   8850 
   8851 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
   8852   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
   8853                                    (__v8di)_mm512_undefined_epi32(), \
   8854                                    0 + (((C) >> 0) & 0x3), \
   8855                                    0 + (((C) >> 2) & 0x3), \
   8856                                    0 + (((C) >> 4) & 0x3), \
   8857                                    0 + (((C) >> 6) & 0x3), \
   8858                                    4 + (((C) >> 0) & 0x3), \
   8859                                    4 + (((C) >> 2) & 0x3), \
   8860                                    4 + (((C) >> 4) & 0x3), \
   8861                                    4 + (((C) >> 6) & 0x3)); })
   8862 
   8863 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
   8864   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8865                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8866                                       (__v8di)(__m512i)(W)); })
   8867 
   8868 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
   8869   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8870                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8871                                       (__v8di)_mm512_setzero_si512()); })
   8872 
   8873 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8874 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
   8875 {
   8876   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8877                  (__v8di) __X,
   8878                  (__v8df) _mm512_undefined_pd (),
   8879                  (__mmask8) -1);
   8880 }
   8881 
   8882 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8883 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
   8884 {
   8885   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8886                  (__v8di) __X,
   8887                  (__v8df) __W,
   8888                  (__mmask8) __U);
   8889 }
   8890 
   8891 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8892 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
   8893 {
   8894   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8895                  (__v8di) __X,
   8896                  (__v8df) _mm512_setzero_pd (),
   8897                  (__mmask8) __U);
   8898 }
   8899 
   8900 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8901 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
   8902 {
   8903   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8904                  (__v8di) __X,
   8905                  (__v8di) _mm512_setzero_si512 (),
   8906                  __M);
   8907 }
   8908 
   8909 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8910 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
   8911 {
   8912   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8913                  (__v8di) __X,
   8914                  (__v8di) _mm512_undefined_epi32 (),
   8915                  (__mmask8) -1);
   8916 }
   8917 
   8918 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8919 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
   8920              __m512i __Y)
   8921 {
   8922   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8923                  (__v8di) __X,
   8924                  (__v8di) __W,
   8925                  __M);
   8926 }
   8927 
   8928 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8929 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
   8930 {
   8931   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8932                 (__v16si) __X,
   8933                 (__v16sf) _mm512_undefined_ps (),
   8934                 (__mmask16) -1);
   8935 }
   8936 
   8937 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8938 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
   8939 {
   8940   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8941                 (__v16si) __X,
   8942                 (__v16sf) __W,
   8943                 (__mmask16) __U);
   8944 }
   8945 
   8946 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8947 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
   8948 {
   8949   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8950                 (__v16si) __X,
   8951                 (__v16sf) _mm512_setzero_ps (),
   8952                 (__mmask16) __U);
   8953 }
   8954 
   8955 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8956 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
   8957 {
   8958   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8959                  (__v16si) __X,
   8960                  (__v16si) _mm512_setzero_si512 (),
   8961                  __M);
   8962 }
   8963 
   8964 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8965 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
   8966 {
   8967   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8968                  (__v16si) __X,
   8969                  (__v16si) _mm512_undefined_epi32 (),
   8970                  (__mmask16) -1);
   8971 }
   8972 
   8973 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
   8974 
   8975 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8976 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
   8977              __m512i __Y)
   8978 {
   8979   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8980                  (__v16si) __X,
   8981                  (__v16si) __W,
   8982                  __M);
   8983 }
   8984 
   8985 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
   8986 
   8987 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8988 _mm512_kand (__mmask16 __A, __mmask16 __B)
   8989 {
   8990   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
   8991 }
   8992 
   8993 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8994 _mm512_kandn (__mmask16 __A, __mmask16 __B)
   8995 {
   8996   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
   8997 }
   8998 
   8999 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9000 _mm512_kor (__mmask16 __A, __mmask16 __B)
   9001 {
   9002   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
   9003 }
   9004 
   9005 static __inline__ int __DEFAULT_FN_ATTRS
   9006 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
   9007 {
   9008   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
   9009 }
   9010 
   9011 static __inline__ int __DEFAULT_FN_ATTRS
   9012 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
   9013 {
   9014   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
   9015 }
   9016 
   9017 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9018 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   9019 {
   9020   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
   9021 }
   9022 
   9023 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9024 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
   9025 {
   9026   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
   9027 }
   9028 
   9029 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9030 _mm512_kxor (__mmask16 __A, __mmask16 __B)
   9031 {
   9032   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
   9033 }
   9034 
   9035 static __inline__ void __DEFAULT_FN_ATTRS
   9036 _mm512_stream_si512 (__m512i * __P, __m512i __A)
   9037 {
   9038   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
   9039 }
   9040 
   9041 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9042 _mm512_stream_load_si512 (void *__P)
   9043 {
   9044   return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
   9045 }
   9046 
   9047 static __inline__ void __DEFAULT_FN_ATTRS
   9048 _mm512_stream_pd (double *__P, __m512d __A)
   9049 {
   9050   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
   9051 }
   9052 
   9053 static __inline__ void __DEFAULT_FN_ATTRS
   9054 _mm512_stream_ps (float *__P, __m512 __A)
   9055 {
   9056   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
   9057 }
   9058 
   9059 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9060 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9061 {
   9062   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9063                   (__v8df) __W,
   9064                   (__mmask8) __U);
   9065 }
   9066 
   9067 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9068 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
   9069 {
   9070   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9071                   (__v8df)
   9072                   _mm512_setzero_pd (),
   9073                   (__mmask8) __U);
   9074 }
   9075 
   9076 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9077 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9078 {
   9079   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9080                   (__v8di) __W,
   9081                   (__mmask8) __U);
   9082 }
   9083 
   9084 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9085 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
   9086 {
   9087   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9088                   (__v8di)
   9089                   _mm512_setzero_si512 (),
   9090                   (__mmask8) __U);
   9091 }
   9092 
   9093 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9094 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9095 {
   9096   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9097                  (__v16sf) __W,
   9098                  (__mmask16) __U);
   9099 }
   9100 
   9101 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9102 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
   9103 {
   9104   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9105                  (__v16sf)
   9106                  _mm512_setzero_ps (),
   9107                  (__mmask16) __U);
   9108 }
   9109 
   9110 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9111 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9112 {
   9113   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9114                   (__v16si) __W,
   9115                   (__mmask16) __U);
   9116 }
   9117 
   9118 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9119 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
   9120 {
   9121   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9122                   (__v16si)
   9123                   _mm512_setzero_si512 (),
   9124                   (__mmask16) __U);
   9125 }
   9126 
   9127 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
   9128   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9129                                       (__v4sf)(__m128)(Y), (int)(P), \
   9130                                       (__mmask8)-1, (int)(R)); })
   9131 
   9132 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
   9133   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9134                                       (__v4sf)(__m128)(Y), (int)(P), \
   9135                                       (__mmask8)(M), (int)(R)); })
   9136 
   9137 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
   9138   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9139                                       (__v4sf)(__m128)(Y), (int)(P), \
   9140                                       (__mmask8)-1, \
   9141                                       _MM_FROUND_CUR_DIRECTION); })
   9142 
   9143 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
   9144   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9145                                       (__v4sf)(__m128)(Y), (int)(P), \
   9146                                       (__mmask8)(M), \
   9147                                       _MM_FROUND_CUR_DIRECTION); })
   9148 
   9149 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
   9150   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9151                                       (__v2df)(__m128d)(Y), (int)(P), \
   9152                                       (__mmask8)-1, (int)(R)); })
   9153 
   9154 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
   9155   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9156                                       (__v2df)(__m128d)(Y), (int)(P), \
   9157                                       (__mmask8)(M), (int)(R)); })
   9158 
   9159 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
   9160   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9161                                       (__v2df)(__m128d)(Y), (int)(P), \
   9162                                       (__mmask8)-1, \
   9163                                       _MM_FROUND_CUR_DIRECTION); })
   9164 
   9165 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
   9166   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9167                                       (__v2df)(__m128d)(Y), (int)(P), \
   9168                                       (__mmask8)(M), \
   9169                                       _MM_FROUND_CUR_DIRECTION); })
   9170 
   9171 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9172 _mm512_movehdup_ps (__m512 __A)
   9173 {
   9174   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9175                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
   9176 }
   9177 
   9178 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9179 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9180 {
   9181   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9182                                              (__v16sf)_mm512_movehdup_ps(__A),
   9183                                              (__v16sf)__W);
   9184 }
   9185 
   9186 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9187 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
   9188 {
   9189   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9190                                              (__v16sf)_mm512_movehdup_ps(__A),
   9191                                              (__v16sf)_mm512_setzero_ps());
   9192 }
   9193 
   9194 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9195 _mm512_moveldup_ps (__m512 __A)
   9196 {
   9197   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9198                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
   9199 }
   9200 
   9201 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9202 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9203 {
   9204   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9205                                              (__v16sf)_mm512_moveldup_ps(__A),
   9206                                              (__v16sf)__W);
   9207 }
   9208 
   9209 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9210 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
   9211 {
   9212   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9213                                              (__v16sf)_mm512_moveldup_ps(__A),
   9214                                              (__v16sf)_mm512_setzero_ps());
   9215 }
   9216 
   9217 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9218 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   9219 {
   9220   __m128 res = __A;
   9221   res[0] = (__U & 1) ? __B[0] : __W[0];
   9222   return res;
   9223 }
   9224 
   9225 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9226 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
   9227 {
   9228   __m128 res = __A;
   9229   res[0] = (__U & 1) ? __B[0] : 0;
   9230   return res;
   9231 }
   9232 
   9233 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9234 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   9235 {
   9236   __m128d res = __A;
   9237   res[0] = (__U & 1) ? __B[0] : __W[0];
   9238   return res;
   9239 }
   9240 
   9241 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9242 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
   9243 {
   9244   __m128d res = __A;
   9245   res[0] = (__U & 1) ? __B[0] : 0;
   9246   return res;
   9247 }
   9248 
   9249 static __inline__ void __DEFAULT_FN_ATTRS
   9250 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
   9251 {
   9252   __builtin_ia32_storess128_mask ((__v16sf *)__W,
   9253                 (__v16sf) _mm512_castps128_ps512(__A),
   9254                 (__mmask16) __U & (__mmask16)1);
   9255 }
   9256 
   9257 static __inline__ void __DEFAULT_FN_ATTRS
   9258 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
   9259 {
   9260   __builtin_ia32_storesd128_mask ((__v8df *)__W,
   9261                 (__v8df) _mm512_castpd128_pd512(__A),
   9262                 (__mmask8) __U & 1);
   9263 }
   9264 
   9265 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9266 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
   9267 {
   9268   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
   9269                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
   9270                                                 0, 4, 4, 4);
   9271 
   9272   return (__m128) __builtin_shufflevector(
   9273                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9274                                       (__v16sf) _mm512_castps128_ps512(src),
   9275                                       (__mmask16) __U & 1),
   9276                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9277 }
   9278 
   9279 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9280 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
   9281 {
   9282   return (__m128) __builtin_shufflevector(
   9283                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9284                                       (__v16sf) _mm512_setzero_ps(),
   9285                                       (__mmask16) __U & 1),
   9286                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9287 }
   9288 
   9289 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9290 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
   9291 {
   9292   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
   9293                                                  (__v2df) {0.0, 0.0}, 0, 2);
   9294 
   9295   return (__m128d) __builtin_shufflevector(
   9296                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9297                                       (__v8df) _mm512_castpd128_pd512(src),
   9298                                       (__mmask8) __U & 1),
   9299                             _mm512_undefined_pd(), 0, 1);
   9300 }
   9301 
   9302 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9303 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
   9304 {
   9305   return (__m128d) __builtin_shufflevector(
   9306                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9307                                       (__v8df) _mm512_setzero_pd(),
   9308                                       (__mmask8) __U & 1),
   9309                             _mm512_undefined_pd(), 0, 1);
   9310 }
   9311 
   9312 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
   9313   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   9314                                    (__v16si)_mm512_undefined_epi32(), \
   9315                                    0  + (((I) >> 0) & 0x3), \
   9316                                    0  + (((I) >> 2) & 0x3), \
   9317                                    0  + (((I) >> 4) & 0x3), \
   9318                                    0  + (((I) >> 6) & 0x3), \
   9319                                    4  + (((I) >> 0) & 0x3), \
   9320                                    4  + (((I) >> 2) & 0x3), \
   9321                                    4  + (((I) >> 4) & 0x3), \
   9322                                    4  + (((I) >> 6) & 0x3), \
   9323                                    8  + (((I) >> 0) & 0x3), \
   9324                                    8  + (((I) >> 2) & 0x3), \
   9325                                    8  + (((I) >> 4) & 0x3), \
   9326                                    8  + (((I) >> 6) & 0x3), \
   9327                                    12 + (((I) >> 0) & 0x3), \
   9328                                    12 + (((I) >> 2) & 0x3), \
   9329                                    12 + (((I) >> 4) & 0x3), \
   9330                                    12 + (((I) >> 6) & 0x3)); })
   9331 
   9332 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
   9333   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9334                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9335                                       (__v16si)(__m512i)(W)); })
   9336 
   9337 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
   9338   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9339                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9340                                       (__v16si)_mm512_setzero_si512()); })
   9341 
   9342 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9343 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9344 {
   9345   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9346                 (__v8df) __W,
   9347                 (__mmask8) __U);
   9348 }
   9349 
   9350 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9351 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
   9352 {
   9353   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9354                 (__v8df) _mm512_setzero_pd (),
   9355                 (__mmask8) __U);
   9356 }
   9357 
   9358 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9359 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9360 {
   9361   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9362                 (__v8di) __W,
   9363                 (__mmask8) __U);
   9364 }
   9365 
   9366 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9367 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
   9368 {
   9369   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9370                 (__v8di) _mm512_setzero_pd (),
   9371                 (__mmask8) __U);
   9372 }
   9373 
   9374 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9375 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
   9376 {
   9377   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9378               (__v8df) __W,
   9379               (__mmask8) __U);
   9380 }
   9381 
   9382 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9383 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
   9384 {
   9385   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9386               (__v8df) _mm512_setzero_pd(),
   9387               (__mmask8) __U);
   9388 }
   9389 
   9390 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9391 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
   9392 {
   9393   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9394               (__v8di) __W,
   9395               (__mmask8) __U);
   9396 }
   9397 
   9398 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9399 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
   9400 {
   9401   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9402               (__v8di) _mm512_setzero_pd(),
   9403               (__mmask8) __U);
   9404 }
   9405 
   9406 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9407 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
   9408 {
   9409   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9410                    (__v16sf) __W,
   9411                    (__mmask16) __U);
   9412 }
   9413 
   9414 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9415 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
   9416 {
   9417   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9418                    (__v16sf) _mm512_setzero_ps(),
   9419                    (__mmask16) __U);
   9420 }
   9421 
   9422 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9423 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
   9424 {
   9425   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9426               (__v16si) __W,
   9427               (__mmask16) __U);
   9428 }
   9429 
   9430 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9431 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
   9432 {
   9433   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9434               (__v16si) _mm512_setzero_ps(),
   9435               (__mmask16) __U);
   9436 }
   9437 
   9438 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9439 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9440 {
   9441   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9442                (__v16sf) __W,
   9443                (__mmask16) __U);
   9444 }
   9445 
   9446 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9447 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
   9448 {
   9449   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9450                (__v16sf) _mm512_setzero_ps(),
   9451                (__mmask16) __U);
   9452 }
   9453 
   9454 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9455 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9456 {
   9457   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9458                 (__v16si) __W,
   9459                 (__mmask16) __U);
   9460 }
   9461 
   9462 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9463 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
   9464 {
   9465   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9466                 (__v16si) _mm512_setzero_ps(),
   9467                 (__mmask16) __U);
   9468 }
   9469 
   9470 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
   9471   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9472                                            (__v8df)_mm512_undefined_pd(), \
   9473                                            (__mmask8)-1, (int)(R)); })
   9474 
   9475 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
   9476   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9477                                            (__v8df)(__m512d)(W), \
   9478                                            (__mmask8)(U), (int)(R)); })
   9479 
   9480 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
   9481   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9482                                            (__v8df)_mm512_setzero_pd(), \
   9483                                            (__mmask8)(U), (int)(R)); })
   9484 
   9485 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9486 _mm512_cvtps_pd (__m256 __A)
   9487 {
   9488   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9489                 (__v8df)
   9490                 _mm512_undefined_pd (),
   9491                 (__mmask8) -1,
   9492                 _MM_FROUND_CUR_DIRECTION);
   9493 }
   9494 
   9495 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9496 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
   9497 {
   9498   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9499                 (__v8df) __W,
   9500                 (__mmask8) __U,
   9501                 _MM_FROUND_CUR_DIRECTION);
   9502 }
   9503 
   9504 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9505 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
   9506 {
   9507   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9508                 (__v8df)
   9509                 _mm512_setzero_pd (),
   9510                 (__mmask8) __U,
   9511                 _MM_FROUND_CUR_DIRECTION);
   9512 }
   9513 
   9514 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9515 _mm512_cvtpslo_pd (__m512 __A)
   9516 {
   9517   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
   9518 }
   9519 
   9520 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9521 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
   9522 {
   9523   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
   9524 }
   9525 
   9526 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9527 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9528 {
   9529   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9530               (__v8df) __A,
   9531               (__v8df) __W);
   9532 }
   9533 
   9534 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9535 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
   9536 {
   9537   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9538               (__v8df) __A,
   9539               (__v8df) _mm512_setzero_pd ());
   9540 }
   9541 
   9542 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9543 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9544 {
   9545   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9546              (__v16sf) __A,
   9547              (__v16sf) __W);
   9548 }
   9549 
   9550 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9551 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
   9552 {
   9553   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9554              (__v16sf) __A,
   9555              (__v16sf) _mm512_setzero_ps ());
   9556 }
   9557 
   9558 static __inline__ void __DEFAULT_FN_ATTRS
   9559 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
   9560 {
   9561   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
   9562             (__mmask8) __U);
   9563 }
   9564 
   9565 static __inline__ void __DEFAULT_FN_ATTRS
   9566 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   9567 {
   9568   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
   9569             (__mmask8) __U);
   9570 }
   9571 
   9572 static __inline__ void __DEFAULT_FN_ATTRS
   9573 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
   9574 {
   9575   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
   9576             (__mmask16) __U);
   9577 }
   9578 
   9579 static __inline__ void __DEFAULT_FN_ATTRS
   9580 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   9581 {
   9582   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
   9583             (__mmask16) __U);
   9584 }
   9585 
   9586 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
   9587   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9588                                              (__v2df)(__m128d)(B), \
   9589                                              (__v4sf)_mm_undefined_ps(), \
   9590                                              (__mmask8)-1, (int)(R)); })
   9591 
   9592 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
   9593   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9594                                              (__v2df)(__m128d)(B), \
   9595                                              (__v4sf)(__m128)(W), \
   9596                                              (__mmask8)(U), (int)(R)); })
   9597 
   9598 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
   9599   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9600                                              (__v2df)(__m128d)(B), \
   9601                                              (__v4sf)_mm_setzero_ps(), \
   9602                                              (__mmask8)(U), (int)(R)); })
   9603 
   9604 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9605 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
   9606 {
   9607   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9608                                              (__v2df)(__B),
   9609                                              (__v4sf)(__W),
   9610                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9611 }
   9612 
   9613 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9614 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
   9615 {
   9616   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9617                                              (__v2df)(__B),
   9618                                              (__v4sf)_mm_setzero_ps(),
   9619                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9620 }
   9621 
   9622 #define _mm_cvtss_i32 _mm_cvtss_si32
   9623 #define _mm_cvtsd_i32 _mm_cvtsd_si32
   9624 #define _mm_cvti32_sd _mm_cvtsi32_sd
   9625 #define _mm_cvti32_ss _mm_cvtsi32_ss
   9626 #ifdef __x86_64__
   9627 #define _mm_cvtss_i64 _mm_cvtss_si64
   9628 #define _mm_cvtsd_i64 _mm_cvtsd_si64
   9629 #define _mm_cvti64_sd _mm_cvtsi64_sd
   9630 #define _mm_cvti64_ss _mm_cvtsi64_ss
   9631 #endif
   9632 
   9633 #ifdef __x86_64__
   9634 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
   9635   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9636                                      (int)(R)); })
   9637 
   9638 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
   9639   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9640                                      (int)(R)); })
   9641 #endif
   9642 
   9643 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
   9644   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9645 
   9646 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
   9647   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9648 
   9649 #ifdef __x86_64__
   9650 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
   9651   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9652                                     (int)(R)); })
   9653 
   9654 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
   9655   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9656                                     (int)(R)); })
   9657 #endif
   9658 
   9659 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
   9660   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9661                                               (__v4sf)(__m128)(B), \
   9662                                               (__v2df)_mm_undefined_pd(), \
   9663                                               (__mmask8)-1, (int)(R)); })
   9664 
   9665 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
   9666   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9667                                               (__v4sf)(__m128)(B), \
   9668                                               (__v2df)(__m128d)(W), \
   9669                                               (__mmask8)(U), (int)(R)); })
   9670 
   9671 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
   9672   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9673                                               (__v4sf)(__m128)(B), \
   9674                                               (__v2df)_mm_setzero_pd(), \
   9675                                               (__mmask8)(U), (int)(R)); })
   9676 
   9677 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9678 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
   9679 {
   9680   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9681                                               (__v4sf)(__B),
   9682                                               (__v2df)(__W),
   9683                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9684 }
   9685 
   9686 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9687 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
   9688 {
   9689   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9690                                               (__v4sf)(__B),
   9691                                               (__v2df)_mm_setzero_pd(),
   9692                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9693 }
   9694 
   9695 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9696 _mm_cvtu32_sd (__m128d __A, unsigned __B)
   9697 {
   9698   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
   9699 }
   9700 
   9701 #ifdef __x86_64__
   9702 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
   9703   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
   9704                                       (unsigned long long)(B), (int)(R)); })
   9705 
   9706 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9707 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
   9708 {
   9709   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
   9710                  _MM_FROUND_CUR_DIRECTION);
   9711 }
   9712 #endif
   9713 
   9714 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
   9715   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
   9716                                      (int)(R)); })
   9717 
   9718 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9719 _mm_cvtu32_ss (__m128 __A, unsigned __B)
   9720 {
   9721   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
   9722                 _MM_FROUND_CUR_DIRECTION);
   9723 }
   9724 
   9725 #ifdef __x86_64__
   9726 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
   9727   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
   9728                                      (unsigned long long)(B), (int)(R)); })
   9729 
   9730 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9731 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
   9732 {
   9733   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
   9734                 _MM_FROUND_CUR_DIRECTION);
   9735 }
   9736 #endif
   9737 
   9738 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9739 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   9740 {
   9741   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
   9742                  __M);
   9743 }
   9744 
   9745 #ifdef __x86_64__
   9746 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9747 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   9748 {
   9749   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
   9750                  __M);
   9751 }
   9752 #endif
   9753 
   9754 static  __inline __m512i __DEFAULT_FN_ATTRS
   9755 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
   9756     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
   9757     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
   9758     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
   9759     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
   9760     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
   9761     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
   9762     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
   9763     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
   9764     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
   9765     char __e4, char __e3, char __e2, char __e1, char __e0) {
   9766 
   9767   return __extension__ (__m512i)(__v64qi)
   9768     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9769      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9770      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9771      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
   9772      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
   9773      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
   9774      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
   9775      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
   9776 }
   9777 
   9778 static  __inline __m512i __DEFAULT_FN_ATTRS
   9779 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
   9780     short __e27, short __e26, short __e25, short __e24, short __e23,
   9781     short __e22, short __e21, short __e20, short __e19, short __e18,
   9782     short __e17, short __e16, short __e15, short __e14, short __e13,
   9783     short __e12, short __e11, short __e10, short __e9, short __e8,
   9784     short __e7, short __e6, short __e5, short __e4, short __e3,
   9785     short __e2, short __e1, short __e0) {
   9786   return __extension__ (__m512i)(__v32hi)
   9787     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9788      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9789      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9790      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
   9791 }
   9792 
   9793 static __inline __m512i __DEFAULT_FN_ATTRS
   9794 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
   9795      int __E, int __F, int __G, int __H,
   9796      int __I, int __J, int __K, int __L,
   9797      int __M, int __N, int __O, int __P)
   9798 {
   9799   return __extension__ (__m512i)(__v16si)
   9800   { __P, __O, __N, __M, __L, __K, __J, __I,
   9801     __H, __G, __F, __E, __D, __C, __B, __A };
   9802 }
   9803 
   9804 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
   9805        e8,e9,e10,e11,e12,e13,e14,e15)          \
   9806   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
   9807                    (e5),(e4),(e3),(e2),(e1),(e0))
   9808 
   9809 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9810 _mm512_set_epi64 (long long __A, long long __B, long long __C,
   9811      long long __D, long long __E, long long __F,
   9812      long long __G, long long __H)
   9813 {
   9814   return __extension__ (__m512i) (__v8di)
   9815   { __H, __G, __F, __E, __D, __C, __B, __A };
   9816 }
   9817 
   9818 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
   9819   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9820 
   9821 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9822 _mm512_set_pd (double __A, double __B, double __C, double __D,
   9823         double __E, double __F, double __G, double __H)
   9824 {
   9825   return __extension__ (__m512d)
   9826   { __H, __G, __F, __E, __D, __C, __B, __A };
   9827 }
   9828 
   9829 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
   9830   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9831 
   9832 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9833 _mm512_set_ps (float __A, float __B, float __C, float __D,
   9834         float __E, float __F, float __G, float __H,
   9835         float __I, float __J, float __K, float __L,
   9836         float __M, float __N, float __O, float __P)
   9837 {
   9838   return __extension__ (__m512)
   9839   { __P, __O, __N, __M, __L, __K, __J, __I,
   9840     __H, __G, __F, __E, __D, __C, __B, __A };
   9841 }
   9842 
   9843 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
   9844   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
   9845                 (e4),(e3),(e2),(e1),(e0))
   9846 
   9847 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9848 _mm512_abs_ps(__m512 __A)
   9849 {
   9850   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9851 }
   9852 
   9853 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9854 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
   9855 {
   9856   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9857 }
   9858 
   9859 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9860 _mm512_abs_pd(__m512d __A)
   9861 {
   9862   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
   9863 }
   9864 
   9865 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9866 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
   9867 {
   9868   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
   9869 }
   9870 
   9871 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
   9872 // outputs. This class of vector operation forms the basis of many scientific
   9873 // computations. In vector-reduction arithmetic, the evaluation off is
   9874 // independent of the order of the input elements of V.
   9875 
   9876 // Used bisection method. At each step, we partition the vector with previous
   9877 // step in half, and the operation is performed on its two halves.
   9878 // This takes log2(n) steps where n is the number of elements in the vector.
   9879 
   9880 // Vec512 - Vector with size 512.
   9881 // Operator - Can be one of following: +,*,&,|
   9882 // T2  - Can get 'i' for int and 'f' for float.
   9883 // T1 - Can get 'i' for int and 'd' for double.
   9884 
   9885 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
   9886   __extension__({                                                      \
   9887     __m256##T1 Vec256 = __builtin_shufflevector(                       \
   9888                             (__v8d##T2)Vec512,                         \
   9889                             (__v8d##T2)Vec512,                         \
   9890                             0, 1, 2, 3)                                \
   9891                         Operator                                       \
   9892                         __builtin_shufflevector(                       \
   9893                             (__v8d##T2)Vec512,                         \
   9894                             (__v8d##T2)Vec512,                         \
   9895                             4, 5, 6, 7);                               \
   9896     __m128##T1 Vec128 = __builtin_shufflevector(                       \
   9897                             (__v4d##T2)Vec256,                         \
   9898                             (__v4d##T2)Vec256,                         \
   9899                             0, 1)                                      \
   9900                         Operator                                       \
   9901                         __builtin_shufflevector(                       \
   9902                             (__v4d##T2)Vec256,                         \
   9903                             (__v4d##T2)Vec256,                         \
   9904                             2, 3);                                     \
   9905     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
   9906                                      (__v2d##T2)Vec128, 0, -1)         \
   9907              Operator                                                  \
   9908              __builtin_shufflevector((__v2d##T2)Vec128,                \
   9909                                      (__v2d##T2)Vec128, 1, -1);        \
   9910     return Vec128[0];                                                  \
   9911   })
   9912 
   9913 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
   9914   _mm512_reduce_operator_64bit(__W, +, i, i);
   9915 }
   9916 
   9917 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
   9918   _mm512_reduce_operator_64bit(__W, *, i, i);
   9919 }
   9920 
   9921 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
   9922   _mm512_reduce_operator_64bit(__W, &, i, i);
   9923 }
   9924 
   9925 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
   9926   _mm512_reduce_operator_64bit(__W, |, i, i);
   9927 }
   9928 
   9929 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
   9930   _mm512_reduce_operator_64bit(__W, +, f, d);
   9931 }
   9932 
   9933 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
   9934   _mm512_reduce_operator_64bit(__W, *, f, d);
   9935 }
   9936 
   9937 // Vec512 - Vector with size 512.
   9938 // Vec512Neutral - All vector elements set to the identity element.
   9939 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
   9940 // Operator - Can be one of following: +,*,&,|
   9941 // Mask - Intrinsic Mask
   9942 // T2  - Can get 'i' for int and 'f' for float.
   9943 // T1 - Can get 'i' for int and 'd' for packed double-precision.
   9944 // T3 - Can be Pd for packed double or q for q-word.
   9945 
   9946 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
   9947                                           Mask, T2, T1, T3)                    \
   9948   __extension__({                                                              \
   9949     Vec512 = __builtin_ia32_select##T3##_512(                                  \
   9950                  (__mmask8)Mask,                                               \
   9951                  (__v8d##T2)Vec512,                                            \
   9952                  (__v8d##T2)Vec512Neutral);                                    \
   9953     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
   9954   })
   9955 
   9956 static __inline__ long long __DEFAULT_FN_ATTRS
   9957 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   9958   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
   9959 }
   9960 
   9961 static __inline__ long long __DEFAULT_FN_ATTRS
   9962 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
   9963   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
   9964 }
   9965 
   9966 static __inline__ long long __DEFAULT_FN_ATTRS
   9967 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   9968   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   9969                                     &, __M,  i, i, q);
   9970 }
   9971 
   9972 static __inline__ long long __DEFAULT_FN_ATTRS
   9973 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   9974   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
   9975                                     i, i, q);
   9976 }
   9977 
   9978 static __inline__ double __DEFAULT_FN_ATTRS
   9979 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   9980   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
   9981                                     f, d, pd);
   9982 }
   9983 
   9984 static __inline__ double __DEFAULT_FN_ATTRS
   9985 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   9986   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
   9987                                     f, d, pd);
   9988 }
   9989 
   9990 // Vec512 - Vector with size 512.
   9991 // Operator - Can be one of following: +,*,&,|
   9992 // T2 - Can get 'i' for int and ' ' for packed single.
   9993 // T1 - Can get 'i' for int and 'f' for float.
   9994 
   9995 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
   9996     __m256##T1 Vec256 =                                                        \
   9997             (__m256##T1)(__builtin_shufflevector(                              \
   9998                                     (__v16s##T2)Vec512,                        \
   9999                                     (__v16s##T2)Vec512,                        \
   10000                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
   10001                                 Operator                                       \
   10002                          __builtin_shufflevector(                              \
   10003                                     (__v16s##T2)Vec512,                        \
   10004                                     (__v16s##T2)Vec512,                        \
   10005                                     8, 9, 10, 11, 12, 13, 14, 15));            \
   10006     __m128##T1 Vec128 =                                                        \
   10007              (__m128##T1)(__builtin_shufflevector(                             \
   10008                                     (__v8s##T2)Vec256,                         \
   10009                                     (__v8s##T2)Vec256,                         \
   10010                                     0, 1, 2, 3)                                \
   10011                                 Operator                                       \
   10012                           __builtin_shufflevector(                             \
   10013                                     (__v8s##T2)Vec256,                         \
   10014                                     (__v8s##T2)Vec256,                         \
   10015                                     4, 5, 6, 7));                              \
   10016     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   10017                                     (__v4s##T2)Vec128,                         \
   10018                                     (__v4s##T2)Vec128,                         \
   10019                                     0, 1, -1, -1)                              \
   10020                                 Operator                                       \
   10021                           __builtin_shufflevector(                             \
   10022                                     (__v4s##T2)Vec128,                         \
   10023                                     (__v4s##T2)Vec128,                         \
   10024                                     2, 3, -1, -1));                            \
   10025     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   10026                                     (__v4s##T2)Vec128,                         \
   10027                                     (__v4s##T2)Vec128,                         \
   10028                                     0, -1, -1, -1)                             \
   10029                                 Operator                                       \
   10030                           __builtin_shufflevector(                             \
   10031                                     (__v4s##T2)Vec128,                         \
   10032                                     (__v4s##T2)Vec128,                         \
   10033                                     1, -1, -1, -1));                           \
   10034     return Vec128[0];                                                          \
   10035   })
   10036 
   10037 static __inline__ int __DEFAULT_FN_ATTRS
   10038 _mm512_reduce_add_epi32(__m512i __W) {
   10039   _mm512_reduce_operator_32bit(__W, +, i, i);
   10040 }
   10041 
   10042 static __inline__ int __DEFAULT_FN_ATTRS
   10043 _mm512_reduce_mul_epi32(__m512i __W) {
   10044   _mm512_reduce_operator_32bit(__W, *, i, i);
   10045 }
   10046 
   10047 static __inline__ int __DEFAULT_FN_ATTRS
   10048 _mm512_reduce_and_epi32(__m512i __W) {
   10049   _mm512_reduce_operator_32bit(__W, &, i, i);
   10050 }
   10051 
   10052 static __inline__ int __DEFAULT_FN_ATTRS
   10053 _mm512_reduce_or_epi32(__m512i __W) {
   10054   _mm512_reduce_operator_32bit(__W, |, i, i);
   10055 }
   10056 
   10057 static __inline__ float __DEFAULT_FN_ATTRS
   10058 _mm512_reduce_add_ps(__m512 __W) {
   10059   _mm512_reduce_operator_32bit(__W, +, f, );
   10060 }
   10061 
   10062 static __inline__ float __DEFAULT_FN_ATTRS
   10063 _mm512_reduce_mul_ps(__m512 __W) {
   10064   _mm512_reduce_operator_32bit(__W, *, f, );
   10065 }
   10066 
   10067 // Vec512 - Vector with size 512.
   10068 // Vec512Neutral - All vector elements set to the identity element.
   10069 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
   10070 // Operator - Can be one of following: +,*,&,|
   10071 // Mask - Intrinsic Mask
   10072 // T2  - Can get 'i' for int and 'f' for float.
   10073 // T1 - Can get 'i' for int and 'd' for double.
   10074 // T3 - Can be Ps for packed single or d for d-word.
   10075 
   10076 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
   10077                                           Mask, T2, T1, T3)                    \
   10078   __extension__({                                                              \
   10079     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10080                              (__mmask16)Mask,                                  \
   10081                              (__v16s##T2)Vec512,                               \
   10082                              (__v16s##T2)Vec512Neutral);                       \
   10083     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
   10084   })
   10085 
   10086 static __inline__ int __DEFAULT_FN_ATTRS
   10087 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   10088   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
   10089 }
   10090 
   10091 static __inline__ int __DEFAULT_FN_ATTRS
   10092 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
   10093   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
   10094 }
   10095 
   10096 static __inline__ int __DEFAULT_FN_ATTRS
   10097 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   10098   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
   10099                                     i, i, d);
   10100 }
   10101 
   10102 static __inline__ int __DEFAULT_FN_ATTRS
   10103 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   10104   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
   10105 }
   10106 
   10107 static __inline__ float __DEFAULT_FN_ATTRS
   10108 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
   10109   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
   10110 }
   10111 
   10112 static __inline__ float __DEFAULT_FN_ATTRS
   10113 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   10114   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
   10115 }
   10116 
   10117 // Used bisection method. At each step, we partition the vector with previous
   10118 // step in half, and the operation is performed on its two halves.
   10119 // This takes log2(n) steps where n is the number of elements in the vector.
   10120 // This macro uses only intrinsics from the AVX512F feature.
   10121 
   10122 // Vec512 - Vector with size of 512.
   10123 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10124 //              __mm512_max_epi64
   10125 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10126 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10127 
   10128 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10129         Vec512 = _mm512_##IntrinName(                                          \
   10130                                 (__m512##T1)__builtin_shufflevector(           \
   10131                                                 (__v8d##T2)Vec512,             \
   10132                                                 (__v8d##T2)Vec512,             \
   10133                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
   10134                                 (__m512##T1)__builtin_shufflevector(           \
   10135                                                 (__v8d##T2)Vec512,             \
   10136                                                 (__v8d##T2)Vec512,             \
   10137                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
   10138         Vec512 = _mm512_##IntrinName(                                          \
   10139                                 (__m512##T1)__builtin_shufflevector(           \
   10140                                                 (__v8d##T2)Vec512,             \
   10141                                                 (__v8d##T2)Vec512,             \
   10142                                                  0, 1, -1, -1, -1, -1, -1, -1),\
   10143                                 (__m512##T1)__builtin_shufflevector(           \
   10144                                                 (__v8d##T2)Vec512,             \
   10145                                                 (__v8d##T2)Vec512,             \
   10146                                                  2, 3, -1, -1, -1, -1, -1,     \
   10147                                                  -1));                         \
   10148         Vec512 = _mm512_##IntrinName(                                          \
   10149                                 (__m512##T1)__builtin_shufflevector(           \
   10150                                                 (__v8d##T2)Vec512,             \
   10151                                                 (__v8d##T2)Vec512,             \
   10152                                                 0, -1, -1, -1, -1, -1, -1, -1),\
   10153                                 (__m512##T1)__builtin_shufflevector(           \
   10154                                                 (__v8d##T2)Vec512,             \
   10155                                                 (__v8d##T2)Vec512,             \
   10156                                                 1, -1, -1, -1, -1, -1, -1, -1))\
   10157                                                 ;                              \
   10158     return Vec512[0];                                                          \
   10159   })
   10160 
   10161 static __inline__ long long __DEFAULT_FN_ATTRS
   10162 _mm512_reduce_max_epi64(__m512i __V) {
   10163   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
   10164 }
   10165 
   10166 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10167 _mm512_reduce_max_epu64(__m512i __V) {
   10168   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
   10169 }
   10170 
   10171 static __inline__ double __DEFAULT_FN_ATTRS
   10172 _mm512_reduce_max_pd(__m512d __V) {
   10173   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
   10174 }
   10175 
   10176 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
   10177 (__m512i __V) {
   10178   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
   10179 }
   10180 
   10181 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10182 _mm512_reduce_min_epu64(__m512i __V) {
   10183   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
   10184 }
   10185 
   10186 static __inline__ double __DEFAULT_FN_ATTRS
   10187 _mm512_reduce_min_pd(__m512d __V) {
   10188   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
   10189 }
   10190 
   10191 // Vec512 - Vector with size 512.
   10192 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10193 // Identity element: {max_epi,0x8000000000000000}
   10194 //                   {max_epu,0x0000000000000000}
   10195 //                   {max_pd, 0xFFF0000000000000}
   10196 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
   10197 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
   10198 //                   {min_pd, 0x7FF0000000000000}
   10199 //
   10200 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10201 //              __mm512_max_epi64
   10202 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10203 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10204 // T3 - Can get 'q' q word and 'pd' for packed double.
   10205 //      [__builtin_ia32_select{q|pd}_512]
   10206 // Mask - Intrinsic Mask
   10207 
   10208 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10209                                         T2, T3, Mask)                          \
   10210   __extension__({                                                              \
   10211     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10212                              (__mmask8)Mask,                                   \
   10213                              (__v8d##T2)Vec512,                                \
   10214                              (__v8d##T2)Vec512Neutral);                        \
   10215     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
   10216   })
   10217 
   10218 static __inline__ long long __DEFAULT_FN_ATTRS
   10219 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   10220   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
   10221                                   max_epi64, i, i, q, __M);
   10222 }
   10223 
   10224 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10225 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   10226   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
   10227                                   max_epu64, i, i, q, __M);
   10228 }
   10229 
   10230 static __inline__ double __DEFAULT_FN_ATTRS
   10231 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
   10232   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
   10233                                   max_pd, d, f, pd, __M);
   10234 }
   10235 
   10236 static __inline__ long long __DEFAULT_FN_ATTRS
   10237 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   10238   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
   10239                                   min_epi64, i, i, q, __M);
   10240 }
   10241 
   10242 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10243 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   10244   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   10245                                   min_epu64, i, i, q, __M);
   10246 }
   10247 
   10248 static __inline__ double __DEFAULT_FN_ATTRS
   10249 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
   10250   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
   10251                                   min_pd, d, f, pd, __M);
   10252 }
   10253 
   10254 // Vec512 - Vector with size 512.
   10255 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10256 //              __mm512_max_epi32
   10257 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10258 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10259 
   10260 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10261     Vec512 = _mm512_##IntrinName(                                              \
   10262                   (__m512##T1)__builtin_shufflevector(                         \
   10263                                   (__v16s##T2)Vec512,                          \
   10264                                   (__v16s##T2)Vec512,                          \
   10265                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
   10266                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10267                   (__m512##T1)__builtin_shufflevector(                         \
   10268                                   (__v16s##T2)Vec512,                          \
   10269                                   (__v16s##T2)Vec512,                          \
   10270                                   8, 9, 10, 11, 12, 13, 14, 15,                \
   10271                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10272     Vec512 = _mm512_##IntrinName(                                              \
   10273                   (__m512##T1)__builtin_shufflevector(                         \
   10274                                   (__v16s##T2)Vec512,                          \
   10275                                   (__v16s##T2)Vec512,                          \
   10276                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
   10277                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10278                   (__m512##T1)__builtin_shufflevector(                         \
   10279                                   (__v16s##T2)Vec512,                          \
   10280                                   (__v16s##T2)Vec512,                          \
   10281                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
   10282                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10283     Vec512 = _mm512_##IntrinName(                                              \
   10284                   (__m512##T1)__builtin_shufflevector(                         \
   10285                                   (__v16s##T2)Vec512,                          \
   10286                                   (__v16s##T2)Vec512,                          \
   10287                                   0, 1, -1, -1, -1, -1, -1, -1,                \
   10288                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10289                   (__m512##T1)__builtin_shufflevector(                         \
   10290                                   (__v16s##T2)Vec512,                          \
   10291                                   (__v16s##T2)Vec512,                          \
   10292                                   2, 3, -1, -1, -1, -1, -1, -1,                \
   10293                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10294     Vec512 = _mm512_##IntrinName(                                              \
   10295                   (__m512##T1)__builtin_shufflevector(                         \
   10296                                   (__v16s##T2)Vec512,                          \
   10297                                   (__v16s##T2)Vec512,                          \
   10298                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
   10299                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10300                   (__m512##T1)__builtin_shufflevector(                         \
   10301                                   (__v16s##T2)Vec512,                          \
   10302                                   (__v16s##T2)Vec512,                          \
   10303                                   1, -1, -1, -1, -1, -1, -1, -1,               \
   10304                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10305     return Vec512[0];                                                          \
   10306   })
   10307 
   10308 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
   10309   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
   10310 }
   10311 
   10312 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10313 _mm512_reduce_max_epu32(__m512i a) {
   10314   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
   10315 }
   10316 
   10317 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
   10318   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
   10319 }
   10320 
   10321 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
   10322   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
   10323 }
   10324 
   10325 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10326 _mm512_reduce_min_epu32(__m512i a) {
   10327   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
   10328 }
   10329 
   10330 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
   10331   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
   10332 }
   10333 
   10334 // Vec512 - Vector with size 512.
   10335 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10336 // Identity element: {max_epi,0x80000000}
   10337 //                   {max_epu,0x00000000}
   10338 //                   {max_ps, 0xFF800000}
   10339 //                   {min_epi,0x7FFFFFFF}
   10340 //                   {min_epu,0xFFFFFFFF}
   10341 //                   {min_ps, 0x7F800000}
   10342 //
   10343 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10344 //              __mm512_max_epi32
   10345 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10346 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10347 // T3 - Can get 'q' q word and 'pd' for packed double.
   10348 //      [__builtin_ia32_select{q|pd}_512]
   10349 // Mask - Intrinsic Mask
   10350 
   10351 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10352                                         T2, T3, Mask)                          \
   10353   __extension__({                                                              \
   10354     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10355                                         (__mmask16)Mask,                       \
   10356                                         (__v16s##T2)Vec512,                    \
   10357                                         (__v16s##T2)Vec512Neutral);            \
   10358    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
   10359    })
   10360 
   10361 static __inline__ int __DEFAULT_FN_ATTRS
   10362 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   10363   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
   10364                                   i, i, d, __M);
   10365 }
   10366 
   10367 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10368 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   10369   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
   10370                                   i, i, d, __M);
   10371 }
   10372 
   10373 static __inline__ float __DEFAULT_FN_ATTRS
   10374 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
   10375   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
   10376                                   ps, __M);
   10377 }
   10378 
   10379 static __inline__ int __DEFAULT_FN_ATTRS
   10380 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   10381   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
   10382                                   i, i, d, __M);
   10383 }
   10384 
   10385 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10386 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   10387   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
   10388                                   i, i, d, __M);
   10389 }
   10390 
   10391 static __inline__ float __DEFAULT_FN_ATTRS
   10392 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
   10393   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
   10394                                   ps, __M);
   10395 }
   10396 
   10397 #undef __DEFAULT_FN_ATTRS
   10398 
   10399 #endif // __AVX512FINTRIN_H
   10400