Home | History | Annotate | Download | only in include
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef char __v64qi __attribute__((__vector_size__(64)));
     31 typedef short __v32hi __attribute__((__vector_size__(64)));
     32 typedef double __v8df __attribute__((__vector_size__(64)));
     33 typedef float __v16sf __attribute__((__vector_size__(64)));
     34 typedef long long __v8di __attribute__((__vector_size__(64)));
     35 typedef int __v16si __attribute__((__vector_size__(64)));
     36 
     37 /* Unsigned types */
     38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
     39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
     40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
     41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
     42 
     43 typedef float __m512 __attribute__((__vector_size__(64)));
     44 typedef double __m512d __attribute__((__vector_size__(64)));
     45 typedef long long __m512i __attribute__((__vector_size__(64)));
     46 
     47 typedef unsigned char __mmask8;
     48 typedef unsigned short __mmask16;
     49 
     50 /* Rounding mode macros.  */
     51 #define _MM_FROUND_TO_NEAREST_INT   0x00
     52 #define _MM_FROUND_TO_NEG_INF       0x01
     53 #define _MM_FROUND_TO_POS_INF       0x02
     54 #define _MM_FROUND_TO_ZERO          0x03
     55 #define _MM_FROUND_CUR_DIRECTION    0x04
     56 
     57 /* Constants for integer comparison predicates */
     58 typedef enum {
     59     _MM_CMPINT_EQ,      /* Equal */
     60     _MM_CMPINT_LT,      /* Less than */
     61     _MM_CMPINT_LE,      /* Less than or Equal */
     62     _MM_CMPINT_UNUSED,
     63     _MM_CMPINT_NE,      /* Not Equal */
     64     _MM_CMPINT_NLT,     /* Not Less than */
     65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
     66     _MM_CMPINT_NLE      /* Not Less than or Equal */
     67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
     68 } _MM_CMPINT_ENUM;
     69 
     70 typedef enum
     71 {
     72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
     73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
     74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
     75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
     76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
     77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
     78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
     79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
     80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
     81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
     82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
     83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
     84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
     85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
     86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
     87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
     88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
     89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
     90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
     91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
     92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
     93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
     94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
     95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
     96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
     97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
     98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
     99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
    100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
    101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
    102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
    103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
    104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
    105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
    106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
    107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
    108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
    109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
    110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
    111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
    112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
    113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
    114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
    115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
    116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
    117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
    118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
    119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
    120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
    121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
    122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
    123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
    124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
    125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
    126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
    127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
    128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
    129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
    130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
    131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
    132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
    133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
    134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
    135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
    136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
    137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
    138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
    139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
    140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
    141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
    142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
    143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
    144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
    145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
    146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
    147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
    148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
    149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
    150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
    151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
    152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
    153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
    154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
    155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
    156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
    157   _MM_PERM_DDDD = 0xFF
    158 } _MM_PERM_ENUM;
    159 
    160 typedef enum
    161 {
    162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
    163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
    164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
    165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
    166 } _MM_MANTISSA_NORM_ENUM;
    167 
    168 typedef enum
    169 {
    170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
    171   _MM_MANT_SIGN_zero,   /* sign = 0             */
    172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
    173 } _MM_MANTISSA_SIGN_ENUM;
    174 
    175 /* Define the default attributes for the functions in this file. */
    176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
    177 
    178 /* Create vectors with repeated elements */
    179 
    180 static  __inline __m512i __DEFAULT_FN_ATTRS
    181 _mm512_setzero_si512(void)
    182 {
    183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    184 }
    185 
    186 #define _mm512_setzero_epi32 _mm512_setzero_si512
    187 
    188 static __inline__ __m512d __DEFAULT_FN_ATTRS
    189 _mm512_undefined_pd(void)
    190 {
    191   return (__m512d)__builtin_ia32_undef512();
    192 }
    193 
    194 static __inline__ __m512 __DEFAULT_FN_ATTRS
    195 _mm512_undefined(void)
    196 {
    197   return (__m512)__builtin_ia32_undef512();
    198 }
    199 
    200 static __inline__ __m512 __DEFAULT_FN_ATTRS
    201 _mm512_undefined_ps(void)
    202 {
    203   return (__m512)__builtin_ia32_undef512();
    204 }
    205 
    206 static __inline__ __m512i __DEFAULT_FN_ATTRS
    207 _mm512_undefined_epi32(void)
    208 {
    209   return (__m512i)__builtin_ia32_undef512();
    210 }
    211 
    212 static __inline__ __m512i __DEFAULT_FN_ATTRS
    213 _mm512_broadcastd_epi32 (__m128i __A)
    214 {
    215   return (__m512i)__builtin_shufflevector((__v4si) __A,
    216                                           (__v4si)_mm_undefined_si128(),
    217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    218 }
    219 
    220 static __inline__ __m512i __DEFAULT_FN_ATTRS
    221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
    222 {
    223   return (__m512i)__builtin_ia32_selectd_512(__M,
    224                                              (__v16si) _mm512_broadcastd_epi32(__A),
    225                                              (__v16si) __O);
    226 }
    227 
    228 static __inline__ __m512i __DEFAULT_FN_ATTRS
    229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
    230 {
    231   return (__m512i)__builtin_ia32_selectd_512(__M,
    232                                              (__v16si) _mm512_broadcastd_epi32(__A),
    233                                              (__v16si) _mm512_setzero_si512());
    234 }
    235 
    236 static __inline__ __m512i __DEFAULT_FN_ATTRS
    237 _mm512_broadcastq_epi64 (__m128i __A)
    238 {
    239   return (__m512i)__builtin_shufflevector((__v2di) __A,
    240                                           (__v2di) _mm_undefined_si128(),
    241                                           0, 0, 0, 0, 0, 0, 0, 0);
    242 }
    243 
    244 static __inline__ __m512i __DEFAULT_FN_ATTRS
    245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
    246 {
    247   return (__m512i)__builtin_ia32_selectq_512(__M,
    248                                              (__v8di) _mm512_broadcastq_epi64(__A),
    249                                              (__v8di) __O);
    250 
    251 }
    252 
    253 static __inline__ __m512i __DEFAULT_FN_ATTRS
    254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
    255 {
    256   return (__m512i)__builtin_ia32_selectq_512(__M,
    257                                              (__v8di) _mm512_broadcastq_epi64(__A),
    258                                              (__v8di) _mm512_setzero_si512());
    259 }
    260 
    261 
    262 static __inline __m512 __DEFAULT_FN_ATTRS
    263 _mm512_setzero_ps(void)
    264 {
    265   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    266                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    267 }
    268 
    269 #define _mm512_setzero _mm512_setzero_ps
    270 
    271 static  __inline __m512d __DEFAULT_FN_ATTRS
    272 _mm512_setzero_pd(void)
    273 {
    274   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    275 }
    276 
    277 static __inline __m512 __DEFAULT_FN_ATTRS
    278 _mm512_set1_ps(float __w)
    279 {
    280   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    281                    __w, __w, __w, __w, __w, __w, __w, __w  };
    282 }
    283 
    284 static __inline __m512d __DEFAULT_FN_ATTRS
    285 _mm512_set1_pd(double __w)
    286 {
    287   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    288 }
    289 
    290 static __inline __m512i __DEFAULT_FN_ATTRS
    291 _mm512_set1_epi8(char __w)
    292 {
    293   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
    294                              __w, __w, __w, __w, __w, __w, __w, __w,
    295                              __w, __w, __w, __w, __w, __w, __w, __w,
    296                              __w, __w, __w, __w, __w, __w, __w, __w,
    297                              __w, __w, __w, __w, __w, __w, __w, __w,
    298                              __w, __w, __w, __w, __w, __w, __w, __w,
    299                              __w, __w, __w, __w, __w, __w, __w, __w,
    300                              __w, __w, __w, __w, __w, __w, __w, __w  };
    301 }
    302 
    303 static __inline __m512i __DEFAULT_FN_ATTRS
    304 _mm512_set1_epi16(short __w)
    305 {
    306   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
    307                              __w, __w, __w, __w, __w, __w, __w, __w,
    308                              __w, __w, __w, __w, __w, __w, __w, __w,
    309                              __w, __w, __w, __w, __w, __w, __w, __w };
    310 }
    311 
    312 static __inline __m512i __DEFAULT_FN_ATTRS
    313 _mm512_set1_epi32(int __s)
    314 {
    315   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    316                              __s, __s, __s, __s, __s, __s, __s, __s };
    317 }
    318 
    319 static __inline __m512i __DEFAULT_FN_ATTRS
    320 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
    321 {
    322   return (__m512i)__builtin_ia32_selectd_512(__M,
    323                                              (__v16si)_mm512_set1_epi32(__A),
    324                                              (__v16si)_mm512_setzero_si512());
    325 }
    326 
    327 static __inline __m512i __DEFAULT_FN_ATTRS
    328 _mm512_set1_epi64(long long __d)
    329 {
    330   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    331 }
    332 
    333 #ifdef __x86_64__
    334 static __inline __m512i __DEFAULT_FN_ATTRS
    335 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
    336 {
    337   return (__m512i)__builtin_ia32_selectq_512(__M,
    338                                              (__v8di)_mm512_set1_epi64(__A),
    339                                              (__v8di)_mm512_setzero_si512());
    340 }
    341 #endif
    342 
    343 static __inline__ __m512 __DEFAULT_FN_ATTRS
    344 _mm512_broadcastss_ps(__m128 __A)
    345 {
    346   return (__m512)__builtin_shufflevector((__v4sf) __A,
    347                                          (__v4sf)_mm_undefined_ps(),
    348                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    349 }
    350 
    351 static __inline __m512i __DEFAULT_FN_ATTRS
    352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    353 {
    354   return  (__m512i)(__v16si)
    355    { __D, __C, __B, __A, __D, __C, __B, __A,
    356      __D, __C, __B, __A, __D, __C, __B, __A };
    357 }
    358 
    359 static __inline __m512i __DEFAULT_FN_ATTRS
    360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    361        long long __D)
    362 {
    363   return  (__m512i) (__v8di)
    364    { __D, __C, __B, __A, __D, __C, __B, __A };
    365 }
    366 
    367 static __inline __m512d __DEFAULT_FN_ATTRS
    368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    369 {
    370   return  (__m512d)
    371    { __D, __C, __B, __A, __D, __C, __B, __A };
    372 }
    373 
    374 static __inline __m512 __DEFAULT_FN_ATTRS
    375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    376 {
    377   return  (__m512)
    378    { __D, __C, __B, __A, __D, __C, __B, __A,
    379      __D, __C, __B, __A, __D, __C, __B, __A };
    380 }
    381 
    382 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
    383   _mm512_set4_epi32((e3),(e2),(e1),(e0))
    384 
    385 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
    386   _mm512_set4_epi64((e3),(e2),(e1),(e0))
    387 
    388 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
    389   _mm512_set4_pd((e3),(e2),(e1),(e0))
    390 
    391 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
    392   _mm512_set4_ps((e3),(e2),(e1),(e0))
    393 
    394 static __inline__ __m512d __DEFAULT_FN_ATTRS
    395 _mm512_broadcastsd_pd(__m128d __A)
    396 {
    397   return (__m512d)__builtin_shufflevector((__v2df) __A,
    398                                           (__v2df) _mm_undefined_pd(),
    399                                           0, 0, 0, 0, 0, 0, 0, 0);
    400 }
    401 
    402 /* Cast between vector types */
    403 
    404 static __inline __m512d __DEFAULT_FN_ATTRS
    405 _mm512_castpd256_pd512(__m256d __a)
    406 {
    407   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    408 }
    409 
    410 static __inline __m512 __DEFAULT_FN_ATTRS
    411 _mm512_castps256_ps512(__m256 __a)
    412 {
    413   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    414                                           -1, -1, -1, -1, -1, -1, -1, -1);
    415 }
    416 
    417 static __inline __m128d __DEFAULT_FN_ATTRS
    418 _mm512_castpd512_pd128(__m512d __a)
    419 {
    420   return __builtin_shufflevector(__a, __a, 0, 1);
    421 }
    422 
    423 static __inline __m256d __DEFAULT_FN_ATTRS
    424 _mm512_castpd512_pd256 (__m512d __A)
    425 {
    426   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
    427 }
    428 
    429 static __inline __m128 __DEFAULT_FN_ATTRS
    430 _mm512_castps512_ps128(__m512 __a)
    431 {
    432   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    433 }
    434 
    435 static __inline __m256 __DEFAULT_FN_ATTRS
    436 _mm512_castps512_ps256 (__m512 __A)
    437 {
    438   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
    439 }
    440 
    441 static __inline __m512 __DEFAULT_FN_ATTRS
    442 _mm512_castpd_ps (__m512d __A)
    443 {
    444   return (__m512) (__A);
    445 }
    446 
    447 static __inline __m512i __DEFAULT_FN_ATTRS
    448 _mm512_castpd_si512 (__m512d __A)
    449 {
    450   return (__m512i) (__A);
    451 }
    452 
    453 static __inline__ __m512d __DEFAULT_FN_ATTRS
    454 _mm512_castpd128_pd512 (__m128d __A)
    455 {
    456   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    457 }
    458 
    459 static __inline __m512d __DEFAULT_FN_ATTRS
    460 _mm512_castps_pd (__m512 __A)
    461 {
    462   return (__m512d) (__A);
    463 }
    464 
    465 static __inline __m512i __DEFAULT_FN_ATTRS
    466 _mm512_castps_si512 (__m512 __A)
    467 {
    468   return (__m512i) (__A);
    469 }
    470 
    471 static __inline__ __m512 __DEFAULT_FN_ATTRS
    472 _mm512_castps128_ps512 (__m128 __A)
    473 {
    474     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    475 }
    476 
    477 static __inline__ __m512i __DEFAULT_FN_ATTRS
    478 _mm512_castsi128_si512 (__m128i __A)
    479 {
    480    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    481 }
    482 
    483 static __inline__ __m512i __DEFAULT_FN_ATTRS
    484 _mm512_castsi256_si512 (__m256i __A)
    485 {
    486    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
    487 }
    488 
    489 static __inline __m512 __DEFAULT_FN_ATTRS
    490 _mm512_castsi512_ps (__m512i __A)
    491 {
    492   return (__m512) (__A);
    493 }
    494 
    495 static __inline __m512d __DEFAULT_FN_ATTRS
    496 _mm512_castsi512_pd (__m512i __A)
    497 {
    498   return (__m512d) (__A);
    499 }
    500 
    501 static __inline __m128i __DEFAULT_FN_ATTRS
    502 _mm512_castsi512_si128 (__m512i __A)
    503 {
    504   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
    505 }
    506 
    507 static __inline __m256i __DEFAULT_FN_ATTRS
    508 _mm512_castsi512_si256 (__m512i __A)
    509 {
    510   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
    511 }
    512 
    513 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
    514 _mm512_int2mask(int __a)
    515 {
    516   return (__mmask16)__a;
    517 }
    518 
    519 static __inline__ int __DEFAULT_FN_ATTRS
    520 _mm512_mask2int(__mmask16 __a)
    521 {
    522   return (int)__a;
    523 }
    524 
    525 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
    526 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
    527 ///    contain the value of the source vector. The upper 384 bits are set
    528 ///    to zero.
    529 ///
    530 /// \headerfile <x86intrin.h>
    531 ///
    532 /// This intrinsic has no corresponding instruction.
    533 ///
    534 /// \param __a
    535 ///    A 128-bit vector of [2 x double].
    536 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
    537 ///    contain the value of the parameter. The upper 384 bits are set to zero.
    538 static __inline __m512d __DEFAULT_FN_ATTRS
    539 _mm512_zextpd128_pd512(__m128d __a)
    540 {
    541   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
    542 }
    543 
    544 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
    545 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
    546 ///    contain the value of the source vector. The upper 256 bits are set
    547 ///    to zero.
    548 ///
    549 /// \headerfile <x86intrin.h>
    550 ///
    551 /// This intrinsic has no corresponding instruction.
    552 ///
    553 /// \param __a
    554 ///    A 256-bit vector of [4 x double].
    555 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
    556 ///    contain the value of the parameter. The upper 256 bits are set to zero.
    557 static __inline __m512d __DEFAULT_FN_ATTRS
    558 _mm512_zextpd256_pd512(__m256d __a)
    559 {
    560   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
    561 }
    562 
    563 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
    564 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
    565 ///    the value of the source vector. The upper 384 bits are set to zero.
    566 ///
    567 /// \headerfile <x86intrin.h>
    568 ///
    569 /// This intrinsic has no corresponding instruction.
    570 ///
    571 /// \param __a
    572 ///    A 128-bit vector of [4 x float].
    573 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
    574 ///    contain the value of the parameter. The upper 384 bits are set to zero.
    575 static __inline __m512 __DEFAULT_FN_ATTRS
    576 _mm512_zextps128_ps512(__m128 __a)
    577 {
    578   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
    579 }
    580 
    581 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
    582 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
    583 ///    the value of the source vector. The upper 256 bits are set to zero.
    584 ///
    585 /// \headerfile <x86intrin.h>
    586 ///
    587 /// This intrinsic has no corresponding instruction.
    588 ///
    589 /// \param __a
    590 ///    A 256-bit vector of [8 x float].
    591 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
    592 ///    contain the value of the parameter. The upper 256 bits are set to zero.
    593 static __inline __m512 __DEFAULT_FN_ATTRS
    594 _mm512_zextps256_ps512(__m256 __a)
    595 {
    596   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
    597 }
    598 
    599 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
    600 ///    The lower 128 bits contain the value of the source vector. The upper
    601 ///    384 bits are set to zero.
    602 ///
    603 /// \headerfile <x86intrin.h>
    604 ///
    605 /// This intrinsic has no corresponding instruction.
    606 ///
    607 /// \param __a
    608 ///    A 128-bit integer vector.
    609 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
    610 ///    the parameter. The upper 384 bits are set to zero.
    611 static __inline __m512i __DEFAULT_FN_ATTRS
    612 _mm512_zextsi128_si512(__m128i __a)
    613 {
    614   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
    615 }
    616 
    617 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
    618 ///    The lower 256 bits contain the value of the source vector. The upper
    619 ///    256 bits are set to zero.
    620 ///
    621 /// \headerfile <x86intrin.h>
    622 ///
    623 /// This intrinsic has no corresponding instruction.
    624 ///
    625 /// \param __a
    626 ///    A 256-bit integer vector.
    627 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
    628 ///    the parameter. The upper 256 bits are set to zero.
    629 static __inline __m512i __DEFAULT_FN_ATTRS
    630 _mm512_zextsi256_si512(__m256i __a)
    631 {
    632   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
    633 }
    634 
    635 /* Bitwise operators */
    636 static __inline__ __m512i __DEFAULT_FN_ATTRS
    637 _mm512_and_epi32(__m512i __a, __m512i __b)
    638 {
    639   return (__m512i)((__v16su)__a & (__v16su)__b);
    640 }
    641 
    642 static __inline__ __m512i __DEFAULT_FN_ATTRS
    643 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    644 {
    645   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    646                 (__v16si) _mm512_and_epi32(__a, __b),
    647                 (__v16si) __src);
    648 }
    649 
    650 static __inline__ __m512i __DEFAULT_FN_ATTRS
    651 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    652 {
    653   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
    654                                          __k, __a, __b);
    655 }
    656 
    657 static __inline__ __m512i __DEFAULT_FN_ATTRS
    658 _mm512_and_epi64(__m512i __a, __m512i __b)
    659 {
    660   return (__m512i)((__v8du)__a & (__v8du)__b);
    661 }
    662 
    663 static __inline__ __m512i __DEFAULT_FN_ATTRS
    664 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    665 {
    666     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
    667                 (__v8di) _mm512_and_epi64(__a, __b),
    668                 (__v8di) __src);
    669 }
    670 
    671 static __inline__ __m512i __DEFAULT_FN_ATTRS
    672 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    673 {
    674   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
    675                                          __k, __a, __b);
    676 }
    677 
    678 static __inline__ __m512i __DEFAULT_FN_ATTRS
    679 _mm512_andnot_si512 (__m512i __A, __m512i __B)
    680 {
    681   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    682 }
    683 
    684 static __inline__ __m512i __DEFAULT_FN_ATTRS
    685 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    686 {
    687   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
    688 }
    689 
    690 static __inline__ __m512i __DEFAULT_FN_ATTRS
    691 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    692 {
    693   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    694                                          (__v16si)_mm512_andnot_epi32(__A, __B),
    695                                          (__v16si)__W);
    696 }
    697 
    698 static __inline__ __m512i __DEFAULT_FN_ATTRS
    699 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    700 {
    701   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
    702                                            __U, __A, __B);
    703 }
    704 
    705 static __inline__ __m512i __DEFAULT_FN_ATTRS
    706 _mm512_andnot_epi64(__m512i __A, __m512i __B)
    707 {
    708   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    709 }
    710 
    711 static __inline__ __m512i __DEFAULT_FN_ATTRS
    712 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    713 {
    714   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    715                                           (__v8di)_mm512_andnot_epi64(__A, __B),
    716                                           (__v8di)__W);
    717 }
    718 
    719 static __inline__ __m512i __DEFAULT_FN_ATTRS
    720 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    721 {
    722   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
    723                                            __U, __A, __B);
    724 }
    725 
    726 static __inline__ __m512i __DEFAULT_FN_ATTRS
    727 _mm512_or_epi32(__m512i __a, __m512i __b)
    728 {
    729   return (__m512i)((__v16su)__a | (__v16su)__b);
    730 }
    731 
    732 static __inline__ __m512i __DEFAULT_FN_ATTRS
    733 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    734 {
    735   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    736                                              (__v16si)_mm512_or_epi32(__a, __b),
    737                                              (__v16si)__src);
    738 }
    739 
    740 static __inline__ __m512i __DEFAULT_FN_ATTRS
    741 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    742 {
    743   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
    744 }
    745 
    746 static __inline__ __m512i __DEFAULT_FN_ATTRS
    747 _mm512_or_epi64(__m512i __a, __m512i __b)
    748 {
    749   return (__m512i)((__v8du)__a | (__v8du)__b);
    750 }
    751 
    752 static __inline__ __m512i __DEFAULT_FN_ATTRS
    753 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    754 {
    755   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    756                                              (__v8di)_mm512_or_epi64(__a, __b),
    757                                              (__v8di)__src);
    758 }
    759 
    760 static __inline__ __m512i __DEFAULT_FN_ATTRS
    761 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    762 {
    763   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
    764 }
    765 
    766 static __inline__ __m512i __DEFAULT_FN_ATTRS
    767 _mm512_xor_epi32(__m512i __a, __m512i __b)
    768 {
    769   return (__m512i)((__v16su)__a ^ (__v16su)__b);
    770 }
    771 
    772 static __inline__ __m512i __DEFAULT_FN_ATTRS
    773 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    774 {
    775   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    776                                             (__v16si)_mm512_xor_epi32(__a, __b),
    777                                             (__v16si)__src);
    778 }
    779 
    780 static __inline__ __m512i __DEFAULT_FN_ATTRS
    781 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    782 {
    783   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
    784 }
    785 
    786 static __inline__ __m512i __DEFAULT_FN_ATTRS
    787 _mm512_xor_epi64(__m512i __a, __m512i __b)
    788 {
    789   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    790 }
    791 
    792 static __inline__ __m512i __DEFAULT_FN_ATTRS
    793 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    794 {
    795   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    796                                              (__v8di)_mm512_xor_epi64(__a, __b),
    797                                              (__v8di)__src);
    798 }
    799 
    800 static __inline__ __m512i __DEFAULT_FN_ATTRS
    801 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    802 {
    803   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
    804 }
    805 
    806 static __inline__ __m512i __DEFAULT_FN_ATTRS
    807 _mm512_and_si512(__m512i __a, __m512i __b)
    808 {
    809   return (__m512i)((__v8du)__a & (__v8du)__b);
    810 }
    811 
    812 static __inline__ __m512i __DEFAULT_FN_ATTRS
    813 _mm512_or_si512(__m512i __a, __m512i __b)
    814 {
    815   return (__m512i)((__v8du)__a | (__v8du)__b);
    816 }
    817 
    818 static __inline__ __m512i __DEFAULT_FN_ATTRS
    819 _mm512_xor_si512(__m512i __a, __m512i __b)
    820 {
    821   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    822 }
    823 
    824 /* Arithmetic */
    825 
    826 static __inline __m512d __DEFAULT_FN_ATTRS
    827 _mm512_add_pd(__m512d __a, __m512d __b)
    828 {
    829   return (__m512d)((__v8df)__a + (__v8df)__b);
    830 }
    831 
    832 static __inline __m512 __DEFAULT_FN_ATTRS
    833 _mm512_add_ps(__m512 __a, __m512 __b)
    834 {
    835   return (__m512)((__v16sf)__a + (__v16sf)__b);
    836 }
    837 
    838 static __inline __m512d __DEFAULT_FN_ATTRS
    839 _mm512_mul_pd(__m512d __a, __m512d __b)
    840 {
    841   return (__m512d)((__v8df)__a * (__v8df)__b);
    842 }
    843 
    844 static __inline __m512 __DEFAULT_FN_ATTRS
    845 _mm512_mul_ps(__m512 __a, __m512 __b)
    846 {
    847   return (__m512)((__v16sf)__a * (__v16sf)__b);
    848 }
    849 
    850 static __inline __m512d __DEFAULT_FN_ATTRS
    851 _mm512_sub_pd(__m512d __a, __m512d __b)
    852 {
    853   return (__m512d)((__v8df)__a - (__v8df)__b);
    854 }
    855 
    856 static __inline __m512 __DEFAULT_FN_ATTRS
    857 _mm512_sub_ps(__m512 __a, __m512 __b)
    858 {
    859   return (__m512)((__v16sf)__a - (__v16sf)__b);
    860 }
    861 
    862 static __inline__ __m512i __DEFAULT_FN_ATTRS
    863 _mm512_add_epi64 (__m512i __A, __m512i __B)
    864 {
    865   return (__m512i) ((__v8du) __A + (__v8du) __B);
    866 }
    867 
    868 static __inline__ __m512i __DEFAULT_FN_ATTRS
    869 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    870 {
    871   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    872                                              (__v8di)_mm512_add_epi64(__A, __B),
    873                                              (__v8di)__W);
    874 }
    875 
    876 static __inline__ __m512i __DEFAULT_FN_ATTRS
    877 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    878 {
    879   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    880                                              (__v8di)_mm512_add_epi64(__A, __B),
    881                                              (__v8di)_mm512_setzero_si512());
    882 }
    883 
    884 static __inline__ __m512i __DEFAULT_FN_ATTRS
    885 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    886 {
    887   return (__m512i) ((__v8du) __A - (__v8du) __B);
    888 }
    889 
    890 static __inline__ __m512i __DEFAULT_FN_ATTRS
    891 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    892 {
    893   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    894                                              (__v8di)_mm512_sub_epi64(__A, __B),
    895                                              (__v8di)__W);
    896 }
    897 
    898 static __inline__ __m512i __DEFAULT_FN_ATTRS
    899 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    900 {
    901   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    902                                              (__v8di)_mm512_sub_epi64(__A, __B),
    903                                              (__v8di)_mm512_setzero_si512());
    904 }
    905 
    906 static __inline__ __m512i __DEFAULT_FN_ATTRS
    907 _mm512_add_epi32 (__m512i __A, __m512i __B)
    908 {
    909   return (__m512i) ((__v16su) __A + (__v16su) __B);
    910 }
    911 
    912 static __inline__ __m512i __DEFAULT_FN_ATTRS
    913 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    914 {
    915   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    916                                              (__v16si)_mm512_add_epi32(__A, __B),
    917                                              (__v16si)__W);
    918 }
    919 
    920 static __inline__ __m512i __DEFAULT_FN_ATTRS
    921 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    922 {
    923   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    924                                              (__v16si)_mm512_add_epi32(__A, __B),
    925                                              (__v16si)_mm512_setzero_si512());
    926 }
    927 
    928 static __inline__ __m512i __DEFAULT_FN_ATTRS
    929 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    930 {
    931   return (__m512i) ((__v16su) __A - (__v16su) __B);
    932 }
    933 
    934 static __inline__ __m512i __DEFAULT_FN_ATTRS
    935 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    936 {
    937   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    938                                              (__v16si)_mm512_sub_epi32(__A, __B),
    939                                              (__v16si)__W);
    940 }
    941 
    942 static __inline__ __m512i __DEFAULT_FN_ATTRS
    943 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    944 {
    945   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    946                                              (__v16si)_mm512_sub_epi32(__A, __B),
    947                                              (__v16si)_mm512_setzero_si512());
    948 }
    949 
    950 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
    951   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    952                                         (__v8df)(__m512d)(B), \
    953                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
    954                                         (int)(R)); })
    955 
    956 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
    957   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    958                                         (__v8df)(__m512d)(B), \
    959                                         (__v8df)_mm512_setzero_pd(), \
    960                                         (__mmask8)(U), (int)(R)); })
    961 
    962 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
    963   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    964                                         (__v8df)(__m512d)(B), \
    965                                         (__v8df)_mm512_undefined_pd(), \
    966                                         (__mmask8)-1, (int)(R)); })
    967 
    968 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    969 _mm512_max_pd(__m512d __A, __m512d __B)
    970 {
    971   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    972              (__v8df) __B,
    973              (__v8df)
    974              _mm512_setzero_pd (),
    975              (__mmask8) -1,
    976              _MM_FROUND_CUR_DIRECTION);
    977 }
    978 
    979 static __inline__ __m512d __DEFAULT_FN_ATTRS
    980 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
    981 {
    982   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    983                   (__v8df) __B,
    984                   (__v8df) __W,
    985                   (__mmask8) __U,
    986                   _MM_FROUND_CUR_DIRECTION);
    987 }
    988 
    989 static __inline__ __m512d __DEFAULT_FN_ATTRS
    990 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
    991 {
    992   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    993                   (__v8df) __B,
    994                   (__v8df)
    995                   _mm512_setzero_pd (),
    996                   (__mmask8) __U,
    997                   _MM_FROUND_CUR_DIRECTION);
    998 }
    999 
   1000 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
   1001   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1002                                        (__v16sf)(__m512)(B), \
   1003                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1004                                        (int)(R)); })
   1005 
   1006 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
   1007   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1008                                        (__v16sf)(__m512)(B), \
   1009                                        (__v16sf)_mm512_setzero_ps(), \
   1010                                        (__mmask16)(U), (int)(R)); })
   1011 
   1012 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
   1013   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
   1014                                        (__v16sf)(__m512)(B), \
   1015                                        (__v16sf)_mm512_undefined_ps(), \
   1016                                        (__mmask16)-1, (int)(R)); })
   1017 
   1018 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1019 _mm512_max_ps(__m512 __A, __m512 __B)
   1020 {
   1021   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1022             (__v16sf) __B,
   1023             (__v16sf)
   1024             _mm512_setzero_ps (),
   1025             (__mmask16) -1,
   1026             _MM_FROUND_CUR_DIRECTION);
   1027 }
   1028 
   1029 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1030 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1031 {
   1032   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1033                  (__v16sf) __B,
   1034                  (__v16sf) __W,
   1035                  (__mmask16) __U,
   1036                  _MM_FROUND_CUR_DIRECTION);
   1037 }
   1038 
   1039 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1040 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1041 {
   1042   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   1043                  (__v16sf) __B,
   1044                  (__v16sf)
   1045                  _mm512_setzero_ps (),
   1046                  (__mmask16) __U,
   1047                  _MM_FROUND_CUR_DIRECTION);
   1048 }
   1049 
   1050 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1051 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1052   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
   1053                 (__v4sf) __B,
   1054                 (__v4sf) __W,
   1055                 (__mmask8) __U,
   1056                 _MM_FROUND_CUR_DIRECTION);
   1057 }
   1058 
   1059 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1060 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1061   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
   1062                 (__v4sf) __B,
   1063                 (__v4sf)  _mm_setzero_ps (),
   1064                 (__mmask8) __U,
   1065                 _MM_FROUND_CUR_DIRECTION);
   1066 }
   1067 
   1068 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
   1069   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1070                                           (__v4sf)(__m128)(B), \
   1071                                           (__v4sf)_mm_setzero_ps(), \
   1072                                           (__mmask8)-1, (int)(R)); })
   1073 
   1074 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
   1075   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1076                                           (__v4sf)(__m128)(B), \
   1077                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1078                                           (int)(R)); })
   1079 
   1080 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
   1081   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
   1082                                           (__v4sf)(__m128)(B), \
   1083                                           (__v4sf)_mm_setzero_ps(), \
   1084                                           (__mmask8)(U), (int)(R)); })
   1085 
   1086 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1087 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1088   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
   1089                 (__v2df) __B,
   1090                 (__v2df) __W,
   1091                 (__mmask8) __U,
   1092                 _MM_FROUND_CUR_DIRECTION);
   1093 }
   1094 
   1095 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1096 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1097   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
   1098                 (__v2df) __B,
   1099                 (__v2df)  _mm_setzero_pd (),
   1100                 (__mmask8) __U,
   1101                 _MM_FROUND_CUR_DIRECTION);
   1102 }
   1103 
   1104 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
   1105   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1106                                            (__v2df)(__m128d)(B), \
   1107                                            (__v2df)_mm_setzero_pd(), \
   1108                                            (__mmask8)-1, (int)(R)); })
   1109 
   1110 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
   1111   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1112                                            (__v2df)(__m128d)(B), \
   1113                                            (__v2df)(__m128d)(W), \
   1114                                            (__mmask8)(U), (int)(R)); })
   1115 
   1116 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
   1117   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1118                                            (__v2df)(__m128d)(B), \
   1119                                            (__v2df)_mm_setzero_pd(), \
   1120                                            (__mmask8)(U), (int)(R)); })
   1121 
   1122 static __inline __m512i
   1123 __DEFAULT_FN_ATTRS
   1124 _mm512_max_epi32(__m512i __A, __m512i __B)
   1125 {
   1126   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1127               (__v16si) __B,
   1128               (__v16si)
   1129               _mm512_setzero_si512 (),
   1130               (__mmask16) -1);
   1131 }
   1132 
   1133 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1134 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1135 {
   1136   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1137                    (__v16si) __B,
   1138                    (__v16si) __W, __M);
   1139 }
   1140 
   1141 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1142 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1143 {
   1144   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1145                    (__v16si) __B,
   1146                    (__v16si)
   1147                    _mm512_setzero_si512 (),
   1148                    __M);
   1149 }
   1150 
   1151 static __inline __m512i __DEFAULT_FN_ATTRS
   1152 _mm512_max_epu32(__m512i __A, __m512i __B)
   1153 {
   1154   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1155               (__v16si) __B,
   1156               (__v16si)
   1157               _mm512_setzero_si512 (),
   1158               (__mmask16) -1);
   1159 }
   1160 
   1161 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1162 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1163 {
   1164   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1165                    (__v16si) __B,
   1166                    (__v16si) __W, __M);
   1167 }
   1168 
   1169 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1170 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1171 {
   1172   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1173                    (__v16si) __B,
   1174                    (__v16si)
   1175                    _mm512_setzero_si512 (),
   1176                    __M);
   1177 }
   1178 
   1179 static __inline __m512i __DEFAULT_FN_ATTRS
   1180 _mm512_max_epi64(__m512i __A, __m512i __B)
   1181 {
   1182   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1183               (__v8di) __B,
   1184               (__v8di)
   1185               _mm512_setzero_si512 (),
   1186               (__mmask8) -1);
   1187 }
   1188 
   1189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1190 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1191 {
   1192   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1193                    (__v8di) __B,
   1194                    (__v8di) __W, __M);
   1195 }
   1196 
   1197 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1198 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1199 {
   1200   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1201                    (__v8di) __B,
   1202                    (__v8di)
   1203                    _mm512_setzero_si512 (),
   1204                    __M);
   1205 }
   1206 
   1207 static __inline __m512i __DEFAULT_FN_ATTRS
   1208 _mm512_max_epu64(__m512i __A, __m512i __B)
   1209 {
   1210   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1211               (__v8di) __B,
   1212               (__v8di)
   1213               _mm512_setzero_si512 (),
   1214               (__mmask8) -1);
   1215 }
   1216 
   1217 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1218 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1219 {
   1220   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1221                    (__v8di) __B,
   1222                    (__v8di) __W, __M);
   1223 }
   1224 
   1225 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1226 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1227 {
   1228   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1229                    (__v8di) __B,
   1230                    (__v8di)
   1231                    _mm512_setzero_si512 (),
   1232                    __M);
   1233 }
   1234 
   1235 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
   1236   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1237                                         (__v8df)(__m512d)(B), \
   1238                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   1239                                         (int)(R)); })
   1240 
   1241 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
   1242   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1243                                         (__v8df)(__m512d)(B), \
   1244                                         (__v8df)_mm512_setzero_pd(), \
   1245                                         (__mmask8)(U), (int)(R)); })
   1246 
   1247 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
   1248   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1249                                         (__v8df)(__m512d)(B), \
   1250                                         (__v8df)_mm512_undefined_pd(), \
   1251                                         (__mmask8)-1, (int)(R)); })
   1252 
   1253 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1254 _mm512_min_pd(__m512d __A, __m512d __B)
   1255 {
   1256   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1257              (__v8df) __B,
   1258              (__v8df)
   1259              _mm512_setzero_pd (),
   1260              (__mmask8) -1,
   1261              _MM_FROUND_CUR_DIRECTION);
   1262 }
   1263 
   1264 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1265 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   1266 {
   1267   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1268                   (__v8df) __B,
   1269                   (__v8df) __W,
   1270                   (__mmask8) __U,
   1271                   _MM_FROUND_CUR_DIRECTION);
   1272 }
   1273 
   1274 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
   1275   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1276                                        (__v16sf)(__m512)(B), \
   1277                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1278                                        (int)(R)); })
   1279 
   1280 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
   1281   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1282                                        (__v16sf)(__m512)(B), \
   1283                                        (__v16sf)_mm512_setzero_ps(), \
   1284                                        (__mmask16)(U), (int)(R)); })
   1285 
   1286 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
   1287   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1288                                        (__v16sf)(__m512)(B), \
   1289                                        (__v16sf)_mm512_undefined_ps(), \
   1290                                        (__mmask16)-1, (int)(R)); })
   1291 
   1292 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1293 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   1294 {
   1295   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1296                   (__v8df) __B,
   1297                   (__v8df)
   1298                   _mm512_setzero_pd (),
   1299                   (__mmask8) __U,
   1300                   _MM_FROUND_CUR_DIRECTION);
   1301 }
   1302 
   1303 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1304 _mm512_min_ps(__m512 __A, __m512 __B)
   1305 {
   1306   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1307             (__v16sf) __B,
   1308             (__v16sf)
   1309             _mm512_setzero_ps (),
   1310             (__mmask16) -1,
   1311             _MM_FROUND_CUR_DIRECTION);
   1312 }
   1313 
   1314 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1315 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1316 {
   1317   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1318                  (__v16sf) __B,
   1319                  (__v16sf) __W,
   1320                  (__mmask16) __U,
   1321                  _MM_FROUND_CUR_DIRECTION);
   1322 }
   1323 
   1324 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1325 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1326 {
   1327   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1328                  (__v16sf) __B,
   1329                  (__v16sf)
   1330                  _mm512_setzero_ps (),
   1331                  (__mmask16) __U,
   1332                  _MM_FROUND_CUR_DIRECTION);
   1333 }
   1334 
   1335 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1336 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1337   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1338                 (__v4sf) __B,
   1339                 (__v4sf) __W,
   1340                 (__mmask8) __U,
   1341                 _MM_FROUND_CUR_DIRECTION);
   1342 }
   1343 
   1344 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1345 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1346   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1347                 (__v4sf) __B,
   1348                 (__v4sf)  _mm_setzero_ps (),
   1349                 (__mmask8) __U,
   1350                 _MM_FROUND_CUR_DIRECTION);
   1351 }
   1352 
   1353 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
   1354   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1355                                           (__v4sf)(__m128)(B), \
   1356                                           (__v4sf)_mm_setzero_ps(), \
   1357                                           (__mmask8)-1, (int)(R)); })
   1358 
   1359 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
   1360   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1361                                           (__v4sf)(__m128)(B), \
   1362                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1363                                           (int)(R)); })
   1364 
   1365 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
   1366   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1367                                           (__v4sf)(__m128)(B), \
   1368                                           (__v4sf)_mm_setzero_ps(), \
   1369                                           (__mmask8)(U), (int)(R)); })
   1370 
   1371 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1372 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1373   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1374                 (__v2df) __B,
   1375                 (__v2df) __W,
   1376                 (__mmask8) __U,
   1377                 _MM_FROUND_CUR_DIRECTION);
   1378 }
   1379 
   1380 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1381 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1382   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1383                 (__v2df) __B,
   1384                 (__v2df)  _mm_setzero_pd (),
   1385                 (__mmask8) __U,
   1386                 _MM_FROUND_CUR_DIRECTION);
   1387 }
   1388 
   1389 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
   1390   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1391                                            (__v2df)(__m128d)(B), \
   1392                                            (__v2df)_mm_setzero_pd(), \
   1393                                            (__mmask8)-1, (int)(R)); })
   1394 
   1395 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
   1396   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1397                                            (__v2df)(__m128d)(B), \
   1398                                            (__v2df)(__m128d)(W), \
   1399                                            (__mmask8)(U), (int)(R)); })
   1400 
   1401 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
   1402   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1403                                            (__v2df)(__m128d)(B), \
   1404                                            (__v2df)_mm_setzero_pd(), \
   1405                                            (__mmask8)(U), (int)(R)); })
   1406 
   1407 static __inline __m512i
   1408 __DEFAULT_FN_ATTRS
   1409 _mm512_min_epi32(__m512i __A, __m512i __B)
   1410 {
   1411   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1412               (__v16si) __B,
   1413               (__v16si)
   1414               _mm512_setzero_si512 (),
   1415               (__mmask16) -1);
   1416 }
   1417 
   1418 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1419 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1420 {
   1421   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1422                    (__v16si) __B,
   1423                    (__v16si) __W, __M);
   1424 }
   1425 
   1426 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1427 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1428 {
   1429   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1430                    (__v16si) __B,
   1431                    (__v16si)
   1432                    _mm512_setzero_si512 (),
   1433                    __M);
   1434 }
   1435 
   1436 static __inline __m512i __DEFAULT_FN_ATTRS
   1437 _mm512_min_epu32(__m512i __A, __m512i __B)
   1438 {
   1439   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1440               (__v16si) __B,
   1441               (__v16si)
   1442               _mm512_setzero_si512 (),
   1443               (__mmask16) -1);
   1444 }
   1445 
   1446 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1447 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1448 {
   1449   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1450                    (__v16si) __B,
   1451                    (__v16si) __W, __M);
   1452 }
   1453 
   1454 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1455 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1456 {
   1457   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1458                    (__v16si) __B,
   1459                    (__v16si)
   1460                    _mm512_setzero_si512 (),
   1461                    __M);
   1462 }
   1463 
   1464 static __inline __m512i __DEFAULT_FN_ATTRS
   1465 _mm512_min_epi64(__m512i __A, __m512i __B)
   1466 {
   1467   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1468               (__v8di) __B,
   1469               (__v8di)
   1470               _mm512_setzero_si512 (),
   1471               (__mmask8) -1);
   1472 }
   1473 
   1474 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1475 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1476 {
   1477   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1478                    (__v8di) __B,
   1479                    (__v8di) __W, __M);
   1480 }
   1481 
   1482 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1483 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1484 {
   1485   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1486                    (__v8di) __B,
   1487                    (__v8di)
   1488                    _mm512_setzero_si512 (),
   1489                    __M);
   1490 }
   1491 
   1492 static __inline __m512i __DEFAULT_FN_ATTRS
   1493 _mm512_min_epu64(__m512i __A, __m512i __B)
   1494 {
   1495   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1496               (__v8di) __B,
   1497               (__v8di)
   1498               _mm512_setzero_si512 (),
   1499               (__mmask8) -1);
   1500 }
   1501 
   1502 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1503 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1504 {
   1505   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1506                    (__v8di) __B,
   1507                    (__v8di) __W, __M);
   1508 }
   1509 
   1510 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1511 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1512 {
   1513   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1514                    (__v8di) __B,
   1515                    (__v8di)
   1516                    _mm512_setzero_si512 (),
   1517                    __M);
   1518 }
   1519 
   1520 static __inline __m512i __DEFAULT_FN_ATTRS
   1521 _mm512_mul_epi32(__m512i __X, __m512i __Y)
   1522 {
   1523   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
   1524 }
   1525 
   1526 static __inline __m512i __DEFAULT_FN_ATTRS
   1527 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1528 {
   1529   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1530                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1531                                              (__v8di)__W);
   1532 }
   1533 
   1534 static __inline __m512i __DEFAULT_FN_ATTRS
   1535 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
   1536 {
   1537   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1538                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1539                                              (__v8di)_mm512_setzero_si512 ());
   1540 }
   1541 
   1542 static __inline __m512i __DEFAULT_FN_ATTRS
   1543 _mm512_mul_epu32(__m512i __X, __m512i __Y)
   1544 {
   1545   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
   1546 }
   1547 
   1548 static __inline __m512i __DEFAULT_FN_ATTRS
   1549 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1550 {
   1551   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1552                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1553                                              (__v8di)__W);
   1554 }
   1555 
   1556 static __inline __m512i __DEFAULT_FN_ATTRS
   1557 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
   1558 {
   1559   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1560                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1561                                              (__v8di)_mm512_setzero_si512 ());
   1562 }
   1563 
   1564 static __inline __m512i __DEFAULT_FN_ATTRS
   1565 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
   1566 {
   1567   return (__m512i) ((__v16su) __A * (__v16su) __B);
   1568 }
   1569 
   1570 static __inline __m512i __DEFAULT_FN_ATTRS
   1571 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
   1572 {
   1573   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1574                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1575                                              (__v16si)_mm512_setzero_si512());
   1576 }
   1577 
   1578 static __inline __m512i __DEFAULT_FN_ATTRS
   1579 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1580 {
   1581   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1582                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1583                                              (__v16si)__W);
   1584 }
   1585 
   1586 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
   1587   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1588                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
   1589                                          (int)(R)); })
   1590 
   1591 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
   1592   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1593                                          (__v8df)_mm512_setzero_pd(), \
   1594                                          (__mmask8)(U), (int)(R)); })
   1595 
   1596 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
   1597   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1598                                          (__v8df)_mm512_undefined_pd(), \
   1599                                          (__mmask8)-1, (int)(R)); })
   1600 
   1601 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1602 _mm512_sqrt_pd(__m512d __a)
   1603 {
   1604   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
   1605                                                 (__v8df) _mm512_setzero_pd (),
   1606                                                 (__mmask8) -1,
   1607                                                 _MM_FROUND_CUR_DIRECTION);
   1608 }
   1609 
   1610 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1611 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1612 {
   1613   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1614                    (__v8df) __W,
   1615                    (__mmask8) __U,
   1616                    _MM_FROUND_CUR_DIRECTION);
   1617 }
   1618 
   1619 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1620 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   1621 {
   1622   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1623                    (__v8df)
   1624                    _mm512_setzero_pd (),
   1625                    (__mmask8) __U,
   1626                    _MM_FROUND_CUR_DIRECTION);
   1627 }
   1628 
   1629 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
   1630   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1631                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
   1632                                         (int)(R)); })
   1633 
   1634 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
   1635   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1636                                         (__v16sf)_mm512_setzero_ps(), \
   1637                                         (__mmask16)(U), (int)(R)); })
   1638 
   1639 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
   1640   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1641                                         (__v16sf)_mm512_undefined_ps(), \
   1642                                         (__mmask16)-1, (int)(R)); })
   1643 
   1644 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1645 _mm512_sqrt_ps(__m512 __a)
   1646 {
   1647   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
   1648                                                (__v16sf) _mm512_setzero_ps (),
   1649                                                (__mmask16) -1,
   1650                                                _MM_FROUND_CUR_DIRECTION);
   1651 }
   1652 
   1653 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1654 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
   1655 {
   1656   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1657                                                (__v16sf) __W,
   1658                                                (__mmask16) __U,
   1659                                                _MM_FROUND_CUR_DIRECTION);
   1660 }
   1661 
   1662 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1663 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
   1664 {
   1665   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1666                                                (__v16sf) _mm512_setzero_ps (),
   1667                                                (__mmask16) __U,
   1668                                                _MM_FROUND_CUR_DIRECTION);
   1669 }
   1670 
   1671 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1672 _mm512_rsqrt14_pd(__m512d __A)
   1673 {
   1674   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1675                  (__v8df)
   1676                  _mm512_setzero_pd (),
   1677                  (__mmask8) -1);}
   1678 
   1679 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1680 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1681 {
   1682   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1683                   (__v8df) __W,
   1684                   (__mmask8) __U);
   1685 }
   1686 
   1687 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1688 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1689 {
   1690   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1691                   (__v8df)
   1692                   _mm512_setzero_pd (),
   1693                   (__mmask8) __U);
   1694 }
   1695 
   1696 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1697 _mm512_rsqrt14_ps(__m512 __A)
   1698 {
   1699   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1700                 (__v16sf)
   1701                 _mm512_setzero_ps (),
   1702                 (__mmask16) -1);
   1703 }
   1704 
   1705 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1706 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1707 {
   1708   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1709                  (__v16sf) __W,
   1710                  (__mmask16) __U);
   1711 }
   1712 
   1713 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1714 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1715 {
   1716   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1717                  (__v16sf)
   1718                  _mm512_setzero_ps (),
   1719                  (__mmask16) __U);
   1720 }
   1721 
   1722 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1723 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
   1724 {
   1725   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1726              (__v4sf) __B,
   1727              (__v4sf)
   1728              _mm_setzero_ps (),
   1729              (__mmask8) -1);
   1730 }
   1731 
   1732 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1733 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1734 {
   1735  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1736           (__v4sf) __B,
   1737           (__v4sf) __W,
   1738           (__mmask8) __U);
   1739 }
   1740 
   1741 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1742 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1743 {
   1744  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1745           (__v4sf) __B,
   1746           (__v4sf) _mm_setzero_ps (),
   1747           (__mmask8) __U);
   1748 }
   1749 
   1750 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1751 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
   1752 {
   1753   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
   1754               (__v2df) __B,
   1755               (__v2df)
   1756               _mm_setzero_pd (),
   1757               (__mmask8) -1);
   1758 }
   1759 
   1760 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1761 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1762 {
   1763  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1764           (__v2df) __B,
   1765           (__v2df) __W,
   1766           (__mmask8) __U);
   1767 }
   1768 
   1769 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1770 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1771 {
   1772  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1773           (__v2df) __B,
   1774           (__v2df) _mm_setzero_pd (),
   1775           (__mmask8) __U);
   1776 }
   1777 
   1778 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1779 _mm512_rcp14_pd(__m512d __A)
   1780 {
   1781   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1782                (__v8df)
   1783                _mm512_setzero_pd (),
   1784                (__mmask8) -1);
   1785 }
   1786 
   1787 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1788 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1789 {
   1790   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1791                 (__v8df) __W,
   1792                 (__mmask8) __U);
   1793 }
   1794 
   1795 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1796 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1797 {
   1798   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1799                 (__v8df)
   1800                 _mm512_setzero_pd (),
   1801                 (__mmask8) __U);
   1802 }
   1803 
   1804 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1805 _mm512_rcp14_ps(__m512 __A)
   1806 {
   1807   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1808               (__v16sf)
   1809               _mm512_setzero_ps (),
   1810               (__mmask16) -1);
   1811 }
   1812 
   1813 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1814 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1815 {
   1816   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1817                    (__v16sf) __W,
   1818                    (__mmask16) __U);
   1819 }
   1820 
   1821 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1822 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1823 {
   1824   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1825                    (__v16sf)
   1826                    _mm512_setzero_ps (),
   1827                    (__mmask16) __U);
   1828 }
   1829 
   1830 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1831 _mm_rcp14_ss(__m128 __A, __m128 __B)
   1832 {
   1833   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1834                  (__v4sf) __B,
   1835                  (__v4sf)
   1836                  _mm_setzero_ps (),
   1837                  (__mmask8) -1);
   1838 }
   1839 
   1840 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1841 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1842 {
   1843  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1844           (__v4sf) __B,
   1845           (__v4sf) __W,
   1846           (__mmask8) __U);
   1847 }
   1848 
   1849 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1850 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1851 {
   1852  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1853           (__v4sf) __B,
   1854           (__v4sf) _mm_setzero_ps (),
   1855           (__mmask8) __U);
   1856 }
   1857 
   1858 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1859 _mm_rcp14_sd(__m128d __A, __m128d __B)
   1860 {
   1861   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
   1862             (__v2df) __B,
   1863             (__v2df)
   1864             _mm_setzero_pd (),
   1865             (__mmask8) -1);
   1866 }
   1867 
   1868 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1869 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1870 {
   1871  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1872           (__v2df) __B,
   1873           (__v2df) __W,
   1874           (__mmask8) __U);
   1875 }
   1876 
   1877 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1878 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1879 {
   1880  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1881           (__v2df) __B,
   1882           (__v2df) _mm_setzero_pd (),
   1883           (__mmask8) __U);
   1884 }
   1885 
   1886 static __inline __m512 __DEFAULT_FN_ATTRS
   1887 _mm512_floor_ps(__m512 __A)
   1888 {
   1889   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1890                                                   _MM_FROUND_FLOOR,
   1891                                                   (__v16sf) __A, -1,
   1892                                                   _MM_FROUND_CUR_DIRECTION);
   1893 }
   1894 
   1895 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1896 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1897 {
   1898   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1899                    _MM_FROUND_FLOOR,
   1900                    (__v16sf) __W, __U,
   1901                    _MM_FROUND_CUR_DIRECTION);
   1902 }
   1903 
   1904 static __inline __m512d __DEFAULT_FN_ATTRS
   1905 _mm512_floor_pd(__m512d __A)
   1906 {
   1907   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1908                                                    _MM_FROUND_FLOOR,
   1909                                                    (__v8df) __A, -1,
   1910                                                    _MM_FROUND_CUR_DIRECTION);
   1911 }
   1912 
   1913 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1914 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1915 {
   1916   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1917                 _MM_FROUND_FLOOR,
   1918                 (__v8df) __W, __U,
   1919                 _MM_FROUND_CUR_DIRECTION);
   1920 }
   1921 
   1922 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1923 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1924 {
   1925   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1926                    _MM_FROUND_CEIL,
   1927                    (__v16sf) __W, __U,
   1928                    _MM_FROUND_CUR_DIRECTION);
   1929 }
   1930 
   1931 static __inline __m512 __DEFAULT_FN_ATTRS
   1932 _mm512_ceil_ps(__m512 __A)
   1933 {
   1934   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1935                                                   _MM_FROUND_CEIL,
   1936                                                   (__v16sf) __A, -1,
   1937                                                   _MM_FROUND_CUR_DIRECTION);
   1938 }
   1939 
   1940 static __inline __m512d __DEFAULT_FN_ATTRS
   1941 _mm512_ceil_pd(__m512d __A)
   1942 {
   1943   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1944                                                    _MM_FROUND_CEIL,
   1945                                                    (__v8df) __A, -1,
   1946                                                    _MM_FROUND_CUR_DIRECTION);
   1947 }
   1948 
   1949 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1950 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1951 {
   1952   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1953                 _MM_FROUND_CEIL,
   1954                 (__v8df) __W, __U,
   1955                 _MM_FROUND_CUR_DIRECTION);
   1956 }
   1957 
   1958 static __inline __m512i __DEFAULT_FN_ATTRS
   1959 _mm512_abs_epi64(__m512i __A)
   1960 {
   1961   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1962              (__v8di)
   1963              _mm512_setzero_si512 (),
   1964              (__mmask8) -1);
   1965 }
   1966 
   1967 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1968 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   1969 {
   1970   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1971                   (__v8di) __W,
   1972                   (__mmask8) __U);
   1973 }
   1974 
   1975 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1976 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   1977 {
   1978   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1979                   (__v8di)
   1980                   _mm512_setzero_si512 (),
   1981                   (__mmask8) __U);
   1982 }
   1983 
   1984 static __inline __m512i __DEFAULT_FN_ATTRS
   1985 _mm512_abs_epi32(__m512i __A)
   1986 {
   1987   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1988              (__v16si)
   1989              _mm512_setzero_si512 (),
   1990              (__mmask16) -1);
   1991 }
   1992 
   1993 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1994 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   1995 {
   1996   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1997                   (__v16si) __W,
   1998                   (__mmask16) __U);
   1999 }
   2000 
   2001 static __inline__ __m512i __DEFAULT_FN_ATTRS
   2002 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   2003 {
   2004   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   2005                   (__v16si)
   2006                   _mm512_setzero_si512 (),
   2007                   (__mmask16) __U);
   2008 }
   2009 
   2010 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2011 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2012   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   2013                 (__v4sf) __B,
   2014                 (__v4sf) __W,
   2015                 (__mmask8) __U,
   2016                 _MM_FROUND_CUR_DIRECTION);
   2017 }
   2018 
   2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2020 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2021   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   2022                 (__v4sf) __B,
   2023                 (__v4sf)  _mm_setzero_ps (),
   2024                 (__mmask8) __U,
   2025                 _MM_FROUND_CUR_DIRECTION);
   2026 }
   2027 
   2028 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
   2029   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2030                                           (__v4sf)(__m128)(B), \
   2031                                           (__v4sf)_mm_setzero_ps(), \
   2032                                           (__mmask8)-1, (int)(R)); })
   2033 
   2034 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
   2035   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2036                                           (__v4sf)(__m128)(B), \
   2037                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2038                                           (int)(R)); })
   2039 
   2040 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
   2041   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   2042                                           (__v4sf)(__m128)(B), \
   2043                                           (__v4sf)_mm_setzero_ps(), \
   2044                                           (__mmask8)(U), (int)(R)); })
   2045 
   2046 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2047 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2048   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   2049                 (__v2df) __B,
   2050                 (__v2df) __W,
   2051                 (__mmask8) __U,
   2052                 _MM_FROUND_CUR_DIRECTION);
   2053 }
   2054 
   2055 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2056 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2057   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   2058                 (__v2df) __B,
   2059                 (__v2df)  _mm_setzero_pd (),
   2060                 (__mmask8) __U,
   2061                 _MM_FROUND_CUR_DIRECTION);
   2062 }
   2063 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
   2064   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2065                                            (__v2df)(__m128d)(B), \
   2066                                            (__v2df)_mm_setzero_pd(), \
   2067                                            (__mmask8)-1, (int)(R)); })
   2068 
   2069 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
   2070   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2071                                            (__v2df)(__m128d)(B), \
   2072                                            (__v2df)(__m128d)(W), \
   2073                                            (__mmask8)(U), (int)(R)); })
   2074 
   2075 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
   2076   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   2077                                            (__v2df)(__m128d)(B), \
   2078                                            (__v2df)_mm_setzero_pd(), \
   2079                                            (__mmask8)(U), (int)(R)); })
   2080 
   2081 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2082 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2083   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2084                                               (__v8df)_mm512_add_pd(__A, __B),
   2085                                               (__v8df)__W);
   2086 }
   2087 
   2088 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2090   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2091                                               (__v8df)_mm512_add_pd(__A, __B),
   2092                                               (__v8df)_mm512_setzero_pd());
   2093 }
   2094 
   2095 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2096 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2097   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2098                                              (__v16sf)_mm512_add_ps(__A, __B),
   2099                                              (__v16sf)__W);
   2100 }
   2101 
   2102 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2103 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2104   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2105                                              (__v16sf)_mm512_add_ps(__A, __B),
   2106                                              (__v16sf)_mm512_setzero_ps());
   2107 }
   2108 
   2109 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
   2110   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2111                                         (__v8df)(__m512d)(B), \
   2112                                         (__v8df)_mm512_setzero_pd(), \
   2113                                         (__mmask8)-1, (int)(R)); })
   2114 
   2115 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
   2116   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2117                                         (__v8df)(__m512d)(B), \
   2118                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2119                                         (int)(R)); })
   2120 
   2121 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
   2122   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2123                                         (__v8df)(__m512d)(B), \
   2124                                         (__v8df)_mm512_setzero_pd(), \
   2125                                         (__mmask8)(U), (int)(R)); })
   2126 
   2127 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
   2128   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2129                                        (__v16sf)(__m512)(B), \
   2130                                        (__v16sf)_mm512_setzero_ps(), \
   2131                                        (__mmask16)-1, (int)(R)); })
   2132 
   2133 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
   2134   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2135                                        (__v16sf)(__m512)(B), \
   2136                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2137                                        (int)(R)); })
   2138 
   2139 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
   2140   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2141                                        (__v16sf)(__m512)(B), \
   2142                                        (__v16sf)_mm512_setzero_ps(), \
   2143                                        (__mmask16)(U), (int)(R)); })
   2144 
   2145 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2146 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2147   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2148                 (__v4sf) __B,
   2149                 (__v4sf) __W,
   2150                 (__mmask8) __U,
   2151                 _MM_FROUND_CUR_DIRECTION);
   2152 }
   2153 
   2154 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2155 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2156   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2157                 (__v4sf) __B,
   2158                 (__v4sf)  _mm_setzero_ps (),
   2159                 (__mmask8) __U,
   2160                 _MM_FROUND_CUR_DIRECTION);
   2161 }
   2162 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
   2163   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2164                                           (__v4sf)(__m128)(B), \
   2165                                           (__v4sf)_mm_setzero_ps(), \
   2166                                           (__mmask8)-1, (int)(R)); })
   2167 
   2168 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
   2169   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2170                                           (__v4sf)(__m128)(B), \
   2171                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2172                                           (int)(R)); })
   2173 
   2174 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
   2175   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2176                                           (__v4sf)(__m128)(B), \
   2177                                           (__v4sf)_mm_setzero_ps(), \
   2178                                           (__mmask8)(U), (int)(R)); })
   2179 
   2180 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2181 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2182   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2183                 (__v2df) __B,
   2184                 (__v2df) __W,
   2185                 (__mmask8) __U,
   2186                 _MM_FROUND_CUR_DIRECTION);
   2187 }
   2188 
   2189 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2190 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2191   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2192                 (__v2df) __B,
   2193                 (__v2df)  _mm_setzero_pd (),
   2194                 (__mmask8) __U,
   2195                 _MM_FROUND_CUR_DIRECTION);
   2196 }
   2197 
   2198 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
   2199   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2200                                            (__v2df)(__m128d)(B), \
   2201                                            (__v2df)_mm_setzero_pd(), \
   2202                                            (__mmask8)-1, (int)(R)); })
   2203 
   2204 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
   2205   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2206                                            (__v2df)(__m128d)(B), \
   2207                                            (__v2df)(__m128d)(W), \
   2208                                            (__mmask8)(U), (int)(R)); })
   2209 
   2210 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
   2211   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2212                                            (__v2df)(__m128d)(B), \
   2213                                            (__v2df)_mm_setzero_pd(), \
   2214                                            (__mmask8)(U), (int)(R)); })
   2215 
   2216 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2217 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2218   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2219                                               (__v8df)_mm512_sub_pd(__A, __B),
   2220                                               (__v8df)__W);
   2221 }
   2222 
   2223 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2224 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2225   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2226                                               (__v8df)_mm512_sub_pd(__A, __B),
   2227                                               (__v8df)_mm512_setzero_pd());
   2228 }
   2229 
   2230 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2231 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2232   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2233                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2234                                              (__v16sf)__W);
   2235 }
   2236 
   2237 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2238 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2239   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2240                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2241                                              (__v16sf)_mm512_setzero_ps());
   2242 }
   2243 
   2244 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
   2245   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2246                                         (__v8df)(__m512d)(B), \
   2247                                         (__v8df)_mm512_setzero_pd(), \
   2248                                         (__mmask8)-1, (int)(R)); })
   2249 
   2250 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
   2251   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2252                                         (__v8df)(__m512d)(B), \
   2253                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2254                                         (int)(R)); })
   2255 
   2256 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
   2257   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2258                                         (__v8df)(__m512d)(B), \
   2259                                         (__v8df)_mm512_setzero_pd(), \
   2260                                         (__mmask8)(U), (int)(R)); })
   2261 
   2262 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
   2263   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2264                                        (__v16sf)(__m512)(B), \
   2265                                        (__v16sf)_mm512_setzero_ps(), \
   2266                                        (__mmask16)-1, (int)(R)); })
   2267 
   2268 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
   2269   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2270                                        (__v16sf)(__m512)(B), \
   2271                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2272                                        (int)(R)); });
   2273 
   2274 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
   2275   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2276                                        (__v16sf)(__m512)(B), \
   2277                                        (__v16sf)_mm512_setzero_ps(), \
   2278                                        (__mmask16)(U), (int)(R)); });
   2279 
   2280 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2281 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2282   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2283                 (__v4sf) __B,
   2284                 (__v4sf) __W,
   2285                 (__mmask8) __U,
   2286                 _MM_FROUND_CUR_DIRECTION);
   2287 }
   2288 
   2289 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2290 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2291   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2292                 (__v4sf) __B,
   2293                 (__v4sf)  _mm_setzero_ps (),
   2294                 (__mmask8) __U,
   2295                 _MM_FROUND_CUR_DIRECTION);
   2296 }
   2297 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
   2298   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2299                                           (__v4sf)(__m128)(B), \
   2300                                           (__v4sf)_mm_setzero_ps(), \
   2301                                           (__mmask8)-1, (int)(R)); })
   2302 
   2303 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
   2304   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2305                                           (__v4sf)(__m128)(B), \
   2306                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2307                                           (int)(R)); })
   2308 
   2309 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
   2310   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2311                                           (__v4sf)(__m128)(B), \
   2312                                           (__v4sf)_mm_setzero_ps(), \
   2313                                           (__mmask8)(U), (int)(R)); })
   2314 
   2315 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2316 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2317   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2318                 (__v2df) __B,
   2319                 (__v2df) __W,
   2320                 (__mmask8) __U,
   2321                 _MM_FROUND_CUR_DIRECTION);
   2322 }
   2323 
   2324 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2325 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2326   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2327                 (__v2df) __B,
   2328                 (__v2df)  _mm_setzero_pd (),
   2329                 (__mmask8) __U,
   2330                 _MM_FROUND_CUR_DIRECTION);
   2331 }
   2332 
   2333 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
   2334   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2335                                            (__v2df)(__m128d)(B), \
   2336                                            (__v2df)_mm_setzero_pd(), \
   2337                                            (__mmask8)-1, (int)(R)); })
   2338 
   2339 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
   2340   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2341                                            (__v2df)(__m128d)(B), \
   2342                                            (__v2df)(__m128d)(W), \
   2343                                            (__mmask8)(U), (int)(R)); })
   2344 
   2345 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
   2346   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2347                                            (__v2df)(__m128d)(B), \
   2348                                            (__v2df)_mm_setzero_pd(), \
   2349                                            (__mmask8)(U), (int)(R)); })
   2350 
   2351 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2352 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2353   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2354                                               (__v8df)_mm512_mul_pd(__A, __B),
   2355                                               (__v8df)__W);
   2356 }
   2357 
   2358 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2359 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2360   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2361                                               (__v8df)_mm512_mul_pd(__A, __B),
   2362                                               (__v8df)_mm512_setzero_pd());
   2363 }
   2364 
   2365 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2366 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2367   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2368                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2369                                              (__v16sf)__W);
   2370 }
   2371 
   2372 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2373 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2374   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2375                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2376                                              (__v16sf)_mm512_setzero_ps());
   2377 }
   2378 
   2379 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
   2380   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2381                                         (__v8df)(__m512d)(B), \
   2382                                         (__v8df)_mm512_setzero_pd(), \
   2383                                         (__mmask8)-1, (int)(R)); })
   2384 
   2385 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
   2386   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2387                                         (__v8df)(__m512d)(B), \
   2388                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2389                                         (int)(R)); })
   2390 
   2391 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
   2392   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2393                                         (__v8df)(__m512d)(B), \
   2394                                         (__v8df)_mm512_setzero_pd(), \
   2395                                         (__mmask8)(U), (int)(R)); })
   2396 
   2397 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
   2398   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2399                                        (__v16sf)(__m512)(B), \
   2400                                        (__v16sf)_mm512_setzero_ps(), \
   2401                                        (__mmask16)-1, (int)(R)); })
   2402 
   2403 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
   2404   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2405                                        (__v16sf)(__m512)(B), \
   2406                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2407                                        (int)(R)); });
   2408 
   2409 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
   2410   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2411                                        (__v16sf)(__m512)(B), \
   2412                                        (__v16sf)_mm512_setzero_ps(), \
   2413                                        (__mmask16)(U), (int)(R)); });
   2414 
   2415 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2416 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2417   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2418                 (__v4sf) __B,
   2419                 (__v4sf) __W,
   2420                 (__mmask8) __U,
   2421                 _MM_FROUND_CUR_DIRECTION);
   2422 }
   2423 
   2424 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2425 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2426   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2427                 (__v4sf) __B,
   2428                 (__v4sf)  _mm_setzero_ps (),
   2429                 (__mmask8) __U,
   2430                 _MM_FROUND_CUR_DIRECTION);
   2431 }
   2432 
   2433 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
   2434   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2435                                           (__v4sf)(__m128)(B), \
   2436                                           (__v4sf)_mm_setzero_ps(), \
   2437                                           (__mmask8)-1, (int)(R)); })
   2438 
   2439 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
   2440   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2441                                           (__v4sf)(__m128)(B), \
   2442                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2443                                           (int)(R)); })
   2444 
   2445 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
   2446   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2447                                           (__v4sf)(__m128)(B), \
   2448                                           (__v4sf)_mm_setzero_ps(), \
   2449                                           (__mmask8)(U), (int)(R)); })
   2450 
   2451 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2452 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2453   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2454                 (__v2df) __B,
   2455                 (__v2df) __W,
   2456                 (__mmask8) __U,
   2457                 _MM_FROUND_CUR_DIRECTION);
   2458 }
   2459 
   2460 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2461 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2462   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2463                 (__v2df) __B,
   2464                 (__v2df)  _mm_setzero_pd (),
   2465                 (__mmask8) __U,
   2466                 _MM_FROUND_CUR_DIRECTION);
   2467 }
   2468 
   2469 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
   2470   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2471                                            (__v2df)(__m128d)(B), \
   2472                                            (__v2df)_mm_setzero_pd(), \
   2473                                            (__mmask8)-1, (int)(R)); })
   2474 
   2475 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
   2476   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2477                                            (__v2df)(__m128d)(B), \
   2478                                            (__v2df)(__m128d)(W), \
   2479                                            (__mmask8)(U), (int)(R)); })
   2480 
   2481 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
   2482   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2483                                            (__v2df)(__m128d)(B), \
   2484                                            (__v2df)_mm_setzero_pd(), \
   2485                                            (__mmask8)(U), (int)(R)); })
   2486 
   2487 static __inline __m512d __DEFAULT_FN_ATTRS
   2488 _mm512_div_pd(__m512d __a, __m512d __b)
   2489 {
   2490   return (__m512d)((__v8df)__a/(__v8df)__b);
   2491 }
   2492 
   2493 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2494 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2495   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2496                                               (__v8df)_mm512_div_pd(__A, __B),
   2497                                               (__v8df)__W);
   2498 }
   2499 
   2500 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2501 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2502   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2503                                               (__v8df)_mm512_div_pd(__A, __B),
   2504                                               (__v8df)_mm512_setzero_pd());
   2505 }
   2506 
   2507 static __inline __m512 __DEFAULT_FN_ATTRS
   2508 _mm512_div_ps(__m512 __a, __m512 __b)
   2509 {
   2510   return (__m512)((__v16sf)__a/(__v16sf)__b);
   2511 }
   2512 
   2513 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2514 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2515   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2516                                              (__v16sf)_mm512_div_ps(__A, __B),
   2517                                              (__v16sf)__W);
   2518 }
   2519 
   2520 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2521 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2522   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2523                                              (__v16sf)_mm512_div_ps(__A, __B),
   2524                                              (__v16sf)_mm512_setzero_ps());
   2525 }
   2526 
   2527 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
   2528   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2529                                         (__v8df)(__m512d)(B), \
   2530                                         (__v8df)_mm512_setzero_pd(), \
   2531                                         (__mmask8)-1, (int)(R)); })
   2532 
   2533 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
   2534   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2535                                         (__v8df)(__m512d)(B), \
   2536                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2537                                         (int)(R)); })
   2538 
   2539 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
   2540   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2541                                         (__v8df)(__m512d)(B), \
   2542                                         (__v8df)_mm512_setzero_pd(), \
   2543                                         (__mmask8)(U), (int)(R)); })
   2544 
   2545 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
   2546   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2547                                        (__v16sf)(__m512)(B), \
   2548                                        (__v16sf)_mm512_setzero_ps(), \
   2549                                        (__mmask16)-1, (int)(R)); })
   2550 
   2551 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
   2552   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2553                                        (__v16sf)(__m512)(B), \
   2554                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2555                                        (int)(R)); });
   2556 
   2557 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
   2558   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2559                                        (__v16sf)(__m512)(B), \
   2560                                        (__v16sf)_mm512_setzero_ps(), \
   2561                                        (__mmask16)(U), (int)(R)); });
   2562 
   2563 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   2564   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
   2565                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
   2566                                          _MM_FROUND_CUR_DIRECTION); })
   2567 
   2568 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
   2569   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2570                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2571                                          _MM_FROUND_CUR_DIRECTION); })
   2572 
   2573 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
   2574   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2575                                          (__v16sf)_mm512_setzero_ps(), \
   2576                                          (__mmask16)(A), \
   2577                                          _MM_FROUND_CUR_DIRECTION); })
   2578 
   2579 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
   2580   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2581                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2582                                          (int)(R)); })
   2583 
   2584 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
   2585   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2586                                          (__v16sf)_mm512_setzero_ps(), \
   2587                                          (__mmask16)(A), (int)(R)); })
   2588 
   2589 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
   2590   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
   2591                                          (__v16sf)_mm512_undefined_ps(), \
   2592                                          (__mmask16)-1, (int)(R)); })
   2593 
   2594 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   2595   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
   2596                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
   2597                                           _MM_FROUND_CUR_DIRECTION); })
   2598 
   2599 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
   2600   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2601                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2602                                           _MM_FROUND_CUR_DIRECTION); })
   2603 
   2604 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
   2605   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2606                                           (__v8df)_mm512_setzero_pd(), \
   2607                                           (__mmask8)(A), \
   2608                                           _MM_FROUND_CUR_DIRECTION); })
   2609 
   2610 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
   2611   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2612                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2613                                           (int)(R)); })
   2614 
   2615 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
   2616   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2617                                           (__v8df)_mm512_setzero_pd(), \
   2618                                           (__mmask8)(A), (int)(R)); })
   2619 
   2620 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
   2621   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
   2622                                           (__v8df)_mm512_undefined_pd(), \
   2623                                           (__mmask8)-1, (int)(R)); })
   2624 
   2625 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   2626   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2627                                            (__v8df)(__m512d)(B), \
   2628                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2629                                            (int)(R)); })
   2630 
   2631 
   2632 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2633   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2634                                            (__v8df)(__m512d)(B), \
   2635                                            (__v8df)(__m512d)(C), \
   2636                                            (__mmask8)(U), (int)(R)); })
   2637 
   2638 
   2639 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2640   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
   2641                                             (__v8df)(__m512d)(B), \
   2642                                             (__v8df)(__m512d)(C), \
   2643                                             (__mmask8)(U), (int)(R)); })
   2644 
   2645 
   2646 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2647   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2648                                             (__v8df)(__m512d)(B), \
   2649                                             (__v8df)(__m512d)(C), \
   2650                                             (__mmask8)(U), (int)(R)); })
   2651 
   2652 
   2653 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   2654   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2655                                            (__v8df)(__m512d)(B), \
   2656                                            -(__v8df)(__m512d)(C), \
   2657                                            (__mmask8)-1, (int)(R)); })
   2658 
   2659 
   2660 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2661   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2662                                            (__v8df)(__m512d)(B), \
   2663                                            -(__v8df)(__m512d)(C), \
   2664                                            (__mmask8)(U), (int)(R)); })
   2665 
   2666 
   2667 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2668   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2669                                             (__v8df)(__m512d)(B), \
   2670                                             -(__v8df)(__m512d)(C), \
   2671                                             (__mmask8)(U), (int)(R)); })
   2672 
   2673 
   2674 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   2675   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2676                                            (__v8df)(__m512d)(B), \
   2677                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2678                                            (int)(R)); })
   2679 
   2680 
   2681 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2682   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
   2683                                             (__v8df)(__m512d)(B), \
   2684                                             (__v8df)(__m512d)(C), \
   2685                                             (__mmask8)(U), (int)(R)); })
   2686 
   2687 
   2688 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2689   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2690                                             (__v8df)(__m512d)(B), \
   2691                                             (__v8df)(__m512d)(C), \
   2692                                             (__mmask8)(U), (int)(R)); })
   2693 
   2694 
   2695 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   2696   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2697                                            (__v8df)(__m512d)(B), \
   2698                                            -(__v8df)(__m512d)(C), \
   2699                                            (__mmask8)-1, (int)(R)); })
   2700 
   2701 
   2702 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2703   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2704                                             (__v8df)(__m512d)(B), \
   2705                                             -(__v8df)(__m512d)(C), \
   2706                                             (__mmask8)(U), (int)(R)); })
   2707 
   2708 
   2709 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2710 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2711 {
   2712   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2713                                                     (__v8df) __B,
   2714                                                     (__v8df) __C,
   2715                                                     (__mmask8) -1,
   2716                                                     _MM_FROUND_CUR_DIRECTION);
   2717 }
   2718 
   2719 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2720 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2721 {
   2722   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2723                                                     (__v8df) __B,
   2724                                                     (__v8df) __C,
   2725                                                     (__mmask8) __U,
   2726                                                     _MM_FROUND_CUR_DIRECTION);
   2727 }
   2728 
   2729 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2730 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2731 {
   2732   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2733                                                      (__v8df) __B,
   2734                                                      (__v8df) __C,
   2735                                                      (__mmask8) __U,
   2736                                                      _MM_FROUND_CUR_DIRECTION);
   2737 }
   2738 
   2739 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2740 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2741 {
   2742   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2743                                                      (__v8df) __B,
   2744                                                      (__v8df) __C,
   2745                                                      (__mmask8) __U,
   2746                                                      _MM_FROUND_CUR_DIRECTION);
   2747 }
   2748 
   2749 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2750 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2751 {
   2752   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2753                                                     (__v8df) __B,
   2754                                                     -(__v8df) __C,
   2755                                                     (__mmask8) -1,
   2756                                                     _MM_FROUND_CUR_DIRECTION);
   2757 }
   2758 
   2759 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2760 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2761 {
   2762   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2763                                                     (__v8df) __B,
   2764                                                     -(__v8df) __C,
   2765                                                     (__mmask8) __U,
   2766                                                     _MM_FROUND_CUR_DIRECTION);
   2767 }
   2768 
   2769 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2770 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2771 {
   2772   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2773                                                      (__v8df) __B,
   2774                                                      -(__v8df) __C,
   2775                                                      (__mmask8) __U,
   2776                                                      _MM_FROUND_CUR_DIRECTION);
   2777 }
   2778 
   2779 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2780 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2781 {
   2782   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2783                                                     (__v8df) __B,
   2784                                                     (__v8df) __C,
   2785                                                     (__mmask8) -1,
   2786                                                     _MM_FROUND_CUR_DIRECTION);
   2787 }
   2788 
   2789 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2790 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2791 {
   2792   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   2793                                                      (__v8df) __B,
   2794                                                      (__v8df) __C,
   2795                                                      (__mmask8) __U,
   2796                                                      _MM_FROUND_CUR_DIRECTION);
   2797 }
   2798 
   2799 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2800 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2801 {
   2802   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2803                                                      (__v8df) __B,
   2804                                                      (__v8df) __C,
   2805                                                      (__mmask8) __U,
   2806                                                      _MM_FROUND_CUR_DIRECTION);
   2807 }
   2808 
   2809 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2810 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2811 {
   2812   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2813                                                     (__v8df) __B,
   2814                                                     -(__v8df) __C,
   2815                                                     (__mmask8) -1,
   2816                                                     _MM_FROUND_CUR_DIRECTION);
   2817 }
   2818 
   2819 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2820 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2821 {
   2822   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2823                                                      (__v8df) __B,
   2824                                                      -(__v8df) __C,
   2825                                                      (__mmask8) __U,
   2826                                                      _MM_FROUND_CUR_DIRECTION);
   2827 }
   2828 
   2829 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   2830   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2831                                           (__v16sf)(__m512)(B), \
   2832                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2833                                           (int)(R)); })
   2834 
   2835 
   2836 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2837   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2838                                           (__v16sf)(__m512)(B), \
   2839                                           (__v16sf)(__m512)(C), \
   2840                                           (__mmask16)(U), (int)(R)); })
   2841 
   2842 
   2843 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2844   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
   2845                                            (__v16sf)(__m512)(B), \
   2846                                            (__v16sf)(__m512)(C), \
   2847                                            (__mmask16)(U), (int)(R)); })
   2848 
   2849 
   2850 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2851   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2852                                            (__v16sf)(__m512)(B), \
   2853                                            (__v16sf)(__m512)(C), \
   2854                                            (__mmask16)(U), (int)(R)); })
   2855 
   2856 
   2857 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   2858   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2859                                           (__v16sf)(__m512)(B), \
   2860                                           -(__v16sf)(__m512)(C), \
   2861                                           (__mmask16)-1, (int)(R)); })
   2862 
   2863 
   2864 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2865   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2866                                           (__v16sf)(__m512)(B), \
   2867                                           -(__v16sf)(__m512)(C), \
   2868                                           (__mmask16)(U), (int)(R)); })
   2869 
   2870 
   2871 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2872   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2873                                            (__v16sf)(__m512)(B), \
   2874                                            -(__v16sf)(__m512)(C), \
   2875                                            (__mmask16)(U), (int)(R)); })
   2876 
   2877 
   2878 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   2879   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2880                                           (__v16sf)(__m512)(B), \
   2881                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2882                                           (int)(R)); })
   2883 
   2884 
   2885 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2886   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
   2887                                            (__v16sf)(__m512)(B), \
   2888                                            (__v16sf)(__m512)(C), \
   2889                                            (__mmask16)(U), (int)(R)); })
   2890 
   2891 
   2892 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2893   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2894                                            (__v16sf)(__m512)(B), \
   2895                                            (__v16sf)(__m512)(C), \
   2896                                            (__mmask16)(U), (int)(R)); })
   2897 
   2898 
   2899 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   2900   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2901                                           (__v16sf)(__m512)(B), \
   2902                                           -(__v16sf)(__m512)(C), \
   2903                                           (__mmask16)-1, (int)(R)); })
   2904 
   2905 
   2906 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2907   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2908                                            (__v16sf)(__m512)(B), \
   2909                                            -(__v16sf)(__m512)(C), \
   2910                                            (__mmask16)(U), (int)(R)); })
   2911 
   2912 
   2913 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2914 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2915 {
   2916   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2917                                                    (__v16sf) __B,
   2918                                                    (__v16sf) __C,
   2919                                                    (__mmask16) -1,
   2920                                                    _MM_FROUND_CUR_DIRECTION);
   2921 }
   2922 
   2923 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2924 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2925 {
   2926   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2927                                                    (__v16sf) __B,
   2928                                                    (__v16sf) __C,
   2929                                                    (__mmask16) __U,
   2930                                                    _MM_FROUND_CUR_DIRECTION);
   2931 }
   2932 
   2933 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2934 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2935 {
   2936   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2937                                                     (__v16sf) __B,
   2938                                                     (__v16sf) __C,
   2939                                                     (__mmask16) __U,
   2940                                                     _MM_FROUND_CUR_DIRECTION);
   2941 }
   2942 
   2943 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2944 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2945 {
   2946   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2947                                                     (__v16sf) __B,
   2948                                                     (__v16sf) __C,
   2949                                                     (__mmask16) __U,
   2950                                                     _MM_FROUND_CUR_DIRECTION);
   2951 }
   2952 
   2953 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2954 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2955 {
   2956   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2957                                                    (__v16sf) __B,
   2958                                                    -(__v16sf) __C,
   2959                                                    (__mmask16) -1,
   2960                                                    _MM_FROUND_CUR_DIRECTION);
   2961 }
   2962 
   2963 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2964 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2965 {
   2966   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2967                                                    (__v16sf) __B,
   2968                                                    -(__v16sf) __C,
   2969                                                    (__mmask16) __U,
   2970                                                    _MM_FROUND_CUR_DIRECTION);
   2971 }
   2972 
   2973 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2974 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2975 {
   2976   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2977                                                     (__v16sf) __B,
   2978                                                     -(__v16sf) __C,
   2979                                                     (__mmask16) __U,
   2980                                                     _MM_FROUND_CUR_DIRECTION);
   2981 }
   2982 
   2983 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2984 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2985 {
   2986   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2987                                                    (__v16sf) __B,
   2988                                                    (__v16sf) __C,
   2989                                                    (__mmask16) -1,
   2990                                                    _MM_FROUND_CUR_DIRECTION);
   2991 }
   2992 
   2993 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2994 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2995 {
   2996   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   2997                                                     (__v16sf) __B,
   2998                                                     (__v16sf) __C,
   2999                                                     (__mmask16) __U,
   3000                                                     _MM_FROUND_CUR_DIRECTION);
   3001 }
   3002 
   3003 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3004 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3005 {
   3006   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3007                                                     (__v16sf) __B,
   3008                                                     (__v16sf) __C,
   3009                                                     (__mmask16) __U,
   3010                                                     _MM_FROUND_CUR_DIRECTION);
   3011 }
   3012 
   3013 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3014 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3015 {
   3016   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   3017                                                    (__v16sf) __B,
   3018                                                    -(__v16sf) __C,
   3019                                                    (__mmask16) -1,
   3020                                                    _MM_FROUND_CUR_DIRECTION);
   3021 }
   3022 
   3023 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3024 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3025 {
   3026   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3027                                                     (__v16sf) __B,
   3028                                                     -(__v16sf) __C,
   3029                                                     (__mmask16) __U,
   3030                                                     _MM_FROUND_CUR_DIRECTION);
   3031 }
   3032 
   3033 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   3034   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3035                                               (__v8df)(__m512d)(B), \
   3036                                               (__v8df)(__m512d)(C), \
   3037                                               (__mmask8)-1, (int)(R)); })
   3038 
   3039 
   3040 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3041   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3042                                               (__v8df)(__m512d)(B), \
   3043                                               (__v8df)(__m512d)(C), \
   3044                                               (__mmask8)(U), (int)(R)); })
   3045 
   3046 
   3047 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3048   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
   3049                                                (__v8df)(__m512d)(B), \
   3050                                                (__v8df)(__m512d)(C), \
   3051                                                (__mmask8)(U), (int)(R)); })
   3052 
   3053 
   3054 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   3055   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3056                                                (__v8df)(__m512d)(B), \
   3057                                                (__v8df)(__m512d)(C), \
   3058                                                (__mmask8)(U), (int)(R)); })
   3059 
   3060 
   3061 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   3062   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3063                                               (__v8df)(__m512d)(B), \
   3064                                               -(__v8df)(__m512d)(C), \
   3065                                               (__mmask8)-1, (int)(R)); })
   3066 
   3067 
   3068 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3069   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3070                                               (__v8df)(__m512d)(B), \
   3071                                               -(__v8df)(__m512d)(C), \
   3072                                               (__mmask8)(U), (int)(R)); })
   3073 
   3074 
   3075 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   3076   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3077                                                (__v8df)(__m512d)(B), \
   3078                                                -(__v8df)(__m512d)(C), \
   3079                                                (__mmask8)(U), (int)(R)); })
   3080 
   3081 
   3082 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3083 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   3084 {
   3085   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3086                                                        (__v8df) __B,
   3087                                                        (__v8df) __C,
   3088                                                        (__mmask8) -1,
   3089                                                        _MM_FROUND_CUR_DIRECTION);
   3090 }
   3091 
   3092 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3093 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3094 {
   3095   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3096                                                        (__v8df) __B,
   3097                                                        (__v8df) __C,
   3098                                                        (__mmask8) __U,
   3099                                                        _MM_FROUND_CUR_DIRECTION);
   3100 }
   3101 
   3102 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3103 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3104 {
   3105   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   3106                                                         (__v8df) __B,
   3107                                                         (__v8df) __C,
   3108                                                         (__mmask8) __U,
   3109                                                         _MM_FROUND_CUR_DIRECTION);
   3110 }
   3111 
   3112 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3113 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3114 {
   3115   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3116                                                         (__v8df) __B,
   3117                                                         (__v8df) __C,
   3118                                                         (__mmask8) __U,
   3119                                                         _MM_FROUND_CUR_DIRECTION);
   3120 }
   3121 
   3122 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3123 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   3124 {
   3125   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3126                                                        (__v8df) __B,
   3127                                                        -(__v8df) __C,
   3128                                                        (__mmask8) -1,
   3129                                                        _MM_FROUND_CUR_DIRECTION);
   3130 }
   3131 
   3132 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3133 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3134 {
   3135   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3136                                                        (__v8df) __B,
   3137                                                        -(__v8df) __C,
   3138                                                        (__mmask8) __U,
   3139                                                        _MM_FROUND_CUR_DIRECTION);
   3140 }
   3141 
   3142 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3143 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3144 {
   3145   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3146                                                         (__v8df) __B,
   3147                                                         -(__v8df) __C,
   3148                                                         (__mmask8) __U,
   3149                                                         _MM_FROUND_CUR_DIRECTION);
   3150 }
   3151 
   3152 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   3153   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3154                                              (__v16sf)(__m512)(B), \
   3155                                              (__v16sf)(__m512)(C), \
   3156                                              (__mmask16)-1, (int)(R)); })
   3157 
   3158 
   3159 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3160   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3161                                              (__v16sf)(__m512)(B), \
   3162                                              (__v16sf)(__m512)(C), \
   3163                                              (__mmask16)(U), (int)(R)); })
   3164 
   3165 
   3166 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3167   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
   3168                                               (__v16sf)(__m512)(B), \
   3169                                               (__v16sf)(__m512)(C), \
   3170                                               (__mmask16)(U), (int)(R)); })
   3171 
   3172 
   3173 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   3174   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3175                                               (__v16sf)(__m512)(B), \
   3176                                               (__v16sf)(__m512)(C), \
   3177                                               (__mmask16)(U), (int)(R)); })
   3178 
   3179 
   3180 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   3181   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3182                                              (__v16sf)(__m512)(B), \
   3183                                              -(__v16sf)(__m512)(C), \
   3184                                              (__mmask16)-1, (int)(R)); })
   3185 
   3186 
   3187 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3188   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3189                                              (__v16sf)(__m512)(B), \
   3190                                              -(__v16sf)(__m512)(C), \
   3191                                              (__mmask16)(U), (int)(R)); })
   3192 
   3193 
   3194 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   3195   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3196                                               (__v16sf)(__m512)(B), \
   3197                                               -(__v16sf)(__m512)(C), \
   3198                                               (__mmask16)(U), (int)(R)); })
   3199 
   3200 
   3201 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3202 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3203 {
   3204   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3205                                                       (__v16sf) __B,
   3206                                                       (__v16sf) __C,
   3207                                                       (__mmask16) -1,
   3208                                                       _MM_FROUND_CUR_DIRECTION);
   3209 }
   3210 
   3211 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3212 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3213 {
   3214   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3215                                                       (__v16sf) __B,
   3216                                                       (__v16sf) __C,
   3217                                                       (__mmask16) __U,
   3218                                                       _MM_FROUND_CUR_DIRECTION);
   3219 }
   3220 
   3221 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3222 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3223 {
   3224   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3225                                                        (__v16sf) __B,
   3226                                                        (__v16sf) __C,
   3227                                                        (__mmask16) __U,
   3228                                                        _MM_FROUND_CUR_DIRECTION);
   3229 }
   3230 
   3231 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3232 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3233 {
   3234   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3235                                                        (__v16sf) __B,
   3236                                                        (__v16sf) __C,
   3237                                                        (__mmask16) __U,
   3238                                                        _MM_FROUND_CUR_DIRECTION);
   3239 }
   3240 
   3241 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3242 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   3243 {
   3244   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3245                                                       (__v16sf) __B,
   3246                                                       -(__v16sf) __C,
   3247                                                       (__mmask16) -1,
   3248                                                       _MM_FROUND_CUR_DIRECTION);
   3249 }
   3250 
   3251 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3252 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3253 {
   3254   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3255                                                       (__v16sf) __B,
   3256                                                       -(__v16sf) __C,
   3257                                                       (__mmask16) __U,
   3258                                                       _MM_FROUND_CUR_DIRECTION);
   3259 }
   3260 
   3261 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3262 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3263 {
   3264   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3265                                                        (__v16sf) __B,
   3266                                                        -(__v16sf) __C,
   3267                                                        (__mmask16) __U,
   3268                                                        _MM_FROUND_CUR_DIRECTION);
   3269 }
   3270 
   3271 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3272   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
   3273                                             (__v8df)(__m512d)(B), \
   3274                                             (__v8df)(__m512d)(C), \
   3275                                             (__mmask8)(U), (int)(R)); })
   3276 
   3277 
   3278 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3279 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3280 {
   3281   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   3282                                                      (__v8df) __B,
   3283                                                      (__v8df) __C,
   3284                                                      (__mmask8) __U,
   3285                                                      _MM_FROUND_CUR_DIRECTION);
   3286 }
   3287 
   3288 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3289   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
   3290                                            (__v16sf)(__m512)(B), \
   3291                                            (__v16sf)(__m512)(C), \
   3292                                            (__mmask16)(U), (int)(R)); })
   3293 
   3294 
   3295 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3296 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3297 {
   3298   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   3299                                                     (__v16sf) __B,
   3300                                                     (__v16sf) __C,
   3301                                                     (__mmask16) __U,
   3302                                                     _MM_FROUND_CUR_DIRECTION);
   3303 }
   3304 
   3305 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   3306   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
   3307                                                (__v8df)(__m512d)(B), \
   3308                                                (__v8df)(__m512d)(C), \
   3309                                                (__mmask8)(U), (int)(R)); })
   3310 
   3311 
   3312 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3313 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3314 {
   3315   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3316                                                         (__v8df) __B,
   3317                                                         (__v8df) __C,
   3318                                                         (__mmask8) __U,
   3319                                                         _MM_FROUND_CUR_DIRECTION);
   3320 }
   3321 
   3322 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   3323   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
   3324                                               (__v16sf)(__m512)(B), \
   3325                                               (__v16sf)(__m512)(C), \
   3326                                               (__mmask16)(U), (int)(R)); })
   3327 
   3328 
   3329 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3330 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3331 {
   3332   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3333                                                        (__v16sf) __B,
   3334                                                        (__v16sf) __C,
   3335                                                        (__mmask16) __U,
   3336                                                        _MM_FROUND_CUR_DIRECTION);
   3337 }
   3338 
   3339 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3340   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
   3341                                             (__v8df)(__m512d)(B), \
   3342                                             (__v8df)(__m512d)(C), \
   3343                                             (__mmask8)(U), (int)(R)); })
   3344 
   3345 
   3346 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3347 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3348 {
   3349   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3350                                                      (__v8df) __B,
   3351                                                      (__v8df) __C,
   3352                                                      (__mmask8) __U,
   3353                                                      _MM_FROUND_CUR_DIRECTION);
   3354 }
   3355 
   3356 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3357   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
   3358                                            (__v16sf)(__m512)(B), \
   3359                                            (__v16sf)(__m512)(C), \
   3360                                            (__mmask16)(U), (int)(R)); })
   3361 
   3362 
   3363 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3364 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3365 {
   3366   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3367                                                     (__v16sf) __B,
   3368                                                     (__v16sf) __C,
   3369                                                     (__mmask16) __U,
   3370                                                     _MM_FROUND_CUR_DIRECTION);
   3371 }
   3372 
   3373 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3374   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
   3375                                             (__v8df)(__m512d)(B), \
   3376                                             (__v8df)(__m512d)(C), \
   3377                                             (__mmask8)(U), (int)(R)); })
   3378 
   3379 
   3380 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3381   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
   3382                                              (__v8df)(__m512d)(B), \
   3383                                              (__v8df)(__m512d)(C), \
   3384                                              (__mmask8)(U), (int)(R)); })
   3385 
   3386 
   3387 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3388 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3389 {
   3390   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3391                                                      (__v8df) __B,
   3392                                                      (__v8df) __C,
   3393                                                      (__mmask8) __U,
   3394                                                      _MM_FROUND_CUR_DIRECTION);
   3395 }
   3396 
   3397 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3398 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3399 {
   3400   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3401                                                       (__v8df) __B,
   3402                                                       (__v8df) __C,
   3403                                                       (__mmask8) __U,
   3404                                                       _MM_FROUND_CUR_DIRECTION);
   3405 }
   3406 
   3407 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3408   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
   3409                                            (__v16sf)(__m512)(B), \
   3410                                            (__v16sf)(__m512)(C), \
   3411                                            (__mmask16)(U), (int)(R)); })
   3412 
   3413 
   3414 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3415   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
   3416                                             (__v16sf)(__m512)(B), \
   3417                                             (__v16sf)(__m512)(C), \
   3418                                             (__mmask16)(U), (int)(R)); })
   3419 
   3420 
   3421 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3422 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3423 {
   3424   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3425                                                     (__v16sf) __B,
   3426                                                     (__v16sf) __C,
   3427                                                     (__mmask16) __U,
   3428                                                     _MM_FROUND_CUR_DIRECTION);
   3429 }
   3430 
   3431 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3432 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3433 {
   3434   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3435                                                      (__v16sf) __B,
   3436                                                      (__v16sf) __C,
   3437                                                      (__mmask16) __U,
   3438                                                      _MM_FROUND_CUR_DIRECTION);
   3439 }
   3440 
   3441 
   3442 
   3443 /* Vector permutations */
   3444 
   3445 static __inline __m512i __DEFAULT_FN_ATTRS
   3446 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   3447 {
   3448   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3449                                                        /* idx */ ,
   3450                                                        (__v16si) __A,
   3451                                                        (__v16si) __B,
   3452                                                        (__mmask16) -1);
   3453 }
   3454 
   3455 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3456 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   3457                                 __m512i __I, __m512i __B)
   3458 {
   3459   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3460                                                         /* idx */ ,
   3461                                                         (__v16si) __A,
   3462                                                         (__v16si) __B,
   3463                                                         (__mmask16) __U);
   3464 }
   3465 
   3466 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3467 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   3468                                  __m512i __I, __m512i __B)
   3469 {
   3470   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   3471                                                         /* idx */ ,
   3472                                                         (__v16si) __A,
   3473                                                         (__v16si) __B,
   3474                                                         (__mmask16) __U);
   3475 }
   3476 
   3477 static __inline __m512i __DEFAULT_FN_ATTRS
   3478 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   3479 {
   3480   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3481                                                        /* idx */ ,
   3482                                                        (__v8di) __A,
   3483                                                        (__v8di) __B,
   3484                                                        (__mmask8) -1);
   3485 }
   3486 
   3487 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3488 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   3489                                 __m512i __B)
   3490 {
   3491   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3492                                                        /* idx */ ,
   3493                                                        (__v8di) __A,
   3494                                                        (__v8di) __B,
   3495                                                        (__mmask8) __U);
   3496 }
   3497 
   3498 
   3499 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3500 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   3501          __m512i __I, __m512i __B)
   3502 {
   3503   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   3504                                                         /* idx */ ,
   3505                                                         (__v8di) __A,
   3506                                                         (__v8di) __B,
   3507                                                         (__mmask8) __U);
   3508 }
   3509 
   3510 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   3511   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
   3512                                    (__v8di)(__m512i)(A), \
   3513                                    ((int)(I) & 0x7) + 0, \
   3514                                    ((int)(I) & 0x7) + 1, \
   3515                                    ((int)(I) & 0x7) + 2, \
   3516                                    ((int)(I) & 0x7) + 3, \
   3517                                    ((int)(I) & 0x7) + 4, \
   3518                                    ((int)(I) & 0x7) + 5, \
   3519                                    ((int)(I) & 0x7) + 6, \
   3520                                    ((int)(I) & 0x7) + 7); })
   3521 
   3522 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
   3523   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3524                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3525                                  (__v8di)(__m512i)(W)); })
   3526 
   3527 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
   3528   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3529                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3530                                  (__v8di)_mm512_setzero_si512()); })
   3531 
   3532 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   3533   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
   3534                                    (__v16si)(__m512i)(A), \
   3535                                    ((int)(I) & 0xf) + 0, \
   3536                                    ((int)(I) & 0xf) + 1, \
   3537                                    ((int)(I) & 0xf) + 2, \
   3538                                    ((int)(I) & 0xf) + 3, \
   3539                                    ((int)(I) & 0xf) + 4, \
   3540                                    ((int)(I) & 0xf) + 5, \
   3541                                    ((int)(I) & 0xf) + 6, \
   3542                                    ((int)(I) & 0xf) + 7, \
   3543                                    ((int)(I) & 0xf) + 8, \
   3544                                    ((int)(I) & 0xf) + 9, \
   3545                                    ((int)(I) & 0xf) + 10, \
   3546                                    ((int)(I) & 0xf) + 11, \
   3547                                    ((int)(I) & 0xf) + 12, \
   3548                                    ((int)(I) & 0xf) + 13, \
   3549                                    ((int)(I) & 0xf) + 14, \
   3550                                    ((int)(I) & 0xf) + 15); })
   3551 
   3552 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
   3553   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3554                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3555                                 (__v16si)(__m512i)(W)); })
   3556 
   3557 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
   3558   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3559                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3560                                 (__v16si)_mm512_setzero_si512()); })
   3561 /* Vector Extract */
   3562 
   3563 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
   3564   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
   3565                                    (__v8df)_mm512_undefined_pd(), \
   3566                                    ((I) & 1) ? 4 : 0,             \
   3567                                    ((I) & 1) ? 5 : 1,             \
   3568                                    ((I) & 1) ? 6 : 2,             \
   3569                                    ((I) & 1) ? 7 : 3); })
   3570 
   3571 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
   3572   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3573                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3574                                    (__v4df)(W)); })
   3575 
   3576 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
   3577   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3578                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3579                                    (__v4df)_mm256_setzero_pd()); })
   3580 
   3581 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
   3582   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
   3583                                   (__v16sf)_mm512_undefined_ps(), \
   3584                                   0 + ((I) & 0x3) * 4,            \
   3585                                   1 + ((I) & 0x3) * 4,            \
   3586                                   2 + ((I) & 0x3) * 4,            \
   3587                                   3 + ((I) & 0x3) * 4); })
   3588 
   3589 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
   3590   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3591                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3592                                    (__v4sf)(W)); })
   3593 
   3594 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
   3595   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3596                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3597                                    (__v4sf)_mm_setzero_ps()); })
   3598 
   3599 /* Vector Blend */
   3600 
   3601 static __inline __m512d __DEFAULT_FN_ATTRS
   3602 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   3603 {
   3604   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   3605                  (__v8df) __W,
   3606                  (__v8df) __A);
   3607 }
   3608 
   3609 static __inline __m512 __DEFAULT_FN_ATTRS
   3610 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   3611 {
   3612   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   3613                 (__v16sf) __W,
   3614                 (__v16sf) __A);
   3615 }
   3616 
   3617 static __inline __m512i __DEFAULT_FN_ATTRS
   3618 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   3619 {
   3620   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   3621                 (__v8di) __W,
   3622                 (__v8di) __A);
   3623 }
   3624 
   3625 static __inline __m512i __DEFAULT_FN_ATTRS
   3626 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   3627 {
   3628   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   3629                 (__v16si) __W,
   3630                 (__v16si) __A);
   3631 }
   3632 
   3633 /* Compare */
   3634 
   3635 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   3636   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3637                                           (__v16sf)(__m512)(B), (int)(P), \
   3638                                           (__mmask16)-1, (int)(R)); })
   3639 
   3640 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   3641   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3642                                           (__v16sf)(__m512)(B), (int)(P), \
   3643                                           (__mmask16)(U), (int)(R)); })
   3644 
   3645 #define _mm512_cmp_ps_mask(A, B, P) \
   3646   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3647 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   3648   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3649 
   3650 #define _mm512_cmpeq_ps_mask(A, B) \
   3651     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
   3652 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
   3653     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
   3654 
   3655 #define _mm512_cmplt_ps_mask(A, B) \
   3656     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
   3657 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
   3658     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
   3659 
   3660 #define _mm512_cmple_ps_mask(A, B) \
   3661     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
   3662 #define _mm512_mask_cmple_ps_mask(k, A, B) \
   3663     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
   3664 
   3665 #define _mm512_cmpunord_ps_mask(A, B) \
   3666     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
   3667 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
   3668     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
   3669 
   3670 #define _mm512_cmpneq_ps_mask(A, B) \
   3671     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
   3672 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
   3673     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
   3674 
   3675 #define _mm512_cmpnlt_ps_mask(A, B) \
   3676     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
   3677 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
   3678     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
   3679 
   3680 #define _mm512_cmpnle_ps_mask(A, B) \
   3681     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
   3682 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
   3683     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
   3684 
   3685 #define _mm512_cmpord_ps_mask(A, B) \
   3686     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
   3687 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
   3688     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
   3689 
   3690 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   3691   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3692                                          (__v8df)(__m512d)(B), (int)(P), \
   3693                                          (__mmask8)-1, (int)(R)); })
   3694 
   3695 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   3696   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3697                                          (__v8df)(__m512d)(B), (int)(P), \
   3698                                          (__mmask8)(U), (int)(R)); })
   3699 
   3700 #define _mm512_cmp_pd_mask(A, B, P) \
   3701   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3702 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   3703   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3704 
   3705 #define _mm512_cmpeq_pd_mask(A, B) \
   3706     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
   3707 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
   3708     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
   3709 
   3710 #define _mm512_cmplt_pd_mask(A, B) \
   3711     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
   3712 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
   3713     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
   3714 
   3715 #define _mm512_cmple_pd_mask(A, B) \
   3716     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
   3717 #define _mm512_mask_cmple_pd_mask(k, A, B) \
   3718     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
   3719 
   3720 #define _mm512_cmpunord_pd_mask(A, B) \
   3721     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
   3722 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
   3723     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
   3724 
   3725 #define _mm512_cmpneq_pd_mask(A, B) \
   3726     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
   3727 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
   3728     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
   3729 
   3730 #define _mm512_cmpnlt_pd_mask(A, B) \
   3731     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
   3732 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
   3733     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
   3734 
   3735 #define _mm512_cmpnle_pd_mask(A, B) \
   3736     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
   3737 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
   3738     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
   3739 
   3740 #define _mm512_cmpord_pd_mask(A, B) \
   3741     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
   3742 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
   3743     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
   3744 
   3745 /* Conversion */
   3746 
   3747 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
   3748   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3749                                              (__v16si)_mm512_undefined_epi32(), \
   3750                                              (__mmask16)-1, (int)(R)); })
   3751 
   3752 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
   3753   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3754                                              (__v16si)(__m512i)(W), \
   3755                                              (__mmask16)(U), (int)(R)); })
   3756 
   3757 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
   3758   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3759                                              (__v16si)_mm512_setzero_si512(), \
   3760                                              (__mmask16)(U), (int)(R)); })
   3761 
   3762 
   3763 static __inline __m512i __DEFAULT_FN_ATTRS
   3764 _mm512_cvttps_epu32(__m512 __A)
   3765 {
   3766   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3767                   (__v16si)
   3768                   _mm512_setzero_si512 (),
   3769                   (__mmask16) -1,
   3770                   _MM_FROUND_CUR_DIRECTION);
   3771 }
   3772 
   3773 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3774 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   3775 {
   3776   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3777                    (__v16si) __W,
   3778                    (__mmask16) __U,
   3779                    _MM_FROUND_CUR_DIRECTION);
   3780 }
   3781 
   3782 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3783 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   3784 {
   3785   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3786                    (__v16si) _mm512_setzero_si512 (),
   3787                    (__mmask16) __U,
   3788                    _MM_FROUND_CUR_DIRECTION);
   3789 }
   3790 
   3791 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   3792   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3793                                           (__v16sf)_mm512_setzero_ps(), \
   3794                                           (__mmask16)-1, (int)(R)); })
   3795 
   3796 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
   3797   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3798                                           (__v16sf)(__m512)(W), \
   3799                                           (__mmask16)(U), (int)(R)); })
   3800 
   3801 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
   3802   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3803                                           (__v16sf)_mm512_setzero_ps(), \
   3804                                           (__mmask16)(U), (int)(R)); })
   3805 
   3806 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   3807   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3808                                            (__v16sf)_mm512_setzero_ps(), \
   3809                                            (__mmask16)-1, (int)(R)); })
   3810 
   3811 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
   3812   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3813                                            (__v16sf)(__m512)(W), \
   3814                                            (__mmask16)(U), (int)(R)); })
   3815 
   3816 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
   3817   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3818                                            (__v16sf)_mm512_setzero_ps(), \
   3819                                            (__mmask16)(U), (int)(R)); })
   3820 
   3821 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3822 _mm512_cvtepu32_ps (__m512i __A)
   3823 {
   3824   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3825                  (__v16sf) _mm512_undefined_ps (),
   3826                  (__mmask16) -1,
   3827                  _MM_FROUND_CUR_DIRECTION);
   3828 }
   3829 
   3830 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3831 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3832 {
   3833   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3834                  (__v16sf) __W,
   3835                  (__mmask16) __U,
   3836                  _MM_FROUND_CUR_DIRECTION);
   3837 }
   3838 
   3839 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3840 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   3841 {
   3842   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3843                  (__v16sf) _mm512_setzero_ps (),
   3844                  (__mmask16) __U,
   3845                  _MM_FROUND_CUR_DIRECTION);
   3846 }
   3847 
   3848 static __inline __m512d __DEFAULT_FN_ATTRS
   3849 _mm512_cvtepi32_pd(__m256i __A)
   3850 {
   3851   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
   3852 }
   3853 
   3854 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3855 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3856 {
   3857   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3858                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3859                                               (__v8df)__W);
   3860 }
   3861 
   3862 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3863 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   3864 {
   3865   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3866                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3867                                               (__v8df)_mm512_setzero_pd());
   3868 }
   3869 
   3870 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3871 _mm512_cvtepi32lo_pd(__m512i __A)
   3872 {
   3873   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
   3874 }
   3875 
   3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3877 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3878 {
   3879   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3880 }
   3881 
   3882 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3883 _mm512_cvtepi32_ps (__m512i __A)
   3884 {
   3885   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3886                 (__v16sf) _mm512_undefined_ps (),
   3887                 (__mmask16) -1,
   3888                 _MM_FROUND_CUR_DIRECTION);
   3889 }
   3890 
   3891 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3892 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3893 {
   3894   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3895                 (__v16sf) __W,
   3896                 (__mmask16) __U,
   3897                 _MM_FROUND_CUR_DIRECTION);
   3898 }
   3899 
   3900 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3901 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   3902 {
   3903   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3904                 (__v16sf) _mm512_setzero_ps (),
   3905                 (__mmask16) __U,
   3906                 _MM_FROUND_CUR_DIRECTION);
   3907 }
   3908 
   3909 static __inline __m512d __DEFAULT_FN_ATTRS
   3910 _mm512_cvtepu32_pd(__m256i __A)
   3911 {
   3912   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
   3913 }
   3914 
   3915 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3916 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3917 {
   3918   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3919                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3920                                               (__v8df)__W);
   3921 }
   3922 
   3923 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3924 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   3925 {
   3926   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3927                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3928                                               (__v8df)_mm512_setzero_pd());
   3929 }
   3930 
   3931 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3932 _mm512_cvtepu32lo_pd(__m512i __A)
   3933 {
   3934   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
   3935 }
   3936 
   3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3938 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3939 {
   3940   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3941 }
   3942 
   3943 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   3944   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3945                                           (__v8sf)_mm256_setzero_ps(), \
   3946                                           (__mmask8)-1, (int)(R)); })
   3947 
   3948 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   3949   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3950                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
   3951                                           (int)(R)); })
   3952 
   3953 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   3954   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3955                                           (__v8sf)_mm256_setzero_ps(), \
   3956                                           (__mmask8)(U), (int)(R)); })
   3957 
   3958 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3959 _mm512_cvtpd_ps (__m512d __A)
   3960 {
   3961   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3962                 (__v8sf) _mm256_undefined_ps (),
   3963                 (__mmask8) -1,
   3964                 _MM_FROUND_CUR_DIRECTION);
   3965 }
   3966 
   3967 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3968 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   3969 {
   3970   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3971                 (__v8sf) __W,
   3972                 (__mmask8) __U,
   3973                 _MM_FROUND_CUR_DIRECTION);
   3974 }
   3975 
   3976 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3977 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   3978 {
   3979   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3980                 (__v8sf) _mm256_setzero_ps (),
   3981                 (__mmask8) __U,
   3982                 _MM_FROUND_CUR_DIRECTION);
   3983 }
   3984 
   3985 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3986 _mm512_cvtpd_pslo (__m512d __A)
   3987 {
   3988   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
   3989                 (__v8sf) _mm256_setzero_ps (),
   3990                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3991 }
   3992 
   3993 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3994 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
   3995 {
   3996   return (__m512) __builtin_shufflevector (
   3997                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
   3998                                                __U, __A),
   3999                 (__v8sf) _mm256_setzero_ps (),
   4000                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   4001 }
   4002 
   4003 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
   4004   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4005                                             (__v16hi)_mm256_undefined_si256(), \
   4006                                             (__mmask16)-1); })
   4007 
   4008 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
   4009   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4010                                             (__v16hi)(__m256i)(U), \
   4011                                             (__mmask16)(W)); })
   4012 
   4013 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
   4014   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4015                                             (__v16hi)_mm256_setzero_si256(), \
   4016                                             (__mmask16)(W)); })
   4017 
   4018 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   4019   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4020                                             (__v16hi)_mm256_setzero_si256(), \
   4021                                             (__mmask16)-1); })
   4022 
   4023 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
   4024   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4025                                             (__v16hi)(__m256i)(U), \
   4026                                             (__mmask16)(W)); })
   4027 
   4028 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
   4029   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   4030                                             (__v16hi)_mm256_setzero_si256(), \
   4031                                             (__mmask16)(W)); })
   4032 
   4033 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
   4034   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4035                                            (__v16sf)_mm512_undefined_ps(), \
   4036                                            (__mmask16)-1, (int)(R)); })
   4037 
   4038 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
   4039   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4040                                            (__v16sf)(__m512)(W), \
   4041                                            (__mmask16)(U), (int)(R)); })
   4042 
   4043 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
   4044   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   4045                                            (__v16sf)_mm512_setzero_ps(), \
   4046                                            (__mmask16)(U), (int)(R)); })
   4047 
   4048 
   4049 static  __inline __m512 __DEFAULT_FN_ATTRS
   4050 _mm512_cvtph_ps(__m256i __A)
   4051 {
   4052   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4053                 (__v16sf)
   4054                 _mm512_setzero_ps (),
   4055                 (__mmask16) -1,
   4056                 _MM_FROUND_CUR_DIRECTION);
   4057 }
   4058 
   4059 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4060 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   4061 {
   4062   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4063                  (__v16sf) __W,
   4064                  (__mmask16) __U,
   4065                  _MM_FROUND_CUR_DIRECTION);
   4066 }
   4067 
   4068 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4069 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   4070 {
   4071   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   4072                  (__v16sf) _mm512_setzero_ps (),
   4073                  (__mmask16) __U,
   4074                  _MM_FROUND_CUR_DIRECTION);
   4075 }
   4076 
   4077 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   4078   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4079                                             (__v8si)_mm256_setzero_si256(), \
   4080                                             (__mmask8)-1, (int)(R)); })
   4081 
   4082 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4083   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4084                                             (__v8si)(__m256i)(W), \
   4085                                             (__mmask8)(U), (int)(R)); })
   4086 
   4087 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   4088   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   4089                                             (__v8si)_mm256_setzero_si256(), \
   4090                                             (__mmask8)(U), (int)(R)); })
   4091 
   4092 static __inline __m256i __DEFAULT_FN_ATTRS
   4093 _mm512_cvttpd_epi32(__m512d __a)
   4094 {
   4095   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   4096                                                    (__v8si)_mm256_setzero_si256(),
   4097                                                    (__mmask8) -1,
   4098                                                     _MM_FROUND_CUR_DIRECTION);
   4099 }
   4100 
   4101 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4102 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4103 {
   4104   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4105                   (__v8si) __W,
   4106                   (__mmask8) __U,
   4107                   _MM_FROUND_CUR_DIRECTION);
   4108 }
   4109 
   4110 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4111 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   4112 {
   4113   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4114                   (__v8si) _mm256_setzero_si256 (),
   4115                   (__mmask8) __U,
   4116                   _MM_FROUND_CUR_DIRECTION);
   4117 }
   4118 
   4119 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   4120   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4121                                             (__v16si)_mm512_setzero_si512(), \
   4122                                             (__mmask16)-1, (int)(R)); })
   4123 
   4124 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4125   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4126                                             (__v16si)(__m512i)(W), \
   4127                                             (__mmask16)(U), (int)(R)); })
   4128 
   4129 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   4130   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4131                                             (__v16si)_mm512_setzero_si512(), \
   4132                                             (__mmask16)(U), (int)(R)); })
   4133 
   4134 static __inline __m512i __DEFAULT_FN_ATTRS
   4135 _mm512_cvttps_epi32(__m512 __a)
   4136 {
   4137   return (__m512i)
   4138     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   4139                                      (__v16si) _mm512_setzero_si512 (),
   4140                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   4141 }
   4142 
   4143 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4144 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4145 {
   4146   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4147                   (__v16si) __W,
   4148                   (__mmask16) __U,
   4149                   _MM_FROUND_CUR_DIRECTION);
   4150 }
   4151 
   4152 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4153 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   4154 {
   4155   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4156                   (__v16si) _mm512_setzero_si512 (),
   4157                   (__mmask16) __U,
   4158                   _MM_FROUND_CUR_DIRECTION);
   4159 }
   4160 
   4161 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   4162   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4163                                            (__v16si)_mm512_setzero_si512(), \
   4164                                            (__mmask16)-1, (int)(R)); })
   4165 
   4166 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4167   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4168                                            (__v16si)(__m512i)(W), \
   4169                                            (__mmask16)(U), (int)(R)); })
   4170 
   4171 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
   4172   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4173                                            (__v16si)_mm512_setzero_si512(), \
   4174                                            (__mmask16)(U), (int)(R)); })
   4175 
   4176 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4177 _mm512_cvtps_epi32 (__m512 __A)
   4178 {
   4179   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4180                  (__v16si) _mm512_undefined_epi32 (),
   4181                  (__mmask16) -1,
   4182                  _MM_FROUND_CUR_DIRECTION);
   4183 }
   4184 
   4185 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4186 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4187 {
   4188   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4189                  (__v16si) __W,
   4190                  (__mmask16) __U,
   4191                  _MM_FROUND_CUR_DIRECTION);
   4192 }
   4193 
   4194 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4195 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   4196 {
   4197   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4198                  (__v16si)
   4199                  _mm512_setzero_si512 (),
   4200                  (__mmask16) __U,
   4201                  _MM_FROUND_CUR_DIRECTION);
   4202 }
   4203 
   4204 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   4205   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4206                                            (__v8si)_mm256_setzero_si256(), \
   4207                                            (__mmask8)-1, (int)(R)); })
   4208 
   4209 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4210   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4211                                            (__v8si)(__m256i)(W), \
   4212                                            (__mmask8)(U), (int)(R)); })
   4213 
   4214 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
   4215   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4216                                            (__v8si)_mm256_setzero_si256(), \
   4217                                            (__mmask8)(U), (int)(R)); })
   4218 
   4219 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4220 _mm512_cvtpd_epi32 (__m512d __A)
   4221 {
   4222   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4223                  (__v8si)
   4224                  _mm256_undefined_si256 (),
   4225                  (__mmask8) -1,
   4226                  _MM_FROUND_CUR_DIRECTION);
   4227 }
   4228 
   4229 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4230 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4231 {
   4232   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4233                  (__v8si) __W,
   4234                  (__mmask8) __U,
   4235                  _MM_FROUND_CUR_DIRECTION);
   4236 }
   4237 
   4238 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4239 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   4240 {
   4241   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4242                  (__v8si)
   4243                  _mm256_setzero_si256 (),
   4244                  (__mmask8) __U,
   4245                  _MM_FROUND_CUR_DIRECTION);
   4246 }
   4247 
   4248 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   4249   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4250                                             (__v16si)_mm512_setzero_si512(), \
   4251                                             (__mmask16)-1, (int)(R)); })
   4252 
   4253 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
   4254   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4255                                             (__v16si)(__m512i)(W), \
   4256                                             (__mmask16)(U), (int)(R)); })
   4257 
   4258 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
   4259   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4260                                             (__v16si)_mm512_setzero_si512(), \
   4261                                             (__mmask16)(U), (int)(R)); })
   4262 
   4263 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4264 _mm512_cvtps_epu32 ( __m512 __A)
   4265 {
   4266   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
   4267                   (__v16si)\
   4268                   _mm512_undefined_epi32 (),\
   4269                   (__mmask16) -1,\
   4270                   _MM_FROUND_CUR_DIRECTION);\
   4271 }
   4272 
   4273 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4274 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   4275 {
   4276   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4277                   (__v16si) __W,
   4278                   (__mmask16) __U,
   4279                   _MM_FROUND_CUR_DIRECTION);
   4280 }
   4281 
   4282 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4283 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
   4284 {
   4285   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4286                   (__v16si)
   4287                   _mm512_setzero_si512 (),
   4288                   (__mmask16) __U ,
   4289                   _MM_FROUND_CUR_DIRECTION);
   4290 }
   4291 
   4292 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   4293   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4294                                             (__v8si)_mm256_setzero_si256(), \
   4295                                             (__mmask8)-1, (int)(R)); })
   4296 
   4297 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   4298   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4299                                             (__v8si)(W), \
   4300                                             (__mmask8)(U), (int)(R)); })
   4301 
   4302 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   4303   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4304                                             (__v8si)_mm256_setzero_si256(), \
   4305                                             (__mmask8)(U), (int)(R)); })
   4306 
   4307 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4308 _mm512_cvtpd_epu32 (__m512d __A)
   4309 {
   4310   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4311                   (__v8si)
   4312                   _mm256_undefined_si256 (),
   4313                   (__mmask8) -1,
   4314                   _MM_FROUND_CUR_DIRECTION);
   4315 }
   4316 
   4317 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4318 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   4319 {
   4320   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4321                   (__v8si) __W,
   4322                   (__mmask8) __U,
   4323                   _MM_FROUND_CUR_DIRECTION);
   4324 }
   4325 
   4326 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4327 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   4328 {
   4329   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4330                   (__v8si)
   4331                   _mm256_setzero_si256 (),
   4332                   (__mmask8) __U,
   4333                   _MM_FROUND_CUR_DIRECTION);
   4334 }
   4335 
   4336 static __inline__ double __DEFAULT_FN_ATTRS
   4337 _mm512_cvtsd_f64(__m512d __a)
   4338 {
   4339   return __a[0];
   4340 }
   4341 
   4342 static __inline__ float __DEFAULT_FN_ATTRS
   4343 _mm512_cvtss_f32(__m512 __a)
   4344 {
   4345   return __a[0];
   4346 }
   4347 
   4348 /* Unpack and Interleave */
   4349 
   4350 static __inline __m512d __DEFAULT_FN_ATTRS
   4351 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   4352 {
   4353   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4354                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4355 }
   4356 
   4357 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4358 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4359 {
   4360   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4361                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4362                                            (__v8df)__W);
   4363 }
   4364 
   4365 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4366 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
   4367 {
   4368   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4369                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4370                                            (__v8df)_mm512_setzero_pd());
   4371 }
   4372 
   4373 static __inline __m512d __DEFAULT_FN_ATTRS
   4374 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   4375 {
   4376   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4377                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4378 }
   4379 
   4380 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4381 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4382 {
   4383   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4384                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4385                                            (__v8df)__W);
   4386 }
   4387 
   4388 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4389 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   4390 {
   4391   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4392                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4393                                            (__v8df)_mm512_setzero_pd());
   4394 }
   4395 
   4396 static __inline __m512 __DEFAULT_FN_ATTRS
   4397 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   4398 {
   4399   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4400                                          2,    18,    3,    19,
   4401                                          2+4,  18+4,  3+4,  19+4,
   4402                                          2+8,  18+8,  3+8,  19+8,
   4403                                          2+12, 18+12, 3+12, 19+12);
   4404 }
   4405 
   4406 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4407 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4408 {
   4409   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4410                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4411                                           (__v16sf)__W);
   4412 }
   4413 
   4414 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4415 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4416 {
   4417   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4418                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4419                                           (__v16sf)_mm512_setzero_ps());
   4420 }
   4421 
   4422 static __inline __m512 __DEFAULT_FN_ATTRS
   4423 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   4424 {
   4425   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4426                                          0,    16,    1,    17,
   4427                                          0+4,  16+4,  1+4,  17+4,
   4428                                          0+8,  16+8,  1+8,  17+8,
   4429                                          0+12, 16+12, 1+12, 17+12);
   4430 }
   4431 
   4432 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4433 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4434 {
   4435   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4436                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4437                                           (__v16sf)__W);
   4438 }
   4439 
   4440 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4441 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4442 {
   4443   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4444                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4445                                           (__v16sf)_mm512_setzero_ps());
   4446 }
   4447 
   4448 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4449 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
   4450 {
   4451   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4452                                           2,    18,    3,    19,
   4453                                           2+4,  18+4,  3+4,  19+4,
   4454                                           2+8,  18+8,  3+8,  19+8,
   4455                                           2+12, 18+12, 3+12, 19+12);
   4456 }
   4457 
   4458 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4459 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4460 {
   4461   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4462                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4463                                        (__v16si)__W);
   4464 }
   4465 
   4466 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4467 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4468 {
   4469   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4470                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4471                                        (__v16si)_mm512_setzero_si512());
   4472 }
   4473 
   4474 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4475 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
   4476 {
   4477   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4478                                           0,    16,    1,    17,
   4479                                           0+4,  16+4,  1+4,  17+4,
   4480                                           0+8,  16+8,  1+8,  17+8,
   4481                                           0+12, 16+12, 1+12, 17+12);
   4482 }
   4483 
   4484 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4485 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4486 {
   4487   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4488                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4489                                        (__v16si)__W);
   4490 }
   4491 
   4492 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4493 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4494 {
   4495   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4496                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4497                                        (__v16si)_mm512_setzero_si512());
   4498 }
   4499 
   4500 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4501 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
   4502 {
   4503   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4504                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4505 }
   4506 
   4507 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4508 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4509 {
   4510   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4511                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4512                                         (__v8di)__W);
   4513 }
   4514 
   4515 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4516 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
   4517 {
   4518   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4519                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4520                                         (__v8di)_mm512_setzero_si512());
   4521 }
   4522 
   4523 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4524 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   4525 {
   4526   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4527                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4528 }
   4529 
   4530 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4531 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4532 {
   4533   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4534                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4535                                         (__v8di)__W);
   4536 }
   4537 
   4538 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4539 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4540 {
   4541   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4542                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4543                                         (__v8di)_mm512_setzero_si512());
   4544 }
   4545 
   4546 /* Bit Test */
   4547 
   4548 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4549 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   4550 {
   4551   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4552             (__v16si) __B,
   4553             (__mmask16) -1);
   4554 }
   4555 
   4556 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4557 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   4558 {
   4559   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4560                  (__v16si) __B, __U);
   4561 }
   4562 
   4563 static __inline __mmask8 __DEFAULT_FN_ATTRS
   4564 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   4565 {
   4566   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   4567                  (__v8di) __B,
   4568                  (__mmask8) -1);
   4569 }
   4570 
   4571 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4572 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   4573 {
   4574   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   4575 }
   4576 
   4577 
   4578 /* SIMD load ops */
   4579 
   4580 static __inline __m512i __DEFAULT_FN_ATTRS
   4581 _mm512_loadu_si512 (void const *__P)
   4582 {
   4583   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4584                   (__v16si)
   4585                   _mm512_setzero_si512 (),
   4586                   (__mmask16) -1);
   4587 }
   4588 
   4589 static __inline __m512i __DEFAULT_FN_ATTRS
   4590 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   4591 {
   4592   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4593                   (__v16si) __W,
   4594                   (__mmask16) __U);
   4595 }
   4596 
   4597 
   4598 static __inline __m512i __DEFAULT_FN_ATTRS
   4599 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   4600 {
   4601   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
   4602                                                      (__v16si)
   4603                                                      _mm512_setzero_si512 (),
   4604                                                      (__mmask16) __U);
   4605 }
   4606 
   4607 static __inline __m512i __DEFAULT_FN_ATTRS
   4608 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   4609 {
   4610   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
   4611                   (__v8di) __W,
   4612                   (__mmask8) __U);
   4613 }
   4614 
   4615 static __inline __m512i __DEFAULT_FN_ATTRS
   4616 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   4617 {
   4618   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
   4619                                                      (__v8di)
   4620                                                      _mm512_setzero_si512 (),
   4621                                                      (__mmask8) __U);
   4622 }
   4623 
   4624 static __inline __m512 __DEFAULT_FN_ATTRS
   4625 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   4626 {
   4627   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
   4628                    (__v16sf) __W,
   4629                    (__mmask16) __U);
   4630 }
   4631 
   4632 static __inline __m512 __DEFAULT_FN_ATTRS
   4633 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   4634 {
   4635   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
   4636                                                   (__v16sf)
   4637                                                   _mm512_setzero_ps (),
   4638                                                   (__mmask16) __U);
   4639 }
   4640 
   4641 static __inline __m512d __DEFAULT_FN_ATTRS
   4642 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   4643 {
   4644   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
   4645                 (__v8df) __W,
   4646                 (__mmask8) __U);
   4647 }
   4648 
   4649 static __inline __m512d __DEFAULT_FN_ATTRS
   4650 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   4651 {
   4652   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
   4653                                                    (__v8df)
   4654                                                    _mm512_setzero_pd (),
   4655                                                    (__mmask8) __U);
   4656 }
   4657 
   4658 static __inline __m512d __DEFAULT_FN_ATTRS
   4659 _mm512_loadu_pd(void const *__p)
   4660 {
   4661   struct __loadu_pd {
   4662     __m512d __v;
   4663   } __attribute__((__packed__, __may_alias__));
   4664   return ((struct __loadu_pd*)__p)->__v;
   4665 }
   4666 
   4667 static __inline __m512 __DEFAULT_FN_ATTRS
   4668 _mm512_loadu_ps(void const *__p)
   4669 {
   4670   struct __loadu_ps {
   4671     __m512 __v;
   4672   } __attribute__((__packed__, __may_alias__));
   4673   return ((struct __loadu_ps*)__p)->__v;
   4674 }
   4675 
   4676 static __inline __m512 __DEFAULT_FN_ATTRS
   4677 _mm512_load_ps(void const *__p)
   4678 {
   4679   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   4680                                                   (__v16sf)
   4681                                                   _mm512_setzero_ps (),
   4682                                                   (__mmask16) -1);
   4683 }
   4684 
   4685 static __inline __m512 __DEFAULT_FN_ATTRS
   4686 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
   4687 {
   4688   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
   4689                    (__v16sf) __W,
   4690                    (__mmask16) __U);
   4691 }
   4692 
   4693 static __inline __m512 __DEFAULT_FN_ATTRS
   4694 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   4695 {
   4696   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   4697                                                   (__v16sf)
   4698                                                   _mm512_setzero_ps (),
   4699                                                   (__mmask16) __U);
   4700 }
   4701 
   4702 static __inline __m512d __DEFAULT_FN_ATTRS
   4703 _mm512_load_pd(void const *__p)
   4704 {
   4705   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   4706                                                    (__v8df)
   4707                                                    _mm512_setzero_pd (),
   4708                                                    (__mmask8) -1);
   4709 }
   4710 
   4711 static __inline __m512d __DEFAULT_FN_ATTRS
   4712 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
   4713 {
   4714   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
   4715                           (__v8df) __W,
   4716                           (__mmask8) __U);
   4717 }
   4718 
   4719 static __inline __m512d __DEFAULT_FN_ATTRS
   4720 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   4721 {
   4722   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   4723                                                    (__v8df)
   4724                                                    _mm512_setzero_pd (),
   4725                                                    (__mmask8) __U);
   4726 }
   4727 
   4728 static __inline __m512i __DEFAULT_FN_ATTRS
   4729 _mm512_load_si512 (void const *__P)
   4730 {
   4731   return *(__m512i *) __P;
   4732 }
   4733 
   4734 static __inline __m512i __DEFAULT_FN_ATTRS
   4735 _mm512_load_epi32 (void const *__P)
   4736 {
   4737   return *(__m512i *) __P;
   4738 }
   4739 
   4740 static __inline __m512i __DEFAULT_FN_ATTRS
   4741 _mm512_load_epi64 (void const *__P)
   4742 {
   4743   return *(__m512i *) __P;
   4744 }
   4745 
   4746 /* SIMD store ops */
   4747 
   4748 static __inline void __DEFAULT_FN_ATTRS
   4749 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   4750 {
   4751   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
   4752                                      (__mmask8) __U);
   4753 }
   4754 
   4755 static __inline void __DEFAULT_FN_ATTRS
   4756 _mm512_storeu_si512 (void *__P, __m512i __A)
   4757 {
   4758   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
   4759             (__mmask16) -1);
   4760 }
   4761 
   4762 static __inline void __DEFAULT_FN_ATTRS
   4763 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   4764 {
   4765   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
   4766                                      (__mmask16) __U);
   4767 }
   4768 
   4769 static __inline void __DEFAULT_FN_ATTRS
   4770 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   4771 {
   4772   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
   4773 }
   4774 
   4775 static __inline void __DEFAULT_FN_ATTRS
   4776 _mm512_storeu_pd(void *__P, __m512d __A)
   4777 {
   4778   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
   4779 }
   4780 
   4781 static __inline void __DEFAULT_FN_ATTRS
   4782 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   4783 {
   4784   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
   4785                                    (__mmask16) __U);
   4786 }
   4787 
   4788 static __inline void __DEFAULT_FN_ATTRS
   4789 _mm512_storeu_ps(void *__P, __m512 __A)
   4790 {
   4791   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
   4792 }
   4793 
   4794 static __inline void __DEFAULT_FN_ATTRS
   4795 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   4796 {
   4797   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   4798 }
   4799 
   4800 static __inline void __DEFAULT_FN_ATTRS
   4801 _mm512_store_pd(void *__P, __m512d __A)
   4802 {
   4803   *(__m512d*)__P = __A;
   4804 }
   4805 
   4806 static __inline void __DEFAULT_FN_ATTRS
   4807 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   4808 {
   4809   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   4810                                    (__mmask16) __U);
   4811 }
   4812 
   4813 static __inline void __DEFAULT_FN_ATTRS
   4814 _mm512_store_ps(void *__P, __m512 __A)
   4815 {
   4816   *(__m512*)__P = __A;
   4817 }
   4818 
   4819 static __inline void __DEFAULT_FN_ATTRS
   4820 _mm512_store_si512 (void *__P, __m512i __A)
   4821 {
   4822   *(__m512i *) __P = __A;
   4823 }
   4824 
   4825 static __inline void __DEFAULT_FN_ATTRS
   4826 _mm512_store_epi32 (void *__P, __m512i __A)
   4827 {
   4828   *(__m512i *) __P = __A;
   4829 }
   4830 
   4831 static __inline void __DEFAULT_FN_ATTRS
   4832 _mm512_store_epi64 (void *__P, __m512i __A)
   4833 {
   4834   *(__m512i *) __P = __A;
   4835 }
   4836 
   4837 /* Mask ops */
   4838 
   4839 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4840 _mm512_knot(__mmask16 __M)
   4841 {
   4842   return __builtin_ia32_knothi(__M);
   4843 }
   4844 
   4845 /* Integer compare */
   4846 
   4847 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4848 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   4849   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4850                                                    (__mmask16)-1);
   4851 }
   4852 
   4853 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4854 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4855   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4856                                                    __u);
   4857 }
   4858 
   4859 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4860 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   4861   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4862                                                  (__mmask16)-1);
   4863 }
   4864 
   4865 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4866 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4867   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4868                                                  __u);
   4869 }
   4870 
   4871 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4872 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4873   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4874                                                   __u);
   4875 }
   4876 
   4877 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4878 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   4879   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4880                                                   (__mmask8)-1);
   4881 }
   4882 
   4883 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4884 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   4885   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4886                                                 (__mmask8)-1);
   4887 }
   4888 
   4889 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4890 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4891   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4892                                                 __u);
   4893 }
   4894 
   4895 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4896 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   4897   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4898                                                 (__mmask16)-1);
   4899 }
   4900 
   4901 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4902 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4903   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4904                                                 __u);
   4905 }
   4906 
   4907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4908 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   4909   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4910                                                  (__mmask16)-1);
   4911 }
   4912 
   4913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4914 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4915   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4916                                                  __u);
   4917 }
   4918 
   4919 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4920 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   4921   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4922                                                (__mmask8)-1);
   4923 }
   4924 
   4925 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4926 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4927   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4928                                                __u);
   4929 }
   4930 
   4931 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4932 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   4933   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4934                                                 (__mmask8)-1);
   4935 }
   4936 
   4937 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4938 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4939   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4940                                                 __u);
   4941 }
   4942 
   4943 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4944 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   4945   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4946                                                    (__mmask16)-1);
   4947 }
   4948 
   4949 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4950 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4951   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4952                                                    __u);
   4953 }
   4954 
   4955 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4956 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   4957   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4958                                                  (__mmask16)-1);
   4959 }
   4960 
   4961 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4962 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4963   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4964                                                  __u);
   4965 }
   4966 
   4967 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4968 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4969   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4970                                                   __u);
   4971 }
   4972 
   4973 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4974 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   4975   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4976                                                   (__mmask8)-1);
   4977 }
   4978 
   4979 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4980 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   4981   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4982                                                 (__mmask8)-1);
   4983 }
   4984 
   4985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4986 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4987   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4988                                                 __u);
   4989 }
   4990 
   4991 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4992 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   4993   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4994                                                 (__mmask16)-1);
   4995 }
   4996 
   4997 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4998 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4999   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5000                                                 __u);
   5001 }
   5002 
   5003 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5004 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   5005   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5006                                                  (__mmask16)-1);
   5007 }
   5008 
   5009 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5010 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5011   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   5012                                                  __u);
   5013 }
   5014 
   5015 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5016 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   5017   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5018                                                (__mmask8)-1);
   5019 }
   5020 
   5021 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5022 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5023   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5024                                                __u);
   5025 }
   5026 
   5027 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5028 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   5029   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5030                                                 (__mmask8)-1);
   5031 }
   5032 
   5033 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5034 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5035   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   5036                                                 __u);
   5037 }
   5038 
   5039 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5040 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   5041   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5042                                                 (__mmask16)-1);
   5043 }
   5044 
   5045 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5046 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5047   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5048                                                 __u);
   5049 }
   5050 
   5051 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5052 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   5053   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5054                                                  (__mmask16)-1);
   5055 }
   5056 
   5057 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5058 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5059   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   5060                                                  __u);
   5061 }
   5062 
   5063 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5064 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   5065   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5066                                                (__mmask8)-1);
   5067 }
   5068 
   5069 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5070 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5071   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5072                                                __u);
   5073 }
   5074 
   5075 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5076 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   5077   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5078                                                 (__mmask8)-1);
   5079 }
   5080 
   5081 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5082 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5083   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   5084                                                 __u);
   5085 }
   5086 
   5087 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5088 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   5089   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5090                                                 (__mmask16)-1);
   5091 }
   5092 
   5093 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5094 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5095   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5096                                                 __u);
   5097 }
   5098 
   5099 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5100 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   5101   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5102                                                  (__mmask16)-1);
   5103 }
   5104 
   5105 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5106 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5107   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5108                                                  __u);
   5109 }
   5110 
   5111 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5112 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   5113   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5114                                                (__mmask8)-1);
   5115 }
   5116 
   5117 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5118 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5119   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5120                                                __u);
   5121 }
   5122 
   5123 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5124 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   5125   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5126                                                 (__mmask8)-1);
   5127 }
   5128 
   5129 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5130 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5131   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5132                                                 __u);
   5133 }
   5134 
   5135 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5136 _mm512_cvtepi8_epi32(__m128i __A)
   5137 {
   5138   /* This function always performs a signed extension, but __v16qi is a char
   5139      which may be signed or unsigned, so use __v16qs. */
   5140   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
   5141 }
   5142 
   5143 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5144 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5145 {
   5146   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5147                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5148                                              (__v16si)__W);
   5149 }
   5150 
   5151 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5152 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
   5153 {
   5154   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5155                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5156                                              (__v16si)_mm512_setzero_si512());
   5157 }
   5158 
   5159 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5160 _mm512_cvtepi8_epi64(__m128i __A)
   5161 {
   5162   /* This function always performs a signed extension, but __v16qi is a char
   5163      which may be signed or unsigned, so use __v16qs. */
   5164   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5165 }
   5166 
   5167 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5168 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5169 {
   5170   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5171                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5172                                              (__v8di)__W);
   5173 }
   5174 
   5175 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5176 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   5177 {
   5178   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5179                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5180                                              (__v8di)_mm512_setzero_si512 ());
   5181 }
   5182 
   5183 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5184 _mm512_cvtepi32_epi64(__m256i __X)
   5185 {
   5186   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
   5187 }
   5188 
   5189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5190 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5191 {
   5192   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5193                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5194                                              (__v8di)__W);
   5195 }
   5196 
   5197 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5198 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
   5199 {
   5200   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5201                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5202                                              (__v8di)_mm512_setzero_si512());
   5203 }
   5204 
   5205 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5206 _mm512_cvtepi16_epi32(__m256i __A)
   5207 {
   5208   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
   5209 }
   5210 
   5211 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5212 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5213 {
   5214   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5215                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5216                                             (__v16si)__W);
   5217 }
   5218 
   5219 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5220 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
   5221 {
   5222   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5223                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5224                                             (__v16si)_mm512_setzero_si512 ());
   5225 }
   5226 
   5227 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5228 _mm512_cvtepi16_epi64(__m128i __A)
   5229 {
   5230   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
   5231 }
   5232 
   5233 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5234 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5235 {
   5236   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5237                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5238                                              (__v8di)__W);
   5239 }
   5240 
   5241 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5242 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   5243 {
   5244   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5245                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5246                                              (__v8di)_mm512_setzero_si512());
   5247 }
   5248 
   5249 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5250 _mm512_cvtepu8_epi32(__m128i __A)
   5251 {
   5252   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
   5253 }
   5254 
   5255 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5256 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5257 {
   5258   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5259                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5260                                              (__v16si)__W);
   5261 }
   5262 
   5263 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5264 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
   5265 {
   5266   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5267                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5268                                              (__v16si)_mm512_setzero_si512());
   5269 }
   5270 
   5271 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5272 _mm512_cvtepu8_epi64(__m128i __A)
   5273 {
   5274   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5275 }
   5276 
   5277 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5278 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5279 {
   5280   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5281                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5282                                              (__v8di)__W);
   5283 }
   5284 
   5285 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5286 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   5287 {
   5288   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5289                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5290                                              (__v8di)_mm512_setzero_si512());
   5291 }
   5292 
   5293 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5294 _mm512_cvtepu32_epi64(__m256i __X)
   5295 {
   5296   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
   5297 }
   5298 
   5299 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5300 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5301 {
   5302   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5303                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5304                                              (__v8di)__W);
   5305 }
   5306 
   5307 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5308 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
   5309 {
   5310   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5311                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5312                                              (__v8di)_mm512_setzero_si512());
   5313 }
   5314 
   5315 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5316 _mm512_cvtepu16_epi32(__m256i __A)
   5317 {
   5318   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
   5319 }
   5320 
   5321 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5322 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5323 {
   5324   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5325                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5326                                             (__v16si)__W);
   5327 }
   5328 
   5329 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5330 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
   5331 {
   5332   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5333                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5334                                             (__v16si)_mm512_setzero_si512());
   5335 }
   5336 
   5337 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5338 _mm512_cvtepu16_epi64(__m128i __A)
   5339 {
   5340   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
   5341 }
   5342 
   5343 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5344 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5345 {
   5346   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5347                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5348                                              (__v8di)__W);
   5349 }
   5350 
   5351 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5352 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   5353 {
   5354   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5355                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5356                                              (__v8di)_mm512_setzero_si512());
   5357 }
   5358 
   5359 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5360 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   5361 {
   5362   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5363               (__v16si) __B,
   5364               (__v16si)
   5365               _mm512_setzero_si512 (),
   5366               (__mmask16) -1);
   5367 }
   5368 
   5369 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5370 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5371 {
   5372   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5373               (__v16si) __B,
   5374               (__v16si) __W,
   5375               (__mmask16) __U);
   5376 }
   5377 
   5378 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5379 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5380 {
   5381   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5382               (__v16si) __B,
   5383               (__v16si)
   5384               _mm512_setzero_si512 (),
   5385               (__mmask16) __U);
   5386 }
   5387 
   5388 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5389 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   5390 {
   5391   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5392               (__v8di) __B,
   5393               (__v8di)
   5394               _mm512_setzero_si512 (),
   5395               (__mmask8) -1);
   5396 }
   5397 
   5398 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5399 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5400 {
   5401   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5402               (__v8di) __B,
   5403               (__v8di) __W,
   5404               (__mmask8) __U);
   5405 }
   5406 
   5407 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5408 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5409 {
   5410   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5411               (__v8di) __B,
   5412               (__v8di)
   5413               _mm512_setzero_si512 (),
   5414               (__mmask8) __U);
   5415 }
   5416 
   5417 
   5418 
   5419 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   5420   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5421                                          (__v16si)(__m512i)(b), (int)(p), \
   5422                                          (__mmask16)-1); })
   5423 
   5424 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   5425   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5426                                           (__v16si)(__m512i)(b), (int)(p), \
   5427                                           (__mmask16)-1); })
   5428 
   5429 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   5430   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5431                                         (__v8di)(__m512i)(b), (int)(p), \
   5432                                         (__mmask8)-1); })
   5433 
   5434 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   5435   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5436                                          (__v8di)(__m512i)(b), (int)(p), \
   5437                                          (__mmask8)-1); })
   5438 
   5439 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   5440   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5441                                          (__v16si)(__m512i)(b), (int)(p), \
   5442                                          (__mmask16)(m)); })
   5443 
   5444 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   5445   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5446                                           (__v16si)(__m512i)(b), (int)(p), \
   5447                                           (__mmask16)(m)); })
   5448 
   5449 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   5450   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5451                                         (__v8di)(__m512i)(b), (int)(p), \
   5452                                         (__mmask8)(m)); })
   5453 
   5454 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   5455   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5456                                          (__v8di)(__m512i)(b), (int)(p), \
   5457                                          (__mmask8)(m)); })
   5458 
   5459 #define _mm512_rol_epi32(a, b) __extension__ ({ \
   5460   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5461                                         (__v16si)_mm512_setzero_si512(), \
   5462                                         (__mmask16)-1); })
   5463 
   5464 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
   5465   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5466                                         (__v16si)(__m512i)(W), \
   5467                                         (__mmask16)(U)); })
   5468 
   5469 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
   5470   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5471                                         (__v16si)_mm512_setzero_si512(), \
   5472                                         (__mmask16)(U)); })
   5473 
   5474 #define _mm512_rol_epi64(a, b) __extension__ ({ \
   5475   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5476                                         (__v8di)_mm512_setzero_si512(), \
   5477                                         (__mmask8)-1); })
   5478 
   5479 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
   5480   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5481                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5482 
   5483 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
   5484   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5485                                         (__v8di)_mm512_setzero_si512(), \
   5486                                         (__mmask8)(U)); })
   5487 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5488 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   5489 {
   5490   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5491               (__v16si) __B,
   5492               (__v16si)
   5493               _mm512_setzero_si512 (),
   5494               (__mmask16) -1);
   5495 }
   5496 
   5497 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5498 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5499 {
   5500   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5501               (__v16si) __B,
   5502               (__v16si) __W,
   5503               (__mmask16) __U);
   5504 }
   5505 
   5506 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5507 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5508 {
   5509   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5510               (__v16si) __B,
   5511               (__v16si)
   5512               _mm512_setzero_si512 (),
   5513               (__mmask16) __U);
   5514 }
   5515 
   5516 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5517 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   5518 {
   5519   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5520               (__v8di) __B,
   5521               (__v8di)
   5522               _mm512_setzero_si512 (),
   5523               (__mmask8) -1);
   5524 }
   5525 
   5526 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5527 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5528 {
   5529   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5530               (__v8di) __B,
   5531               (__v8di) __W,
   5532               (__mmask8) __U);
   5533 }
   5534 
   5535 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5536 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5537 {
   5538   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5539               (__v8di) __B,
   5540               (__v8di)
   5541               _mm512_setzero_si512 (),
   5542               (__mmask8) __U);
   5543 }
   5544 
   5545 #define _mm512_ror_epi32(A, B) __extension__ ({ \
   5546   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5547                                         (__v16si)_mm512_setzero_si512(), \
   5548                                         (__mmask16)-1); })
   5549 
   5550 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   5551   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5552                                         (__v16si)(__m512i)(W), \
   5553                                         (__mmask16)(U)); })
   5554 
   5555 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
   5556   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5557                                         (__v16si)_mm512_setzero_si512(), \
   5558                                         (__mmask16)(U)); })
   5559 
   5560 #define _mm512_ror_epi64(A, B) __extension__ ({ \
   5561   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5562                                         (__v8di)_mm512_setzero_si512(), \
   5563                                         (__mmask8)-1); })
   5564 
   5565 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   5566   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5567                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5568 
   5569 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
   5570   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5571                                         (__v8di)_mm512_setzero_si512(), \
   5572                                         (__mmask8)(U)); })
   5573 
   5574 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5575 _mm512_slli_epi32(__m512i __A, int __B)
   5576 {
   5577   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
   5578 }
   5579 
   5580 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5581 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5582 {
   5583   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5584                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5585                                          (__v16si)__W);
   5586 }
   5587 
   5588 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5589 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5590   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5591                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5592                                          (__v16si)_mm512_setzero_si512());
   5593 }
   5594 
   5595 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5596 _mm512_slli_epi64(__m512i __A, int __B)
   5597 {
   5598   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
   5599 }
   5600 
   5601 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5602 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5603 {
   5604   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5605                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5606                                           (__v8di)__W);
   5607 }
   5608 
   5609 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5610 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
   5611 {
   5612   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5613                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5614                                           (__v8di)_mm512_setzero_si512());
   5615 }
   5616 
   5617 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5618 _mm512_srli_epi32(__m512i __A, int __B)
   5619 {
   5620   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
   5621 }
   5622 
   5623 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5624 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5625 {
   5626   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5627                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5628                                          (__v16si)__W);
   5629 }
   5630 
   5631 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5632 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5633   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5634                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5635                                          (__v16si)_mm512_setzero_si512());
   5636 }
   5637 
   5638 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5639 _mm512_srli_epi64(__m512i __A, int __B)
   5640 {
   5641   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
   5642 }
   5643 
   5644 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5645 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5646 {
   5647   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5648                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5649                                           (__v8di)__W);
   5650 }
   5651 
   5652 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5653 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
   5654 {
   5655   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5656                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5657                                           (__v8di)_mm512_setzero_si512());
   5658 }
   5659 
   5660 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5661 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5662 {
   5663   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5664               (__v16si) __W,
   5665               (__mmask16) __U);
   5666 }
   5667 
   5668 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5669 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
   5670 {
   5671   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5672               (__v16si)
   5673               _mm512_setzero_si512 (),
   5674               (__mmask16) __U);
   5675 }
   5676 
   5677 static __inline__ void __DEFAULT_FN_ATTRS
   5678 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5679 {
   5680   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
   5681           (__mmask16) __U);
   5682 }
   5683 
   5684 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5685 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   5686 {
   5687   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5688                  (__v16si) __A,
   5689                  (__v16si) __W);
   5690 }
   5691 
   5692 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5693 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
   5694 {
   5695   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5696                  (__v16si) __A,
   5697                  (__v16si) _mm512_setzero_si512 ());
   5698 }
   5699 
   5700 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5701 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   5702 {
   5703   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5704                  (__v8di) __A,
   5705                  (__v8di) __W);
   5706 }
   5707 
   5708 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5709 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
   5710 {
   5711   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5712                  (__v8di) __A,
   5713                  (__v8di) _mm512_setzero_si512 ());
   5714 }
   5715 
   5716 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5717 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5718 {
   5719   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5720               (__v8di) __W,
   5721               (__mmask8) __U);
   5722 }
   5723 
   5724 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5725 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5726 {
   5727   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5728               (__v8di)
   5729               _mm512_setzero_si512 (),
   5730               (__mmask8) __U);
   5731 }
   5732 
   5733 static __inline__ void __DEFAULT_FN_ATTRS
   5734 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5735 {
   5736   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
   5737           (__mmask8) __U);
   5738 }
   5739 
   5740 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5741 _mm512_movedup_pd (__m512d __A)
   5742 {
   5743   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
   5744                                           0, 0, 2, 2, 4, 4, 6, 6);
   5745 }
   5746 
   5747 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5748 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   5749 {
   5750   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5751                                               (__v8df)_mm512_movedup_pd(__A),
   5752                                               (__v8df)__W);
   5753 }
   5754 
   5755 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5756 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   5757 {
   5758   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5759                                               (__v8df)_mm512_movedup_pd(__A),
   5760                                               (__v8df)_mm512_setzero_pd());
   5761 }
   5762 
   5763 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
   5764   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5765                                              (__v8df)(__m512d)(B), \
   5766                                              (__v8di)(__m512i)(C), (int)(imm), \
   5767                                              (__mmask8)-1, (int)(R)); })
   5768 
   5769 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
   5770   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5771                                              (__v8df)(__m512d)(B), \
   5772                                              (__v8di)(__m512i)(C), (int)(imm), \
   5773                                              (__mmask8)(U), (int)(R)); })
   5774 
   5775 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5776   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5777                                              (__v8df)(__m512d)(B), \
   5778                                              (__v8di)(__m512i)(C), (int)(imm), \
   5779                                              (__mmask8)-1, \
   5780                                              _MM_FROUND_CUR_DIRECTION); })
   5781 
   5782 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5783   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5784                                              (__v8df)(__m512d)(B), \
   5785                                              (__v8di)(__m512i)(C), (int)(imm), \
   5786                                              (__mmask8)(U), \
   5787                                              _MM_FROUND_CUR_DIRECTION); })
   5788 
   5789 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
   5790   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5791                                               (__v8df)(__m512d)(B), \
   5792                                               (__v8di)(__m512i)(C), \
   5793                                               (int)(imm), (__mmask8)(U), \
   5794                                               (int)(R)); })
   5795 
   5796 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5797   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5798                                               (__v8df)(__m512d)(B), \
   5799                                               (__v8di)(__m512i)(C), \
   5800                                               (int)(imm), (__mmask8)(U), \
   5801                                               _MM_FROUND_CUR_DIRECTION); })
   5802 
   5803 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
   5804   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5805                                             (__v16sf)(__m512)(B), \
   5806                                             (__v16si)(__m512i)(C), (int)(imm), \
   5807                                             (__mmask16)-1, (int)(R)); })
   5808 
   5809 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
   5810   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5811                                             (__v16sf)(__m512)(B), \
   5812                                             (__v16si)(__m512i)(C), (int)(imm), \
   5813                                             (__mmask16)(U), (int)(R)); })
   5814 
   5815 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5816   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5817                                             (__v16sf)(__m512)(B), \
   5818                                             (__v16si)(__m512i)(C), (int)(imm), \
   5819                                             (__mmask16)-1, \
   5820                                             _MM_FROUND_CUR_DIRECTION); })
   5821 
   5822 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5823   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5824                                             (__v16sf)(__m512)(B), \
   5825                                             (__v16si)(__m512i)(C), (int)(imm), \
   5826                                             (__mmask16)(U), \
   5827                                             _MM_FROUND_CUR_DIRECTION); })
   5828 
   5829 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
   5830   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5831                                              (__v16sf)(__m512)(B), \
   5832                                              (__v16si)(__m512i)(C), \
   5833                                              (int)(imm), (__mmask16)(U), \
   5834                                              (int)(R)); })
   5835 
   5836 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5837   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5838                                              (__v16sf)(__m512)(B), \
   5839                                              (__v16si)(__m512i)(C), \
   5840                                              (int)(imm), (__mmask16)(U), \
   5841                                              _MM_FROUND_CUR_DIRECTION); })
   5842 
   5843 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
   5844   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5845                                           (__v2df)(__m128d)(B), \
   5846                                           (__v2di)(__m128i)(C), (int)(imm), \
   5847                                           (__mmask8)-1, (int)(R)); })
   5848 
   5849 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
   5850   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5851                                           (__v2df)(__m128d)(B), \
   5852                                           (__v2di)(__m128i)(C), (int)(imm), \
   5853                                           (__mmask8)(U), (int)(R)); })
   5854 
   5855 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
   5856   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5857                                           (__v2df)(__m128d)(B), \
   5858                                           (__v2di)(__m128i)(C), (int)(imm), \
   5859                                           (__mmask8)-1, \
   5860                                           _MM_FROUND_CUR_DIRECTION); })
   5861 
   5862 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
   5863   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5864                                           (__v2df)(__m128d)(B), \
   5865                                           (__v2di)(__m128i)(C), (int)(imm), \
   5866                                           (__mmask8)(U), \
   5867                                           _MM_FROUND_CUR_DIRECTION); })
   5868 
   5869 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
   5870   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5871                                            (__v2df)(__m128d)(B), \
   5872                                            (__v2di)(__m128i)(C), (int)(imm), \
   5873                                            (__mmask8)(U), (int)(R)); })
   5874 
   5875 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
   5876   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5877                                            (__v2df)(__m128d)(B), \
   5878                                            (__v2di)(__m128i)(C), (int)(imm), \
   5879                                            (__mmask8)(U), \
   5880                                            _MM_FROUND_CUR_DIRECTION); })
   5881 
   5882 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
   5883   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5884                                          (__v4sf)(__m128)(B), \
   5885                                          (__v4si)(__m128i)(C), (int)(imm), \
   5886                                          (__mmask8)-1, (int)(R)); })
   5887 
   5888 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
   5889   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5890                                          (__v4sf)(__m128)(B), \
   5891                                          (__v4si)(__m128i)(C), (int)(imm), \
   5892                                          (__mmask8)(U), (int)(R)); })
   5893 
   5894 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
   5895   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5896                                          (__v4sf)(__m128)(B), \
   5897                                          (__v4si)(__m128i)(C), (int)(imm), \
   5898                                          (__mmask8)-1, \
   5899                                          _MM_FROUND_CUR_DIRECTION); })
   5900 
   5901 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
   5902   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5903                                          (__v4sf)(__m128)(B), \
   5904                                          (__v4si)(__m128i)(C), (int)(imm), \
   5905                                          (__mmask8)(U), \
   5906                                          _MM_FROUND_CUR_DIRECTION); })
   5907 
   5908 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
   5909   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5910                                           (__v4sf)(__m128)(B), \
   5911                                           (__v4si)(__m128i)(C), (int)(imm), \
   5912                                           (__mmask8)(U), (int)(R)); })
   5913 
   5914 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
   5915   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5916                                           (__v4sf)(__m128)(B), \
   5917                                           (__v4si)(__m128i)(C), (int)(imm), \
   5918                                           (__mmask8)(U), \
   5919                                           _MM_FROUND_CUR_DIRECTION); })
   5920 
   5921 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
   5922   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5923                                                  (__v2df)(__m128d)(B), \
   5924                                                  (__v2df)_mm_setzero_pd(), \
   5925                                                  (__mmask8)-1, (int)(R)); })
   5926 
   5927 
   5928 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5929 _mm_getexp_sd (__m128d __A, __m128d __B)
   5930 {
   5931   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
   5932                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5933 }
   5934 
   5935 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5936 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5937 {
   5938  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5939           (__v2df) __B,
   5940           (__v2df) __W,
   5941           (__mmask8) __U,
   5942           _MM_FROUND_CUR_DIRECTION);
   5943 }
   5944 
   5945 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
   5946   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5947                                                  (__v2df)(__m128d)(B), \
   5948                                                  (__v2df)(__m128d)(W), \
   5949                                                  (__mmask8)(U), (int)(R)); })
   5950 
   5951 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5952 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   5953 {
   5954  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5955           (__v2df) __B,
   5956           (__v2df) _mm_setzero_pd (),
   5957           (__mmask8) __U,
   5958           _MM_FROUND_CUR_DIRECTION);
   5959 }
   5960 
   5961 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
   5962   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5963                                                  (__v2df)(__m128d)(B), \
   5964                                                  (__v2df)_mm_setzero_pd(), \
   5965                                                  (__mmask8)(U), (int)(R)); })
   5966 
   5967 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
   5968   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5969                                                 (__v4sf)(__m128)(B), \
   5970                                                 (__v4sf)_mm_setzero_ps(), \
   5971                                                 (__mmask8)-1, (int)(R)); })
   5972 
   5973 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5974 _mm_getexp_ss (__m128 __A, __m128 __B)
   5975 {
   5976   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5977                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5978 }
   5979 
   5980 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5981 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5982 {
   5983  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5984           (__v4sf) __B,
   5985           (__v4sf) __W,
   5986           (__mmask8) __U,
   5987           _MM_FROUND_CUR_DIRECTION);
   5988 }
   5989 
   5990 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
   5991   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5992                                                 (__v4sf)(__m128)(B), \
   5993                                                 (__v4sf)(__m128)(W), \
   5994                                                 (__mmask8)(U), (int)(R)); })
   5995 
   5996 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5997 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   5998 {
   5999  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   6000           (__v4sf) __B,
   6001           (__v4sf) _mm_setzero_pd (),
   6002           (__mmask8) __U,
   6003           _MM_FROUND_CUR_DIRECTION);
   6004 }
   6005 
   6006 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
   6007   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   6008                                                 (__v4sf)(__m128)(B), \
   6009                                                 (__v4sf)_mm_setzero_ps(), \
   6010                                                 (__mmask8)(U), (int)(R)); })
   6011 
   6012 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
   6013   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6014                                                (__v2df)(__m128d)(B), \
   6015                                                (int)(((D)<<2) | (C)), \
   6016                                                (__v2df)_mm_setzero_pd(), \
   6017                                                (__mmask8)-1, (int)(R)); })
   6018 
   6019 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
   6020   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6021                                                (__v2df)(__m128d)(B), \
   6022                                                (int)(((D)<<2) | (C)), \
   6023                                                (__v2df)_mm_setzero_pd(), \
   6024                                                (__mmask8)-1, \
   6025                                                _MM_FROUND_CUR_DIRECTION); })
   6026 
   6027 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
   6028   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6029                                                (__v2df)(__m128d)(B), \
   6030                                                (int)(((D)<<2) | (C)), \
   6031                                                (__v2df)(__m128d)(W), \
   6032                                                (__mmask8)(U), \
   6033                                                _MM_FROUND_CUR_DIRECTION); })
   6034 
   6035 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
   6036   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6037                                                (__v2df)(__m128d)(B), \
   6038                                                (int)(((D)<<2) | (C)), \
   6039                                                (__v2df)(__m128d)(W), \
   6040                                                (__mmask8)(U), (int)(R)); })
   6041 
   6042 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
   6043   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6044                                                (__v2df)(__m128d)(B), \
   6045                                                (int)(((D)<<2) | (C)), \
   6046                                                (__v2df)_mm_setzero_pd(), \
   6047                                                (__mmask8)(U), \
   6048                                                _MM_FROUND_CUR_DIRECTION); })
   6049 
   6050 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
   6051   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   6052                                                (__v2df)(__m128d)(B), \
   6053                                                (int)(((D)<<2) | (C)), \
   6054                                                (__v2df)_mm_setzero_pd(), \
   6055                                                (__mmask8)(U), (int)(R)); })
   6056 
   6057 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
   6058   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6059                                               (__v4sf)(__m128)(B), \
   6060                                               (int)(((D)<<2) | (C)), \
   6061                                               (__v4sf)_mm_setzero_ps(), \
   6062                                               (__mmask8)-1, (int)(R)); })
   6063 
   6064 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
   6065   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6066                                               (__v4sf)(__m128)(B), \
   6067                                               (int)(((D)<<2) | (C)), \
   6068                                               (__v4sf)_mm_setzero_ps(), \
   6069                                               (__mmask8)-1, \
   6070                                               _MM_FROUND_CUR_DIRECTION); })
   6071 
   6072 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
   6073   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6074                                               (__v4sf)(__m128)(B), \
   6075                                               (int)(((D)<<2) | (C)), \
   6076                                               (__v4sf)(__m128)(W), \
   6077                                               (__mmask8)(U), \
   6078                                               _MM_FROUND_CUR_DIRECTION); })
   6079 
   6080 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
   6081   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6082                                               (__v4sf)(__m128)(B), \
   6083                                               (int)(((D)<<2) | (C)), \
   6084                                               (__v4sf)(__m128)(W), \
   6085                                               (__mmask8)(U), (int)(R)); })
   6086 
   6087 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
   6088   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6089                                               (__v4sf)(__m128)(B), \
   6090                                               (int)(((D)<<2) | (C)), \
   6091                                               (__v4sf)_mm_setzero_pd(), \
   6092                                               (__mmask8)(U), \
   6093                                               _MM_FROUND_CUR_DIRECTION); })
   6094 
   6095 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
   6096   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   6097                                               (__v4sf)(__m128)(B), \
   6098                                               (int)(((D)<<2) | (C)), \
   6099                                               (__v4sf)_mm_setzero_ps(), \
   6100                                               (__mmask8)(U), (int)(R)); })
   6101 
   6102 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6103 _mm512_kmov (__mmask16 __A)
   6104 {
   6105   return  __A;
   6106 }
   6107 
   6108 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
   6109   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
   6110                               (int)(P), (int)(R)); })
   6111 
   6112 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
   6113   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
   6114                               (int)(P), (int)(R)); })
   6115 
   6116 #ifdef __x86_64__
   6117 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   6118   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6119 #endif
   6120 
   6121 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6122 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   6123          __mmask16 __U, __m512i __B)
   6124 {
   6125   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   6126                    (__v16si) __I
   6127                    /* idx */ ,
   6128                    (__v16si) __B,
   6129                    (__mmask16) __U);
   6130 }
   6131 
   6132 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6133 _mm512_sll_epi32(__m512i __A, __m128i __B)
   6134 {
   6135   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
   6136 }
   6137 
   6138 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6139 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6140 {
   6141   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6142                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6143                                           (__v16si)__W);
   6144 }
   6145 
   6146 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6147 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6148 {
   6149   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6150                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6151                                           (__v16si)_mm512_setzero_si512());
   6152 }
   6153 
   6154 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6155 _mm512_sll_epi64(__m512i __A, __m128i __B)
   6156 {
   6157   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
   6158 }
   6159 
   6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6161 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6162 {
   6163   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6164                                              (__v8di)_mm512_sll_epi64(__A, __B),
   6165                                              (__v8di)__W);
   6166 }
   6167 
   6168 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6169 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6170 {
   6171   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6172                                            (__v8di)_mm512_sll_epi64(__A, __B),
   6173                                            (__v8di)_mm512_setzero_si512());
   6174 }
   6175 
   6176 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6177 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
   6178 {
   6179   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
   6180 }
   6181 
   6182 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6183 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6184 {
   6185   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6186                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6187                                            (__v16si)__W);
   6188 }
   6189 
   6190 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6191 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6192 {
   6193   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6194                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6195                                            (__v16si)_mm512_setzero_si512());
   6196 }
   6197 
   6198 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6199 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
   6200 {
   6201   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
   6202 }
   6203 
   6204 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6205 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6206 {
   6207   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6208                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6209                                             (__v8di)__W);
   6210 }
   6211 
   6212 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6213 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6214 {
   6215   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6216                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6217                                             (__v8di)_mm512_setzero_si512());
   6218 }
   6219 
   6220 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6221 _mm512_sra_epi32(__m512i __A, __m128i __B)
   6222 {
   6223   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
   6224 }
   6225 
   6226 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6227 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6228 {
   6229   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6230                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6231                                           (__v16si)__W);
   6232 }
   6233 
   6234 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6235 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6236 {
   6237   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6238                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6239                                           (__v16si)_mm512_setzero_si512());
   6240 }
   6241 
   6242 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6243 _mm512_sra_epi64(__m512i __A, __m128i __B)
   6244 {
   6245   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
   6246 }
   6247 
   6248 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6249 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6250 {
   6251   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6252                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6253                                            (__v8di)__W);
   6254 }
   6255 
   6256 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6257 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6258 {
   6259   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6260                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6261                                            (__v8di)_mm512_setzero_si512());
   6262 }
   6263 
   6264 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6265 _mm512_srav_epi32(__m512i __X, __m512i __Y)
   6266 {
   6267   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
   6268 }
   6269 
   6270 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6271 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6272 {
   6273   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6274                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6275                                            (__v16si)__W);
   6276 }
   6277 
   6278 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6279 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6280 {
   6281   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6282                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6283                                            (__v16si)_mm512_setzero_si512());
   6284 }
   6285 
   6286 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6287 _mm512_srav_epi64(__m512i __X, __m512i __Y)
   6288 {
   6289   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
   6290 }
   6291 
   6292 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6293 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6294 {
   6295   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6296                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6297                                             (__v8di)__W);
   6298 }
   6299 
   6300 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6301 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6302 {
   6303   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6304                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6305                                             (__v8di)_mm512_setzero_si512());
   6306 }
   6307 
   6308 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6309 _mm512_srl_epi32(__m512i __A, __m128i __B)
   6310 {
   6311   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
   6312 }
   6313 
   6314 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6315 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6316 {
   6317   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6318                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6319                                           (__v16si)__W);
   6320 }
   6321 
   6322 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6323 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6324 {
   6325   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6326                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6327                                           (__v16si)_mm512_setzero_si512());
   6328 }
   6329 
   6330 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6331 _mm512_srl_epi64(__m512i __A, __m128i __B)
   6332 {
   6333   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
   6334 }
   6335 
   6336 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6337 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6338 {
   6339   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6340                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6341                                            (__v8di)__W);
   6342 }
   6343 
   6344 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6345 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6346 {
   6347   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6348                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6349                                            (__v8di)_mm512_setzero_si512());
   6350 }
   6351 
   6352 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6353 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
   6354 {
   6355   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
   6356 }
   6357 
   6358 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6359 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6360 {
   6361   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6362                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6363                                            (__v16si)__W);
   6364 }
   6365 
   6366 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6367 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6368 {
   6369   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6370                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6371                                            (__v16si)_mm512_setzero_si512());
   6372 }
   6373 
   6374 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6375 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
   6376 {
   6377   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
   6378 }
   6379 
   6380 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6381 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6382 {
   6383   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6384                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6385                                             (__v8di)__W);
   6386 }
   6387 
   6388 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6389 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6390 {
   6391   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6392                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6393                                             (__v8di)_mm512_setzero_si512());
   6394 }
   6395 
   6396 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6397   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6398                                             (__v16si)(__m512i)(B), \
   6399                                             (__v16si)(__m512i)(C), (int)(imm), \
   6400                                             (__mmask16)-1); })
   6401 
   6402 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6403   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6404                                             (__v16si)(__m512i)(B), \
   6405                                             (__v16si)(__m512i)(C), (int)(imm), \
   6406                                             (__mmask16)(U)); })
   6407 
   6408 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6409   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
   6410                                              (__v16si)(__m512i)(B), \
   6411                                              (__v16si)(__m512i)(C), \
   6412                                              (int)(imm), (__mmask16)(U)); })
   6413 
   6414 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6415   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6416                                             (__v8di)(__m512i)(B), \
   6417                                             (__v8di)(__m512i)(C), (int)(imm), \
   6418                                             (__mmask8)-1); })
   6419 
   6420 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6421   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6422                                             (__v8di)(__m512i)(B), \
   6423                                             (__v8di)(__m512i)(C), (int)(imm), \
   6424                                             (__mmask8)(U)); })
   6425 
   6426 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6427   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
   6428                                              (__v8di)(__m512i)(B), \
   6429                                              (__v8di)(__m512i)(C), (int)(imm), \
   6430                                              (__mmask8)(U)); })
   6431 
   6432 #ifdef __x86_64__
   6433 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
   6434   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6435 #endif
   6436 
   6437 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
   6438   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6439 
   6440 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
   6441   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6442 
   6443 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
   6444   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6445 
   6446 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6447 _mm_cvtsd_u32 (__m128d __A)
   6448 {
   6449   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
   6450              _MM_FROUND_CUR_DIRECTION);
   6451 }
   6452 
   6453 #ifdef __x86_64__
   6454 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
   6455   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
   6456                                                   (int)(R)); })
   6457 
   6458 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6459 _mm_cvtsd_u64 (__m128d __A)
   6460 {
   6461   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
   6462                  __A,
   6463                  _MM_FROUND_CUR_DIRECTION);
   6464 }
   6465 #endif
   6466 
   6467 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
   6468   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6469 
   6470 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
   6471   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6472 
   6473 #ifdef __x86_64__
   6474 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
   6475   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6476 
   6477 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
   6478   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6479 #endif
   6480 
   6481 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
   6482   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6483 
   6484 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6485 _mm_cvtss_u32 (__m128 __A)
   6486 {
   6487   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
   6488              _MM_FROUND_CUR_DIRECTION);
   6489 }
   6490 
   6491 #ifdef __x86_64__
   6492 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
   6493   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
   6494                                                   (int)(R)); })
   6495 
   6496 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6497 _mm_cvtss_u64 (__m128 __A)
   6498 {
   6499   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
   6500                  __A,
   6501                  _MM_FROUND_CUR_DIRECTION);
   6502 }
   6503 #endif
   6504 
   6505 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
   6506   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6507 
   6508 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
   6509   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6510 
   6511 static __inline__ int __DEFAULT_FN_ATTRS
   6512 _mm_cvttsd_i32 (__m128d __A)
   6513 {
   6514   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
   6515               _MM_FROUND_CUR_DIRECTION);
   6516 }
   6517 
   6518 #ifdef __x86_64__
   6519 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
   6520   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6521 
   6522 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
   6523   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6524 
   6525 static __inline__ long long __DEFAULT_FN_ATTRS
   6526 _mm_cvttsd_i64 (__m128d __A)
   6527 {
   6528   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
   6529               _MM_FROUND_CUR_DIRECTION);
   6530 }
   6531 #endif
   6532 
   6533 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
   6534   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6535 
   6536 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6537 _mm_cvttsd_u32 (__m128d __A)
   6538 {
   6539   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
   6540               _MM_FROUND_CUR_DIRECTION);
   6541 }
   6542 
   6543 #ifdef __x86_64__
   6544 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
   6545   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
   6546                                                    (int)(R)); })
   6547 
   6548 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6549 _mm_cvttsd_u64 (__m128d __A)
   6550 {
   6551   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
   6552                   __A,
   6553                   _MM_FROUND_CUR_DIRECTION);
   6554 }
   6555 #endif
   6556 
   6557 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
   6558   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6559 
   6560 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
   6561   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6562 
   6563 static __inline__ int __DEFAULT_FN_ATTRS
   6564 _mm_cvttss_i32 (__m128 __A)
   6565 {
   6566   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
   6567               _MM_FROUND_CUR_DIRECTION);
   6568 }
   6569 
   6570 #ifdef __x86_64__
   6571 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
   6572   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6573 
   6574 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
   6575   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6576 
   6577 static __inline__ long long __DEFAULT_FN_ATTRS
   6578 _mm_cvttss_i64 (__m128 __A)
   6579 {
   6580   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
   6581               _MM_FROUND_CUR_DIRECTION);
   6582 }
   6583 #endif
   6584 
   6585 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
   6586   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6587 
   6588 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6589 _mm_cvttss_u32 (__m128 __A)
   6590 {
   6591   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
   6592               _MM_FROUND_CUR_DIRECTION);
   6593 }
   6594 
   6595 #ifdef __x86_64__
   6596 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
   6597   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
   6598                                                    (int)(R)); })
   6599 
   6600 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6601 _mm_cvttss_u64 (__m128 __A)
   6602 {
   6603   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
   6604                   __A,
   6605                   _MM_FROUND_CUR_DIRECTION);
   6606 }
   6607 #endif
   6608 
   6609 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6610 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
   6611             __m512d __B)
   6612 {
   6613   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
   6614               (__v8di) __I
   6615               /* idx */ ,
   6616               (__v8df) __B,
   6617               (__mmask8) __U);
   6618 }
   6619 
   6620 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6621 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
   6622             __m512 __B)
   6623 {
   6624   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
   6625                    (__v16si) __I
   6626                    /* idx */ ,
   6627                    (__v16sf) __B,
   6628                    (__mmask16) __U);
   6629 }
   6630 
   6631 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6632 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
   6633          __mmask8 __U, __m512i __B)
   6634 {
   6635   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
   6636                    (__v8di) __I
   6637                    /* idx */ ,
   6638                    (__v8di) __B,
   6639                    (__mmask8) __U);
   6640 }
   6641 
   6642 #define _mm512_permute_pd(X, C) __extension__ ({ \
   6643   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   6644                                    (__v8df)_mm512_undefined_pd(), \
   6645                                    0 + (((C) >> 0) & 0x1), \
   6646                                    0 + (((C) >> 1) & 0x1), \
   6647                                    2 + (((C) >> 2) & 0x1), \
   6648                                    2 + (((C) >> 3) & 0x1), \
   6649                                    4 + (((C) >> 4) & 0x1), \
   6650                                    4 + (((C) >> 5) & 0x1), \
   6651                                    6 + (((C) >> 6) & 0x1), \
   6652                                    6 + (((C) >> 7) & 0x1)); })
   6653 
   6654 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6655   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6656                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6657                                        (__v8df)(__m512d)(W)); })
   6658 
   6659 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
   6660   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6661                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6662                                        (__v8df)_mm512_setzero_pd()); })
   6663 
   6664 #define _mm512_permute_ps(X, C) __extension__ ({ \
   6665   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
   6666                                   (__v16sf)_mm512_undefined_ps(), \
   6667                                    0  + (((C) >> 0) & 0x3), \
   6668                                    0  + (((C) >> 2) & 0x3), \
   6669                                    0  + (((C) >> 4) & 0x3), \
   6670                                    0  + (((C) >> 6) & 0x3), \
   6671                                    4  + (((C) >> 0) & 0x3), \
   6672                                    4  + (((C) >> 2) & 0x3), \
   6673                                    4  + (((C) >> 4) & 0x3), \
   6674                                    4  + (((C) >> 6) & 0x3), \
   6675                                    8  + (((C) >> 0) & 0x3), \
   6676                                    8  + (((C) >> 2) & 0x3), \
   6677                                    8  + (((C) >> 4) & 0x3), \
   6678                                    8  + (((C) >> 6) & 0x3), \
   6679                                    12 + (((C) >> 0) & 0x3), \
   6680                                    12 + (((C) >> 2) & 0x3), \
   6681                                    12 + (((C) >> 4) & 0x3), \
   6682                                    12 + (((C) >> 6) & 0x3)); })
   6683 
   6684 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6685   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6686                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6687                                       (__v16sf)(__m512)(W)); })
   6688 
   6689 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
   6690   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6691                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6692                                       (__v16sf)_mm512_setzero_ps()); })
   6693 
   6694 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6695 _mm512_permutevar_pd(__m512d __A, __m512i __C)
   6696 {
   6697   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
   6698 }
   6699 
   6700 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6701 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
   6702 {
   6703   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6704                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6705                                          (__v8df)__W);
   6706 }
   6707 
   6708 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6709 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
   6710 {
   6711   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6712                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6713                                          (__v8df)_mm512_setzero_pd());
   6714 }
   6715 
   6716 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6717 _mm512_permutevar_ps(__m512 __A, __m512i __C)
   6718 {
   6719   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
   6720 }
   6721 
   6722 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6723 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
   6724 {
   6725   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6726                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6727                                         (__v16sf)__W);
   6728 }
   6729 
   6730 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6731 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
   6732 {
   6733   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6734                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6735                                         (__v16sf)_mm512_setzero_ps());
   6736 }
   6737 
   6738 static __inline __m512d __DEFAULT_FN_ATTRS
   6739 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
   6740 {
   6741   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6742                     /* idx */ ,
   6743                     (__v8df) __A,
   6744                     (__v8df) __B,
   6745                     (__mmask8) -1);
   6746 }
   6747 
   6748 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6749 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
   6750 {
   6751   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6752                     /* idx */ ,
   6753                     (__v8df) __A,
   6754                     (__v8df) __B,
   6755                     (__mmask8) __U);
   6756 }
   6757 
   6758 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6759 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
   6760             __m512d __B)
   6761 {
   6762   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
   6763                                                          /* idx */ ,
   6764                                                          (__v8df) __A,
   6765                                                          (__v8df) __B,
   6766                                                          (__mmask8) __U);
   6767 }
   6768 
   6769 static __inline __m512 __DEFAULT_FN_ATTRS
   6770 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
   6771 {
   6772   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6773                                                          /* idx */ ,
   6774                                                          (__v16sf) __A,
   6775                                                          (__v16sf) __B,
   6776                                                          (__mmask16) -1);
   6777 }
   6778 
   6779 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6780 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
   6781 {
   6782   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6783                                                          /* idx */ ,
   6784                                                          (__v16sf) __A,
   6785                                                          (__v16sf) __B,
   6786                                                          (__mmask16) __U);
   6787 }
   6788 
   6789 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6790 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
   6791             __m512 __B)
   6792 {
   6793   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
   6794                                                         /* idx */ ,
   6795                                                         (__v16sf) __A,
   6796                                                         (__v16sf) __B,
   6797                                                         (__mmask16) __U);
   6798 }
   6799 
   6800 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6801 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
   6802 {
   6803   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6804              (__v16si) __B,
   6805              (__mmask16) -1);
   6806 }
   6807 
   6808 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6809 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   6810 {
   6811   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6812              (__v16si) __B, __U);
   6813 }
   6814 
   6815 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6816 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
   6817 {
   6818   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6819             (__v8di) __B,
   6820             (__mmask8) -1);
   6821 }
   6822 
   6823 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6824 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   6825 {
   6826   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6827             (__v8di) __B, __U);
   6828 }
   6829 
   6830 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
   6831   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6832                                              (__v8si)_mm256_undefined_si256(), \
   6833                                              (__mmask8)-1, (int)(R)); })
   6834 
   6835 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   6836   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6837                                              (__v8si)(__m256i)(W), \
   6838                                              (__mmask8)(U), (int)(R)); })
   6839 
   6840 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
   6841   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6842                                              (__v8si)_mm256_setzero_si256(), \
   6843                                              (__mmask8)(U), (int)(R)); })
   6844 
   6845 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6846 _mm512_cvttpd_epu32 (__m512d __A)
   6847 {
   6848   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6849                   (__v8si)
   6850                   _mm256_undefined_si256 (),
   6851                   (__mmask8) -1,
   6852                   _MM_FROUND_CUR_DIRECTION);
   6853 }
   6854 
   6855 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6856 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   6857 {
   6858   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6859                   (__v8si) __W,
   6860                   (__mmask8) __U,
   6861                   _MM_FROUND_CUR_DIRECTION);
   6862 }
   6863 
   6864 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6865 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
   6866 {
   6867   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6868                   (__v8si)
   6869                   _mm256_setzero_si256 (),
   6870                   (__mmask8) __U,
   6871                   _MM_FROUND_CUR_DIRECTION);
   6872 }
   6873 
   6874 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
   6875   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6876                                                 (__v2df)(__m128d)(B), \
   6877                                                 (__v2df)_mm_setzero_pd(), \
   6878                                                 (__mmask8)-1, (int)(imm), \
   6879                                                 (int)(R)); })
   6880 
   6881 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
   6882   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6883                                                 (__v2df)(__m128d)(B), \
   6884                                                 (__v2df)_mm_setzero_pd(), \
   6885                                                 (__mmask8)-1, (int)(imm), \
   6886                                                 _MM_FROUND_CUR_DIRECTION); })
   6887 
   6888 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
   6889   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6890                                                 (__v2df)(__m128d)(B), \
   6891                                                 (__v2df)(__m128d)(W), \
   6892                                                 (__mmask8)(U), (int)(imm), \
   6893                                                 _MM_FROUND_CUR_DIRECTION); })
   6894 
   6895 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
   6896   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6897                                                 (__v2df)(__m128d)(B), \
   6898                                                 (__v2df)(__m128d)(W), \
   6899                                                 (__mmask8)(U), (int)(I), \
   6900                                                 (int)(R)); })
   6901 
   6902 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
   6903   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6904                                                 (__v2df)(__m128d)(B), \
   6905                                                 (__v2df)_mm_setzero_pd(), \
   6906                                                 (__mmask8)(U), (int)(I), \
   6907                                                 _MM_FROUND_CUR_DIRECTION); })
   6908 
   6909 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
   6910   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6911                                                 (__v2df)(__m128d)(B), \
   6912                                                 (__v2df)_mm_setzero_pd(), \
   6913                                                 (__mmask8)(U), (int)(I), \
   6914                                                 (int)(R)); })
   6915 
   6916 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
   6917   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6918                                                (__v4sf)(__m128)(B), \
   6919                                                (__v4sf)_mm_setzero_ps(), \
   6920                                                (__mmask8)-1, (int)(imm), \
   6921                                                (int)(R)); })
   6922 
   6923 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
   6924   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6925                                                (__v4sf)(__m128)(B), \
   6926                                                (__v4sf)_mm_setzero_ps(), \
   6927                                                (__mmask8)-1, (int)(imm), \
   6928                                                _MM_FROUND_CUR_DIRECTION); })
   6929 
   6930 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
   6931   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6932                                                (__v4sf)(__m128)(B), \
   6933                                                (__v4sf)(__m128)(W), \
   6934                                                (__mmask8)(U), (int)(I), \
   6935                                                _MM_FROUND_CUR_DIRECTION); })
   6936 
   6937 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
   6938   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6939                                                (__v4sf)(__m128)(B), \
   6940                                                (__v4sf)(__m128)(W), \
   6941                                                (__mmask8)(U), (int)(I), \
   6942                                                (int)(R)); })
   6943 
   6944 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
   6945   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6946                                                (__v4sf)(__m128)(B), \
   6947                                                (__v4sf)_mm_setzero_ps(), \
   6948                                                (__mmask8)(U), (int)(I), \
   6949                                                _MM_FROUND_CUR_DIRECTION); })
   6950 
   6951 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
   6952   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6953                                                (__v4sf)(__m128)(B), \
   6954                                                (__v4sf)_mm_setzero_ps(), \
   6955                                                (__mmask8)(U), (int)(I), \
   6956                                                (int)(R)); })
   6957 
   6958 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
   6959   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6960                                            (__v8df)(__m512d)(B), \
   6961                                            (__v8df)_mm512_undefined_pd(), \
   6962                                            (__mmask8)-1, (int)(R)); })
   6963 
   6964 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
   6965   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6966                                            (__v8df)(__m512d)(B), \
   6967                                            (__v8df)(__m512d)(W), \
   6968                                            (__mmask8)(U), (int)(R)); })
   6969 
   6970 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
   6971   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6972                                            (__v8df)(__m512d)(B), \
   6973                                            (__v8df)_mm512_setzero_pd(), \
   6974                                            (__mmask8)(U), (int)(R)); })
   6975 
   6976 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6977 _mm512_scalef_pd (__m512d __A, __m512d __B)
   6978 {
   6979   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6980                 (__v8df) __B,
   6981                 (__v8df)
   6982                 _mm512_undefined_pd (),
   6983                 (__mmask8) -1,
   6984                 _MM_FROUND_CUR_DIRECTION);
   6985 }
   6986 
   6987 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6988 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   6989 {
   6990   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6991                 (__v8df) __B,
   6992                 (__v8df) __W,
   6993                 (__mmask8) __U,
   6994                 _MM_FROUND_CUR_DIRECTION);
   6995 }
   6996 
   6997 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6998 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
   6999 {
   7000   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   7001                 (__v8df) __B,
   7002                 (__v8df)
   7003                 _mm512_setzero_pd (),
   7004                 (__mmask8) __U,
   7005                 _MM_FROUND_CUR_DIRECTION);
   7006 }
   7007 
   7008 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
   7009   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7010                                           (__v16sf)(__m512)(B), \
   7011                                           (__v16sf)_mm512_undefined_ps(), \
   7012                                           (__mmask16)-1, (int)(R)); })
   7013 
   7014 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
   7015   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7016                                           (__v16sf)(__m512)(B), \
   7017                                           (__v16sf)(__m512)(W), \
   7018                                           (__mmask16)(U), (int)(R)); })
   7019 
   7020 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
   7021   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   7022                                           (__v16sf)(__m512)(B), \
   7023                                           (__v16sf)_mm512_setzero_ps(), \
   7024                                           (__mmask16)(U), (int)(R)); })
   7025 
   7026 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7027 _mm512_scalef_ps (__m512 __A, __m512 __B)
   7028 {
   7029   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7030                (__v16sf) __B,
   7031                (__v16sf)
   7032                _mm512_undefined_ps (),
   7033                (__mmask16) -1,
   7034                _MM_FROUND_CUR_DIRECTION);
   7035 }
   7036 
   7037 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7038 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   7039 {
   7040   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7041                (__v16sf) __B,
   7042                (__v16sf) __W,
   7043                (__mmask16) __U,
   7044                _MM_FROUND_CUR_DIRECTION);
   7045 }
   7046 
   7047 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7048 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
   7049 {
   7050   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   7051                (__v16sf) __B,
   7052                (__v16sf)
   7053                _mm512_setzero_ps (),
   7054                (__mmask16) __U,
   7055                _MM_FROUND_CUR_DIRECTION);
   7056 }
   7057 
   7058 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
   7059   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7060                                               (__v2df)(__m128d)(B), \
   7061                                               (__v2df)_mm_setzero_pd(), \
   7062                                               (__mmask8)-1, (int)(R)); })
   7063 
   7064 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7065 _mm_scalef_sd (__m128d __A, __m128d __B)
   7066 {
   7067   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
   7068               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
   7069               (__mmask8) -1,
   7070               _MM_FROUND_CUR_DIRECTION);
   7071 }
   7072 
   7073 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7074 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7075 {
   7076  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   7077                  (__v2df) __B,
   7078                 (__v2df) __W,
   7079                 (__mmask8) __U,
   7080                 _MM_FROUND_CUR_DIRECTION);
   7081 }
   7082 
   7083 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
   7084   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7085                                               (__v2df)(__m128d)(B), \
   7086                                               (__v2df)(__m128d)(W), \
   7087                                               (__mmask8)(U), (int)(R)); })
   7088 
   7089 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7090 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7091 {
   7092  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   7093                  (__v2df) __B,
   7094                 (__v2df) _mm_setzero_pd (),
   7095                 (__mmask8) __U,
   7096                 _MM_FROUND_CUR_DIRECTION);
   7097 }
   7098 
   7099 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
   7100   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   7101                                               (__v2df)(__m128d)(B), \
   7102                                               (__v2df)_mm_setzero_pd(), \
   7103                                               (__mmask8)(U), (int)(R)); })
   7104 
   7105 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
   7106   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7107                                              (__v4sf)(__m128)(B), \
   7108                                              (__v4sf)_mm_setzero_ps(), \
   7109                                              (__mmask8)-1, (int)(R)); })
   7110 
   7111 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7112 _mm_scalef_ss (__m128 __A, __m128 __B)
   7113 {
   7114   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
   7115              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
   7116              (__mmask8) -1,
   7117              _MM_FROUND_CUR_DIRECTION);
   7118 }
   7119 
   7120 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7121 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7122 {
   7123  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7124                 (__v4sf) __B,
   7125                 (__v4sf) __W,
   7126                 (__mmask8) __U,
   7127                 _MM_FROUND_CUR_DIRECTION);
   7128 }
   7129 
   7130 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
   7131   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7132                                              (__v4sf)(__m128)(B), \
   7133                                              (__v4sf)(__m128)(W), \
   7134                                              (__mmask8)(U), (int)(R)); })
   7135 
   7136 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7137 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7138 {
   7139  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7140                  (__v4sf) __B,
   7141                 (__v4sf) _mm_setzero_ps (),
   7142                 (__mmask8) __U,
   7143                 _MM_FROUND_CUR_DIRECTION);
   7144 }
   7145 
   7146 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
   7147   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7148                                              (__v4sf)(__m128)(B), \
   7149                                              (__v4sf)_mm_setzero_ps(), \
   7150                                              (__mmask8)(U), \
   7151                                              _MM_FROUND_CUR_DIRECTION); })
   7152 
   7153 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7154 _mm512_srai_epi32(__m512i __A, int __B)
   7155 {
   7156   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
   7157 }
   7158 
   7159 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7160 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   7161 {
   7162   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7163                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7164                                          (__v16si)__W);
   7165 }
   7166 
   7167 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7168 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
   7169   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7170                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7171                                          (__v16si)_mm512_setzero_si512());
   7172 }
   7173 
   7174 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7175 _mm512_srai_epi64(__m512i __A, int __B)
   7176 {
   7177   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
   7178 }
   7179 
   7180 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7181 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   7182 {
   7183   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7184                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7185                                           (__v8di)__W);
   7186 }
   7187 
   7188 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7189 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
   7190 {
   7191   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7192                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7193                                           (__v8di)_mm512_setzero_si512());
   7194 }
   7195 
   7196 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
   7197   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7198                                          (__v16sf)(__m512)(B), (int)(imm), \
   7199                                          (__v16sf)_mm512_undefined_ps(), \
   7200                                          (__mmask16)-1); })
   7201 
   7202 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
   7203   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7204                                          (__v16sf)(__m512)(B), (int)(imm), \
   7205                                          (__v16sf)(__m512)(W), \
   7206                                          (__mmask16)(U)); })
   7207 
   7208 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
   7209   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7210                                          (__v16sf)(__m512)(B), (int)(imm), \
   7211                                          (__v16sf)_mm512_setzero_ps(), \
   7212                                          (__mmask16)(U)); })
   7213 
   7214 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
   7215   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7216                                           (__v8df)(__m512d)(B), (int)(imm), \
   7217                                           (__v8df)_mm512_undefined_pd(), \
   7218                                           (__mmask8)-1); })
   7219 
   7220 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
   7221   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7222                                           (__v8df)(__m512d)(B), (int)(imm), \
   7223                                           (__v8df)(__m512d)(W), \
   7224                                           (__mmask8)(U)); })
   7225 
   7226 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
   7227   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7228                                           (__v8df)(__m512d)(B), (int)(imm), \
   7229                                           (__v8df)_mm512_setzero_pd(), \
   7230                                           (__mmask8)(U)); })
   7231 
   7232 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
   7233   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7234                                           (__v16si)(__m512i)(B), (int)(imm), \
   7235                                           (__v16si)_mm512_setzero_si512(), \
   7236                                           (__mmask16)-1); })
   7237 
   7238 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
   7239   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7240                                           (__v16si)(__m512i)(B), (int)(imm), \
   7241                                           (__v16si)(__m512i)(W), \
   7242                                           (__mmask16)(U)); })
   7243 
   7244 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
   7245   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7246                                           (__v16si)(__m512i)(B), (int)(imm), \
   7247                                           (__v16si)_mm512_setzero_si512(), \
   7248                                           (__mmask16)(U)); })
   7249 
   7250 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
   7251   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7252                                           (__v8di)(__m512i)(B), (int)(imm), \
   7253                                           (__v8di)_mm512_setzero_si512(), \
   7254                                           (__mmask8)-1); })
   7255 
   7256 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
   7257   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7258                                           (__v8di)(__m512i)(B), (int)(imm), \
   7259                                           (__v8di)(__m512i)(W), \
   7260                                           (__mmask8)(U)); })
   7261 
   7262 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
   7263   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7264                                           (__v8di)(__m512i)(B), (int)(imm), \
   7265                                           (__v8di)_mm512_setzero_si512(), \
   7266                                           (__mmask8)(U)); })
   7267 
   7268 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
   7269   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7270                                    (__v8df)(__m512d)(B), \
   7271                                    0  + (((M) >> 0) & 0x1), \
   7272                                    8  + (((M) >> 1) & 0x1), \
   7273                                    2  + (((M) >> 2) & 0x1), \
   7274                                    10 + (((M) >> 3) & 0x1), \
   7275                                    4  + (((M) >> 4) & 0x1), \
   7276                                    12 + (((M) >> 5) & 0x1), \
   7277                                    6  + (((M) >> 6) & 0x1), \
   7278                                    14 + (((M) >> 7) & 0x1)); })
   7279 
   7280 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7281   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7282                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7283                                        (__v8df)(__m512d)(W)); })
   7284 
   7285 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7286   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7287                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7288                                        (__v8df)_mm512_setzero_pd()); })
   7289 
   7290 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
   7291   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
   7292                                    (__v16sf)(__m512)(B), \
   7293                                    0  + (((M) >> 0) & 0x3), \
   7294                                    0  + (((M) >> 2) & 0x3), \
   7295                                    16 + (((M) >> 4) & 0x3), \
   7296                                    16 + (((M) >> 6) & 0x3), \
   7297                                    4  + (((M) >> 0) & 0x3), \
   7298                                    4  + (((M) >> 2) & 0x3), \
   7299                                    20 + (((M) >> 4) & 0x3), \
   7300                                    20 + (((M) >> 6) & 0x3), \
   7301                                    8  + (((M) >> 0) & 0x3), \
   7302                                    8  + (((M) >> 2) & 0x3), \
   7303                                    24 + (((M) >> 4) & 0x3), \
   7304                                    24 + (((M) >> 6) & 0x3), \
   7305                                    12 + (((M) >> 0) & 0x3), \
   7306                                    12 + (((M) >> 2) & 0x3), \
   7307                                    28 + (((M) >> 4) & 0x3), \
   7308                                    28 + (((M) >> 6) & 0x3)); })
   7309 
   7310 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7311   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7312                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7313                                       (__v16sf)(__m512)(W)); })
   7314 
   7315 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7316   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7317                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7318                                       (__v16sf)_mm512_setzero_ps()); })
   7319 
   7320 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
   7321   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7322                                             (__v2df)(__m128d)(B), \
   7323                                             (__v2df)_mm_setzero_pd(), \
   7324                                             (__mmask8)-1, (int)(R)); })
   7325 
   7326 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7327 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7328 {
   7329  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7330                  (__v2df) __B,
   7331                 (__v2df) __W,
   7332                 (__mmask8) __U,
   7333                 _MM_FROUND_CUR_DIRECTION);
   7334 }
   7335 
   7336 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
   7337   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7338                                             (__v2df)(__m128d)(B), \
   7339                                             (__v2df)(__m128d)(W), \
   7340                                             (__mmask8)(U), (int)(R)); })
   7341 
   7342 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7343 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7344 {
   7345  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7346                  (__v2df) __B,
   7347                 (__v2df) _mm_setzero_pd (),
   7348                 (__mmask8) __U,
   7349                 _MM_FROUND_CUR_DIRECTION);
   7350 }
   7351 
   7352 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
   7353   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7354                                             (__v2df)(__m128d)(B), \
   7355                                             (__v2df)_mm_setzero_pd(), \
   7356                                             (__mmask8)(U), (int)(R)); })
   7357 
   7358 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
   7359   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7360                                            (__v4sf)(__m128)(B), \
   7361                                            (__v4sf)_mm_setzero_ps(), \
   7362                                            (__mmask8)-1, (int)(R)); })
   7363 
   7364 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7365 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7366 {
   7367  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7368                  (__v4sf) __B,
   7369                 (__v4sf) __W,
   7370                 (__mmask8) __U,
   7371                 _MM_FROUND_CUR_DIRECTION);
   7372 }
   7373 
   7374 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
   7375   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7376                                            (__v4sf)(__m128)(B), \
   7377                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
   7378                                            (int)(R)); })
   7379 
   7380 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7381 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7382 {
   7383  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7384                  (__v4sf) __B,
   7385                 (__v4sf) _mm_setzero_ps (),
   7386                 (__mmask8) __U,
   7387                 _MM_FROUND_CUR_DIRECTION);
   7388 }
   7389 
   7390 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
   7391   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7392                                            (__v4sf)(__m128)(B), \
   7393                                            (__v4sf)_mm_setzero_ps(), \
   7394                                            (__mmask8)(U), (int)(R)); })
   7395 
   7396 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7397 _mm512_broadcast_f32x4(__m128 __A)
   7398 {
   7399   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
   7400                                          0, 1, 2, 3, 0, 1, 2, 3,
   7401                                          0, 1, 2, 3, 0, 1, 2, 3);
   7402 }
   7403 
   7404 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7405 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
   7406 {
   7407   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7408                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7409                                            (__v16sf)__O);
   7410 }
   7411 
   7412 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7413 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
   7414 {
   7415   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7416                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7417                                            (__v16sf)_mm512_setzero_ps());
   7418 }
   7419 
   7420 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7421 _mm512_broadcast_f64x4(__m256d __A)
   7422 {
   7423   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
   7424                                           0, 1, 2, 3, 0, 1, 2, 3);
   7425 }
   7426 
   7427 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7428 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
   7429 {
   7430   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7431                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7432                                             (__v8df)__O);
   7433 }
   7434 
   7435 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7436 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
   7437 {
   7438   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7439                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7440                                             (__v8df)_mm512_setzero_pd());
   7441 }
   7442 
   7443 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7444 _mm512_broadcast_i32x4(__m128i __A)
   7445 {
   7446   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   7447                                           0, 1, 2, 3, 0, 1, 2, 3,
   7448                                           0, 1, 2, 3, 0, 1, 2, 3);
   7449 }
   7450 
   7451 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7452 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
   7453 {
   7454   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7455                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7456                                            (__v16si)__O);
   7457 }
   7458 
   7459 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7460 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
   7461 {
   7462   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7463                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7464                                            (__v16si)_mm512_setzero_si512());
   7465 }
   7466 
   7467 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7468 _mm512_broadcast_i64x4(__m256i __A)
   7469 {
   7470   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
   7471                                           0, 1, 2, 3, 0, 1, 2, 3);
   7472 }
   7473 
   7474 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7475 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
   7476 {
   7477   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7478                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7479                                             (__v8di)__O);
   7480 }
   7481 
   7482 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7483 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
   7484 {
   7485   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7486                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7487                                             (__v8di)_mm512_setzero_si512());
   7488 }
   7489 
   7490 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7491 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
   7492 {
   7493   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7494                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7495                                               (__v8df) __O);
   7496 }
   7497 
   7498 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7499 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   7500 {
   7501   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7502                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7503                                               (__v8df) _mm512_setzero_pd());
   7504 }
   7505 
   7506 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7507 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
   7508 {
   7509   return (__m512)__builtin_ia32_selectps_512(__M,
   7510                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7511                                              (__v16sf) __O);
   7512 }
   7513 
   7514 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7515 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
   7516 {
   7517   return (__m512)__builtin_ia32_selectps_512(__M,
   7518                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7519                                              (__v16sf) _mm512_setzero_ps());
   7520 }
   7521 
   7522 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7523 _mm512_cvtsepi32_epi8 (__m512i __A)
   7524 {
   7525   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7526                (__v16qi) _mm_undefined_si128 (),
   7527                (__mmask16) -1);
   7528 }
   7529 
   7530 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7531 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7532 {
   7533   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7534                (__v16qi) __O, __M);
   7535 }
   7536 
   7537 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7538 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
   7539 {
   7540   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7541                (__v16qi) _mm_setzero_si128 (),
   7542                __M);
   7543 }
   7544 
   7545 static __inline__ void __DEFAULT_FN_ATTRS
   7546 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7547 {
   7548   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7549 }
   7550 
   7551 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7552 _mm512_cvtsepi32_epi16 (__m512i __A)
   7553 {
   7554   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7555                (__v16hi) _mm256_undefined_si256 (),
   7556                (__mmask16) -1);
   7557 }
   7558 
   7559 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7560 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7561 {
   7562   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7563                (__v16hi) __O, __M);
   7564 }
   7565 
   7566 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7567 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
   7568 {
   7569   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7570                (__v16hi) _mm256_setzero_si256 (),
   7571                __M);
   7572 }
   7573 
   7574 static __inline__ void __DEFAULT_FN_ATTRS
   7575 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7576 {
   7577   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7578 }
   7579 
   7580 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7581 _mm512_cvtsepi64_epi8 (__m512i __A)
   7582 {
   7583   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7584                (__v16qi) _mm_undefined_si128 (),
   7585                (__mmask8) -1);
   7586 }
   7587 
   7588 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7589 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7590 {
   7591   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7592                (__v16qi) __O, __M);
   7593 }
   7594 
   7595 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7596 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
   7597 {
   7598   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7599                (__v16qi) _mm_setzero_si128 (),
   7600                __M);
   7601 }
   7602 
   7603 static __inline__ void __DEFAULT_FN_ATTRS
   7604 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7605 {
   7606   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7607 }
   7608 
   7609 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7610 _mm512_cvtsepi64_epi32 (__m512i __A)
   7611 {
   7612   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7613                (__v8si) _mm256_undefined_si256 (),
   7614                (__mmask8) -1);
   7615 }
   7616 
   7617 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7618 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7619 {
   7620   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7621                (__v8si) __O, __M);
   7622 }
   7623 
   7624 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7625 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
   7626 {
   7627   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7628                (__v8si) _mm256_setzero_si256 (),
   7629                __M);
   7630 }
   7631 
   7632 static __inline__ void __DEFAULT_FN_ATTRS
   7633 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
   7634 {
   7635   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7636 }
   7637 
   7638 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7639 _mm512_cvtsepi64_epi16 (__m512i __A)
   7640 {
   7641   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7642                (__v8hi) _mm_undefined_si128 (),
   7643                (__mmask8) -1);
   7644 }
   7645 
   7646 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7647 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7648 {
   7649   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7650                (__v8hi) __O, __M);
   7651 }
   7652 
   7653 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7654 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
   7655 {
   7656   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7657                (__v8hi) _mm_setzero_si128 (),
   7658                __M);
   7659 }
   7660 
   7661 static __inline__ void __DEFAULT_FN_ATTRS
   7662 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
   7663 {
   7664   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7665 }
   7666 
   7667 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7668 _mm512_cvtusepi32_epi8 (__m512i __A)
   7669 {
   7670   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7671                 (__v16qi) _mm_undefined_si128 (),
   7672                 (__mmask16) -1);
   7673 }
   7674 
   7675 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7676 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7677 {
   7678   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7679                 (__v16qi) __O,
   7680                 __M);
   7681 }
   7682 
   7683 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7684 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
   7685 {
   7686   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7687                 (__v16qi) _mm_setzero_si128 (),
   7688                 __M);
   7689 }
   7690 
   7691 static __inline__ void __DEFAULT_FN_ATTRS
   7692 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7693 {
   7694   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7695 }
   7696 
   7697 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7698 _mm512_cvtusepi32_epi16 (__m512i __A)
   7699 {
   7700   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7701                 (__v16hi) _mm256_undefined_si256 (),
   7702                 (__mmask16) -1);
   7703 }
   7704 
   7705 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7706 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7707 {
   7708   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7709                 (__v16hi) __O,
   7710                 __M);
   7711 }
   7712 
   7713 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7714 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
   7715 {
   7716   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7717                 (__v16hi) _mm256_setzero_si256 (),
   7718                 __M);
   7719 }
   7720 
   7721 static __inline__ void __DEFAULT_FN_ATTRS
   7722 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7723 {
   7724   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7725 }
   7726 
   7727 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7728 _mm512_cvtusepi64_epi8 (__m512i __A)
   7729 {
   7730   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7731                 (__v16qi) _mm_undefined_si128 (),
   7732                 (__mmask8) -1);
   7733 }
   7734 
   7735 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7736 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7737 {
   7738   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7739                 (__v16qi) __O,
   7740                 __M);
   7741 }
   7742 
   7743 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7744 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
   7745 {
   7746   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7747                 (__v16qi) _mm_setzero_si128 (),
   7748                 __M);
   7749 }
   7750 
   7751 static __inline__ void __DEFAULT_FN_ATTRS
   7752 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7753 {
   7754   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7755 }
   7756 
   7757 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7758 _mm512_cvtusepi64_epi32 (__m512i __A)
   7759 {
   7760   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7761                 (__v8si) _mm256_undefined_si256 (),
   7762                 (__mmask8) -1);
   7763 }
   7764 
   7765 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7766 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7767 {
   7768   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7769                 (__v8si) __O, __M);
   7770 }
   7771 
   7772 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7773 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
   7774 {
   7775   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7776                 (__v8si) _mm256_setzero_si256 (),
   7777                 __M);
   7778 }
   7779 
   7780 static __inline__ void __DEFAULT_FN_ATTRS
   7781 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7782 {
   7783   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
   7784 }
   7785 
   7786 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7787 _mm512_cvtusepi64_epi16 (__m512i __A)
   7788 {
   7789   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7790                 (__v8hi) _mm_undefined_si128 (),
   7791                 (__mmask8) -1);
   7792 }
   7793 
   7794 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7795 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7796 {
   7797   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7798                 (__v8hi) __O, __M);
   7799 }
   7800 
   7801 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7802 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
   7803 {
   7804   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7805                 (__v8hi) _mm_setzero_si128 (),
   7806                 __M);
   7807 }
   7808 
   7809 static __inline__ void __DEFAULT_FN_ATTRS
   7810 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7811 {
   7812   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
   7813 }
   7814 
   7815 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7816 _mm512_cvtepi32_epi8 (__m512i __A)
   7817 {
   7818   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7819               (__v16qi) _mm_undefined_si128 (),
   7820               (__mmask16) -1);
   7821 }
   7822 
   7823 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7824 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7825 {
   7826   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7827               (__v16qi) __O, __M);
   7828 }
   7829 
   7830 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7831 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
   7832 {
   7833   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7834               (__v16qi) _mm_setzero_si128 (),
   7835               __M);
   7836 }
   7837 
   7838 static __inline__ void __DEFAULT_FN_ATTRS
   7839 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7840 {
   7841   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7842 }
   7843 
   7844 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7845 _mm512_cvtepi32_epi16 (__m512i __A)
   7846 {
   7847   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7848               (__v16hi) _mm256_undefined_si256 (),
   7849               (__mmask16) -1);
   7850 }
   7851 
   7852 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7853 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7854 {
   7855   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7856               (__v16hi) __O, __M);
   7857 }
   7858 
   7859 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7860 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
   7861 {
   7862   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7863               (__v16hi) _mm256_setzero_si256 (),
   7864               __M);
   7865 }
   7866 
   7867 static __inline__ void __DEFAULT_FN_ATTRS
   7868 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
   7869 {
   7870   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
   7871 }
   7872 
   7873 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7874 _mm512_cvtepi64_epi8 (__m512i __A)
   7875 {
   7876   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7877               (__v16qi) _mm_undefined_si128 (),
   7878               (__mmask8) -1);
   7879 }
   7880 
   7881 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7882 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7883 {
   7884   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7885               (__v16qi) __O, __M);
   7886 }
   7887 
   7888 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7889 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
   7890 {
   7891   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7892               (__v16qi) _mm_setzero_si128 (),
   7893               __M);
   7894 }
   7895 
   7896 static __inline__ void __DEFAULT_FN_ATTRS
   7897 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7898 {
   7899   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7900 }
   7901 
   7902 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7903 _mm512_cvtepi64_epi32 (__m512i __A)
   7904 {
   7905   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7906               (__v8si) _mm256_undefined_si256 (),
   7907               (__mmask8) -1);
   7908 }
   7909 
   7910 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7911 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7912 {
   7913   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7914               (__v8si) __O, __M);
   7915 }
   7916 
   7917 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7918 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
   7919 {
   7920   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7921               (__v8si) _mm256_setzero_si256 (),
   7922               __M);
   7923 }
   7924 
   7925 static __inline__ void __DEFAULT_FN_ATTRS
   7926 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7927 {
   7928   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7929 }
   7930 
   7931 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7932 _mm512_cvtepi64_epi16 (__m512i __A)
   7933 {
   7934   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7935               (__v8hi) _mm_undefined_si128 (),
   7936               (__mmask8) -1);
   7937 }
   7938 
   7939 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7940 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7941 {
   7942   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7943               (__v8hi) __O, __M);
   7944 }
   7945 
   7946 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7947 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
   7948 {
   7949   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7950               (__v8hi) _mm_setzero_si128 (),
   7951               __M);
   7952 }
   7953 
   7954 static __inline__ void __DEFAULT_FN_ATTRS
   7955 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7956 {
   7957   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7958 }
   7959 
   7960 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
   7961   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
   7962                                    (__v16si)_mm512_undefined_epi32(), \
   7963                                    0 + ((imm) & 0x3) * 4,             \
   7964                                    1 + ((imm) & 0x3) * 4,             \
   7965                                    2 + ((imm) & 0x3) * 4,             \
   7966                                    3 + ((imm) & 0x3) * 4); })
   7967 
   7968 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
   7969   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7970                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7971                                 (__v4si)(W)); })
   7972 
   7973 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
   7974   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7975                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7976                                 (__v4si)_mm_setzero_si128()); })
   7977 
   7978 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
   7979   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
   7980                                    (__v8di)_mm512_undefined_epi32(), \
   7981                                    ((imm) & 1) ? 4 : 0,              \
   7982                                    ((imm) & 1) ? 5 : 1,              \
   7983                                    ((imm) & 1) ? 6 : 2,              \
   7984                                    ((imm) & 1) ? 7 : 3); })
   7985 
   7986 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
   7987   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7988                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   7989                                 (__v4di)(W)); })
   7990 
   7991 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
   7992   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7993                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   7994                                 (__v4di)_mm256_setzero_si256()); })
   7995 
   7996 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
   7997   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7998                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
   7999                                  ((imm) & 0x1) ?  0 :  8, \
   8000                                  ((imm) & 0x1) ?  1 :  9, \
   8001                                  ((imm) & 0x1) ?  2 : 10, \
   8002                                  ((imm) & 0x1) ?  3 : 11, \
   8003                                  ((imm) & 0x1) ?  8 :  4, \
   8004                                  ((imm) & 0x1) ?  9 :  5, \
   8005                                  ((imm) & 0x1) ? 10 :  6, \
   8006                                  ((imm) & 0x1) ? 11 :  7); })
   8007 
   8008 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
   8009   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8010                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   8011                                   (__v8df)(W)); })
   8012 
   8013 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
   8014   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8015                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   8016                                   (__v8df)_mm512_setzero_pd()); })
   8017 
   8018 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
   8019   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
   8020                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
   8021                                  ((imm) & 0x1) ?  0 :  8, \
   8022                                  ((imm) & 0x1) ?  1 :  9, \
   8023                                  ((imm) & 0x1) ?  2 : 10, \
   8024                                  ((imm) & 0x1) ?  3 : 11, \
   8025                                  ((imm) & 0x1) ?  8 :  4, \
   8026                                  ((imm) & 0x1) ?  9 :  5, \
   8027                                  ((imm) & 0x1) ? 10 :  6, \
   8028                                  ((imm) & 0x1) ? 11 :  7); })
   8029 
   8030 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
   8031   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8032                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   8033                                   (__v8di)(W)); })
   8034 
   8035 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
   8036   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8037                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   8038                                   (__v8di)_mm512_setzero_si512()); })
   8039 
   8040 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
   8041   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
   8042                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
   8043                                   (((imm) & 0x3) == 0) ? 16 :  0, \
   8044                                   (((imm) & 0x3) == 0) ? 17 :  1, \
   8045                                   (((imm) & 0x3) == 0) ? 18 :  2, \
   8046                                   (((imm) & 0x3) == 0) ? 19 :  3, \
   8047                                   (((imm) & 0x3) == 1) ? 16 :  4, \
   8048                                   (((imm) & 0x3) == 1) ? 17 :  5, \
   8049                                   (((imm) & 0x3) == 1) ? 18 :  6, \
   8050                                   (((imm) & 0x3) == 1) ? 19 :  7, \
   8051                                   (((imm) & 0x3) == 2) ? 16 :  8, \
   8052                                   (((imm) & 0x3) == 2) ? 17 :  9, \
   8053                                   (((imm) & 0x3) == 2) ? 18 : 10, \
   8054                                   (((imm) & 0x3) == 2) ? 19 : 11, \
   8055                                   (((imm) & 0x3) == 3) ? 16 : 12, \
   8056                                   (((imm) & 0x3) == 3) ? 17 : 13, \
   8057                                   (((imm) & 0x3) == 3) ? 18 : 14, \
   8058                                   (((imm) & 0x3) == 3) ? 19 : 15); })
   8059 
   8060 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
   8061   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   8062                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   8063                                  (__v16sf)(W)); })
   8064 
   8065 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
   8066   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   8067                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   8068                                  (__v16sf)_mm512_setzero_ps()); })
   8069 
   8070 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
   8071   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   8072                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
   8073                                  (((imm) & 0x3) == 0) ? 16 :  0, \
   8074                                  (((imm) & 0x3) == 0) ? 17 :  1, \
   8075                                  (((imm) & 0x3) == 0) ? 18 :  2, \
   8076                                  (((imm) & 0x3) == 0) ? 19 :  3, \
   8077                                  (((imm) & 0x3) == 1) ? 16 :  4, \
   8078                                  (((imm) & 0x3) == 1) ? 17 :  5, \
   8079                                  (((imm) & 0x3) == 1) ? 18 :  6, \
   8080                                  (((imm) & 0x3) == 1) ? 19 :  7, \
   8081                                  (((imm) & 0x3) == 2) ? 16 :  8, \
   8082                                  (((imm) & 0x3) == 2) ? 17 :  9, \
   8083                                  (((imm) & 0x3) == 2) ? 18 : 10, \
   8084                                  (((imm) & 0x3) == 2) ? 19 : 11, \
   8085                                  (((imm) & 0x3) == 3) ? 16 : 12, \
   8086                                  (((imm) & 0x3) == 3) ? 17 : 13, \
   8087                                  (((imm) & 0x3) == 3) ? 18 : 14, \
   8088                                  (((imm) & 0x3) == 3) ? 19 : 15); })
   8089 
   8090 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
   8091   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   8092                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   8093                                  (__v16si)(W)); })
   8094 
   8095 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
   8096   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   8097                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   8098                                  (__v16si)_mm512_setzero_si512()); })
   8099 
   8100 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
   8101   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8102                                             (int)(((C)<<2) | (B)), \
   8103                                             (__v8df)_mm512_undefined_pd(), \
   8104                                             (__mmask8)-1, (int)(R)); })
   8105 
   8106 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
   8107   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8108                                             (int)(((C)<<2) | (B)), \
   8109                                             (__v8df)(__m512d)(W), \
   8110                                             (__mmask8)(U), (int)(R)); })
   8111 
   8112 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
   8113   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8114                                             (int)(((C)<<2) | (B)), \
   8115                                             (__v8df)_mm512_setzero_pd(), \
   8116                                             (__mmask8)(U), (int)(R)); })
   8117 
   8118 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
   8119   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8120                                             (int)(((C)<<2) | (B)), \
   8121                                             (__v8df)_mm512_setzero_pd(), \
   8122                                             (__mmask8)-1, \
   8123                                             _MM_FROUND_CUR_DIRECTION); })
   8124 
   8125 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
   8126   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8127                                             (int)(((C)<<2) | (B)), \
   8128                                             (__v8df)(__m512d)(W), \
   8129                                             (__mmask8)(U), \
   8130                                             _MM_FROUND_CUR_DIRECTION); })
   8131 
   8132 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
   8133   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8134                                             (int)(((C)<<2) | (B)), \
   8135                                             (__v8df)_mm512_setzero_pd(), \
   8136                                             (__mmask8)(U), \
   8137                                             _MM_FROUND_CUR_DIRECTION); })
   8138 
   8139 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
   8140   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8141                                            (int)(((C)<<2) | (B)), \
   8142                                            (__v16sf)_mm512_undefined_ps(), \
   8143                                            (__mmask16)-1, (int)(R)); })
   8144 
   8145 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
   8146   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8147                                            (int)(((C)<<2) | (B)), \
   8148                                            (__v16sf)(__m512)(W), \
   8149                                            (__mmask16)(U), (int)(R)); })
   8150 
   8151 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
   8152   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8153                                            (int)(((C)<<2) | (B)), \
   8154                                            (__v16sf)_mm512_setzero_ps(), \
   8155                                            (__mmask16)(U), (int)(R)); })
   8156 
   8157 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
   8158   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8159                                            (int)(((C)<<2)|(B)), \
   8160                                            (__v16sf)_mm512_undefined_ps(), \
   8161                                            (__mmask16)-1, \
   8162                                            _MM_FROUND_CUR_DIRECTION); })
   8163 
   8164 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8165   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8166                                            (int)(((C)<<2)|(B)), \
   8167                                            (__v16sf)(__m512)(W), \
   8168                                            (__mmask16)(U), \
   8169                                            _MM_FROUND_CUR_DIRECTION); })
   8170 
   8171 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8172   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8173                                            (int)(((C)<<2)|(B)), \
   8174                                            (__v16sf)_mm512_setzero_ps(), \
   8175                                            (__mmask16)(U), \
   8176                                            _MM_FROUND_CUR_DIRECTION); })
   8177 
   8178 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
   8179   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8180                                            (__v8df)_mm512_undefined_pd(), \
   8181                                            (__mmask8)-1, (int)(R)); })
   8182 
   8183 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
   8184   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8185                                            (__v8df)(__m512d)(W), \
   8186                                            (__mmask8)(U), (int)(R)); })
   8187 
   8188 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
   8189   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8190                                            (__v8df)_mm512_setzero_pd(), \
   8191                                            (__mmask8)(U), (int)(R)); })
   8192 
   8193 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8194 _mm512_getexp_pd (__m512d __A)
   8195 {
   8196   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8197                 (__v8df) _mm512_undefined_pd (),
   8198                 (__mmask8) -1,
   8199                 _MM_FROUND_CUR_DIRECTION);
   8200 }
   8201 
   8202 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8203 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8204 {
   8205   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8206                 (__v8df) __W,
   8207                 (__mmask8) __U,
   8208                 _MM_FROUND_CUR_DIRECTION);
   8209 }
   8210 
   8211 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8212 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
   8213 {
   8214   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8215                 (__v8df) _mm512_setzero_pd (),
   8216                 (__mmask8) __U,
   8217                 _MM_FROUND_CUR_DIRECTION);
   8218 }
   8219 
   8220 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
   8221   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8222                                           (__v16sf)_mm512_undefined_ps(), \
   8223                                           (__mmask16)-1, (int)(R)); })
   8224 
   8225 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
   8226   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8227                                           (__v16sf)(__m512)(W), \
   8228                                           (__mmask16)(U), (int)(R)); })
   8229 
   8230 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
   8231   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8232                                           (__v16sf)_mm512_setzero_ps(), \
   8233                                           (__mmask16)(U), (int)(R)); })
   8234 
   8235 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8236 _mm512_getexp_ps (__m512 __A)
   8237 {
   8238   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8239                (__v16sf) _mm512_undefined_ps (),
   8240                (__mmask16) -1,
   8241                _MM_FROUND_CUR_DIRECTION);
   8242 }
   8243 
   8244 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8245 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8246 {
   8247   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8248                (__v16sf) __W,
   8249                (__mmask16) __U,
   8250                _MM_FROUND_CUR_DIRECTION);
   8251 }
   8252 
   8253 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8254 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
   8255 {
   8256   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8257                (__v16sf) _mm512_setzero_ps (),
   8258                (__mmask16) __U,
   8259                _MM_FROUND_CUR_DIRECTION);
   8260 }
   8261 
   8262 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
   8263   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
   8264                                        (float const *)(addr), \
   8265                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8266                                        (int)(scale)); })
   8267 
   8268 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
   8269   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
   8270                                        (float const *)(addr), \
   8271                                        (__v8di)(__m512i)(index), \
   8272                                        (__mmask8)(mask), (int)(scale)); })
   8273 
   8274 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
   8275   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
   8276                                         (int const *)(addr), \
   8277                                         (__v8di)(__m512i)(index), \
   8278                                         (__mmask8)-1, (int)(scale)); })
   8279 
   8280 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8281   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
   8282                                         (int const *)(addr), \
   8283                                         (__v8di)(__m512i)(index), \
   8284                                         (__mmask8)(mask), (int)(scale)); })
   8285 
   8286 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
   8287   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
   8288                                        (double const *)(addr), \
   8289                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8290                                        (int)(scale)); })
   8291 
   8292 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8293   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
   8294                                        (double const *)(addr), \
   8295                                        (__v8di)(__m512i)(index), \
   8296                                        (__mmask8)(mask), (int)(scale)); })
   8297 
   8298 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
   8299   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
   8300                                        (long long const *)(addr), \
   8301                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8302                                        (int)(scale)); })
   8303 
   8304 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8305   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
   8306                                        (long long const *)(addr), \
   8307                                        (__v8di)(__m512i)(index), \
   8308                                        (__mmask8)(mask), (int)(scale)); })
   8309 
   8310 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
   8311   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
   8312                                        (float const *)(addr), \
   8313                                        (__v16sf)(__m512)(index), \
   8314                                        (__mmask16)-1, (int)(scale)); })
   8315 
   8316 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8317   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
   8318                                        (float const *)(addr), \
   8319                                        (__v16sf)(__m512)(index), \
   8320                                        (__mmask16)(mask), (int)(scale)); })
   8321 
   8322 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
   8323   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
   8324                                         (int const *)(addr), \
   8325                                         (__v16si)(__m512i)(index), \
   8326                                         (__mmask16)-1, (int)(scale)); })
   8327 
   8328 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8329   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
   8330                                         (int const *)(addr), \
   8331                                         (__v16si)(__m512i)(index), \
   8332                                         (__mmask16)(mask), (int)(scale)); })
   8333 
   8334 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
   8335   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
   8336                                        (double const *)(addr), \
   8337                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8338                                        (int)(scale)); })
   8339 
   8340 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8341   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
   8342                                        (double const *)(addr), \
   8343                                        (__v8si)(__m256i)(index), \
   8344                                        (__mmask8)(mask), (int)(scale)); })
   8345 
   8346 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
   8347   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
   8348                                        (long long const *)(addr), \
   8349                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8350                                        (int)(scale)); })
   8351 
   8352 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8353   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
   8354                                        (long long const *)(addr), \
   8355                                        (__v8si)(__m256i)(index), \
   8356                                        (__mmask8)(mask), (int)(scale)); })
   8357 
   8358 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
   8359   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
   8360                                 (__v8di)(__m512i)(index), \
   8361                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8362 
   8363 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8364   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
   8365                                 (__v8di)(__m512i)(index), \
   8366                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8367 
   8368 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8369   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
   8370                                 (__v8di)(__m512i)(index), \
   8371                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8372 
   8373 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8374   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
   8375                                 (__v8di)(__m512i)(index), \
   8376                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8377 
   8378 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
   8379   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
   8380                                (__v8di)(__m512i)(index), \
   8381                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8382 
   8383 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8384   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
   8385                                (__v8di)(__m512i)(index), \
   8386                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8387 
   8388 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8389   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
   8390                                (__v8di)(__m512i)(index), \
   8391                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8392 
   8393 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8394   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
   8395                                (__v8di)(__m512i)(index), \
   8396                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8397 
   8398 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
   8399   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
   8400                                 (__v16si)(__m512i)(index), \
   8401                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8402 
   8403 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8404   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
   8405                                 (__v16si)(__m512i)(index), \
   8406                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8407 
   8408 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8409   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
   8410                                 (__v16si)(__m512i)(index), \
   8411                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8412 
   8413 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8414   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
   8415                                 (__v16si)(__m512i)(index), \
   8416                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8417 
   8418 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
   8419   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
   8420                                (__v8si)(__m256i)(index), \
   8421                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8422 
   8423 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8424   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
   8425                                (__v8si)(__m256i)(index), \
   8426                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8427 
   8428 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8429   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
   8430                                (__v8si)(__m256i)(index), \
   8431                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8432 
   8433 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8434   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
   8435                                (__v8si)(__m256i)(index), \
   8436                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8437 
   8438 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8439 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8440 {
   8441  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8442           (__v4sf) __A,
   8443           (__v4sf) __B,
   8444           (__mmask8) __U,
   8445           _MM_FROUND_CUR_DIRECTION);
   8446 }
   8447 
   8448 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
   8449   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8450                                         (__v4sf)(__m128)(A), \
   8451                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8452                                         (int)(R)); })
   8453 
   8454 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8455 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8456 {
   8457  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8458           (__v4sf) __B,
   8459           (__v4sf) __C,
   8460           (__mmask8) __U,
   8461           _MM_FROUND_CUR_DIRECTION);
   8462 }
   8463 
   8464 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8465   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8466                                          (__v4sf)(__m128)(B), \
   8467                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8468                                          _MM_FROUND_CUR_DIRECTION); })
   8469 
   8470 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8471 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8472 {
   8473  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
   8474           (__v4sf) __X,
   8475           (__v4sf) __Y,
   8476           (__mmask8) __U,
   8477           _MM_FROUND_CUR_DIRECTION);
   8478 }
   8479 
   8480 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
   8481   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
   8482                                          (__v4sf)(__m128)(X), \
   8483                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8484                                          (int)(R)); })
   8485 
   8486 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8487 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8488 {
   8489  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8490           (__v4sf) __A,
   8491           -(__v4sf) __B,
   8492           (__mmask8) __U,
   8493           _MM_FROUND_CUR_DIRECTION);
   8494 }
   8495 
   8496 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8497   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8498                                         (__v4sf)(__m128)(A), \
   8499                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8500                                         (int)(R)); })
   8501 
   8502 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8503 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8504 {
   8505  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8506           (__v4sf) __B,
   8507           -(__v4sf) __C,
   8508           (__mmask8) __U,
   8509           _MM_FROUND_CUR_DIRECTION);
   8510 }
   8511 
   8512 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8513   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8514                                          (__v4sf)(__m128)(B), \
   8515                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8516                                          (int)(R)); })
   8517 
   8518 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8519 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8520 {
   8521  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
   8522           (__v4sf) __X,
   8523           (__v4sf) __Y,
   8524           (__mmask8) __U,
   8525           _MM_FROUND_CUR_DIRECTION);
   8526 }
   8527 
   8528 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
   8529   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
   8530                                          (__v4sf)(__m128)(X), \
   8531                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8532                                          (int)(R)); })
   8533 
   8534 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8535 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8536 {
   8537  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8538           -(__v4sf) __A,
   8539           (__v4sf) __B,
   8540           (__mmask8) __U,
   8541           _MM_FROUND_CUR_DIRECTION);
   8542 }
   8543 
   8544 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
   8545   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8546                                         -(__v4sf)(__m128)(A), \
   8547                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8548                                         (int)(R)); })
   8549 
   8550 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8551 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8552 {
   8553  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8554           (__v4sf) __B,
   8555           (__v4sf) __C,
   8556           (__mmask8) __U,
   8557           _MM_FROUND_CUR_DIRECTION);
   8558 }
   8559 
   8560 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8561   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8562                                          (__v4sf)(__m128)(B), \
   8563                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8564                                          (int)(R)); })
   8565 
   8566 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8567 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8568 {
   8569  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
   8570           (__v4sf) __X,
   8571           (__v4sf) __Y,
   8572           (__mmask8) __U,
   8573           _MM_FROUND_CUR_DIRECTION);
   8574 }
   8575 
   8576 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
   8577   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
   8578                                          (__v4sf)(__m128)(X), \
   8579                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8580                                          (int)(R)); })
   8581 
   8582 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8583 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8584 {
   8585  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8586           -(__v4sf) __A,
   8587           -(__v4sf) __B,
   8588           (__mmask8) __U,
   8589           _MM_FROUND_CUR_DIRECTION);
   8590 }
   8591 
   8592 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8593   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8594                                         -(__v4sf)(__m128)(A), \
   8595                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
   8596                                         (int)(R)); })
   8597 
   8598 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8599 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8600 {
   8601  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8602           (__v4sf) __B,
   8603           -(__v4sf) __C,
   8604           (__mmask8) __U,
   8605           _MM_FROUND_CUR_DIRECTION);
   8606 }
   8607 
   8608 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8609   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8610                                          (__v4sf)(__m128)(B), \
   8611                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8612                                          _MM_FROUND_CUR_DIRECTION); })
   8613 
   8614 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8615 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8616 {
   8617  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
   8618           (__v4sf) __X,
   8619           (__v4sf) __Y,
   8620           (__mmask8) __U,
   8621           _MM_FROUND_CUR_DIRECTION);
   8622 }
   8623 
   8624 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
   8625   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
   8626                                          (__v4sf)(__m128)(X), \
   8627                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8628                                          (int)(R)); })
   8629 
   8630 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8631 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8632 {
   8633  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8634           (__v2df) __A,
   8635           (__v2df) __B,
   8636           (__mmask8) __U,
   8637           _MM_FROUND_CUR_DIRECTION);
   8638 }
   8639 
   8640 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
   8641   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8642                                          (__v2df)(__m128d)(A), \
   8643                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8644                                          (int)(R)); })
   8645 
   8646 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8647 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8648 {
   8649  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8650           (__v2df) __B,
   8651           (__v2df) __C,
   8652           (__mmask8) __U,
   8653           _MM_FROUND_CUR_DIRECTION);
   8654 }
   8655 
   8656 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8657   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8658                                           (__v2df)(__m128d)(B), \
   8659                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8660                                           _MM_FROUND_CUR_DIRECTION); })
   8661 
   8662 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8663 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8664 {
   8665  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
   8666           (__v2df) __X,
   8667           (__v2df) __Y,
   8668           (__mmask8) __U,
   8669           _MM_FROUND_CUR_DIRECTION);
   8670 }
   8671 
   8672 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
   8673   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
   8674                                           (__v2df)(__m128d)(X), \
   8675                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8676                                           (int)(R)); })
   8677 
   8678 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8679 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8680 {
   8681  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8682           (__v2df) __A,
   8683           -(__v2df) __B,
   8684           (__mmask8) __U,
   8685           _MM_FROUND_CUR_DIRECTION);
   8686 }
   8687 
   8688 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8689   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8690                                          (__v2df)(__m128d)(A), \
   8691                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8692                                          (int)(R)); })
   8693 
   8694 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8695 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8696 {
   8697  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8698           (__v2df) __B,
   8699           -(__v2df) __C,
   8700           (__mmask8) __U,
   8701           _MM_FROUND_CUR_DIRECTION);
   8702 }
   8703 
   8704 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8705   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8706                                           (__v2df)(__m128d)(B), \
   8707                                           -(__v2df)(__m128d)(C), \
   8708                                           (__mmask8)(U), (int)(R)); })
   8709 
   8710 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8711 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8712 {
   8713  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
   8714           (__v2df) __X,
   8715           (__v2df) __Y,
   8716           (__mmask8) __U,
   8717           _MM_FROUND_CUR_DIRECTION);
   8718 }
   8719 
   8720 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
   8721   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
   8722                                           (__v2df)(__m128d)(X), \
   8723                                           (__v2df)(__m128d)(Y), \
   8724                                           (__mmask8)(U), (int)(R)); })
   8725 
   8726 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8727 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8728 {
   8729  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8730           -(__v2df) __A,
   8731           (__v2df) __B,
   8732           (__mmask8) __U,
   8733           _MM_FROUND_CUR_DIRECTION);
   8734 }
   8735 
   8736 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
   8737   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8738                                          -(__v2df)(__m128d)(A), \
   8739                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8740                                          (int)(R)); })
   8741 
   8742 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8743 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8744 {
   8745  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8746           (__v2df) __B,
   8747           (__v2df) __C,
   8748           (__mmask8) __U,
   8749           _MM_FROUND_CUR_DIRECTION);
   8750 }
   8751 
   8752 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8753   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8754                                           (__v2df)(__m128d)(B), \
   8755                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8756                                           (int)(R)); })
   8757 
   8758 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8759 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8760 {
   8761  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
   8762           (__v2df) __X,
   8763           (__v2df) __Y,
   8764           (__mmask8) __U,
   8765           _MM_FROUND_CUR_DIRECTION);
   8766 }
   8767 
   8768 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
   8769   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
   8770                                           (__v2df)(__m128d)(X), \
   8771                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8772                                           (int)(R)); })
   8773 
   8774 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8775 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8776 {
   8777  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8778           -(__v2df) __A,
   8779           -(__v2df) __B,
   8780           (__mmask8) __U,
   8781           _MM_FROUND_CUR_DIRECTION);
   8782 }
   8783 
   8784 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8785   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8786                                          -(__v2df)(__m128d)(A), \
   8787                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8788                                          (int)(R)); })
   8789 
   8790 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8791 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8792 {
   8793  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8794           (__v2df) __B,
   8795           -(__v2df) __C,
   8796           (__mmask8) __U,
   8797           _MM_FROUND_CUR_DIRECTION);
   8798 }
   8799 
   8800 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8801   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8802                                           (__v2df)(__m128d)(B), \
   8803                                           -(__v2df)(__m128d)(C), \
   8804                                           (__mmask8)(U), \
   8805                                           _MM_FROUND_CUR_DIRECTION); })
   8806 
   8807 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8808 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8809 {
   8810  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
   8811           (__v2df) __X,
   8812           (__v2df) (__Y),
   8813           (__mmask8) __U,
   8814           _MM_FROUND_CUR_DIRECTION);
   8815 }
   8816 
   8817 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
   8818   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
   8819                                           (__v2df)(__m128d)(X), \
   8820                                           (__v2df)(__m128d)(Y), \
   8821                                           (__mmask8)(U), (int)(R)); })
   8822 
   8823 #define _mm512_permutex_pd(X, C) __extension__ ({ \
   8824   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   8825                                    (__v8df)_mm512_undefined_pd(), \
   8826                                    0 + (((C) >> 0) & 0x3), \
   8827                                    0 + (((C) >> 2) & 0x3), \
   8828                                    0 + (((C) >> 4) & 0x3), \
   8829                                    0 + (((C) >> 6) & 0x3), \
   8830                                    4 + (((C) >> 0) & 0x3), \
   8831                                    4 + (((C) >> 2) & 0x3), \
   8832                                    4 + (((C) >> 4) & 0x3), \
   8833                                    4 + (((C) >> 6) & 0x3)); })
   8834 
   8835 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
   8836   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8837                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8838                                        (__v8df)(__m512d)(W)); })
   8839 
   8840 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
   8841   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8842                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8843                                        (__v8df)_mm512_setzero_pd()); })
   8844 
   8845 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
   8846   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
   8847                                    (__v8di)_mm512_undefined_epi32(), \
   8848                                    0 + (((C) >> 0) & 0x3), \
   8849                                    0 + (((C) >> 2) & 0x3), \
   8850                                    0 + (((C) >> 4) & 0x3), \
   8851                                    0 + (((C) >> 6) & 0x3), \
   8852                                    4 + (((C) >> 0) & 0x3), \
   8853                                    4 + (((C) >> 2) & 0x3), \
   8854                                    4 + (((C) >> 4) & 0x3), \
   8855                                    4 + (((C) >> 6) & 0x3)); })
   8856 
   8857 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
   8858   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8859                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8860                                       (__v8di)(__m512i)(W)); })
   8861 
   8862 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
   8863   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8864                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8865                                       (__v8di)_mm512_setzero_si512()); })
   8866 
   8867 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8868 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
   8869 {
   8870   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8871                  (__v8di) __X,
   8872                  (__v8df) _mm512_undefined_pd (),
   8873                  (__mmask8) -1);
   8874 }
   8875 
   8876 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8877 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
   8878 {
   8879   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8880                  (__v8di) __X,
   8881                  (__v8df) __W,
   8882                  (__mmask8) __U);
   8883 }
   8884 
   8885 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8886 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
   8887 {
   8888   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8889                  (__v8di) __X,
   8890                  (__v8df) _mm512_setzero_pd (),
   8891                  (__mmask8) __U);
   8892 }
   8893 
   8894 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8895 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
   8896 {
   8897   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8898                  (__v8di) __X,
   8899                  (__v8di) _mm512_setzero_si512 (),
   8900                  __M);
   8901 }
   8902 
   8903 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8904 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
   8905 {
   8906   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8907                  (__v8di) __X,
   8908                  (__v8di) _mm512_undefined_epi32 (),
   8909                  (__mmask8) -1);
   8910 }
   8911 
   8912 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8913 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
   8914              __m512i __Y)
   8915 {
   8916   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8917                  (__v8di) __X,
   8918                  (__v8di) __W,
   8919                  __M);
   8920 }
   8921 
   8922 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8923 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
   8924 {
   8925   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8926                 (__v16si) __X,
   8927                 (__v16sf) _mm512_undefined_ps (),
   8928                 (__mmask16) -1);
   8929 }
   8930 
   8931 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8932 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
   8933 {
   8934   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8935                 (__v16si) __X,
   8936                 (__v16sf) __W,
   8937                 (__mmask16) __U);
   8938 }
   8939 
   8940 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8941 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
   8942 {
   8943   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8944                 (__v16si) __X,
   8945                 (__v16sf) _mm512_setzero_ps (),
   8946                 (__mmask16) __U);
   8947 }
   8948 
   8949 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8950 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
   8951 {
   8952   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8953                  (__v16si) __X,
   8954                  (__v16si) _mm512_setzero_si512 (),
   8955                  __M);
   8956 }
   8957 
   8958 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8959 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
   8960 {
   8961   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8962                  (__v16si) __X,
   8963                  (__v16si) _mm512_undefined_epi32 (),
   8964                  (__mmask16) -1);
   8965 }
   8966 
   8967 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
   8968 
   8969 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8970 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
   8971              __m512i __Y)
   8972 {
   8973   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8974                  (__v16si) __X,
   8975                  (__v16si) __W,
   8976                  __M);
   8977 }
   8978 
   8979 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
   8980 
   8981 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8982 _mm512_kand (__mmask16 __A, __mmask16 __B)
   8983 {
   8984   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
   8985 }
   8986 
   8987 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8988 _mm512_kandn (__mmask16 __A, __mmask16 __B)
   8989 {
   8990   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
   8991 }
   8992 
   8993 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8994 _mm512_kor (__mmask16 __A, __mmask16 __B)
   8995 {
   8996   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
   8997 }
   8998 
   8999 static __inline__ int __DEFAULT_FN_ATTRS
   9000 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
   9001 {
   9002   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
   9003 }
   9004 
   9005 static __inline__ int __DEFAULT_FN_ATTRS
   9006 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
   9007 {
   9008   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
   9009 }
   9010 
   9011 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9012 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   9013 {
   9014   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
   9015 }
   9016 
   9017 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9018 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
   9019 {
   9020   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
   9021 }
   9022 
   9023 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   9024 _mm512_kxor (__mmask16 __A, __mmask16 __B)
   9025 {
   9026   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
   9027 }
   9028 
   9029 static __inline__ void __DEFAULT_FN_ATTRS
   9030 _mm512_stream_si512 (__m512i * __P, __m512i __A)
   9031 {
   9032   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   9033   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
   9034 }
   9035 
   9036 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9037 _mm512_stream_load_si512 (void const *__P)
   9038 {
   9039   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   9040   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
   9041 }
   9042 
   9043 static __inline__ void __DEFAULT_FN_ATTRS
   9044 _mm512_stream_pd (double *__P, __m512d __A)
   9045 {
   9046   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
   9047   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
   9048 }
   9049 
   9050 static __inline__ void __DEFAULT_FN_ATTRS
   9051 _mm512_stream_ps (float *__P, __m512 __A)
   9052 {
   9053   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
   9054   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
   9055 }
   9056 
   9057 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9058 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9059 {
   9060   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9061                   (__v8df) __W,
   9062                   (__mmask8) __U);
   9063 }
   9064 
   9065 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9066 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
   9067 {
   9068   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9069                   (__v8df)
   9070                   _mm512_setzero_pd (),
   9071                   (__mmask8) __U);
   9072 }
   9073 
   9074 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9075 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9076 {
   9077   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9078                   (__v8di) __W,
   9079                   (__mmask8) __U);
   9080 }
   9081 
   9082 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9083 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
   9084 {
   9085   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9086                   (__v8di)
   9087                   _mm512_setzero_si512 (),
   9088                   (__mmask8) __U);
   9089 }
   9090 
   9091 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9092 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9093 {
   9094   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9095                  (__v16sf) __W,
   9096                  (__mmask16) __U);
   9097 }
   9098 
   9099 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9100 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
   9101 {
   9102   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9103                  (__v16sf)
   9104                  _mm512_setzero_ps (),
   9105                  (__mmask16) __U);
   9106 }
   9107 
   9108 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9109 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9110 {
   9111   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9112                   (__v16si) __W,
   9113                   (__mmask16) __U);
   9114 }
   9115 
   9116 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9117 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
   9118 {
   9119   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9120                   (__v16si)
   9121                   _mm512_setzero_si512 (),
   9122                   (__mmask16) __U);
   9123 }
   9124 
   9125 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
   9126   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9127                                       (__v4sf)(__m128)(Y), (int)(P), \
   9128                                       (__mmask8)-1, (int)(R)); })
   9129 
   9130 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
   9131   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9132                                       (__v4sf)(__m128)(Y), (int)(P), \
   9133                                       (__mmask8)(M), (int)(R)); })
   9134 
   9135 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
   9136   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9137                                       (__v4sf)(__m128)(Y), (int)(P), \
   9138                                       (__mmask8)-1, \
   9139                                       _MM_FROUND_CUR_DIRECTION); })
   9140 
   9141 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
   9142   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9143                                       (__v4sf)(__m128)(Y), (int)(P), \
   9144                                       (__mmask8)(M), \
   9145                                       _MM_FROUND_CUR_DIRECTION); })
   9146 
   9147 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
   9148   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9149                                       (__v2df)(__m128d)(Y), (int)(P), \
   9150                                       (__mmask8)-1, (int)(R)); })
   9151 
   9152 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
   9153   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9154                                       (__v2df)(__m128d)(Y), (int)(P), \
   9155                                       (__mmask8)(M), (int)(R)); })
   9156 
   9157 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
   9158   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9159                                       (__v2df)(__m128d)(Y), (int)(P), \
   9160                                       (__mmask8)-1, \
   9161                                       _MM_FROUND_CUR_DIRECTION); })
   9162 
   9163 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
   9164   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9165                                       (__v2df)(__m128d)(Y), (int)(P), \
   9166                                       (__mmask8)(M), \
   9167                                       _MM_FROUND_CUR_DIRECTION); })
   9168 
   9169 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9170 _mm512_movehdup_ps (__m512 __A)
   9171 {
   9172   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9173                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
   9174 }
   9175 
   9176 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9177 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9178 {
   9179   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9180                                              (__v16sf)_mm512_movehdup_ps(__A),
   9181                                              (__v16sf)__W);
   9182 }
   9183 
   9184 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9185 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
   9186 {
   9187   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9188                                              (__v16sf)_mm512_movehdup_ps(__A),
   9189                                              (__v16sf)_mm512_setzero_ps());
   9190 }
   9191 
   9192 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9193 _mm512_moveldup_ps (__m512 __A)
   9194 {
   9195   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9196                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
   9197 }
   9198 
   9199 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9200 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9201 {
   9202   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9203                                              (__v16sf)_mm512_moveldup_ps(__A),
   9204                                              (__v16sf)__W);
   9205 }
   9206 
   9207 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9208 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
   9209 {
   9210   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9211                                              (__v16sf)_mm512_moveldup_ps(__A),
   9212                                              (__v16sf)_mm512_setzero_ps());
   9213 }
   9214 
   9215 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9216 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   9217 {
   9218   __m128 res = __A;
   9219   res[0] = (__U & 1) ? __B[0] : __W[0];
   9220   return res;
   9221 }
   9222 
   9223 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9224 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
   9225 {
   9226   __m128 res = __A;
   9227   res[0] = (__U & 1) ? __B[0] : 0;
   9228   return res;
   9229 }
   9230 
   9231 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9232 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   9233 {
   9234   __m128d res = __A;
   9235   res[0] = (__U & 1) ? __B[0] : __W[0];
   9236   return res;
   9237 }
   9238 
   9239 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9240 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
   9241 {
   9242   __m128d res = __A;
   9243   res[0] = (__U & 1) ? __B[0] : 0;
   9244   return res;
   9245 }
   9246 
   9247 static __inline__ void __DEFAULT_FN_ATTRS
   9248 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
   9249 {
   9250   __builtin_ia32_storess128_mask ((__v16sf *)__W,
   9251                 (__v16sf) _mm512_castps128_ps512(__A),
   9252                 (__mmask16) __U & (__mmask16)1);
   9253 }
   9254 
   9255 static __inline__ void __DEFAULT_FN_ATTRS
   9256 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
   9257 {
   9258   __builtin_ia32_storesd128_mask ((__v8df *)__W,
   9259                 (__v8df) _mm512_castpd128_pd512(__A),
   9260                 (__mmask8) __U & 1);
   9261 }
   9262 
   9263 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9264 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
   9265 {
   9266   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
   9267                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
   9268                                                 0, 4, 4, 4);
   9269 
   9270   return (__m128) __builtin_shufflevector(
   9271                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9272                                       (__v16sf) _mm512_castps128_ps512(src),
   9273                                       (__mmask16) __U & 1),
   9274                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9275 }
   9276 
   9277 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9278 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
   9279 {
   9280   return (__m128) __builtin_shufflevector(
   9281                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9282                                       (__v16sf) _mm512_setzero_ps(),
   9283                                       (__mmask16) __U & 1),
   9284                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9285 }
   9286 
   9287 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9288 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
   9289 {
   9290   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
   9291                                                  (__v2df) {0.0, 0.0}, 0, 2);
   9292 
   9293   return (__m128d) __builtin_shufflevector(
   9294                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9295                                       (__v8df) _mm512_castpd128_pd512(src),
   9296                                       (__mmask8) __U & 1),
   9297                             _mm512_undefined_pd(), 0, 1);
   9298 }
   9299 
   9300 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9301 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
   9302 {
   9303   return (__m128d) __builtin_shufflevector(
   9304                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9305                                       (__v8df) _mm512_setzero_pd(),
   9306                                       (__mmask8) __U & 1),
   9307                             _mm512_undefined_pd(), 0, 1);
   9308 }
   9309 
   9310 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
   9311   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   9312                                    (__v16si)_mm512_undefined_epi32(), \
   9313                                    0  + (((I) >> 0) & 0x3), \
   9314                                    0  + (((I) >> 2) & 0x3), \
   9315                                    0  + (((I) >> 4) & 0x3), \
   9316                                    0  + (((I) >> 6) & 0x3), \
   9317                                    4  + (((I) >> 0) & 0x3), \
   9318                                    4  + (((I) >> 2) & 0x3), \
   9319                                    4  + (((I) >> 4) & 0x3), \
   9320                                    4  + (((I) >> 6) & 0x3), \
   9321                                    8  + (((I) >> 0) & 0x3), \
   9322                                    8  + (((I) >> 2) & 0x3), \
   9323                                    8  + (((I) >> 4) & 0x3), \
   9324                                    8  + (((I) >> 6) & 0x3), \
   9325                                    12 + (((I) >> 0) & 0x3), \
   9326                                    12 + (((I) >> 2) & 0x3), \
   9327                                    12 + (((I) >> 4) & 0x3), \
   9328                                    12 + (((I) >> 6) & 0x3)); })
   9329 
   9330 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
   9331   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9332                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9333                                       (__v16si)(__m512i)(W)); })
   9334 
   9335 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
   9336   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9337                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9338                                       (__v16si)_mm512_setzero_si512()); })
   9339 
   9340 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9341 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9342 {
   9343   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9344                 (__v8df) __W,
   9345                 (__mmask8) __U);
   9346 }
   9347 
   9348 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9349 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
   9350 {
   9351   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9352                 (__v8df) _mm512_setzero_pd (),
   9353                 (__mmask8) __U);
   9354 }
   9355 
   9356 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9357 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9358 {
   9359   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9360                 (__v8di) __W,
   9361                 (__mmask8) __U);
   9362 }
   9363 
   9364 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9365 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
   9366 {
   9367   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9368                 (__v8di) _mm512_setzero_pd (),
   9369                 (__mmask8) __U);
   9370 }
   9371 
   9372 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9373 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
   9374 {
   9375   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9376               (__v8df) __W,
   9377               (__mmask8) __U);
   9378 }
   9379 
   9380 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9381 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
   9382 {
   9383   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9384               (__v8df) _mm512_setzero_pd(),
   9385               (__mmask8) __U);
   9386 }
   9387 
   9388 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9389 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
   9390 {
   9391   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9392               (__v8di) __W,
   9393               (__mmask8) __U);
   9394 }
   9395 
   9396 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9397 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
   9398 {
   9399   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9400               (__v8di) _mm512_setzero_pd(),
   9401               (__mmask8) __U);
   9402 }
   9403 
   9404 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9405 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
   9406 {
   9407   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9408                    (__v16sf) __W,
   9409                    (__mmask16) __U);
   9410 }
   9411 
   9412 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9413 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
   9414 {
   9415   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9416                    (__v16sf) _mm512_setzero_ps(),
   9417                    (__mmask16) __U);
   9418 }
   9419 
   9420 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9421 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
   9422 {
   9423   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9424               (__v16si) __W,
   9425               (__mmask16) __U);
   9426 }
   9427 
   9428 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9429 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
   9430 {
   9431   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9432               (__v16si) _mm512_setzero_ps(),
   9433               (__mmask16) __U);
   9434 }
   9435 
   9436 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9437 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9438 {
   9439   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9440                (__v16sf) __W,
   9441                (__mmask16) __U);
   9442 }
   9443 
   9444 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9445 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
   9446 {
   9447   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9448                (__v16sf) _mm512_setzero_ps(),
   9449                (__mmask16) __U);
   9450 }
   9451 
   9452 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9453 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9454 {
   9455   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9456                 (__v16si) __W,
   9457                 (__mmask16) __U);
   9458 }
   9459 
   9460 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9461 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
   9462 {
   9463   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9464                 (__v16si) _mm512_setzero_ps(),
   9465                 (__mmask16) __U);
   9466 }
   9467 
   9468 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
   9469   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9470                                            (__v8df)_mm512_undefined_pd(), \
   9471                                            (__mmask8)-1, (int)(R)); })
   9472 
   9473 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
   9474   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9475                                            (__v8df)(__m512d)(W), \
   9476                                            (__mmask8)(U), (int)(R)); })
   9477 
   9478 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
   9479   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9480                                            (__v8df)_mm512_setzero_pd(), \
   9481                                            (__mmask8)(U), (int)(R)); })
   9482 
   9483 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9484 _mm512_cvtps_pd (__m256 __A)
   9485 {
   9486   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9487                 (__v8df)
   9488                 _mm512_undefined_pd (),
   9489                 (__mmask8) -1,
   9490                 _MM_FROUND_CUR_DIRECTION);
   9491 }
   9492 
   9493 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9494 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
   9495 {
   9496   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9497                 (__v8df) __W,
   9498                 (__mmask8) __U,
   9499                 _MM_FROUND_CUR_DIRECTION);
   9500 }
   9501 
   9502 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9503 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
   9504 {
   9505   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9506                 (__v8df)
   9507                 _mm512_setzero_pd (),
   9508                 (__mmask8) __U,
   9509                 _MM_FROUND_CUR_DIRECTION);
   9510 }
   9511 
   9512 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9513 _mm512_cvtpslo_pd (__m512 __A)
   9514 {
   9515   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
   9516 }
   9517 
   9518 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9519 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
   9520 {
   9521   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
   9522 }
   9523 
   9524 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9525 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9526 {
   9527   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9528               (__v8df) __A,
   9529               (__v8df) __W);
   9530 }
   9531 
   9532 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9533 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
   9534 {
   9535   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9536               (__v8df) __A,
   9537               (__v8df) _mm512_setzero_pd ());
   9538 }
   9539 
   9540 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9541 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9542 {
   9543   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9544              (__v16sf) __A,
   9545              (__v16sf) __W);
   9546 }
   9547 
   9548 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9549 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
   9550 {
   9551   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9552              (__v16sf) __A,
   9553              (__v16sf) _mm512_setzero_ps ());
   9554 }
   9555 
   9556 static __inline__ void __DEFAULT_FN_ATTRS
   9557 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
   9558 {
   9559   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
   9560             (__mmask8) __U);
   9561 }
   9562 
   9563 static __inline__ void __DEFAULT_FN_ATTRS
   9564 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   9565 {
   9566   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
   9567             (__mmask8) __U);
   9568 }
   9569 
   9570 static __inline__ void __DEFAULT_FN_ATTRS
   9571 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
   9572 {
   9573   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
   9574             (__mmask16) __U);
   9575 }
   9576 
   9577 static __inline__ void __DEFAULT_FN_ATTRS
   9578 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   9579 {
   9580   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
   9581             (__mmask16) __U);
   9582 }
   9583 
   9584 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
   9585   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9586                                              (__v2df)(__m128d)(B), \
   9587                                              (__v4sf)_mm_undefined_ps(), \
   9588                                              (__mmask8)-1, (int)(R)); })
   9589 
   9590 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
   9591   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9592                                              (__v2df)(__m128d)(B), \
   9593                                              (__v4sf)(__m128)(W), \
   9594                                              (__mmask8)(U), (int)(R)); })
   9595 
   9596 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
   9597   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9598                                              (__v2df)(__m128d)(B), \
   9599                                              (__v4sf)_mm_setzero_ps(), \
   9600                                              (__mmask8)(U), (int)(R)); })
   9601 
   9602 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9603 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
   9604 {
   9605   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9606                                              (__v2df)(__B),
   9607                                              (__v4sf)(__W),
   9608                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9609 }
   9610 
   9611 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9612 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
   9613 {
   9614   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9615                                              (__v2df)(__B),
   9616                                              (__v4sf)_mm_setzero_ps(),
   9617                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9618 }
   9619 
   9620 #define _mm_cvtss_i32 _mm_cvtss_si32
   9621 #define _mm_cvtsd_i32 _mm_cvtsd_si32
   9622 #define _mm_cvti32_sd _mm_cvtsi32_sd
   9623 #define _mm_cvti32_ss _mm_cvtsi32_ss
   9624 #ifdef __x86_64__
   9625 #define _mm_cvtss_i64 _mm_cvtss_si64
   9626 #define _mm_cvtsd_i64 _mm_cvtsd_si64
   9627 #define _mm_cvti64_sd _mm_cvtsi64_sd
   9628 #define _mm_cvti64_ss _mm_cvtsi64_ss
   9629 #endif
   9630 
   9631 #ifdef __x86_64__
   9632 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
   9633   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9634                                      (int)(R)); })
   9635 
   9636 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
   9637   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9638                                      (int)(R)); })
   9639 #endif
   9640 
   9641 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
   9642   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9643 
   9644 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
   9645   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9646 
   9647 #ifdef __x86_64__
   9648 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
   9649   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9650                                     (int)(R)); })
   9651 
   9652 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
   9653   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9654                                     (int)(R)); })
   9655 #endif
   9656 
   9657 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
   9658   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9659                                               (__v4sf)(__m128)(B), \
   9660                                               (__v2df)_mm_undefined_pd(), \
   9661                                               (__mmask8)-1, (int)(R)); })
   9662 
   9663 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
   9664   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9665                                               (__v4sf)(__m128)(B), \
   9666                                               (__v2df)(__m128d)(W), \
   9667                                               (__mmask8)(U), (int)(R)); })
   9668 
   9669 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
   9670   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9671                                               (__v4sf)(__m128)(B), \
   9672                                               (__v2df)_mm_setzero_pd(), \
   9673                                               (__mmask8)(U), (int)(R)); })
   9674 
   9675 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9676 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
   9677 {
   9678   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9679                                               (__v4sf)(__B),
   9680                                               (__v2df)(__W),
   9681                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9682 }
   9683 
   9684 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9685 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
   9686 {
   9687   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9688                                               (__v4sf)(__B),
   9689                                               (__v2df)_mm_setzero_pd(),
   9690                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9691 }
   9692 
   9693 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9694 _mm_cvtu32_sd (__m128d __A, unsigned __B)
   9695 {
   9696   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
   9697 }
   9698 
   9699 #ifdef __x86_64__
   9700 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
   9701   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
   9702                                       (unsigned long long)(B), (int)(R)); })
   9703 
   9704 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9705 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
   9706 {
   9707   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
   9708                  _MM_FROUND_CUR_DIRECTION);
   9709 }
   9710 #endif
   9711 
   9712 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
   9713   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
   9714                                      (int)(R)); })
   9715 
   9716 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9717 _mm_cvtu32_ss (__m128 __A, unsigned __B)
   9718 {
   9719   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
   9720                 _MM_FROUND_CUR_DIRECTION);
   9721 }
   9722 
   9723 #ifdef __x86_64__
   9724 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
   9725   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
   9726                                      (unsigned long long)(B), (int)(R)); })
   9727 
   9728 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9729 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
   9730 {
   9731   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
   9732                 _MM_FROUND_CUR_DIRECTION);
   9733 }
   9734 #endif
   9735 
   9736 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9737 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   9738 {
   9739   return (__m512i) __builtin_ia32_selectd_512(__M,
   9740                                               (__v16si) _mm512_set1_epi32(__A),
   9741                                               (__v16si) __O);
   9742 }
   9743 
   9744 #ifdef __x86_64__
   9745 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9746 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   9747 {
   9748   return (__m512i) __builtin_ia32_selectq_512(__M,
   9749                                               (__v8di) _mm512_set1_epi64(__A),
   9750                                               (__v8di) __O);
   9751 }
   9752 #endif
   9753 
   9754 static  __inline __m512i __DEFAULT_FN_ATTRS
   9755 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
   9756     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
   9757     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
   9758     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
   9759     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
   9760     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
   9761     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
   9762     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
   9763     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
   9764     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
   9765     char __e4, char __e3, char __e2, char __e1, char __e0) {
   9766 
   9767   return __extension__ (__m512i)(__v64qi)
   9768     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9769      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9770      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9771      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
   9772      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
   9773      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
   9774      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
   9775      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
   9776 }
   9777 
   9778 static  __inline __m512i __DEFAULT_FN_ATTRS
   9779 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
   9780     short __e27, short __e26, short __e25, short __e24, short __e23,
   9781     short __e22, short __e21, short __e20, short __e19, short __e18,
   9782     short __e17, short __e16, short __e15, short __e14, short __e13,
   9783     short __e12, short __e11, short __e10, short __e9, short __e8,
   9784     short __e7, short __e6, short __e5, short __e4, short __e3,
   9785     short __e2, short __e1, short __e0) {
   9786   return __extension__ (__m512i)(__v32hi)
   9787     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9788      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9789      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9790      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
   9791 }
   9792 
   9793 static __inline __m512i __DEFAULT_FN_ATTRS
   9794 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
   9795      int __E, int __F, int __G, int __H,
   9796      int __I, int __J, int __K, int __L,
   9797      int __M, int __N, int __O, int __P)
   9798 {
   9799   return __extension__ (__m512i)(__v16si)
   9800   { __P, __O, __N, __M, __L, __K, __J, __I,
   9801     __H, __G, __F, __E, __D, __C, __B, __A };
   9802 }
   9803 
   9804 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
   9805        e8,e9,e10,e11,e12,e13,e14,e15)          \
   9806   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
   9807                    (e5),(e4),(e3),(e2),(e1),(e0))
   9808 
   9809 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9810 _mm512_set_epi64 (long long __A, long long __B, long long __C,
   9811      long long __D, long long __E, long long __F,
   9812      long long __G, long long __H)
   9813 {
   9814   return __extension__ (__m512i) (__v8di)
   9815   { __H, __G, __F, __E, __D, __C, __B, __A };
   9816 }
   9817 
   9818 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
   9819   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9820 
   9821 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9822 _mm512_set_pd (double __A, double __B, double __C, double __D,
   9823         double __E, double __F, double __G, double __H)
   9824 {
   9825   return __extension__ (__m512d)
   9826   { __H, __G, __F, __E, __D, __C, __B, __A };
   9827 }
   9828 
   9829 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
   9830   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9831 
   9832 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9833 _mm512_set_ps (float __A, float __B, float __C, float __D,
   9834         float __E, float __F, float __G, float __H,
   9835         float __I, float __J, float __K, float __L,
   9836         float __M, float __N, float __O, float __P)
   9837 {
   9838   return __extension__ (__m512)
   9839   { __P, __O, __N, __M, __L, __K, __J, __I,
   9840     __H, __G, __F, __E, __D, __C, __B, __A };
   9841 }
   9842 
   9843 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
   9844   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
   9845                 (e4),(e3),(e2),(e1),(e0))
   9846 
   9847 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9848 _mm512_abs_ps(__m512 __A)
   9849 {
   9850   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9851 }
   9852 
   9853 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9854 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
   9855 {
   9856   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9857 }
   9858 
   9859 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9860 _mm512_abs_pd(__m512d __A)
   9861 {
   9862   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
   9863 }
   9864 
   9865 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9866 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
   9867 {
   9868   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
   9869 }
   9870 
   9871 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
   9872 // outputs. This class of vector operation forms the basis of many scientific
   9873 // computations. In vector-reduction arithmetic, the evaluation off is
   9874 // independent of the order of the input elements of V.
   9875 
   9876 // Used bisection method. At each step, we partition the vector with previous
   9877 // step in half, and the operation is performed on its two halves.
   9878 // This takes log2(n) steps where n is the number of elements in the vector.
   9879 
   9880 // Vec512 - Vector with size 512.
   9881 // Operator - Can be one of following: +,*,&,|
   9882 // T2  - Can get 'i' for int and 'f' for float.
   9883 // T1 - Can get 'i' for int and 'd' for double.
   9884 
   9885 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
   9886   __extension__({                                                      \
   9887     __m256##T1 Vec256 = __builtin_shufflevector(                       \
   9888                             (__v8d##T2)Vec512,                         \
   9889                             (__v8d##T2)Vec512,                         \
   9890                             0, 1, 2, 3)                                \
   9891                         Operator                                       \
   9892                         __builtin_shufflevector(                       \
   9893                             (__v8d##T2)Vec512,                         \
   9894                             (__v8d##T2)Vec512,                         \
   9895                             4, 5, 6, 7);                               \
   9896     __m128##T1 Vec128 = __builtin_shufflevector(                       \
   9897                             (__v4d##T2)Vec256,                         \
   9898                             (__v4d##T2)Vec256,                         \
   9899                             0, 1)                                      \
   9900                         Operator                                       \
   9901                         __builtin_shufflevector(                       \
   9902                             (__v4d##T2)Vec256,                         \
   9903                             (__v4d##T2)Vec256,                         \
   9904                             2, 3);                                     \
   9905     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
   9906                                      (__v2d##T2)Vec128, 0, -1)         \
   9907              Operator                                                  \
   9908              __builtin_shufflevector((__v2d##T2)Vec128,                \
   9909                                      (__v2d##T2)Vec128, 1, -1);        \
   9910     return Vec128[0];                                                  \
   9911   })
   9912 
   9913 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
   9914   _mm512_reduce_operator_64bit(__W, +, i, i);
   9915 }
   9916 
   9917 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
   9918   _mm512_reduce_operator_64bit(__W, *, i, i);
   9919 }
   9920 
   9921 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
   9922   _mm512_reduce_operator_64bit(__W, &, i, i);
   9923 }
   9924 
   9925 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
   9926   _mm512_reduce_operator_64bit(__W, |, i, i);
   9927 }
   9928 
   9929 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
   9930   _mm512_reduce_operator_64bit(__W, +, f, d);
   9931 }
   9932 
   9933 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
   9934   _mm512_reduce_operator_64bit(__W, *, f, d);
   9935 }
   9936 
   9937 // Vec512 - Vector with size 512.
   9938 // Vec512Neutral - All vector elements set to the identity element.
   9939 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
   9940 // Operator - Can be one of following: +,*,&,|
   9941 // Mask - Intrinsic Mask
   9942 // T2  - Can get 'i' for int and 'f' for float.
   9943 // T1 - Can get 'i' for int and 'd' for packed double-precision.
   9944 // T3 - Can be Pd for packed double or q for q-word.
   9945 
   9946 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
   9947                                           Mask, T2, T1, T3)                    \
   9948   __extension__({                                                              \
   9949     Vec512 = __builtin_ia32_select##T3##_512(                                  \
   9950                  (__mmask8)Mask,                                               \
   9951                  (__v8d##T2)Vec512,                                            \
   9952                  (__v8d##T2)Vec512Neutral);                                    \
   9953     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
   9954   })
   9955 
   9956 static __inline__ long long __DEFAULT_FN_ATTRS
   9957 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   9958   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
   9959 }
   9960 
   9961 static __inline__ long long __DEFAULT_FN_ATTRS
   9962 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
   9963   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
   9964 }
   9965 
   9966 static __inline__ long long __DEFAULT_FN_ATTRS
   9967 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   9968   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   9969                                     &, __M,  i, i, q);
   9970 }
   9971 
   9972 static __inline__ long long __DEFAULT_FN_ATTRS
   9973 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   9974   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
   9975                                     i, i, q);
   9976 }
   9977 
   9978 static __inline__ double __DEFAULT_FN_ATTRS
   9979 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   9980   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
   9981                                     f, d, pd);
   9982 }
   9983 
   9984 static __inline__ double __DEFAULT_FN_ATTRS
   9985 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   9986   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
   9987                                     f, d, pd);
   9988 }
   9989 
   9990 // Vec512 - Vector with size 512.
   9991 // Operator - Can be one of following: +,*,&,|
   9992 // T2 - Can get 'i' for int and ' ' for packed single.
   9993 // T1 - Can get 'i' for int and 'f' for float.
   9994 
   9995 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
   9996     __m256##T1 Vec256 =                                                        \
   9997             (__m256##T1)(__builtin_shufflevector(                              \
   9998                                     (__v16s##T2)Vec512,                        \
   9999                                     (__v16s##T2)Vec512,                        \
   10000                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
   10001                                 Operator                                       \
   10002                          __builtin_shufflevector(                              \
   10003                                     (__v16s##T2)Vec512,                        \
   10004                                     (__v16s##T2)Vec512,                        \
   10005                                     8, 9, 10, 11, 12, 13, 14, 15));            \
   10006     __m128##T1 Vec128 =                                                        \
   10007              (__m128##T1)(__builtin_shufflevector(                             \
   10008                                     (__v8s##T2)Vec256,                         \
   10009                                     (__v8s##T2)Vec256,                         \
   10010                                     0, 1, 2, 3)                                \
   10011                                 Operator                                       \
   10012                           __builtin_shufflevector(                             \
   10013                                     (__v8s##T2)Vec256,                         \
   10014                                     (__v8s##T2)Vec256,                         \
   10015                                     4, 5, 6, 7));                              \
   10016     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   10017                                     (__v4s##T2)Vec128,                         \
   10018                                     (__v4s##T2)Vec128,                         \
   10019                                     0, 1, -1, -1)                              \
   10020                                 Operator                                       \
   10021                           __builtin_shufflevector(                             \
   10022                                     (__v4s##T2)Vec128,                         \
   10023                                     (__v4s##T2)Vec128,                         \
   10024                                     2, 3, -1, -1));                            \
   10025     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   10026                                     (__v4s##T2)Vec128,                         \
   10027                                     (__v4s##T2)Vec128,                         \
   10028                                     0, -1, -1, -1)                             \
   10029                                 Operator                                       \
   10030                           __builtin_shufflevector(                             \
   10031                                     (__v4s##T2)Vec128,                         \
   10032                                     (__v4s##T2)Vec128,                         \
   10033                                     1, -1, -1, -1));                           \
   10034     return Vec128[0];                                                          \
   10035   })
   10036 
   10037 static __inline__ int __DEFAULT_FN_ATTRS
   10038 _mm512_reduce_add_epi32(__m512i __W) {
   10039   _mm512_reduce_operator_32bit(__W, +, i, i);
   10040 }
   10041 
   10042 static __inline__ int __DEFAULT_FN_ATTRS
   10043 _mm512_reduce_mul_epi32(__m512i __W) {
   10044   _mm512_reduce_operator_32bit(__W, *, i, i);
   10045 }
   10046 
   10047 static __inline__ int __DEFAULT_FN_ATTRS
   10048 _mm512_reduce_and_epi32(__m512i __W) {
   10049   _mm512_reduce_operator_32bit(__W, &, i, i);
   10050 }
   10051 
   10052 static __inline__ int __DEFAULT_FN_ATTRS
   10053 _mm512_reduce_or_epi32(__m512i __W) {
   10054   _mm512_reduce_operator_32bit(__W, |, i, i);
   10055 }
   10056 
   10057 static __inline__ float __DEFAULT_FN_ATTRS
   10058 _mm512_reduce_add_ps(__m512 __W) {
   10059   _mm512_reduce_operator_32bit(__W, +, f, );
   10060 }
   10061 
   10062 static __inline__ float __DEFAULT_FN_ATTRS
   10063 _mm512_reduce_mul_ps(__m512 __W) {
   10064   _mm512_reduce_operator_32bit(__W, *, f, );
   10065 }
   10066 
   10067 // Vec512 - Vector with size 512.
   10068 // Vec512Neutral - All vector elements set to the identity element.
   10069 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
   10070 // Operator - Can be one of following: +,*,&,|
   10071 // Mask - Intrinsic Mask
   10072 // T2  - Can get 'i' for int and 'f' for float.
   10073 // T1 - Can get 'i' for int and 'd' for double.
   10074 // T3 - Can be Ps for packed single or d for d-word.
   10075 
   10076 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
   10077                                           Mask, T2, T1, T3)                    \
   10078   __extension__({                                                              \
   10079     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10080                              (__mmask16)Mask,                                  \
   10081                              (__v16s##T2)Vec512,                               \
   10082                              (__v16s##T2)Vec512Neutral);                       \
   10083     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
   10084   })
   10085 
   10086 static __inline__ int __DEFAULT_FN_ATTRS
   10087 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   10088   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
   10089 }
   10090 
   10091 static __inline__ int __DEFAULT_FN_ATTRS
   10092 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
   10093   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
   10094 }
   10095 
   10096 static __inline__ int __DEFAULT_FN_ATTRS
   10097 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   10098   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
   10099                                     i, i, d);
   10100 }
   10101 
   10102 static __inline__ int __DEFAULT_FN_ATTRS
   10103 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   10104   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
   10105 }
   10106 
   10107 static __inline__ float __DEFAULT_FN_ATTRS
   10108 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
   10109   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
   10110 }
   10111 
   10112 static __inline__ float __DEFAULT_FN_ATTRS
   10113 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   10114   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
   10115 }
   10116 
   10117 // Used bisection method. At each step, we partition the vector with previous
   10118 // step in half, and the operation is performed on its two halves.
   10119 // This takes log2(n) steps where n is the number of elements in the vector.
   10120 // This macro uses only intrinsics from the AVX512F feature.
   10121 
   10122 // Vec512 - Vector with size of 512.
   10123 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10124 //              __mm512_max_epi64
   10125 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10126 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10127 
   10128 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10129         Vec512 = _mm512_##IntrinName(                                          \
   10130                                 (__m512##T1)__builtin_shufflevector(           \
   10131                                                 (__v8d##T2)Vec512,             \
   10132                                                 (__v8d##T2)Vec512,             \
   10133                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
   10134                                 (__m512##T1)__builtin_shufflevector(           \
   10135                                                 (__v8d##T2)Vec512,             \
   10136                                                 (__v8d##T2)Vec512,             \
   10137                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
   10138         Vec512 = _mm512_##IntrinName(                                          \
   10139                                 (__m512##T1)__builtin_shufflevector(           \
   10140                                                 (__v8d##T2)Vec512,             \
   10141                                                 (__v8d##T2)Vec512,             \
   10142                                                  0, 1, -1, -1, -1, -1, -1, -1),\
   10143                                 (__m512##T1)__builtin_shufflevector(           \
   10144                                                 (__v8d##T2)Vec512,             \
   10145                                                 (__v8d##T2)Vec512,             \
   10146                                                  2, 3, -1, -1, -1, -1, -1,     \
   10147                                                  -1));                         \
   10148         Vec512 = _mm512_##IntrinName(                                          \
   10149                                 (__m512##T1)__builtin_shufflevector(           \
   10150                                                 (__v8d##T2)Vec512,             \
   10151                                                 (__v8d##T2)Vec512,             \
   10152                                                 0, -1, -1, -1, -1, -1, -1, -1),\
   10153                                 (__m512##T1)__builtin_shufflevector(           \
   10154                                                 (__v8d##T2)Vec512,             \
   10155                                                 (__v8d##T2)Vec512,             \
   10156                                                 1, -1, -1, -1, -1, -1, -1, -1))\
   10157                                                 ;                              \
   10158     return Vec512[0];                                                          \
   10159   })
   10160 
   10161 static __inline__ long long __DEFAULT_FN_ATTRS
   10162 _mm512_reduce_max_epi64(__m512i __V) {
   10163   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
   10164 }
   10165 
   10166 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10167 _mm512_reduce_max_epu64(__m512i __V) {
   10168   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
   10169 }
   10170 
   10171 static __inline__ double __DEFAULT_FN_ATTRS
   10172 _mm512_reduce_max_pd(__m512d __V) {
   10173   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
   10174 }
   10175 
   10176 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
   10177 (__m512i __V) {
   10178   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
   10179 }
   10180 
   10181 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10182 _mm512_reduce_min_epu64(__m512i __V) {
   10183   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
   10184 }
   10185 
   10186 static __inline__ double __DEFAULT_FN_ATTRS
   10187 _mm512_reduce_min_pd(__m512d __V) {
   10188   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
   10189 }
   10190 
   10191 // Vec512 - Vector with size 512.
   10192 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10193 // Identity element: {max_epi,0x8000000000000000}
   10194 //                   {max_epu,0x0000000000000000}
   10195 //                   {max_pd, 0xFFF0000000000000}
   10196 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
   10197 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
   10198 //                   {min_pd, 0x7FF0000000000000}
   10199 //
   10200 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10201 //              __mm512_max_epi64
   10202 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10203 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10204 // T3 - Can get 'q' q word and 'pd' for packed double.
   10205 //      [__builtin_ia32_select{q|pd}_512]
   10206 // Mask - Intrinsic Mask
   10207 
   10208 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10209                                         T2, T3, Mask)                          \
   10210   __extension__({                                                              \
   10211     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10212                              (__mmask8)Mask,                                   \
   10213                              (__v8d##T2)Vec512,                                \
   10214                              (__v8d##T2)Vec512Neutral);                        \
   10215     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
   10216   })
   10217 
   10218 static __inline__ long long __DEFAULT_FN_ATTRS
   10219 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   10220   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
   10221                                   max_epi64, i, i, q, __M);
   10222 }
   10223 
   10224 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10225 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   10226   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
   10227                                   max_epu64, i, i, q, __M);
   10228 }
   10229 
   10230 static __inline__ double __DEFAULT_FN_ATTRS
   10231 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
   10232   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
   10233                                   max_pd, d, f, pd, __M);
   10234 }
   10235 
   10236 static __inline__ long long __DEFAULT_FN_ATTRS
   10237 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   10238   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
   10239                                   min_epi64, i, i, q, __M);
   10240 }
   10241 
   10242 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10243 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   10244   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   10245                                   min_epu64, i, i, q, __M);
   10246 }
   10247 
   10248 static __inline__ double __DEFAULT_FN_ATTRS
   10249 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
   10250   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
   10251                                   min_pd, d, f, pd, __M);
   10252 }
   10253 
   10254 // Vec512 - Vector with size 512.
   10255 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10256 //              __mm512_max_epi32
   10257 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10258 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10259 
   10260 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10261     Vec512 = _mm512_##IntrinName(                                              \
   10262                   (__m512##T1)__builtin_shufflevector(                         \
   10263                                   (__v16s##T2)Vec512,                          \
   10264                                   (__v16s##T2)Vec512,                          \
   10265                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
   10266                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10267                   (__m512##T1)__builtin_shufflevector(                         \
   10268                                   (__v16s##T2)Vec512,                          \
   10269                                   (__v16s##T2)Vec512,                          \
   10270                                   8, 9, 10, 11, 12, 13, 14, 15,                \
   10271                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10272     Vec512 = _mm512_##IntrinName(                                              \
   10273                   (__m512##T1)__builtin_shufflevector(                         \
   10274                                   (__v16s##T2)Vec512,                          \
   10275                                   (__v16s##T2)Vec512,                          \
   10276                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
   10277                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10278                   (__m512##T1)__builtin_shufflevector(                         \
   10279                                   (__v16s##T2)Vec512,                          \
   10280                                   (__v16s##T2)Vec512,                          \
   10281                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
   10282                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10283     Vec512 = _mm512_##IntrinName(                                              \
   10284                   (__m512##T1)__builtin_shufflevector(                         \
   10285                                   (__v16s##T2)Vec512,                          \
   10286                                   (__v16s##T2)Vec512,                          \
   10287                                   0, 1, -1, -1, -1, -1, -1, -1,                \
   10288                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10289                   (__m512##T1)__builtin_shufflevector(                         \
   10290                                   (__v16s##T2)Vec512,                          \
   10291                                   (__v16s##T2)Vec512,                          \
   10292                                   2, 3, -1, -1, -1, -1, -1, -1,                \
   10293                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10294     Vec512 = _mm512_##IntrinName(                                              \
   10295                   (__m512##T1)__builtin_shufflevector(                         \
   10296                                   (__v16s##T2)Vec512,                          \
   10297                                   (__v16s##T2)Vec512,                          \
   10298                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
   10299                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10300                   (__m512##T1)__builtin_shufflevector(                         \
   10301                                   (__v16s##T2)Vec512,                          \
   10302                                   (__v16s##T2)Vec512,                          \
   10303                                   1, -1, -1, -1, -1, -1, -1, -1,               \
   10304                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10305     return Vec512[0];                                                          \
   10306   })
   10307 
   10308 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
   10309   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
   10310 }
   10311 
   10312 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10313 _mm512_reduce_max_epu32(__m512i a) {
   10314   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
   10315 }
   10316 
   10317 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
   10318   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
   10319 }
   10320 
   10321 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
   10322   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
   10323 }
   10324 
   10325 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10326 _mm512_reduce_min_epu32(__m512i a) {
   10327   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
   10328 }
   10329 
   10330 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
   10331   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
   10332 }
   10333 
   10334 // Vec512 - Vector with size 512.
   10335 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10336 // Identity element: {max_epi,0x80000000}
   10337 //                   {max_epu,0x00000000}
   10338 //                   {max_ps, 0xFF800000}
   10339 //                   {min_epi,0x7FFFFFFF}
   10340 //                   {min_epu,0xFFFFFFFF}
   10341 //                   {min_ps, 0x7F800000}
   10342 //
   10343 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10344 //              __mm512_max_epi32
   10345 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10346 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10347 // T3 - Can get 'q' q word and 'pd' for packed double.
   10348 //      [__builtin_ia32_select{q|pd}_512]
   10349 // Mask - Intrinsic Mask
   10350 
   10351 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10352                                         T2, T3, Mask)                          \
   10353   __extension__({                                                              \
   10354     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10355                                         (__mmask16)Mask,                       \
   10356                                         (__v16s##T2)Vec512,                    \
   10357                                         (__v16s##T2)Vec512Neutral);            \
   10358    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
   10359    })
   10360 
   10361 static __inline__ int __DEFAULT_FN_ATTRS
   10362 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   10363   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
   10364                                   i, i, d, __M);
   10365 }
   10366 
   10367 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10368 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   10369   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
   10370                                   i, i, d, __M);
   10371 }
   10372 
   10373 static __inline__ float __DEFAULT_FN_ATTRS
   10374 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
   10375   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
   10376                                   ps, __M);
   10377 }
   10378 
   10379 static __inline__ int __DEFAULT_FN_ATTRS
   10380 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   10381   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
   10382                                   i, i, d, __M);
   10383 }
   10384 
   10385 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10386 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   10387   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
   10388                                   i, i, d, __M);
   10389 }
   10390 
   10391 static __inline__ float __DEFAULT_FN_ATTRS
   10392 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
   10393   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
   10394                                   ps, __M);
   10395 }
   10396 
   10397 #undef __DEFAULT_FN_ATTRS
   10398 
   10399 #endif // __AVX512FINTRIN_H
   10400