Home | History | Annotate | Download | only in include
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef char __v64qi __attribute__((__vector_size__(64)));
     31 typedef short __v32hi __attribute__((__vector_size__(64)));
     32 typedef double __v8df __attribute__((__vector_size__(64)));
     33 typedef float __v16sf __attribute__((__vector_size__(64)));
     34 typedef long long __v8di __attribute__((__vector_size__(64)));
     35 typedef int __v16si __attribute__((__vector_size__(64)));
     36 
     37 /* Unsigned types */
     38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
     39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
     40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
     41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
     42 
     43 typedef float __m512 __attribute__((__vector_size__(64)));
     44 typedef double __m512d __attribute__((__vector_size__(64)));
     45 typedef long long __m512i __attribute__((__vector_size__(64)));
     46 
     47 typedef unsigned char __mmask8;
     48 typedef unsigned short __mmask16;
     49 
     50 /* Rounding mode macros.  */
     51 #define _MM_FROUND_TO_NEAREST_INT   0x00
     52 #define _MM_FROUND_TO_NEG_INF       0x01
     53 #define _MM_FROUND_TO_POS_INF       0x02
     54 #define _MM_FROUND_TO_ZERO          0x03
     55 #define _MM_FROUND_CUR_DIRECTION    0x04
     56 
     57 /* Constants for integer comparison predicates */
     58 typedef enum {
     59     _MM_CMPINT_EQ,      /* Equal */
     60     _MM_CMPINT_LT,      /* Less than */
     61     _MM_CMPINT_LE,      /* Less than or Equal */
     62     _MM_CMPINT_UNUSED,
     63     _MM_CMPINT_NE,      /* Not Equal */
     64     _MM_CMPINT_NLT,     /* Not Less than */
     65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
     66     _MM_CMPINT_NLE      /* Not Less than or Equal */
     67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
     68 } _MM_CMPINT_ENUM;
     69 
     70 typedef enum
     71 {
     72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
     73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
     74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
     75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
     76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
     77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
     78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
     79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
     80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
     81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
     82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
     83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
     84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
     85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
     86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
     87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
     88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
     89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
     90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
     91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
     92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
     93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
     94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
     95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
     96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
     97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
     98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
     99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
    100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
    101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
    102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
    103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
    104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
    105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
    106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
    107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
    108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
    109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
    110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
    111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
    112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
    113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
    114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
    115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
    116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
    117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
    118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
    119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
    120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
    121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
    122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
    123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
    124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
    125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
    126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
    127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
    128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
    129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
    130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
    131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
    132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
    133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
    134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
    135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
    136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
    137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
    138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
    139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
    140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
    141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
    142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
    143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
    144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
    145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
    146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
    147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
    148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
    149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
    150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
    151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
    152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
    153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
    154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
    155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
    156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
    157   _MM_PERM_DDDD = 0xFF
    158 } _MM_PERM_ENUM;
    159 
    160 typedef enum
    161 {
    162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
    163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
    164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
    165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
    166 } _MM_MANTISSA_NORM_ENUM;
    167 
    168 typedef enum
    169 {
    170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
    171   _MM_MANT_SIGN_zero,   /* sign = 0             */
    172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
    173 } _MM_MANTISSA_SIGN_ENUM;
    174 
    175 /* Define the default attributes for the functions in this file. */
    176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
    177 
    178 /* Create vectors with repeated elements */
    179 
    180 static  __inline __m512i __DEFAULT_FN_ATTRS
    181 _mm512_setzero_si512(void)
    182 {
    183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    184 }
    185 
    186 #define _mm512_setzero_epi32 _mm512_setzero_si512
    187 
    188 static __inline__ __m512d __DEFAULT_FN_ATTRS
    189 _mm512_undefined_pd(void)
    190 {
    191   return (__m512d)__builtin_ia32_undef512();
    192 }
    193 
    194 static __inline__ __m512 __DEFAULT_FN_ATTRS
    195 _mm512_undefined(void)
    196 {
    197   return (__m512)__builtin_ia32_undef512();
    198 }
    199 
    200 static __inline__ __m512 __DEFAULT_FN_ATTRS
    201 _mm512_undefined_ps(void)
    202 {
    203   return (__m512)__builtin_ia32_undef512();
    204 }
    205 
    206 static __inline__ __m512i __DEFAULT_FN_ATTRS
    207 _mm512_undefined_epi32(void)
    208 {
    209   return (__m512i)__builtin_ia32_undef512();
    210 }
    211 
    212 static __inline__ __m512i __DEFAULT_FN_ATTRS
    213 _mm512_broadcastd_epi32 (__m128i __A)
    214 {
    215   return (__m512i)__builtin_shufflevector((__v4si) __A,
    216                                           (__v4si)_mm_undefined_si128(),
    217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    218 }
    219 
    220 static __inline__ __m512i __DEFAULT_FN_ATTRS
    221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
    222 {
    223   return (__m512i)__builtin_ia32_selectd_512(__M,
    224                                              (__v16si) _mm512_broadcastd_epi32(__A),
    225                                              (__v16si) __O);
    226 }
    227 
    228 static __inline__ __m512i __DEFAULT_FN_ATTRS
    229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
    230 {
    231   return (__m512i)__builtin_ia32_selectd_512(__M,
    232                                              (__v16si) _mm512_broadcastd_epi32(__A),
    233                                              (__v16si) _mm512_setzero_si512());
    234 }
    235 
    236 static __inline__ __m512i __DEFAULT_FN_ATTRS
    237 _mm512_broadcastq_epi64 (__m128i __A)
    238 {
    239   return (__m512i)__builtin_shufflevector((__v2di) __A,
    240                                           (__v2di) _mm_undefined_si128(),
    241                                           0, 0, 0, 0, 0, 0, 0, 0);
    242 }
    243 
    244 static __inline__ __m512i __DEFAULT_FN_ATTRS
    245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
    246 {
    247   return (__m512i)__builtin_ia32_selectq_512(__M,
    248                                              (__v8di) _mm512_broadcastq_epi64(__A),
    249                                              (__v8di) __O);
    250 
    251 }
    252 
    253 static __inline__ __m512i __DEFAULT_FN_ATTRS
    254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
    255 {
    256   return (__m512i)__builtin_ia32_selectq_512(__M,
    257                                              (__v8di) _mm512_broadcastq_epi64(__A),
    258                                              (__v8di) _mm512_setzero_si512());
    259 }
    260 
    261 static __inline __m512i __DEFAULT_FN_ATTRS
    262 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
    263 {
    264   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
    265                  (__v16si)
    266                  _mm512_setzero_si512 (),
    267                  __M);
    268 }
    269 
    270 static __inline __m512i __DEFAULT_FN_ATTRS
    271 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
    272 {
    273 #ifdef __x86_64__
    274   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
    275                  (__v8di)
    276                  _mm512_setzero_si512 (),
    277                  __M);
    278 #else
    279   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
    280                  (__v8di)
    281                  _mm512_setzero_si512 (),
    282                  __M);
    283 #endif
    284 }
    285 
    286 static __inline __m512 __DEFAULT_FN_ATTRS
    287 _mm512_setzero_ps(void)
    288 {
    289   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    290                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    291 }
    292 
    293 #define _mm512_setzero _mm512_setzero_ps
    294 
    295 static  __inline __m512d __DEFAULT_FN_ATTRS
    296 _mm512_setzero_pd(void)
    297 {
    298   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    299 }
    300 
    301 static __inline __m512 __DEFAULT_FN_ATTRS
    302 _mm512_set1_ps(float __w)
    303 {
    304   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    305                    __w, __w, __w, __w, __w, __w, __w, __w  };
    306 }
    307 
    308 static __inline __m512d __DEFAULT_FN_ATTRS
    309 _mm512_set1_pd(double __w)
    310 {
    311   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    312 }
    313 
    314 static __inline __m512i __DEFAULT_FN_ATTRS
    315 _mm512_set1_epi8(char __w)
    316 {
    317   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
    318                              __w, __w, __w, __w, __w, __w, __w, __w,
    319                              __w, __w, __w, __w, __w, __w, __w, __w,
    320                              __w, __w, __w, __w, __w, __w, __w, __w,
    321                              __w, __w, __w, __w, __w, __w, __w, __w,
    322                              __w, __w, __w, __w, __w, __w, __w, __w,
    323                              __w, __w, __w, __w, __w, __w, __w, __w,
    324                              __w, __w, __w, __w, __w, __w, __w, __w  };
    325 }
    326 
    327 static __inline __m512i __DEFAULT_FN_ATTRS
    328 _mm512_set1_epi16(short __w)
    329 {
    330   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
    331                              __w, __w, __w, __w, __w, __w, __w, __w,
    332                              __w, __w, __w, __w, __w, __w, __w, __w,
    333                              __w, __w, __w, __w, __w, __w, __w, __w };
    334 }
    335 
    336 static __inline __m512i __DEFAULT_FN_ATTRS
    337 _mm512_set1_epi32(int __s)
    338 {
    339   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    340                              __s, __s, __s, __s, __s, __s, __s, __s };
    341 }
    342 
    343 static __inline __m512i __DEFAULT_FN_ATTRS
    344 _mm512_set1_epi64(long long __d)
    345 {
    346   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    347 }
    348 
    349 static __inline__ __m512 __DEFAULT_FN_ATTRS
    350 _mm512_broadcastss_ps(__m128 __A)
    351 {
    352   return (__m512)__builtin_shufflevector((__v4sf) __A,
    353                                          (__v4sf)_mm_undefined_ps(),
    354                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    355 }
    356 
    357 static __inline __m512i __DEFAULT_FN_ATTRS
    358 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    359 {
    360   return  (__m512i)(__v16si)
    361    { __D, __C, __B, __A, __D, __C, __B, __A,
    362      __D, __C, __B, __A, __D, __C, __B, __A };
    363 }
    364 
    365 static __inline __m512i __DEFAULT_FN_ATTRS
    366 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    367        long long __D)
    368 {
    369   return  (__m512i) (__v8di)
    370    { __D, __C, __B, __A, __D, __C, __B, __A };
    371 }
    372 
    373 static __inline __m512d __DEFAULT_FN_ATTRS
    374 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    375 {
    376   return  (__m512d)
    377    { __D, __C, __B, __A, __D, __C, __B, __A };
    378 }
    379 
    380 static __inline __m512 __DEFAULT_FN_ATTRS
    381 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    382 {
    383   return  (__m512)
    384    { __D, __C, __B, __A, __D, __C, __B, __A,
    385      __D, __C, __B, __A, __D, __C, __B, __A };
    386 }
    387 
    388 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
    389   _mm512_set4_epi32((e3),(e2),(e1),(e0))
    390 
    391 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
    392   _mm512_set4_epi64((e3),(e2),(e1),(e0))
    393 
    394 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
    395   _mm512_set4_pd((e3),(e2),(e1),(e0))
    396 
    397 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
    398   _mm512_set4_ps((e3),(e2),(e1),(e0))
    399 
    400 static __inline__ __m512d __DEFAULT_FN_ATTRS
    401 _mm512_broadcastsd_pd(__m128d __A)
    402 {
    403   return (__m512d)__builtin_shufflevector((__v2df) __A,
    404                                           (__v2df) _mm_undefined_pd(),
    405                                           0, 0, 0, 0, 0, 0, 0, 0);
    406 }
    407 
    408 /* Cast between vector types */
    409 
    410 static __inline __m512d __DEFAULT_FN_ATTRS
    411 _mm512_castpd256_pd512(__m256d __a)
    412 {
    413   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    414 }
    415 
    416 static __inline __m512 __DEFAULT_FN_ATTRS
    417 _mm512_castps256_ps512(__m256 __a)
    418 {
    419   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    420                                           -1, -1, -1, -1, -1, -1, -1, -1);
    421 }
    422 
    423 static __inline __m128d __DEFAULT_FN_ATTRS
    424 _mm512_castpd512_pd128(__m512d __a)
    425 {
    426   return __builtin_shufflevector(__a, __a, 0, 1);
    427 }
    428 
    429 static __inline __m256d __DEFAULT_FN_ATTRS
    430 _mm512_castpd512_pd256 (__m512d __A)
    431 {
    432   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
    433 }
    434 
    435 static __inline __m128 __DEFAULT_FN_ATTRS
    436 _mm512_castps512_ps128(__m512 __a)
    437 {
    438   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    439 }
    440 
    441 static __inline __m256 __DEFAULT_FN_ATTRS
    442 _mm512_castps512_ps256 (__m512 __A)
    443 {
    444   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
    445 }
    446 
    447 static __inline __m512 __DEFAULT_FN_ATTRS
    448 _mm512_castpd_ps (__m512d __A)
    449 {
    450   return (__m512) (__A);
    451 }
    452 
    453 static __inline __m512i __DEFAULT_FN_ATTRS
    454 _mm512_castpd_si512 (__m512d __A)
    455 {
    456   return (__m512i) (__A);
    457 }
    458 
    459 static __inline__ __m512d __DEFAULT_FN_ATTRS
    460 _mm512_castpd128_pd512 (__m128d __A)
    461 {
    462   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    463 }
    464 
    465 static __inline __m512d __DEFAULT_FN_ATTRS
    466 _mm512_castps_pd (__m512 __A)
    467 {
    468   return (__m512d) (__A);
    469 }
    470 
    471 static __inline __m512i __DEFAULT_FN_ATTRS
    472 _mm512_castps_si512 (__m512 __A)
    473 {
    474   return (__m512i) (__A);
    475 }
    476 
    477 static __inline__ __m512 __DEFAULT_FN_ATTRS
    478 _mm512_castps128_ps512 (__m128 __A)
    479 {
    480     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    481 }
    482 
    483 static __inline__ __m512i __DEFAULT_FN_ATTRS
    484 _mm512_castsi128_si512 (__m128i __A)
    485 {
    486    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    487 }
    488 
    489 static __inline__ __m512i __DEFAULT_FN_ATTRS
    490 _mm512_castsi256_si512 (__m256i __A)
    491 {
    492    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
    493 }
    494 
    495 static __inline __m512 __DEFAULT_FN_ATTRS
    496 _mm512_castsi512_ps (__m512i __A)
    497 {
    498   return (__m512) (__A);
    499 }
    500 
    501 static __inline __m512d __DEFAULT_FN_ATTRS
    502 _mm512_castsi512_pd (__m512i __A)
    503 {
    504   return (__m512d) (__A);
    505 }
    506 
    507 static __inline __m128i __DEFAULT_FN_ATTRS
    508 _mm512_castsi512_si128 (__m512i __A)
    509 {
    510   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
    511 }
    512 
    513 static __inline __m256i __DEFAULT_FN_ATTRS
    514 _mm512_castsi512_si256 (__m512i __A)
    515 {
    516   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
    517 }
    518 
    519 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
    520 _mm512_int2mask(int __a)
    521 {
    522   return (__mmask16)__a;
    523 }
    524 
    525 static __inline__ int __DEFAULT_FN_ATTRS
    526 _mm512_mask2int(__mmask16 __a)
    527 {
    528   return (int)__a;
    529 }
    530 
    531 /* Bitwise operators */
    532 static __inline__ __m512i __DEFAULT_FN_ATTRS
    533 _mm512_and_epi32(__m512i __a, __m512i __b)
    534 {
    535   return (__m512i)((__v16su)__a & (__v16su)__b);
    536 }
    537 
    538 static __inline__ __m512i __DEFAULT_FN_ATTRS
    539 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    540 {
    541   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    542                 (__v16si) _mm512_and_epi32(__a, __b),
    543                 (__v16si) __src);
    544 }
    545 
    546 static __inline__ __m512i __DEFAULT_FN_ATTRS
    547 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    548 {
    549   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
    550                                          __k, __a, __b);
    551 }
    552 
    553 static __inline__ __m512i __DEFAULT_FN_ATTRS
    554 _mm512_and_epi64(__m512i __a, __m512i __b)
    555 {
    556   return (__m512i)((__v8du)__a & (__v8du)__b);
    557 }
    558 
    559 static __inline__ __m512i __DEFAULT_FN_ATTRS
    560 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    561 {
    562     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
    563                 (__v8di) _mm512_and_epi64(__a, __b),
    564                 (__v8di) __src);
    565 }
    566 
    567 static __inline__ __m512i __DEFAULT_FN_ATTRS
    568 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    569 {
    570   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
    571                                          __k, __a, __b);
    572 }
    573 
    574 static __inline__ __m512i __DEFAULT_FN_ATTRS
    575 _mm512_andnot_si512 (__m512i __A, __m512i __B)
    576 {
    577   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    578 }
    579 
    580 static __inline__ __m512i __DEFAULT_FN_ATTRS
    581 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    582 {
    583   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
    584 }
    585 
    586 static __inline__ __m512i __DEFAULT_FN_ATTRS
    587 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    588 {
    589   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    590                                          (__v16si)_mm512_andnot_epi32(__A, __B),
    591                                          (__v16si)__W);
    592 }
    593 
    594 static __inline__ __m512i __DEFAULT_FN_ATTRS
    595 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    596 {
    597   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
    598                                            __U, __A, __B);
    599 }
    600 
    601 static __inline__ __m512i __DEFAULT_FN_ATTRS
    602 _mm512_andnot_epi64(__m512i __A, __m512i __B)
    603 {
    604   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    605 }
    606 
    607 static __inline__ __m512i __DEFAULT_FN_ATTRS
    608 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    609 {
    610   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    611                                           (__v8di)_mm512_andnot_epi64(__A, __B),
    612                                           (__v8di)__W);
    613 }
    614 
    615 static __inline__ __m512i __DEFAULT_FN_ATTRS
    616 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    617 {
    618   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
    619                                            __U, __A, __B);
    620 }
    621 
    622 static __inline__ __m512i __DEFAULT_FN_ATTRS
    623 _mm512_or_epi32(__m512i __a, __m512i __b)
    624 {
    625   return (__m512i)((__v16su)__a | (__v16su)__b);
    626 }
    627 
    628 static __inline__ __m512i __DEFAULT_FN_ATTRS
    629 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    630 {
    631   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    632                                              (__v16si)_mm512_or_epi32(__a, __b),
    633                                              (__v16si)__src);
    634 }
    635 
    636 static __inline__ __m512i __DEFAULT_FN_ATTRS
    637 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    638 {
    639   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
    640 }
    641 
    642 static __inline__ __m512i __DEFAULT_FN_ATTRS
    643 _mm512_or_epi64(__m512i __a, __m512i __b)
    644 {
    645   return (__m512i)((__v8du)__a | (__v8du)__b);
    646 }
    647 
    648 static __inline__ __m512i __DEFAULT_FN_ATTRS
    649 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    650 {
    651   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    652                                              (__v8di)_mm512_or_epi64(__a, __b),
    653                                              (__v8di)__src);
    654 }
    655 
    656 static __inline__ __m512i __DEFAULT_FN_ATTRS
    657 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    658 {
    659   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
    660 }
    661 
    662 static __inline__ __m512i __DEFAULT_FN_ATTRS
    663 _mm512_xor_epi32(__m512i __a, __m512i __b)
    664 {
    665   return (__m512i)((__v16su)__a ^ (__v16su)__b);
    666 }
    667 
    668 static __inline__ __m512i __DEFAULT_FN_ATTRS
    669 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    670 {
    671   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    672                                             (__v16si)_mm512_xor_epi32(__a, __b),
    673                                             (__v16si)__src);
    674 }
    675 
    676 static __inline__ __m512i __DEFAULT_FN_ATTRS
    677 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    678 {
    679   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
    680 }
    681 
    682 static __inline__ __m512i __DEFAULT_FN_ATTRS
    683 _mm512_xor_epi64(__m512i __a, __m512i __b)
    684 {
    685   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    686 }
    687 
    688 static __inline__ __m512i __DEFAULT_FN_ATTRS
    689 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    690 {
    691   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    692                                              (__v8di)_mm512_xor_epi64(__a, __b),
    693                                              (__v8di)__src);
    694 }
    695 
    696 static __inline__ __m512i __DEFAULT_FN_ATTRS
    697 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    698 {
    699   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
    700 }
    701 
    702 static __inline__ __m512i __DEFAULT_FN_ATTRS
    703 _mm512_and_si512(__m512i __a, __m512i __b)
    704 {
    705   return (__m512i)((__v8du)__a & (__v8du)__b);
    706 }
    707 
    708 static __inline__ __m512i __DEFAULT_FN_ATTRS
    709 _mm512_or_si512(__m512i __a, __m512i __b)
    710 {
    711   return (__m512i)((__v8du)__a | (__v8du)__b);
    712 }
    713 
    714 static __inline__ __m512i __DEFAULT_FN_ATTRS
    715 _mm512_xor_si512(__m512i __a, __m512i __b)
    716 {
    717   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    718 }
    719 
    720 /* Arithmetic */
    721 
    722 static __inline __m512d __DEFAULT_FN_ATTRS
    723 _mm512_add_pd(__m512d __a, __m512d __b)
    724 {
    725   return (__m512d)((__v8df)__a + (__v8df)__b);
    726 }
    727 
    728 static __inline __m512 __DEFAULT_FN_ATTRS
    729 _mm512_add_ps(__m512 __a, __m512 __b)
    730 {
    731   return (__m512)((__v16sf)__a + (__v16sf)__b);
    732 }
    733 
    734 static __inline __m512d __DEFAULT_FN_ATTRS
    735 _mm512_mul_pd(__m512d __a, __m512d __b)
    736 {
    737   return (__m512d)((__v8df)__a * (__v8df)__b);
    738 }
    739 
    740 static __inline __m512 __DEFAULT_FN_ATTRS
    741 _mm512_mul_ps(__m512 __a, __m512 __b)
    742 {
    743   return (__m512)((__v16sf)__a * (__v16sf)__b);
    744 }
    745 
    746 static __inline __m512d __DEFAULT_FN_ATTRS
    747 _mm512_sub_pd(__m512d __a, __m512d __b)
    748 {
    749   return (__m512d)((__v8df)__a - (__v8df)__b);
    750 }
    751 
    752 static __inline __m512 __DEFAULT_FN_ATTRS
    753 _mm512_sub_ps(__m512 __a, __m512 __b)
    754 {
    755   return (__m512)((__v16sf)__a - (__v16sf)__b);
    756 }
    757 
    758 static __inline__ __m512i __DEFAULT_FN_ATTRS
    759 _mm512_add_epi64 (__m512i __A, __m512i __B)
    760 {
    761   return (__m512i) ((__v8du) __A + (__v8du) __B);
    762 }
    763 
    764 static __inline__ __m512i __DEFAULT_FN_ATTRS
    765 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    766 {
    767   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    768                                              (__v8di)_mm512_add_epi64(__A, __B),
    769                                              (__v8di)__W);
    770 }
    771 
    772 static __inline__ __m512i __DEFAULT_FN_ATTRS
    773 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    774 {
    775   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    776                                              (__v8di)_mm512_add_epi64(__A, __B),
    777                                              (__v8di)_mm512_setzero_si512());
    778 }
    779 
    780 static __inline__ __m512i __DEFAULT_FN_ATTRS
    781 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    782 {
    783   return (__m512i) ((__v8du) __A - (__v8du) __B);
    784 }
    785 
    786 static __inline__ __m512i __DEFAULT_FN_ATTRS
    787 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    788 {
    789   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    790                                              (__v8di)_mm512_sub_epi64(__A, __B),
    791                                              (__v8di)__W);
    792 }
    793 
    794 static __inline__ __m512i __DEFAULT_FN_ATTRS
    795 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    796 {
    797   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    798                                              (__v8di)_mm512_sub_epi64(__A, __B),
    799                                              (__v8di)_mm512_setzero_si512());
    800 }
    801 
    802 static __inline__ __m512i __DEFAULT_FN_ATTRS
    803 _mm512_add_epi32 (__m512i __A, __m512i __B)
    804 {
    805   return (__m512i) ((__v16su) __A + (__v16su) __B);
    806 }
    807 
    808 static __inline__ __m512i __DEFAULT_FN_ATTRS
    809 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    810 {
    811   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    812                                              (__v16si)_mm512_add_epi32(__A, __B),
    813                                              (__v16si)__W);
    814 }
    815 
    816 static __inline__ __m512i __DEFAULT_FN_ATTRS
    817 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    818 {
    819   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    820                                              (__v16si)_mm512_add_epi32(__A, __B),
    821                                              (__v16si)_mm512_setzero_si512());
    822 }
    823 
    824 static __inline__ __m512i __DEFAULT_FN_ATTRS
    825 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    826 {
    827   return (__m512i) ((__v16su) __A - (__v16su) __B);
    828 }
    829 
    830 static __inline__ __m512i __DEFAULT_FN_ATTRS
    831 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    832 {
    833   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    834                                              (__v16si)_mm512_sub_epi32(__A, __B),
    835                                              (__v16si)__W);
    836 }
    837 
    838 static __inline__ __m512i __DEFAULT_FN_ATTRS
    839 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    840 {
    841   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    842                                              (__v16si)_mm512_sub_epi32(__A, __B),
    843                                              (__v16si)_mm512_setzero_si512());
    844 }
    845 
    846 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
    847   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    848                                         (__v8df)(__m512d)(B), \
    849                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
    850                                         (int)(R)); })
    851 
    852 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
    853   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    854                                         (__v8df)(__m512d)(B), \
    855                                         (__v8df)_mm512_setzero_pd(), \
    856                                         (__mmask8)(U), (int)(R)); })
    857 
    858 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
    859   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    860                                         (__v8df)(__m512d)(B), \
    861                                         (__v8df)_mm512_undefined_pd(), \
    862                                         (__mmask8)-1, (int)(R)); })
    863 
    864 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    865 _mm512_max_pd(__m512d __A, __m512d __B)
    866 {
    867   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    868              (__v8df) __B,
    869              (__v8df)
    870              _mm512_setzero_pd (),
    871              (__mmask8) -1,
    872              _MM_FROUND_CUR_DIRECTION);
    873 }
    874 
    875 static __inline__ __m512d __DEFAULT_FN_ATTRS
    876 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
    877 {
    878   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    879                   (__v8df) __B,
    880                   (__v8df) __W,
    881                   (__mmask8) __U,
    882                   _MM_FROUND_CUR_DIRECTION);
    883 }
    884 
    885 static __inline__ __m512d __DEFAULT_FN_ATTRS
    886 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
    887 {
    888   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    889                   (__v8df) __B,
    890                   (__v8df)
    891                   _mm512_setzero_pd (),
    892                   (__mmask8) __U,
    893                   _MM_FROUND_CUR_DIRECTION);
    894 }
    895 
    896 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
    897   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    898                                        (__v16sf)(__m512)(B), \
    899                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
    900                                        (int)(R)); })
    901 
    902 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
    903   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    904                                        (__v16sf)(__m512)(B), \
    905                                        (__v16sf)_mm512_setzero_ps(), \
    906                                        (__mmask16)(U), (int)(R)); })
    907 
    908 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
    909   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    910                                        (__v16sf)(__m512)(B), \
    911                                        (__v16sf)_mm512_undefined_ps(), \
    912                                        (__mmask16)-1, (int)(R)); })
    913 
    914 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    915 _mm512_max_ps(__m512 __A, __m512 __B)
    916 {
    917   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    918             (__v16sf) __B,
    919             (__v16sf)
    920             _mm512_setzero_ps (),
    921             (__mmask16) -1,
    922             _MM_FROUND_CUR_DIRECTION);
    923 }
    924 
    925 static __inline__ __m512 __DEFAULT_FN_ATTRS
    926 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
    927 {
    928   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    929                  (__v16sf) __B,
    930                  (__v16sf) __W,
    931                  (__mmask16) __U,
    932                  _MM_FROUND_CUR_DIRECTION);
    933 }
    934 
    935 static __inline__ __m512 __DEFAULT_FN_ATTRS
    936 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
    937 {
    938   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    939                  (__v16sf) __B,
    940                  (__v16sf)
    941                  _mm512_setzero_ps (),
    942                  (__mmask16) __U,
    943                  _MM_FROUND_CUR_DIRECTION);
    944 }
    945 
    946 static __inline__ __m128 __DEFAULT_FN_ATTRS
    947 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
    948   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    949                 (__v4sf) __B,
    950                 (__v4sf) __W,
    951                 (__mmask8) __U,
    952                 _MM_FROUND_CUR_DIRECTION);
    953 }
    954 
    955 static __inline__ __m128 __DEFAULT_FN_ATTRS
    956 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
    957   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    958                 (__v4sf) __B,
    959                 (__v4sf)  _mm_setzero_ps (),
    960                 (__mmask8) __U,
    961                 _MM_FROUND_CUR_DIRECTION);
    962 }
    963 
    964 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
    965   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    966                                           (__v4sf)(__m128)(B), \
    967                                           (__v4sf)_mm_setzero_ps(), \
    968                                           (__mmask8)-1, (int)(R)); })
    969 
    970 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
    971   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    972                                           (__v4sf)(__m128)(B), \
    973                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
    974                                           (int)(R)); })
    975 
    976 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
    977   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    978                                           (__v4sf)(__m128)(B), \
    979                                           (__v4sf)_mm_setzero_ps(), \
    980                                           (__mmask8)(U), (int)(R)); })
    981 
    982 static __inline__ __m128d __DEFAULT_FN_ATTRS
    983 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
    984   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    985                 (__v2df) __B,
    986                 (__v2df) __W,
    987                 (__mmask8) __U,
    988                 _MM_FROUND_CUR_DIRECTION);
    989 }
    990 
    991 static __inline__ __m128d __DEFAULT_FN_ATTRS
    992 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
    993   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    994                 (__v2df) __B,
    995                 (__v2df)  _mm_setzero_pd (),
    996                 (__mmask8) __U,
    997                 _MM_FROUND_CUR_DIRECTION);
    998 }
    999 
   1000 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
   1001   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1002                                            (__v2df)(__m128d)(B), \
   1003                                            (__v2df)_mm_setzero_pd(), \
   1004                                            (__mmask8)-1, (int)(R)); })
   1005 
   1006 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
   1007   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1008                                            (__v2df)(__m128d)(B), \
   1009                                            (__v2df)(__m128d)(W), \
   1010                                            (__mmask8)(U), (int)(R)); })
   1011 
   1012 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
   1013   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1014                                            (__v2df)(__m128d)(B), \
   1015                                            (__v2df)_mm_setzero_pd(), \
   1016                                            (__mmask8)(U), (int)(R)); })
   1017 
   1018 static __inline __m512i
   1019 __DEFAULT_FN_ATTRS
   1020 _mm512_max_epi32(__m512i __A, __m512i __B)
   1021 {
   1022   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1023               (__v16si) __B,
   1024               (__v16si)
   1025               _mm512_setzero_si512 (),
   1026               (__mmask16) -1);
   1027 }
   1028 
   1029 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1030 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1031 {
   1032   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1033                    (__v16si) __B,
   1034                    (__v16si) __W, __M);
   1035 }
   1036 
   1037 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1038 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1039 {
   1040   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1041                    (__v16si) __B,
   1042                    (__v16si)
   1043                    _mm512_setzero_si512 (),
   1044                    __M);
   1045 }
   1046 
   1047 static __inline __m512i __DEFAULT_FN_ATTRS
   1048 _mm512_max_epu32(__m512i __A, __m512i __B)
   1049 {
   1050   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1051               (__v16si) __B,
   1052               (__v16si)
   1053               _mm512_setzero_si512 (),
   1054               (__mmask16) -1);
   1055 }
   1056 
   1057 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1058 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1059 {
   1060   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1061                    (__v16si) __B,
   1062                    (__v16si) __W, __M);
   1063 }
   1064 
   1065 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1066 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1067 {
   1068   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1069                    (__v16si) __B,
   1070                    (__v16si)
   1071                    _mm512_setzero_si512 (),
   1072                    __M);
   1073 }
   1074 
   1075 static __inline __m512i __DEFAULT_FN_ATTRS
   1076 _mm512_max_epi64(__m512i __A, __m512i __B)
   1077 {
   1078   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1079               (__v8di) __B,
   1080               (__v8di)
   1081               _mm512_setzero_si512 (),
   1082               (__mmask8) -1);
   1083 }
   1084 
   1085 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1086 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1087 {
   1088   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1089                    (__v8di) __B,
   1090                    (__v8di) __W, __M);
   1091 }
   1092 
   1093 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1094 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1095 {
   1096   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1097                    (__v8di) __B,
   1098                    (__v8di)
   1099                    _mm512_setzero_si512 (),
   1100                    __M);
   1101 }
   1102 
   1103 static __inline __m512i __DEFAULT_FN_ATTRS
   1104 _mm512_max_epu64(__m512i __A, __m512i __B)
   1105 {
   1106   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1107               (__v8di) __B,
   1108               (__v8di)
   1109               _mm512_setzero_si512 (),
   1110               (__mmask8) -1);
   1111 }
   1112 
   1113 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1114 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1115 {
   1116   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1117                    (__v8di) __B,
   1118                    (__v8di) __W, __M);
   1119 }
   1120 
   1121 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1122 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1123 {
   1124   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1125                    (__v8di) __B,
   1126                    (__v8di)
   1127                    _mm512_setzero_si512 (),
   1128                    __M);
   1129 }
   1130 
   1131 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
   1132   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1133                                         (__v8df)(__m512d)(B), \
   1134                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   1135                                         (int)(R)); })
   1136 
   1137 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
   1138   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1139                                         (__v8df)(__m512d)(B), \
   1140                                         (__v8df)_mm512_setzero_pd(), \
   1141                                         (__mmask8)(U), (int)(R)); })
   1142 
   1143 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
   1144   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1145                                         (__v8df)(__m512d)(B), \
   1146                                         (__v8df)_mm512_undefined_pd(), \
   1147                                         (__mmask8)-1, (int)(R)); })
   1148 
   1149 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1150 _mm512_min_pd(__m512d __A, __m512d __B)
   1151 {
   1152   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1153              (__v8df) __B,
   1154              (__v8df)
   1155              _mm512_setzero_pd (),
   1156              (__mmask8) -1,
   1157              _MM_FROUND_CUR_DIRECTION);
   1158 }
   1159 
   1160 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1161 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   1162 {
   1163   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1164                   (__v8df) __B,
   1165                   (__v8df) __W,
   1166                   (__mmask8) __U,
   1167                   _MM_FROUND_CUR_DIRECTION);
   1168 }
   1169 
   1170 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
   1171   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1172                                        (__v16sf)(__m512)(B), \
   1173                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1174                                        (int)(R)); })
   1175 
   1176 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
   1177   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1178                                        (__v16sf)(__m512)(B), \
   1179                                        (__v16sf)_mm512_setzero_ps(), \
   1180                                        (__mmask16)(U), (int)(R)); })
   1181 
   1182 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
   1183   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1184                                        (__v16sf)(__m512)(B), \
   1185                                        (__v16sf)_mm512_undefined_ps(), \
   1186                                        (__mmask16)-1, (int)(R)); })
   1187 
   1188 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1189 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   1190 {
   1191   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1192                   (__v8df) __B,
   1193                   (__v8df)
   1194                   _mm512_setzero_pd (),
   1195                   (__mmask8) __U,
   1196                   _MM_FROUND_CUR_DIRECTION);
   1197 }
   1198 
   1199 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1200 _mm512_min_ps(__m512 __A, __m512 __B)
   1201 {
   1202   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1203             (__v16sf) __B,
   1204             (__v16sf)
   1205             _mm512_setzero_ps (),
   1206             (__mmask16) -1,
   1207             _MM_FROUND_CUR_DIRECTION);
   1208 }
   1209 
   1210 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1211 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1212 {
   1213   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1214                  (__v16sf) __B,
   1215                  (__v16sf) __W,
   1216                  (__mmask16) __U,
   1217                  _MM_FROUND_CUR_DIRECTION);
   1218 }
   1219 
   1220 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1221 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1222 {
   1223   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1224                  (__v16sf) __B,
   1225                  (__v16sf)
   1226                  _mm512_setzero_ps (),
   1227                  (__mmask16) __U,
   1228                  _MM_FROUND_CUR_DIRECTION);
   1229 }
   1230 
   1231 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1232 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1233   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1234                 (__v4sf) __B,
   1235                 (__v4sf) __W,
   1236                 (__mmask8) __U,
   1237                 _MM_FROUND_CUR_DIRECTION);
   1238 }
   1239 
   1240 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1241 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1242   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1243                 (__v4sf) __B,
   1244                 (__v4sf)  _mm_setzero_ps (),
   1245                 (__mmask8) __U,
   1246                 _MM_FROUND_CUR_DIRECTION);
   1247 }
   1248 
   1249 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
   1250   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1251                                           (__v4sf)(__m128)(B), \
   1252                                           (__v4sf)_mm_setzero_ps(), \
   1253                                           (__mmask8)-1, (int)(R)); })
   1254 
   1255 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
   1256   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1257                                           (__v4sf)(__m128)(B), \
   1258                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1259                                           (int)(R)); })
   1260 
   1261 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
   1262   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1263                                           (__v4sf)(__m128)(B), \
   1264                                           (__v4sf)_mm_setzero_ps(), \
   1265                                           (__mmask8)(U), (int)(R)); })
   1266 
   1267 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1268 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1269   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1270                 (__v2df) __B,
   1271                 (__v2df) __W,
   1272                 (__mmask8) __U,
   1273                 _MM_FROUND_CUR_DIRECTION);
   1274 }
   1275 
   1276 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1277 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1278   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1279                 (__v2df) __B,
   1280                 (__v2df)  _mm_setzero_pd (),
   1281                 (__mmask8) __U,
   1282                 _MM_FROUND_CUR_DIRECTION);
   1283 }
   1284 
   1285 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
   1286   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1287                                            (__v2df)(__m128d)(B), \
   1288                                            (__v2df)_mm_setzero_pd(), \
   1289                                            (__mmask8)-1, (int)(R)); })
   1290 
   1291 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
   1292   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1293                                            (__v2df)(__m128d)(B), \
   1294                                            (__v2df)(__m128d)(W), \
   1295                                            (__mmask8)(U), (int)(R)); })
   1296 
   1297 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
   1298   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1299                                            (__v2df)(__m128d)(B), \
   1300                                            (__v2df)_mm_setzero_pd(), \
   1301                                            (__mmask8)(U), (int)(R)); })
   1302 
   1303 static __inline __m512i
   1304 __DEFAULT_FN_ATTRS
   1305 _mm512_min_epi32(__m512i __A, __m512i __B)
   1306 {
   1307   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1308               (__v16si) __B,
   1309               (__v16si)
   1310               _mm512_setzero_si512 (),
   1311               (__mmask16) -1);
   1312 }
   1313 
   1314 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1315 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1316 {
   1317   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1318                    (__v16si) __B,
   1319                    (__v16si) __W, __M);
   1320 }
   1321 
   1322 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1323 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1324 {
   1325   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1326                    (__v16si) __B,
   1327                    (__v16si)
   1328                    _mm512_setzero_si512 (),
   1329                    __M);
   1330 }
   1331 
   1332 static __inline __m512i __DEFAULT_FN_ATTRS
   1333 _mm512_min_epu32(__m512i __A, __m512i __B)
   1334 {
   1335   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1336               (__v16si) __B,
   1337               (__v16si)
   1338               _mm512_setzero_si512 (),
   1339               (__mmask16) -1);
   1340 }
   1341 
   1342 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1343 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1344 {
   1345   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1346                    (__v16si) __B,
   1347                    (__v16si) __W, __M);
   1348 }
   1349 
   1350 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1351 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1352 {
   1353   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1354                    (__v16si) __B,
   1355                    (__v16si)
   1356                    _mm512_setzero_si512 (),
   1357                    __M);
   1358 }
   1359 
   1360 static __inline __m512i __DEFAULT_FN_ATTRS
   1361 _mm512_min_epi64(__m512i __A, __m512i __B)
   1362 {
   1363   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1364               (__v8di) __B,
   1365               (__v8di)
   1366               _mm512_setzero_si512 (),
   1367               (__mmask8) -1);
   1368 }
   1369 
   1370 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1371 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1372 {
   1373   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1374                    (__v8di) __B,
   1375                    (__v8di) __W, __M);
   1376 }
   1377 
   1378 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1379 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1380 {
   1381   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1382                    (__v8di) __B,
   1383                    (__v8di)
   1384                    _mm512_setzero_si512 (),
   1385                    __M);
   1386 }
   1387 
   1388 static __inline __m512i __DEFAULT_FN_ATTRS
   1389 _mm512_min_epu64(__m512i __A, __m512i __B)
   1390 {
   1391   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1392               (__v8di) __B,
   1393               (__v8di)
   1394               _mm512_setzero_si512 (),
   1395               (__mmask8) -1);
   1396 }
   1397 
   1398 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1399 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1400 {
   1401   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1402                    (__v8di) __B,
   1403                    (__v8di) __W, __M);
   1404 }
   1405 
   1406 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1407 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1408 {
   1409   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1410                    (__v8di) __B,
   1411                    (__v8di)
   1412                    _mm512_setzero_si512 (),
   1413                    __M);
   1414 }
   1415 
   1416 static __inline __m512i __DEFAULT_FN_ATTRS
   1417 _mm512_mul_epi32(__m512i __X, __m512i __Y)
   1418 {
   1419   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
   1420 }
   1421 
   1422 static __inline __m512i __DEFAULT_FN_ATTRS
   1423 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1424 {
   1425   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1426                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1427                                              (__v8di)__W);
   1428 }
   1429 
   1430 static __inline __m512i __DEFAULT_FN_ATTRS
   1431 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
   1432 {
   1433   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1434                                              (__v8di)_mm512_mul_epi32(__X, __Y),
   1435                                              (__v8di)_mm512_setzero_si512 ());
   1436 }
   1437 
   1438 static __inline __m512i __DEFAULT_FN_ATTRS
   1439 _mm512_mul_epu32(__m512i __X, __m512i __Y)
   1440 {
   1441   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
   1442 }
   1443 
   1444 static __inline __m512i __DEFAULT_FN_ATTRS
   1445 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1446 {
   1447   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1448                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1449                                              (__v8di)__W);
   1450 }
   1451 
   1452 static __inline __m512i __DEFAULT_FN_ATTRS
   1453 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
   1454 {
   1455   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   1456                                              (__v8di)_mm512_mul_epu32(__X, __Y),
   1457                                              (__v8di)_mm512_setzero_si512 ());
   1458 }
   1459 
   1460 static __inline __m512i __DEFAULT_FN_ATTRS
   1461 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
   1462 {
   1463   return (__m512i) ((__v16su) __A * (__v16su) __B);
   1464 }
   1465 
   1466 static __inline __m512i __DEFAULT_FN_ATTRS
   1467 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
   1468 {
   1469   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1470                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1471                                              (__v16si)_mm512_setzero_si512());
   1472 }
   1473 
   1474 static __inline __m512i __DEFAULT_FN_ATTRS
   1475 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1476 {
   1477   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   1478                                              (__v16si)_mm512_mullo_epi32(__A, __B),
   1479                                              (__v16si)__W);
   1480 }
   1481 
   1482 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
   1483   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1484                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
   1485                                          (int)(R)); })
   1486 
   1487 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
   1488   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1489                                          (__v8df)_mm512_setzero_pd(), \
   1490                                          (__mmask8)(U), (int)(R)); })
   1491 
   1492 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
   1493   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1494                                          (__v8df)_mm512_undefined_pd(), \
   1495                                          (__mmask8)-1, (int)(R)); })
   1496 
   1497 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1498 _mm512_sqrt_pd(__m512d __a)
   1499 {
   1500   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
   1501                                                 (__v8df) _mm512_setzero_pd (),
   1502                                                 (__mmask8) -1,
   1503                                                 _MM_FROUND_CUR_DIRECTION);
   1504 }
   1505 
   1506 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1507 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1508 {
   1509   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1510                    (__v8df) __W,
   1511                    (__mmask8) __U,
   1512                    _MM_FROUND_CUR_DIRECTION);
   1513 }
   1514 
   1515 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1516 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   1517 {
   1518   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1519                    (__v8df)
   1520                    _mm512_setzero_pd (),
   1521                    (__mmask8) __U,
   1522                    _MM_FROUND_CUR_DIRECTION);
   1523 }
   1524 
   1525 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
   1526   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1527                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
   1528                                         (int)(R)); })
   1529 
   1530 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
   1531   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1532                                         (__v16sf)_mm512_setzero_ps(), \
   1533                                         (__mmask16)(U), (int)(R)); })
   1534 
   1535 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
   1536   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1537                                         (__v16sf)_mm512_undefined_ps(), \
   1538                                         (__mmask16)-1, (int)(R)); })
   1539 
   1540 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1541 _mm512_sqrt_ps(__m512 __a)
   1542 {
   1543   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
   1544                                                (__v16sf) _mm512_setzero_ps (),
   1545                                                (__mmask16) -1,
   1546                                                _MM_FROUND_CUR_DIRECTION);
   1547 }
   1548 
   1549 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1550 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
   1551 {
   1552   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1553                                                (__v16sf) __W,
   1554                                                (__mmask16) __U,
   1555                                                _MM_FROUND_CUR_DIRECTION);
   1556 }
   1557 
   1558 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1559 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
   1560 {
   1561   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1562                                                (__v16sf) _mm512_setzero_ps (),
   1563                                                (__mmask16) __U,
   1564                                                _MM_FROUND_CUR_DIRECTION);
   1565 }
   1566 
   1567 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1568 _mm512_rsqrt14_pd(__m512d __A)
   1569 {
   1570   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1571                  (__v8df)
   1572                  _mm512_setzero_pd (),
   1573                  (__mmask8) -1);}
   1574 
   1575 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1576 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1577 {
   1578   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1579                   (__v8df) __W,
   1580                   (__mmask8) __U);
   1581 }
   1582 
   1583 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1584 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1585 {
   1586   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1587                   (__v8df)
   1588                   _mm512_setzero_pd (),
   1589                   (__mmask8) __U);
   1590 }
   1591 
   1592 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1593 _mm512_rsqrt14_ps(__m512 __A)
   1594 {
   1595   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1596                 (__v16sf)
   1597                 _mm512_setzero_ps (),
   1598                 (__mmask16) -1);
   1599 }
   1600 
   1601 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1602 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1603 {
   1604   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1605                  (__v16sf) __W,
   1606                  (__mmask16) __U);
   1607 }
   1608 
   1609 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1610 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1611 {
   1612   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1613                  (__v16sf)
   1614                  _mm512_setzero_ps (),
   1615                  (__mmask16) __U);
   1616 }
   1617 
   1618 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1619 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
   1620 {
   1621   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1622              (__v4sf) __B,
   1623              (__v4sf)
   1624              _mm_setzero_ps (),
   1625              (__mmask8) -1);
   1626 }
   1627 
   1628 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1629 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1630 {
   1631  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1632           (__v4sf) __B,
   1633           (__v4sf) __W,
   1634           (__mmask8) __U);
   1635 }
   1636 
   1637 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1638 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1639 {
   1640  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1641           (__v4sf) __B,
   1642           (__v4sf) _mm_setzero_ps (),
   1643           (__mmask8) __U);
   1644 }
   1645 
   1646 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1647 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
   1648 {
   1649   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
   1650               (__v2df) __B,
   1651               (__v2df)
   1652               _mm_setzero_pd (),
   1653               (__mmask8) -1);
   1654 }
   1655 
   1656 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1657 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1658 {
   1659  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1660           (__v2df) __B,
   1661           (__v2df) __W,
   1662           (__mmask8) __U);
   1663 }
   1664 
   1665 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1666 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1667 {
   1668  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1669           (__v2df) __B,
   1670           (__v2df) _mm_setzero_pd (),
   1671           (__mmask8) __U);
   1672 }
   1673 
   1674 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1675 _mm512_rcp14_pd(__m512d __A)
   1676 {
   1677   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1678                (__v8df)
   1679                _mm512_setzero_pd (),
   1680                (__mmask8) -1);
   1681 }
   1682 
   1683 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1684 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1685 {
   1686   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1687                 (__v8df) __W,
   1688                 (__mmask8) __U);
   1689 }
   1690 
   1691 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1692 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1693 {
   1694   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1695                 (__v8df)
   1696                 _mm512_setzero_pd (),
   1697                 (__mmask8) __U);
   1698 }
   1699 
   1700 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1701 _mm512_rcp14_ps(__m512 __A)
   1702 {
   1703   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1704               (__v16sf)
   1705               _mm512_setzero_ps (),
   1706               (__mmask16) -1);
   1707 }
   1708 
   1709 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1710 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1711 {
   1712   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1713                    (__v16sf) __W,
   1714                    (__mmask16) __U);
   1715 }
   1716 
   1717 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1718 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1719 {
   1720   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1721                    (__v16sf)
   1722                    _mm512_setzero_ps (),
   1723                    (__mmask16) __U);
   1724 }
   1725 
   1726 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1727 _mm_rcp14_ss(__m128 __A, __m128 __B)
   1728 {
   1729   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1730                  (__v4sf) __B,
   1731                  (__v4sf)
   1732                  _mm_setzero_ps (),
   1733                  (__mmask8) -1);
   1734 }
   1735 
   1736 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1737 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1738 {
   1739  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1740           (__v4sf) __B,
   1741           (__v4sf) __W,
   1742           (__mmask8) __U);
   1743 }
   1744 
   1745 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1746 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1747 {
   1748  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1749           (__v4sf) __B,
   1750           (__v4sf) _mm_setzero_ps (),
   1751           (__mmask8) __U);
   1752 }
   1753 
   1754 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1755 _mm_rcp14_sd(__m128d __A, __m128d __B)
   1756 {
   1757   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
   1758             (__v2df) __B,
   1759             (__v2df)
   1760             _mm_setzero_pd (),
   1761             (__mmask8) -1);
   1762 }
   1763 
   1764 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1765 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1766 {
   1767  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1768           (__v2df) __B,
   1769           (__v2df) __W,
   1770           (__mmask8) __U);
   1771 }
   1772 
   1773 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1774 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1775 {
   1776  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1777           (__v2df) __B,
   1778           (__v2df) _mm_setzero_pd (),
   1779           (__mmask8) __U);
   1780 }
   1781 
   1782 static __inline __m512 __DEFAULT_FN_ATTRS
   1783 _mm512_floor_ps(__m512 __A)
   1784 {
   1785   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1786                                                   _MM_FROUND_FLOOR,
   1787                                                   (__v16sf) __A, -1,
   1788                                                   _MM_FROUND_CUR_DIRECTION);
   1789 }
   1790 
   1791 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1792 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1793 {
   1794   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1795                    _MM_FROUND_FLOOR,
   1796                    (__v16sf) __W, __U,
   1797                    _MM_FROUND_CUR_DIRECTION);
   1798 }
   1799 
   1800 static __inline __m512d __DEFAULT_FN_ATTRS
   1801 _mm512_floor_pd(__m512d __A)
   1802 {
   1803   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1804                                                    _MM_FROUND_FLOOR,
   1805                                                    (__v8df) __A, -1,
   1806                                                    _MM_FROUND_CUR_DIRECTION);
   1807 }
   1808 
   1809 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1810 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1811 {
   1812   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1813                 _MM_FROUND_FLOOR,
   1814                 (__v8df) __W, __U,
   1815                 _MM_FROUND_CUR_DIRECTION);
   1816 }
   1817 
   1818 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1819 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1820 {
   1821   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1822                    _MM_FROUND_CEIL,
   1823                    (__v16sf) __W, __U,
   1824                    _MM_FROUND_CUR_DIRECTION);
   1825 }
   1826 
   1827 static __inline __m512 __DEFAULT_FN_ATTRS
   1828 _mm512_ceil_ps(__m512 __A)
   1829 {
   1830   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1831                                                   _MM_FROUND_CEIL,
   1832                                                   (__v16sf) __A, -1,
   1833                                                   _MM_FROUND_CUR_DIRECTION);
   1834 }
   1835 
   1836 static __inline __m512d __DEFAULT_FN_ATTRS
   1837 _mm512_ceil_pd(__m512d __A)
   1838 {
   1839   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1840                                                    _MM_FROUND_CEIL,
   1841                                                    (__v8df) __A, -1,
   1842                                                    _MM_FROUND_CUR_DIRECTION);
   1843 }
   1844 
   1845 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1846 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1847 {
   1848   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1849                 _MM_FROUND_CEIL,
   1850                 (__v8df) __W, __U,
   1851                 _MM_FROUND_CUR_DIRECTION);
   1852 }
   1853 
   1854 static __inline __m512i __DEFAULT_FN_ATTRS
   1855 _mm512_abs_epi64(__m512i __A)
   1856 {
   1857   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1858              (__v8di)
   1859              _mm512_setzero_si512 (),
   1860              (__mmask8) -1);
   1861 }
   1862 
   1863 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1864 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   1865 {
   1866   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1867                   (__v8di) __W,
   1868                   (__mmask8) __U);
   1869 }
   1870 
   1871 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1872 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   1873 {
   1874   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1875                   (__v8di)
   1876                   _mm512_setzero_si512 (),
   1877                   (__mmask8) __U);
   1878 }
   1879 
   1880 static __inline __m512i __DEFAULT_FN_ATTRS
   1881 _mm512_abs_epi32(__m512i __A)
   1882 {
   1883   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1884              (__v16si)
   1885              _mm512_setzero_si512 (),
   1886              (__mmask16) -1);
   1887 }
   1888 
   1889 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1890 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   1891 {
   1892   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1893                   (__v16si) __W,
   1894                   (__mmask16) __U);
   1895 }
   1896 
   1897 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1898 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   1899 {
   1900   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1901                   (__v16si)
   1902                   _mm512_setzero_si512 (),
   1903                   (__mmask16) __U);
   1904 }
   1905 
   1906 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1907 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1908   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1909                 (__v4sf) __B,
   1910                 (__v4sf) __W,
   1911                 (__mmask8) __U,
   1912                 _MM_FROUND_CUR_DIRECTION);
   1913 }
   1914 
   1915 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1916 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1917   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1918                 (__v4sf) __B,
   1919                 (__v4sf)  _mm_setzero_ps (),
   1920                 (__mmask8) __U,
   1921                 _MM_FROUND_CUR_DIRECTION);
   1922 }
   1923 
   1924 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
   1925   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1926                                           (__v4sf)(__m128)(B), \
   1927                                           (__v4sf)_mm_setzero_ps(), \
   1928                                           (__mmask8)-1, (int)(R)); })
   1929 
   1930 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
   1931   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1932                                           (__v4sf)(__m128)(B), \
   1933                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1934                                           (int)(R)); })
   1935 
   1936 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
   1937   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1938                                           (__v4sf)(__m128)(B), \
   1939                                           (__v4sf)_mm_setzero_ps(), \
   1940                                           (__mmask8)(U), (int)(R)); })
   1941 
   1942 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1943 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1944   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1945                 (__v2df) __B,
   1946                 (__v2df) __W,
   1947                 (__mmask8) __U,
   1948                 _MM_FROUND_CUR_DIRECTION);
   1949 }
   1950 
   1951 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1952 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1953   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1954                 (__v2df) __B,
   1955                 (__v2df)  _mm_setzero_pd (),
   1956                 (__mmask8) __U,
   1957                 _MM_FROUND_CUR_DIRECTION);
   1958 }
   1959 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
   1960   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1961                                            (__v2df)(__m128d)(B), \
   1962                                            (__v2df)_mm_setzero_pd(), \
   1963                                            (__mmask8)-1, (int)(R)); })
   1964 
   1965 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
   1966   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1967                                            (__v2df)(__m128d)(B), \
   1968                                            (__v2df)(__m128d)(W), \
   1969                                            (__mmask8)(U), (int)(R)); })
   1970 
   1971 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
   1972   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1973                                            (__v2df)(__m128d)(B), \
   1974                                            (__v2df)_mm_setzero_pd(), \
   1975                                            (__mmask8)(U), (int)(R)); })
   1976 
   1977 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1978 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1979   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   1980                                               (__v8df)_mm512_add_pd(__A, __B),
   1981                                               (__v8df)__W);
   1982 }
   1983 
   1984 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1985 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1986   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   1987                                               (__v8df)_mm512_add_pd(__A, __B),
   1988                                               (__v8df)_mm512_setzero_pd());
   1989 }
   1990 
   1991 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1992 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1993   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   1994                                              (__v16sf)_mm512_add_ps(__A, __B),
   1995                                              (__v16sf)__W);
   1996 }
   1997 
   1998 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1999 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2000   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2001                                              (__v16sf)_mm512_add_ps(__A, __B),
   2002                                              (__v16sf)_mm512_setzero_ps());
   2003 }
   2004 
   2005 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
   2006   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2007                                         (__v8df)(__m512d)(B), \
   2008                                         (__v8df)_mm512_setzero_pd(), \
   2009                                         (__mmask8)-1, (int)(R)); })
   2010 
   2011 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
   2012   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2013                                         (__v8df)(__m512d)(B), \
   2014                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2015                                         (int)(R)); })
   2016 
   2017 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
   2018   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2019                                         (__v8df)(__m512d)(B), \
   2020                                         (__v8df)_mm512_setzero_pd(), \
   2021                                         (__mmask8)(U), (int)(R)); })
   2022 
   2023 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
   2024   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2025                                        (__v16sf)(__m512)(B), \
   2026                                        (__v16sf)_mm512_setzero_ps(), \
   2027                                        (__mmask16)-1, (int)(R)); })
   2028 
   2029 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
   2030   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2031                                        (__v16sf)(__m512)(B), \
   2032                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2033                                        (int)(R)); })
   2034 
   2035 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
   2036   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2037                                        (__v16sf)(__m512)(B), \
   2038                                        (__v16sf)_mm512_setzero_ps(), \
   2039                                        (__mmask16)(U), (int)(R)); })
   2040 
   2041 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2042 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2043   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2044                 (__v4sf) __B,
   2045                 (__v4sf) __W,
   2046                 (__mmask8) __U,
   2047                 _MM_FROUND_CUR_DIRECTION);
   2048 }
   2049 
   2050 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2051 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2052   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2053                 (__v4sf) __B,
   2054                 (__v4sf)  _mm_setzero_ps (),
   2055                 (__mmask8) __U,
   2056                 _MM_FROUND_CUR_DIRECTION);
   2057 }
   2058 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
   2059   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2060                                           (__v4sf)(__m128)(B), \
   2061                                           (__v4sf)_mm_setzero_ps(), \
   2062                                           (__mmask8)-1, (int)(R)); })
   2063 
   2064 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
   2065   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2066                                           (__v4sf)(__m128)(B), \
   2067                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2068                                           (int)(R)); })
   2069 
   2070 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
   2071   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2072                                           (__v4sf)(__m128)(B), \
   2073                                           (__v4sf)_mm_setzero_ps(), \
   2074                                           (__mmask8)(U), (int)(R)); })
   2075 
   2076 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2077 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2078   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2079                 (__v2df) __B,
   2080                 (__v2df) __W,
   2081                 (__mmask8) __U,
   2082                 _MM_FROUND_CUR_DIRECTION);
   2083 }
   2084 
   2085 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2086 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2087   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2088                 (__v2df) __B,
   2089                 (__v2df)  _mm_setzero_pd (),
   2090                 (__mmask8) __U,
   2091                 _MM_FROUND_CUR_DIRECTION);
   2092 }
   2093 
   2094 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
   2095   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2096                                            (__v2df)(__m128d)(B), \
   2097                                            (__v2df)_mm_setzero_pd(), \
   2098                                            (__mmask8)-1, (int)(R)); })
   2099 
   2100 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
   2101   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2102                                            (__v2df)(__m128d)(B), \
   2103                                            (__v2df)(__m128d)(W), \
   2104                                            (__mmask8)(U), (int)(R)); })
   2105 
   2106 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
   2107   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2108                                            (__v2df)(__m128d)(B), \
   2109                                            (__v2df)_mm_setzero_pd(), \
   2110                                            (__mmask8)(U), (int)(R)); })
   2111 
   2112 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2113 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2114   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2115                                               (__v8df)_mm512_sub_pd(__A, __B),
   2116                                               (__v8df)__W);
   2117 }
   2118 
   2119 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2120 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2121   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2122                                               (__v8df)_mm512_sub_pd(__A, __B),
   2123                                               (__v8df)_mm512_setzero_pd());
   2124 }
   2125 
   2126 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2127 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2128   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2129                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2130                                              (__v16sf)__W);
   2131 }
   2132 
   2133 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2134 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2135   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2136                                              (__v16sf)_mm512_sub_ps(__A, __B),
   2137                                              (__v16sf)_mm512_setzero_ps());
   2138 }
   2139 
   2140 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
   2141   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2142                                         (__v8df)(__m512d)(B), \
   2143                                         (__v8df)_mm512_setzero_pd(), \
   2144                                         (__mmask8)-1, (int)(R)); })
   2145 
   2146 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
   2147   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2148                                         (__v8df)(__m512d)(B), \
   2149                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2150                                         (int)(R)); })
   2151 
   2152 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
   2153   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2154                                         (__v8df)(__m512d)(B), \
   2155                                         (__v8df)_mm512_setzero_pd(), \
   2156                                         (__mmask8)(U), (int)(R)); })
   2157 
   2158 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
   2159   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2160                                        (__v16sf)(__m512)(B), \
   2161                                        (__v16sf)_mm512_setzero_ps(), \
   2162                                        (__mmask16)-1, (int)(R)); })
   2163 
   2164 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
   2165   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2166                                        (__v16sf)(__m512)(B), \
   2167                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2168                                        (int)(R)); });
   2169 
   2170 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
   2171   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2172                                        (__v16sf)(__m512)(B), \
   2173                                        (__v16sf)_mm512_setzero_ps(), \
   2174                                        (__mmask16)(U), (int)(R)); });
   2175 
   2176 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2177 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2178   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2179                 (__v4sf) __B,
   2180                 (__v4sf) __W,
   2181                 (__mmask8) __U,
   2182                 _MM_FROUND_CUR_DIRECTION);
   2183 }
   2184 
   2185 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2186 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2187   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2188                 (__v4sf) __B,
   2189                 (__v4sf)  _mm_setzero_ps (),
   2190                 (__mmask8) __U,
   2191                 _MM_FROUND_CUR_DIRECTION);
   2192 }
   2193 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
   2194   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2195                                           (__v4sf)(__m128)(B), \
   2196                                           (__v4sf)_mm_setzero_ps(), \
   2197                                           (__mmask8)-1, (int)(R)); })
   2198 
   2199 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
   2200   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2201                                           (__v4sf)(__m128)(B), \
   2202                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2203                                           (int)(R)); })
   2204 
   2205 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
   2206   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2207                                           (__v4sf)(__m128)(B), \
   2208                                           (__v4sf)_mm_setzero_ps(), \
   2209                                           (__mmask8)(U), (int)(R)); })
   2210 
   2211 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2212 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2213   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2214                 (__v2df) __B,
   2215                 (__v2df) __W,
   2216                 (__mmask8) __U,
   2217                 _MM_FROUND_CUR_DIRECTION);
   2218 }
   2219 
   2220 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2221 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2222   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2223                 (__v2df) __B,
   2224                 (__v2df)  _mm_setzero_pd (),
   2225                 (__mmask8) __U,
   2226                 _MM_FROUND_CUR_DIRECTION);
   2227 }
   2228 
   2229 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
   2230   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2231                                            (__v2df)(__m128d)(B), \
   2232                                            (__v2df)_mm_setzero_pd(), \
   2233                                            (__mmask8)-1, (int)(R)); })
   2234 
   2235 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
   2236   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2237                                            (__v2df)(__m128d)(B), \
   2238                                            (__v2df)(__m128d)(W), \
   2239                                            (__mmask8)(U), (int)(R)); })
   2240 
   2241 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
   2242   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2243                                            (__v2df)(__m128d)(B), \
   2244                                            (__v2df)_mm_setzero_pd(), \
   2245                                            (__mmask8)(U), (int)(R)); })
   2246 
   2247 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2248 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2249   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2250                                               (__v8df)_mm512_mul_pd(__A, __B),
   2251                                               (__v8df)__W);
   2252 }
   2253 
   2254 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2255 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2256   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2257                                               (__v8df)_mm512_mul_pd(__A, __B),
   2258                                               (__v8df)_mm512_setzero_pd());
   2259 }
   2260 
   2261 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2262 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2263   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2264                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2265                                              (__v16sf)__W);
   2266 }
   2267 
   2268 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2269 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2270   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2271                                              (__v16sf)_mm512_mul_ps(__A, __B),
   2272                                              (__v16sf)_mm512_setzero_ps());
   2273 }
   2274 
   2275 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
   2276   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2277                                         (__v8df)(__m512d)(B), \
   2278                                         (__v8df)_mm512_setzero_pd(), \
   2279                                         (__mmask8)-1, (int)(R)); })
   2280 
   2281 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
   2282   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2283                                         (__v8df)(__m512d)(B), \
   2284                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2285                                         (int)(R)); })
   2286 
   2287 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
   2288   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2289                                         (__v8df)(__m512d)(B), \
   2290                                         (__v8df)_mm512_setzero_pd(), \
   2291                                         (__mmask8)(U), (int)(R)); })
   2292 
   2293 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
   2294   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2295                                        (__v16sf)(__m512)(B), \
   2296                                        (__v16sf)_mm512_setzero_ps(), \
   2297                                        (__mmask16)-1, (int)(R)); })
   2298 
   2299 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
   2300   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2301                                        (__v16sf)(__m512)(B), \
   2302                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2303                                        (int)(R)); });
   2304 
   2305 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
   2306   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2307                                        (__v16sf)(__m512)(B), \
   2308                                        (__v16sf)_mm512_setzero_ps(), \
   2309                                        (__mmask16)(U), (int)(R)); });
   2310 
   2311 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2312 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2313   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2314                 (__v4sf) __B,
   2315                 (__v4sf) __W,
   2316                 (__mmask8) __U,
   2317                 _MM_FROUND_CUR_DIRECTION);
   2318 }
   2319 
   2320 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2321 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2322   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2323                 (__v4sf) __B,
   2324                 (__v4sf)  _mm_setzero_ps (),
   2325                 (__mmask8) __U,
   2326                 _MM_FROUND_CUR_DIRECTION);
   2327 }
   2328 
   2329 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
   2330   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2331                                           (__v4sf)(__m128)(B), \
   2332                                           (__v4sf)_mm_setzero_ps(), \
   2333                                           (__mmask8)-1, (int)(R)); })
   2334 
   2335 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
   2336   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2337                                           (__v4sf)(__m128)(B), \
   2338                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2339                                           (int)(R)); })
   2340 
   2341 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
   2342   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2343                                           (__v4sf)(__m128)(B), \
   2344                                           (__v4sf)_mm_setzero_ps(), \
   2345                                           (__mmask8)(U), (int)(R)); })
   2346 
   2347 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2348 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2349   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2350                 (__v2df) __B,
   2351                 (__v2df) __W,
   2352                 (__mmask8) __U,
   2353                 _MM_FROUND_CUR_DIRECTION);
   2354 }
   2355 
   2356 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2357 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2358   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2359                 (__v2df) __B,
   2360                 (__v2df)  _mm_setzero_pd (),
   2361                 (__mmask8) __U,
   2362                 _MM_FROUND_CUR_DIRECTION);
   2363 }
   2364 
   2365 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
   2366   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2367                                            (__v2df)(__m128d)(B), \
   2368                                            (__v2df)_mm_setzero_pd(), \
   2369                                            (__mmask8)-1, (int)(R)); })
   2370 
   2371 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
   2372   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2373                                            (__v2df)(__m128d)(B), \
   2374                                            (__v2df)(__m128d)(W), \
   2375                                            (__mmask8)(U), (int)(R)); })
   2376 
   2377 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
   2378   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2379                                            (__v2df)(__m128d)(B), \
   2380                                            (__v2df)_mm_setzero_pd(), \
   2381                                            (__mmask8)(U), (int)(R)); })
   2382 
   2383 static __inline __m512d __DEFAULT_FN_ATTRS
   2384 _mm512_div_pd(__m512d __a, __m512d __b)
   2385 {
   2386   return (__m512d)((__v8df)__a/(__v8df)__b);
   2387 }
   2388 
   2389 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2390 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2391   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2392                                               (__v8df)_mm512_div_pd(__A, __B),
   2393                                               (__v8df)__W);
   2394 }
   2395 
   2396 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2397 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2398   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   2399                                               (__v8df)_mm512_div_pd(__A, __B),
   2400                                               (__v8df)_mm512_setzero_pd());
   2401 }
   2402 
   2403 static __inline __m512 __DEFAULT_FN_ATTRS
   2404 _mm512_div_ps(__m512 __a, __m512 __b)
   2405 {
   2406   return (__m512)((__v16sf)__a/(__v16sf)__b);
   2407 }
   2408 
   2409 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2410 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2411   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2412                                              (__v16sf)_mm512_div_ps(__A, __B),
   2413                                              (__v16sf)__W);
   2414 }
   2415 
   2416 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2417 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2418   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   2419                                              (__v16sf)_mm512_div_ps(__A, __B),
   2420                                              (__v16sf)_mm512_setzero_ps());
   2421 }
   2422 
   2423 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
   2424   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2425                                         (__v8df)(__m512d)(B), \
   2426                                         (__v8df)_mm512_setzero_pd(), \
   2427                                         (__mmask8)-1, (int)(R)); })
   2428 
   2429 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
   2430   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2431                                         (__v8df)(__m512d)(B), \
   2432                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2433                                         (int)(R)); })
   2434 
   2435 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
   2436   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2437                                         (__v8df)(__m512d)(B), \
   2438                                         (__v8df)_mm512_setzero_pd(), \
   2439                                         (__mmask8)(U), (int)(R)); })
   2440 
   2441 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
   2442   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2443                                        (__v16sf)(__m512)(B), \
   2444                                        (__v16sf)_mm512_setzero_ps(), \
   2445                                        (__mmask16)-1, (int)(R)); })
   2446 
   2447 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
   2448   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2449                                        (__v16sf)(__m512)(B), \
   2450                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2451                                        (int)(R)); });
   2452 
   2453 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
   2454   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2455                                        (__v16sf)(__m512)(B), \
   2456                                        (__v16sf)_mm512_setzero_ps(), \
   2457                                        (__mmask16)(U), (int)(R)); });
   2458 
   2459 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   2460   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
   2461                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
   2462                                          _MM_FROUND_CUR_DIRECTION); })
   2463 
   2464 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
   2465   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2466                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2467                                          _MM_FROUND_CUR_DIRECTION); })
   2468 
   2469 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
   2470   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2471                                          (__v16sf)_mm512_setzero_ps(), \
   2472                                          (__mmask16)(A), \
   2473                                          _MM_FROUND_CUR_DIRECTION); })
   2474 
   2475 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
   2476   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2477                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2478                                          (int)(R)); })
   2479 
   2480 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
   2481   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2482                                          (__v16sf)_mm512_setzero_ps(), \
   2483                                          (__mmask16)(A), (int)(R)); })
   2484 
   2485 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
   2486   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
   2487                                          (__v16sf)_mm512_undefined_ps(), \
   2488                                          (__mmask16)-1, (int)(R)); })
   2489 
   2490 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   2491   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
   2492                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
   2493                                           _MM_FROUND_CUR_DIRECTION); })
   2494 
   2495 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
   2496   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2497                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2498                                           _MM_FROUND_CUR_DIRECTION); })
   2499 
   2500 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
   2501   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2502                                           (__v8df)_mm512_setzero_pd(), \
   2503                                           (__mmask8)(A), \
   2504                                           _MM_FROUND_CUR_DIRECTION); })
   2505 
   2506 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
   2507   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2508                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2509                                           (int)(R)); })
   2510 
   2511 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
   2512   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2513                                           (__v8df)_mm512_setzero_pd(), \
   2514                                           (__mmask8)(A), (int)(R)); })
   2515 
   2516 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
   2517   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
   2518                                           (__v8df)_mm512_undefined_pd(), \
   2519                                           (__mmask8)-1, (int)(R)); })
   2520 
   2521 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   2522   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2523                                            (__v8df)(__m512d)(B), \
   2524                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2525                                            (int)(R)); })
   2526 
   2527 
   2528 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2529   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2530                                            (__v8df)(__m512d)(B), \
   2531                                            (__v8df)(__m512d)(C), \
   2532                                            (__mmask8)(U), (int)(R)); })
   2533 
   2534 
   2535 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2536   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
   2537                                             (__v8df)(__m512d)(B), \
   2538                                             (__v8df)(__m512d)(C), \
   2539                                             (__mmask8)(U), (int)(R)); })
   2540 
   2541 
   2542 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2543   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2544                                             (__v8df)(__m512d)(B), \
   2545                                             (__v8df)(__m512d)(C), \
   2546                                             (__mmask8)(U), (int)(R)); })
   2547 
   2548 
   2549 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   2550   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2551                                            (__v8df)(__m512d)(B), \
   2552                                            -(__v8df)(__m512d)(C), \
   2553                                            (__mmask8)-1, (int)(R)); })
   2554 
   2555 
   2556 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2557   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2558                                            (__v8df)(__m512d)(B), \
   2559                                            -(__v8df)(__m512d)(C), \
   2560                                            (__mmask8)(U), (int)(R)); })
   2561 
   2562 
   2563 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2564   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2565                                             (__v8df)(__m512d)(B), \
   2566                                             -(__v8df)(__m512d)(C), \
   2567                                             (__mmask8)(U), (int)(R)); })
   2568 
   2569 
   2570 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   2571   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2572                                            (__v8df)(__m512d)(B), \
   2573                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2574                                            (int)(R)); })
   2575 
   2576 
   2577 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2578   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
   2579                                             (__v8df)(__m512d)(B), \
   2580                                             (__v8df)(__m512d)(C), \
   2581                                             (__mmask8)(U), (int)(R)); })
   2582 
   2583 
   2584 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2585   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2586                                             (__v8df)(__m512d)(B), \
   2587                                             (__v8df)(__m512d)(C), \
   2588                                             (__mmask8)(U), (int)(R)); })
   2589 
   2590 
   2591 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   2592   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2593                                            (__v8df)(__m512d)(B), \
   2594                                            -(__v8df)(__m512d)(C), \
   2595                                            (__mmask8)-1, (int)(R)); })
   2596 
   2597 
   2598 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2599   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2600                                             (__v8df)(__m512d)(B), \
   2601                                             -(__v8df)(__m512d)(C), \
   2602                                             (__mmask8)(U), (int)(R)); })
   2603 
   2604 
   2605 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2606 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2607 {
   2608   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2609                                                     (__v8df) __B,
   2610                                                     (__v8df) __C,
   2611                                                     (__mmask8) -1,
   2612                                                     _MM_FROUND_CUR_DIRECTION);
   2613 }
   2614 
   2615 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2616 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2617 {
   2618   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2619                                                     (__v8df) __B,
   2620                                                     (__v8df) __C,
   2621                                                     (__mmask8) __U,
   2622                                                     _MM_FROUND_CUR_DIRECTION);
   2623 }
   2624 
   2625 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2626 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2627 {
   2628   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2629                                                      (__v8df) __B,
   2630                                                      (__v8df) __C,
   2631                                                      (__mmask8) __U,
   2632                                                      _MM_FROUND_CUR_DIRECTION);
   2633 }
   2634 
   2635 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2636 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2637 {
   2638   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2639                                                      (__v8df) __B,
   2640                                                      (__v8df) __C,
   2641                                                      (__mmask8) __U,
   2642                                                      _MM_FROUND_CUR_DIRECTION);
   2643 }
   2644 
   2645 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2646 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2647 {
   2648   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2649                                                     (__v8df) __B,
   2650                                                     -(__v8df) __C,
   2651                                                     (__mmask8) -1,
   2652                                                     _MM_FROUND_CUR_DIRECTION);
   2653 }
   2654 
   2655 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2656 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2657 {
   2658   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2659                                                     (__v8df) __B,
   2660                                                     -(__v8df) __C,
   2661                                                     (__mmask8) __U,
   2662                                                     _MM_FROUND_CUR_DIRECTION);
   2663 }
   2664 
   2665 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2666 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2667 {
   2668   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2669                                                      (__v8df) __B,
   2670                                                      -(__v8df) __C,
   2671                                                      (__mmask8) __U,
   2672                                                      _MM_FROUND_CUR_DIRECTION);
   2673 }
   2674 
   2675 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2676 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2677 {
   2678   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2679                                                     (__v8df) __B,
   2680                                                     (__v8df) __C,
   2681                                                     (__mmask8) -1,
   2682                                                     _MM_FROUND_CUR_DIRECTION);
   2683 }
   2684 
   2685 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2686 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2687 {
   2688   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   2689                                                      (__v8df) __B,
   2690                                                      (__v8df) __C,
   2691                                                      (__mmask8) __U,
   2692                                                      _MM_FROUND_CUR_DIRECTION);
   2693 }
   2694 
   2695 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2696 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2697 {
   2698   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2699                                                      (__v8df) __B,
   2700                                                      (__v8df) __C,
   2701                                                      (__mmask8) __U,
   2702                                                      _MM_FROUND_CUR_DIRECTION);
   2703 }
   2704 
   2705 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2706 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2707 {
   2708   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2709                                                     (__v8df) __B,
   2710                                                     -(__v8df) __C,
   2711                                                     (__mmask8) -1,
   2712                                                     _MM_FROUND_CUR_DIRECTION);
   2713 }
   2714 
   2715 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2716 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2717 {
   2718   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2719                                                      (__v8df) __B,
   2720                                                      -(__v8df) __C,
   2721                                                      (__mmask8) __U,
   2722                                                      _MM_FROUND_CUR_DIRECTION);
   2723 }
   2724 
   2725 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   2726   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2727                                           (__v16sf)(__m512)(B), \
   2728                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2729                                           (int)(R)); })
   2730 
   2731 
   2732 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2733   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2734                                           (__v16sf)(__m512)(B), \
   2735                                           (__v16sf)(__m512)(C), \
   2736                                           (__mmask16)(U), (int)(R)); })
   2737 
   2738 
   2739 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2740   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
   2741                                            (__v16sf)(__m512)(B), \
   2742                                            (__v16sf)(__m512)(C), \
   2743                                            (__mmask16)(U), (int)(R)); })
   2744 
   2745 
   2746 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2747   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2748                                            (__v16sf)(__m512)(B), \
   2749                                            (__v16sf)(__m512)(C), \
   2750                                            (__mmask16)(U), (int)(R)); })
   2751 
   2752 
   2753 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   2754   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2755                                           (__v16sf)(__m512)(B), \
   2756                                           -(__v16sf)(__m512)(C), \
   2757                                           (__mmask16)-1, (int)(R)); })
   2758 
   2759 
   2760 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2761   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2762                                           (__v16sf)(__m512)(B), \
   2763                                           -(__v16sf)(__m512)(C), \
   2764                                           (__mmask16)(U), (int)(R)); })
   2765 
   2766 
   2767 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2768   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2769                                            (__v16sf)(__m512)(B), \
   2770                                            -(__v16sf)(__m512)(C), \
   2771                                            (__mmask16)(U), (int)(R)); })
   2772 
   2773 
   2774 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   2775   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2776                                           (__v16sf)(__m512)(B), \
   2777                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2778                                           (int)(R)); })
   2779 
   2780 
   2781 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2782   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
   2783                                            (__v16sf)(__m512)(B), \
   2784                                            (__v16sf)(__m512)(C), \
   2785                                            (__mmask16)(U), (int)(R)); })
   2786 
   2787 
   2788 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2789   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2790                                            (__v16sf)(__m512)(B), \
   2791                                            (__v16sf)(__m512)(C), \
   2792                                            (__mmask16)(U), (int)(R)); })
   2793 
   2794 
   2795 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   2796   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2797                                           (__v16sf)(__m512)(B), \
   2798                                           -(__v16sf)(__m512)(C), \
   2799                                           (__mmask16)-1, (int)(R)); })
   2800 
   2801 
   2802 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2803   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2804                                            (__v16sf)(__m512)(B), \
   2805                                            -(__v16sf)(__m512)(C), \
   2806                                            (__mmask16)(U), (int)(R)); })
   2807 
   2808 
   2809 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2810 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2811 {
   2812   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2813                                                    (__v16sf) __B,
   2814                                                    (__v16sf) __C,
   2815                                                    (__mmask16) -1,
   2816                                                    _MM_FROUND_CUR_DIRECTION);
   2817 }
   2818 
   2819 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2820 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2821 {
   2822   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2823                                                    (__v16sf) __B,
   2824                                                    (__v16sf) __C,
   2825                                                    (__mmask16) __U,
   2826                                                    _MM_FROUND_CUR_DIRECTION);
   2827 }
   2828 
   2829 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2830 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2831 {
   2832   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2833                                                     (__v16sf) __B,
   2834                                                     (__v16sf) __C,
   2835                                                     (__mmask16) __U,
   2836                                                     _MM_FROUND_CUR_DIRECTION);
   2837 }
   2838 
   2839 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2840 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2841 {
   2842   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2843                                                     (__v16sf) __B,
   2844                                                     (__v16sf) __C,
   2845                                                     (__mmask16) __U,
   2846                                                     _MM_FROUND_CUR_DIRECTION);
   2847 }
   2848 
   2849 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2850 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2851 {
   2852   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2853                                                    (__v16sf) __B,
   2854                                                    -(__v16sf) __C,
   2855                                                    (__mmask16) -1,
   2856                                                    _MM_FROUND_CUR_DIRECTION);
   2857 }
   2858 
   2859 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2860 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2861 {
   2862   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2863                                                    (__v16sf) __B,
   2864                                                    -(__v16sf) __C,
   2865                                                    (__mmask16) __U,
   2866                                                    _MM_FROUND_CUR_DIRECTION);
   2867 }
   2868 
   2869 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2870 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2871 {
   2872   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2873                                                     (__v16sf) __B,
   2874                                                     -(__v16sf) __C,
   2875                                                     (__mmask16) __U,
   2876                                                     _MM_FROUND_CUR_DIRECTION);
   2877 }
   2878 
   2879 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2880 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2881 {
   2882   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2883                                                    (__v16sf) __B,
   2884                                                    (__v16sf) __C,
   2885                                                    (__mmask16) -1,
   2886                                                    _MM_FROUND_CUR_DIRECTION);
   2887 }
   2888 
   2889 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2890 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2891 {
   2892   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   2893                                                     (__v16sf) __B,
   2894                                                     (__v16sf) __C,
   2895                                                     (__mmask16) __U,
   2896                                                     _MM_FROUND_CUR_DIRECTION);
   2897 }
   2898 
   2899 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2900 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2901 {
   2902   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2903                                                     (__v16sf) __B,
   2904                                                     (__v16sf) __C,
   2905                                                     (__mmask16) __U,
   2906                                                     _MM_FROUND_CUR_DIRECTION);
   2907 }
   2908 
   2909 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2910 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2911 {
   2912   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2913                                                    (__v16sf) __B,
   2914                                                    -(__v16sf) __C,
   2915                                                    (__mmask16) -1,
   2916                                                    _MM_FROUND_CUR_DIRECTION);
   2917 }
   2918 
   2919 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2920 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2921 {
   2922   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2923                                                     (__v16sf) __B,
   2924                                                     -(__v16sf) __C,
   2925                                                     (__mmask16) __U,
   2926                                                     _MM_FROUND_CUR_DIRECTION);
   2927 }
   2928 
   2929 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   2930   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2931                                               (__v8df)(__m512d)(B), \
   2932                                               (__v8df)(__m512d)(C), \
   2933                                               (__mmask8)-1, (int)(R)); })
   2934 
   2935 
   2936 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2937   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2938                                               (__v8df)(__m512d)(B), \
   2939                                               (__v8df)(__m512d)(C), \
   2940                                               (__mmask8)(U), (int)(R)); })
   2941 
   2942 
   2943 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   2944   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
   2945                                                (__v8df)(__m512d)(B), \
   2946                                                (__v8df)(__m512d)(C), \
   2947                                                (__mmask8)(U), (int)(R)); })
   2948 
   2949 
   2950 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2951   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   2952                                                (__v8df)(__m512d)(B), \
   2953                                                (__v8df)(__m512d)(C), \
   2954                                                (__mmask8)(U), (int)(R)); })
   2955 
   2956 
   2957 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   2958   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2959                                               (__v8df)(__m512d)(B), \
   2960                                               -(__v8df)(__m512d)(C), \
   2961                                               (__mmask8)-1, (int)(R)); })
   2962 
   2963 
   2964 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2965   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2966                                               (__v8df)(__m512d)(B), \
   2967                                               -(__v8df)(__m512d)(C), \
   2968                                               (__mmask8)(U), (int)(R)); })
   2969 
   2970 
   2971 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2972   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   2973                                                (__v8df)(__m512d)(B), \
   2974                                                -(__v8df)(__m512d)(C), \
   2975                                                (__mmask8)(U), (int)(R)); })
   2976 
   2977 
   2978 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2979 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2980 {
   2981   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   2982                                                        (__v8df) __B,
   2983                                                        (__v8df) __C,
   2984                                                        (__mmask8) -1,
   2985                                                        _MM_FROUND_CUR_DIRECTION);
   2986 }
   2987 
   2988 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2989 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2990 {
   2991   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   2992                                                        (__v8df) __B,
   2993                                                        (__v8df) __C,
   2994                                                        (__mmask8) __U,
   2995                                                        _MM_FROUND_CUR_DIRECTION);
   2996 }
   2997 
   2998 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2999 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3000 {
   3001   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   3002                                                         (__v8df) __B,
   3003                                                         (__v8df) __C,
   3004                                                         (__mmask8) __U,
   3005                                                         _MM_FROUND_CUR_DIRECTION);
   3006 }
   3007 
   3008 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3009 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3010 {
   3011   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3012                                                         (__v8df) __B,
   3013                                                         (__v8df) __C,
   3014                                                         (__mmask8) __U,
   3015                                                         _MM_FROUND_CUR_DIRECTION);
   3016 }
   3017 
   3018 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3019 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   3020 {
   3021   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3022                                                        (__v8df) __B,
   3023                                                        -(__v8df) __C,
   3024                                                        (__mmask8) -1,
   3025                                                        _MM_FROUND_CUR_DIRECTION);
   3026 }
   3027 
   3028 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3029 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3030 {
   3031   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3032                                                        (__v8df) __B,
   3033                                                        -(__v8df) __C,
   3034                                                        (__mmask8) __U,
   3035                                                        _MM_FROUND_CUR_DIRECTION);
   3036 }
   3037 
   3038 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3039 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3040 {
   3041   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3042                                                         (__v8df) __B,
   3043                                                         -(__v8df) __C,
   3044                                                         (__mmask8) __U,
   3045                                                         _MM_FROUND_CUR_DIRECTION);
   3046 }
   3047 
   3048 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   3049   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3050                                              (__v16sf)(__m512)(B), \
   3051                                              (__v16sf)(__m512)(C), \
   3052                                              (__mmask16)-1, (int)(R)); })
   3053 
   3054 
   3055 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3056   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3057                                              (__v16sf)(__m512)(B), \
   3058                                              (__v16sf)(__m512)(C), \
   3059                                              (__mmask16)(U), (int)(R)); })
   3060 
   3061 
   3062 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3063   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
   3064                                               (__v16sf)(__m512)(B), \
   3065                                               (__v16sf)(__m512)(C), \
   3066                                               (__mmask16)(U), (int)(R)); })
   3067 
   3068 
   3069 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   3070   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3071                                               (__v16sf)(__m512)(B), \
   3072                                               (__v16sf)(__m512)(C), \
   3073                                               (__mmask16)(U), (int)(R)); })
   3074 
   3075 
   3076 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   3077   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3078                                              (__v16sf)(__m512)(B), \
   3079                                              -(__v16sf)(__m512)(C), \
   3080                                              (__mmask16)-1, (int)(R)); })
   3081 
   3082 
   3083 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3084   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3085                                              (__v16sf)(__m512)(B), \
   3086                                              -(__v16sf)(__m512)(C), \
   3087                                              (__mmask16)(U), (int)(R)); })
   3088 
   3089 
   3090 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   3091   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3092                                               (__v16sf)(__m512)(B), \
   3093                                               -(__v16sf)(__m512)(C), \
   3094                                               (__mmask16)(U), (int)(R)); })
   3095 
   3096 
   3097 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3098 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3099 {
   3100   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3101                                                       (__v16sf) __B,
   3102                                                       (__v16sf) __C,
   3103                                                       (__mmask16) -1,
   3104                                                       _MM_FROUND_CUR_DIRECTION);
   3105 }
   3106 
   3107 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3108 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3109 {
   3110   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3111                                                       (__v16sf) __B,
   3112                                                       (__v16sf) __C,
   3113                                                       (__mmask16) __U,
   3114                                                       _MM_FROUND_CUR_DIRECTION);
   3115 }
   3116 
   3117 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3118 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3119 {
   3120   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3121                                                        (__v16sf) __B,
   3122                                                        (__v16sf) __C,
   3123                                                        (__mmask16) __U,
   3124                                                        _MM_FROUND_CUR_DIRECTION);
   3125 }
   3126 
   3127 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3128 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3129 {
   3130   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3131                                                        (__v16sf) __B,
   3132                                                        (__v16sf) __C,
   3133                                                        (__mmask16) __U,
   3134                                                        _MM_FROUND_CUR_DIRECTION);
   3135 }
   3136 
   3137 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3138 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   3139 {
   3140   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3141                                                       (__v16sf) __B,
   3142                                                       -(__v16sf) __C,
   3143                                                       (__mmask16) -1,
   3144                                                       _MM_FROUND_CUR_DIRECTION);
   3145 }
   3146 
   3147 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3148 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3149 {
   3150   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3151                                                       (__v16sf) __B,
   3152                                                       -(__v16sf) __C,
   3153                                                       (__mmask16) __U,
   3154                                                       _MM_FROUND_CUR_DIRECTION);
   3155 }
   3156 
   3157 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3158 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3159 {
   3160   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3161                                                        (__v16sf) __B,
   3162                                                        -(__v16sf) __C,
   3163                                                        (__mmask16) __U,
   3164                                                        _MM_FROUND_CUR_DIRECTION);
   3165 }
   3166 
   3167 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3168   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
   3169                                             (__v8df)(__m512d)(B), \
   3170                                             (__v8df)(__m512d)(C), \
   3171                                             (__mmask8)(U), (int)(R)); })
   3172 
   3173 
   3174 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3175 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3176 {
   3177   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   3178                                                      (__v8df) __B,
   3179                                                      (__v8df) __C,
   3180                                                      (__mmask8) __U,
   3181                                                      _MM_FROUND_CUR_DIRECTION);
   3182 }
   3183 
   3184 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3185   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
   3186                                            (__v16sf)(__m512)(B), \
   3187                                            (__v16sf)(__m512)(C), \
   3188                                            (__mmask16)(U), (int)(R)); })
   3189 
   3190 
   3191 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3192 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3193 {
   3194   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   3195                                                     (__v16sf) __B,
   3196                                                     (__v16sf) __C,
   3197                                                     (__mmask16) __U,
   3198                                                     _MM_FROUND_CUR_DIRECTION);
   3199 }
   3200 
   3201 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   3202   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
   3203                                                (__v8df)(__m512d)(B), \
   3204                                                (__v8df)(__m512d)(C), \
   3205                                                (__mmask8)(U), (int)(R)); })
   3206 
   3207 
   3208 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3209 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3210 {
   3211   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3212                                                         (__v8df) __B,
   3213                                                         (__v8df) __C,
   3214                                                         (__mmask8) __U,
   3215                                                         _MM_FROUND_CUR_DIRECTION);
   3216 }
   3217 
   3218 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   3219   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
   3220                                               (__v16sf)(__m512)(B), \
   3221                                               (__v16sf)(__m512)(C), \
   3222                                               (__mmask16)(U), (int)(R)); })
   3223 
   3224 
   3225 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3226 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3227 {
   3228   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3229                                                        (__v16sf) __B,
   3230                                                        (__v16sf) __C,
   3231                                                        (__mmask16) __U,
   3232                                                        _MM_FROUND_CUR_DIRECTION);
   3233 }
   3234 
   3235 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3236   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
   3237                                             (__v8df)(__m512d)(B), \
   3238                                             (__v8df)(__m512d)(C), \
   3239                                             (__mmask8)(U), (int)(R)); })
   3240 
   3241 
   3242 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3243 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3244 {
   3245   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3246                                                      (__v8df) __B,
   3247                                                      (__v8df) __C,
   3248                                                      (__mmask8) __U,
   3249                                                      _MM_FROUND_CUR_DIRECTION);
   3250 }
   3251 
   3252 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3253   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
   3254                                            (__v16sf)(__m512)(B), \
   3255                                            (__v16sf)(__m512)(C), \
   3256                                            (__mmask16)(U), (int)(R)); })
   3257 
   3258 
   3259 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3260 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3261 {
   3262   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3263                                                     (__v16sf) __B,
   3264                                                     (__v16sf) __C,
   3265                                                     (__mmask16) __U,
   3266                                                     _MM_FROUND_CUR_DIRECTION);
   3267 }
   3268 
   3269 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3270   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
   3271                                             (__v8df)(__m512d)(B), \
   3272                                             (__v8df)(__m512d)(C), \
   3273                                             (__mmask8)(U), (int)(R)); })
   3274 
   3275 
   3276 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3277   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
   3278                                              (__v8df)(__m512d)(B), \
   3279                                              (__v8df)(__m512d)(C), \
   3280                                              (__mmask8)(U), (int)(R)); })
   3281 
   3282 
   3283 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3284 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3285 {
   3286   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3287                                                      (__v8df) __B,
   3288                                                      (__v8df) __C,
   3289                                                      (__mmask8) __U,
   3290                                                      _MM_FROUND_CUR_DIRECTION);
   3291 }
   3292 
   3293 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3294 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3295 {
   3296   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3297                                                       (__v8df) __B,
   3298                                                       (__v8df) __C,
   3299                                                       (__mmask8) __U,
   3300                                                       _MM_FROUND_CUR_DIRECTION);
   3301 }
   3302 
   3303 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3304   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
   3305                                            (__v16sf)(__m512)(B), \
   3306                                            (__v16sf)(__m512)(C), \
   3307                                            (__mmask16)(U), (int)(R)); })
   3308 
   3309 
   3310 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3311   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
   3312                                             (__v16sf)(__m512)(B), \
   3313                                             (__v16sf)(__m512)(C), \
   3314                                             (__mmask16)(U), (int)(R)); })
   3315 
   3316 
   3317 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3318 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3319 {
   3320   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3321                                                     (__v16sf) __B,
   3322                                                     (__v16sf) __C,
   3323                                                     (__mmask16) __U,
   3324                                                     _MM_FROUND_CUR_DIRECTION);
   3325 }
   3326 
   3327 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3328 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3329 {
   3330   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3331                                                      (__v16sf) __B,
   3332                                                      (__v16sf) __C,
   3333                                                      (__mmask16) __U,
   3334                                                      _MM_FROUND_CUR_DIRECTION);
   3335 }
   3336 
   3337 
   3338 
   3339 /* Vector permutations */
   3340 
   3341 static __inline __m512i __DEFAULT_FN_ATTRS
   3342 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   3343 {
   3344   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3345                                                        /* idx */ ,
   3346                                                        (__v16si) __A,
   3347                                                        (__v16si) __B,
   3348                                                        (__mmask16) -1);
   3349 }
   3350 
   3351 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3352 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   3353                                 __m512i __I, __m512i __B)
   3354 {
   3355   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3356                                                         /* idx */ ,
   3357                                                         (__v16si) __A,
   3358                                                         (__v16si) __B,
   3359                                                         (__mmask16) __U);
   3360 }
   3361 
   3362 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3363 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   3364                                  __m512i __I, __m512i __B)
   3365 {
   3366   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   3367                                                         /* idx */ ,
   3368                                                         (__v16si) __A,
   3369                                                         (__v16si) __B,
   3370                                                         (__mmask16) __U);
   3371 }
   3372 
   3373 static __inline __m512i __DEFAULT_FN_ATTRS
   3374 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   3375 {
   3376   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3377                                                        /* idx */ ,
   3378                                                        (__v8di) __A,
   3379                                                        (__v8di) __B,
   3380                                                        (__mmask8) -1);
   3381 }
   3382 
   3383 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3384 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   3385                                 __m512i __B)
   3386 {
   3387   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3388                                                        /* idx */ ,
   3389                                                        (__v8di) __A,
   3390                                                        (__v8di) __B,
   3391                                                        (__mmask8) __U);
   3392 }
   3393 
   3394 
   3395 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3396 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   3397          __m512i __I, __m512i __B)
   3398 {
   3399   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   3400                                                         /* idx */ ,
   3401                                                         (__v8di) __A,
   3402                                                         (__v8di) __B,
   3403                                                         (__mmask8) __U);
   3404 }
   3405 
   3406 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   3407   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
   3408                                    (__v8di)(__m512i)(A), \
   3409                                    ((int)(I) & 0x7) + 0, \
   3410                                    ((int)(I) & 0x7) + 1, \
   3411                                    ((int)(I) & 0x7) + 2, \
   3412                                    ((int)(I) & 0x7) + 3, \
   3413                                    ((int)(I) & 0x7) + 4, \
   3414                                    ((int)(I) & 0x7) + 5, \
   3415                                    ((int)(I) & 0x7) + 6, \
   3416                                    ((int)(I) & 0x7) + 7); })
   3417 
   3418 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
   3419   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3420                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3421                                  (__v8di)(__m512i)(W)); })
   3422 
   3423 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
   3424   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   3425                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
   3426                                  (__v8di)_mm512_setzero_si512()); })
   3427 
   3428 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   3429   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
   3430                                    (__v16si)(__m512i)(A), \
   3431                                    ((int)(I) & 0xf) + 0, \
   3432                                    ((int)(I) & 0xf) + 1, \
   3433                                    ((int)(I) & 0xf) + 2, \
   3434                                    ((int)(I) & 0xf) + 3, \
   3435                                    ((int)(I) & 0xf) + 4, \
   3436                                    ((int)(I) & 0xf) + 5, \
   3437                                    ((int)(I) & 0xf) + 6, \
   3438                                    ((int)(I) & 0xf) + 7, \
   3439                                    ((int)(I) & 0xf) + 8, \
   3440                                    ((int)(I) & 0xf) + 9, \
   3441                                    ((int)(I) & 0xf) + 10, \
   3442                                    ((int)(I) & 0xf) + 11, \
   3443                                    ((int)(I) & 0xf) + 12, \
   3444                                    ((int)(I) & 0xf) + 13, \
   3445                                    ((int)(I) & 0xf) + 14, \
   3446                                    ((int)(I) & 0xf) + 15); })
   3447 
   3448 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
   3449   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3450                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3451                                 (__v16si)(__m512i)(W)); })
   3452 
   3453 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
   3454   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   3455                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
   3456                                 (__v16si)_mm512_setzero_si512()); })
   3457 /* Vector Extract */
   3458 
   3459 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
   3460   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
   3461                                    (__v8df)_mm512_undefined_pd(), \
   3462                                    ((I) & 1) ? 4 : 0,             \
   3463                                    ((I) & 1) ? 5 : 1,             \
   3464                                    ((I) & 1) ? 6 : 2,             \
   3465                                    ((I) & 1) ? 7 : 3); })
   3466 
   3467 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
   3468   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3469                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3470                                    (__v4df)(W)); })
   3471 
   3472 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
   3473   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   3474                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
   3475                                    (__v4df)_mm256_setzero_pd()); })
   3476 
   3477 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
   3478   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
   3479                                   (__v16sf)_mm512_undefined_ps(), \
   3480                                   0 + ((I) & 0x3) * 4,            \
   3481                                   1 + ((I) & 0x3) * 4,            \
   3482                                   2 + ((I) & 0x3) * 4,            \
   3483                                   3 + ((I) & 0x3) * 4); })
   3484 
   3485 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
   3486   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3487                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3488                                    (__v4sf)(W)); })
   3489 
   3490 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
   3491   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   3492                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
   3493                                    (__v4sf)_mm_setzero_ps()); })
   3494 
   3495 /* Vector Blend */
   3496 
   3497 static __inline __m512d __DEFAULT_FN_ATTRS
   3498 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   3499 {
   3500   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   3501                  (__v8df) __W,
   3502                  (__v8df) __A);
   3503 }
   3504 
   3505 static __inline __m512 __DEFAULT_FN_ATTRS
   3506 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   3507 {
   3508   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   3509                 (__v16sf) __W,
   3510                 (__v16sf) __A);
   3511 }
   3512 
   3513 static __inline __m512i __DEFAULT_FN_ATTRS
   3514 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   3515 {
   3516   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   3517                 (__v8di) __W,
   3518                 (__v8di) __A);
   3519 }
   3520 
   3521 static __inline __m512i __DEFAULT_FN_ATTRS
   3522 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   3523 {
   3524   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   3525                 (__v16si) __W,
   3526                 (__v16si) __A);
   3527 }
   3528 
   3529 /* Compare */
   3530 
   3531 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   3532   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3533                                           (__v16sf)(__m512)(B), (int)(P), \
   3534                                           (__mmask16)-1, (int)(R)); })
   3535 
   3536 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   3537   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3538                                           (__v16sf)(__m512)(B), (int)(P), \
   3539                                           (__mmask16)(U), (int)(R)); })
   3540 
   3541 #define _mm512_cmp_ps_mask(A, B, P) \
   3542   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3543 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   3544   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3545 
   3546 #define _mm512_cmpeq_ps_mask(A, B) \
   3547     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
   3548 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
   3549     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
   3550 
   3551 #define _mm512_cmplt_ps_mask(A, B) \
   3552     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
   3553 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
   3554     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
   3555 
   3556 #define _mm512_cmple_ps_mask(A, B) \
   3557     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
   3558 #define _mm512_mask_cmple_ps_mask(k, A, B) \
   3559     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
   3560 
   3561 #define _mm512_cmpunord_ps_mask(A, B) \
   3562     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
   3563 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
   3564     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
   3565 
   3566 #define _mm512_cmpneq_ps_mask(A, B) \
   3567     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
   3568 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
   3569     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
   3570 
   3571 #define _mm512_cmpnlt_ps_mask(A, B) \
   3572     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
   3573 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
   3574     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
   3575 
   3576 #define _mm512_cmpnle_ps_mask(A, B) \
   3577     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
   3578 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
   3579     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
   3580 
   3581 #define _mm512_cmpord_ps_mask(A, B) \
   3582     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
   3583 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
   3584     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
   3585 
   3586 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   3587   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3588                                          (__v8df)(__m512d)(B), (int)(P), \
   3589                                          (__mmask8)-1, (int)(R)); })
   3590 
   3591 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   3592   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3593                                          (__v8df)(__m512d)(B), (int)(P), \
   3594                                          (__mmask8)(U), (int)(R)); })
   3595 
   3596 #define _mm512_cmp_pd_mask(A, B, P) \
   3597   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3598 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   3599   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3600 
   3601 #define _mm512_cmpeq_pd_mask(A, B) \
   3602     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
   3603 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
   3604     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
   3605 
   3606 #define _mm512_cmplt_pd_mask(A, B) \
   3607     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
   3608 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
   3609     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
   3610 
   3611 #define _mm512_cmple_pd_mask(A, B) \
   3612     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
   3613 #define _mm512_mask_cmple_pd_mask(k, A, B) \
   3614     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
   3615 
   3616 #define _mm512_cmpunord_pd_mask(A, B) \
   3617     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
   3618 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
   3619     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
   3620 
   3621 #define _mm512_cmpneq_pd_mask(A, B) \
   3622     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
   3623 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
   3624     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
   3625 
   3626 #define _mm512_cmpnlt_pd_mask(A, B) \
   3627     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
   3628 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
   3629     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
   3630 
   3631 #define _mm512_cmpnle_pd_mask(A, B) \
   3632     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
   3633 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
   3634     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
   3635 
   3636 #define _mm512_cmpord_pd_mask(A, B) \
   3637     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
   3638 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
   3639     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
   3640 
   3641 /* Conversion */
   3642 
   3643 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
   3644   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3645                                              (__v16si)_mm512_undefined_epi32(), \
   3646                                              (__mmask16)-1, (int)(R)); })
   3647 
   3648 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
   3649   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3650                                              (__v16si)(__m512i)(W), \
   3651                                              (__mmask16)(U), (int)(R)); })
   3652 
   3653 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
   3654   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3655                                              (__v16si)_mm512_setzero_si512(), \
   3656                                              (__mmask16)(U), (int)(R)); })
   3657 
   3658 
   3659 static __inline __m512i __DEFAULT_FN_ATTRS
   3660 _mm512_cvttps_epu32(__m512 __A)
   3661 {
   3662   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3663                   (__v16si)
   3664                   _mm512_setzero_si512 (),
   3665                   (__mmask16) -1,
   3666                   _MM_FROUND_CUR_DIRECTION);
   3667 }
   3668 
   3669 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3670 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   3671 {
   3672   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3673                    (__v16si) __W,
   3674                    (__mmask16) __U,
   3675                    _MM_FROUND_CUR_DIRECTION);
   3676 }
   3677 
   3678 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3679 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   3680 {
   3681   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3682                    (__v16si) _mm512_setzero_si512 (),
   3683                    (__mmask16) __U,
   3684                    _MM_FROUND_CUR_DIRECTION);
   3685 }
   3686 
   3687 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   3688   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3689                                           (__v16sf)_mm512_setzero_ps(), \
   3690                                           (__mmask16)-1, (int)(R)); })
   3691 
   3692 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
   3693   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3694                                           (__v16sf)(__m512)(W), \
   3695                                           (__mmask16)(U), (int)(R)); })
   3696 
   3697 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
   3698   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3699                                           (__v16sf)_mm512_setzero_ps(), \
   3700                                           (__mmask16)(U), (int)(R)); })
   3701 
   3702 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   3703   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3704                                            (__v16sf)_mm512_setzero_ps(), \
   3705                                            (__mmask16)-1, (int)(R)); })
   3706 
   3707 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
   3708   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3709                                            (__v16sf)(__m512)(W), \
   3710                                            (__mmask16)(U), (int)(R)); })
   3711 
   3712 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
   3713   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3714                                            (__v16sf)_mm512_setzero_ps(), \
   3715                                            (__mmask16)(U), (int)(R)); })
   3716 
   3717 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3718 _mm512_cvtepu32_ps (__m512i __A)
   3719 {
   3720   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3721                  (__v16sf) _mm512_undefined_ps (),
   3722                  (__mmask16) -1,
   3723                  _MM_FROUND_CUR_DIRECTION);
   3724 }
   3725 
   3726 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3727 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3728 {
   3729   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3730                  (__v16sf) __W,
   3731                  (__mmask16) __U,
   3732                  _MM_FROUND_CUR_DIRECTION);
   3733 }
   3734 
   3735 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3736 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   3737 {
   3738   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3739                  (__v16sf) _mm512_setzero_ps (),
   3740                  (__mmask16) __U,
   3741                  _MM_FROUND_CUR_DIRECTION);
   3742 }
   3743 
   3744 static __inline __m512d __DEFAULT_FN_ATTRS
   3745 _mm512_cvtepi32_pd(__m256i __A)
   3746 {
   3747   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
   3748 }
   3749 
   3750 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3751 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3752 {
   3753   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3754                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3755                                               (__v8df)__W);
   3756 }
   3757 
   3758 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3759 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   3760 {
   3761   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3762                                               (__v8df)_mm512_cvtepi32_pd(__A),
   3763                                               (__v8df)_mm512_setzero_pd());
   3764 }
   3765 
   3766 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3767 _mm512_cvtepi32lo_pd(__m512i __A)
   3768 {
   3769   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
   3770 }
   3771 
   3772 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3773 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3774 {
   3775   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3776 }
   3777 
   3778 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3779 _mm512_cvtepi32_ps (__m512i __A)
   3780 {
   3781   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3782                 (__v16sf) _mm512_undefined_ps (),
   3783                 (__mmask16) -1,
   3784                 _MM_FROUND_CUR_DIRECTION);
   3785 }
   3786 
   3787 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3788 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3789 {
   3790   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3791                 (__v16sf) __W,
   3792                 (__mmask16) __U,
   3793                 _MM_FROUND_CUR_DIRECTION);
   3794 }
   3795 
   3796 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3797 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   3798 {
   3799   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3800                 (__v16sf) _mm512_setzero_ps (),
   3801                 (__mmask16) __U,
   3802                 _MM_FROUND_CUR_DIRECTION);
   3803 }
   3804 
   3805 static __inline __m512d __DEFAULT_FN_ATTRS
   3806 _mm512_cvtepu32_pd(__m256i __A)
   3807 {
   3808   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
   3809 }
   3810 
   3811 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3812 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3813 {
   3814   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3815                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3816                                               (__v8df)__W);
   3817 }
   3818 
   3819 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3820 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   3821 {
   3822   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   3823                                               (__v8df)_mm512_cvtepu32_pd(__A),
   3824                                               (__v8df)_mm512_setzero_pd());
   3825 }
   3826 
   3827 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3828 _mm512_cvtepu32lo_pd(__m512i __A)
   3829 {
   3830   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
   3831 }
   3832 
   3833 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3834 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
   3835 {
   3836   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
   3837 }
   3838 
   3839 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   3840   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3841                                           (__v8sf)_mm256_setzero_ps(), \
   3842                                           (__mmask8)-1, (int)(R)); })
   3843 
   3844 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   3845   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3846                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
   3847                                           (int)(R)); })
   3848 
   3849 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   3850   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3851                                           (__v8sf)_mm256_setzero_ps(), \
   3852                                           (__mmask8)(U), (int)(R)); })
   3853 
   3854 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3855 _mm512_cvtpd_ps (__m512d __A)
   3856 {
   3857   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3858                 (__v8sf) _mm256_undefined_ps (),
   3859                 (__mmask8) -1,
   3860                 _MM_FROUND_CUR_DIRECTION);
   3861 }
   3862 
   3863 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3864 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   3865 {
   3866   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3867                 (__v8sf) __W,
   3868                 (__mmask8) __U,
   3869                 _MM_FROUND_CUR_DIRECTION);
   3870 }
   3871 
   3872 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3873 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   3874 {
   3875   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3876                 (__v8sf) _mm256_setzero_ps (),
   3877                 (__mmask8) __U,
   3878                 _MM_FROUND_CUR_DIRECTION);
   3879 }
   3880 
   3881 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3882 _mm512_cvtpd_pslo (__m512d __A)
   3883 {
   3884   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
   3885                 (__v8sf) _mm256_setzero_ps (),
   3886                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3887 }
   3888 
   3889 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3890 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
   3891 {
   3892   return (__m512) __builtin_shufflevector (
   3893                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
   3894                                                __U, __A),
   3895                 (__v8sf) _mm256_setzero_ps (),
   3896                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
   3897 }
   3898 
   3899 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
   3900   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3901                                             (__v16hi)_mm256_undefined_si256(), \
   3902                                             (__mmask16)-1); })
   3903 
   3904 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
   3905   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3906                                             (__v16hi)(__m256i)(U), \
   3907                                             (__mmask16)(W)); })
   3908 
   3909 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
   3910   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3911                                             (__v16hi)_mm256_setzero_si256(), \
   3912                                             (__mmask16)(W)); })
   3913 
   3914 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   3915   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3916                                             (__v16hi)_mm256_setzero_si256(), \
   3917                                             (__mmask16)-1); })
   3918 
   3919 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
   3920   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3921                                             (__v16hi)(__m256i)(U), \
   3922                                             (__mmask16)(W)); })
   3923 
   3924 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
   3925   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3926                                             (__v16hi)_mm256_setzero_si256(), \
   3927                                             (__mmask16)(W)); })
   3928 
   3929 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
   3930   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3931                                            (__v16sf)_mm512_undefined_ps(), \
   3932                                            (__mmask16)-1, (int)(R)); })
   3933 
   3934 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
   3935   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3936                                            (__v16sf)(__m512)(W), \
   3937                                            (__mmask16)(U), (int)(R)); })
   3938 
   3939 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
   3940   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3941                                            (__v16sf)_mm512_setzero_ps(), \
   3942                                            (__mmask16)(U), (int)(R)); })
   3943 
   3944 
   3945 static  __inline __m512 __DEFAULT_FN_ATTRS
   3946 _mm512_cvtph_ps(__m256i __A)
   3947 {
   3948   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3949                 (__v16sf)
   3950                 _mm512_setzero_ps (),
   3951                 (__mmask16) -1,
   3952                 _MM_FROUND_CUR_DIRECTION);
   3953 }
   3954 
   3955 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3956 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   3957 {
   3958   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3959                  (__v16sf) __W,
   3960                  (__mmask16) __U,
   3961                  _MM_FROUND_CUR_DIRECTION);
   3962 }
   3963 
   3964 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3965 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   3966 {
   3967   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3968                  (__v16sf) _mm512_setzero_ps (),
   3969                  (__mmask16) __U,
   3970                  _MM_FROUND_CUR_DIRECTION);
   3971 }
   3972 
   3973 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   3974   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3975                                             (__v8si)_mm256_setzero_si256(), \
   3976                                             (__mmask8)-1, (int)(R)); })
   3977 
   3978 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   3979   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3980                                             (__v8si)(__m256i)(W), \
   3981                                             (__mmask8)(U), (int)(R)); })
   3982 
   3983 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   3984   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3985                                             (__v8si)_mm256_setzero_si256(), \
   3986                                             (__mmask8)(U), (int)(R)); })
   3987 
   3988 static __inline __m256i __DEFAULT_FN_ATTRS
   3989 _mm512_cvttpd_epi32(__m512d __a)
   3990 {
   3991   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   3992                                                    (__v8si)_mm256_setzero_si256(),
   3993                                                    (__mmask8) -1,
   3994                                                     _MM_FROUND_CUR_DIRECTION);
   3995 }
   3996 
   3997 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3998 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   3999 {
   4000   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4001                   (__v8si) __W,
   4002                   (__mmask8) __U,
   4003                   _MM_FROUND_CUR_DIRECTION);
   4004 }
   4005 
   4006 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4007 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   4008 {
   4009   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4010                   (__v8si) _mm256_setzero_si256 (),
   4011                   (__mmask8) __U,
   4012                   _MM_FROUND_CUR_DIRECTION);
   4013 }
   4014 
   4015 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   4016   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4017                                             (__v16si)_mm512_setzero_si512(), \
   4018                                             (__mmask16)-1, (int)(R)); })
   4019 
   4020 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4021   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4022                                             (__v16si)(__m512i)(W), \
   4023                                             (__mmask16)(U), (int)(R)); })
   4024 
   4025 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   4026   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   4027                                             (__v16si)_mm512_setzero_si512(), \
   4028                                             (__mmask16)(U), (int)(R)); })
   4029 
   4030 static __inline __m512i __DEFAULT_FN_ATTRS
   4031 _mm512_cvttps_epi32(__m512 __a)
   4032 {
   4033   return (__m512i)
   4034     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   4035                                      (__v16si) _mm512_setzero_si512 (),
   4036                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   4037 }
   4038 
   4039 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4040 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4041 {
   4042   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4043                   (__v16si) __W,
   4044                   (__mmask16) __U,
   4045                   _MM_FROUND_CUR_DIRECTION);
   4046 }
   4047 
   4048 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4049 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   4050 {
   4051   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4052                   (__v16si) _mm512_setzero_si512 (),
   4053                   (__mmask16) __U,
   4054                   _MM_FROUND_CUR_DIRECTION);
   4055 }
   4056 
   4057 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   4058   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4059                                            (__v16si)_mm512_setzero_si512(), \
   4060                                            (__mmask16)-1, (int)(R)); })
   4061 
   4062 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
   4063   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4064                                            (__v16si)(__m512i)(W), \
   4065                                            (__mmask16)(U), (int)(R)); })
   4066 
   4067 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
   4068   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   4069                                            (__v16si)_mm512_setzero_si512(), \
   4070                                            (__mmask16)(U), (int)(R)); })
   4071 
   4072 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4073 _mm512_cvtps_epi32 (__m512 __A)
   4074 {
   4075   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4076                  (__v16si) _mm512_undefined_epi32 (),
   4077                  (__mmask16) -1,
   4078                  _MM_FROUND_CUR_DIRECTION);
   4079 }
   4080 
   4081 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4082 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   4083 {
   4084   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4085                  (__v16si) __W,
   4086                  (__mmask16) __U,
   4087                  _MM_FROUND_CUR_DIRECTION);
   4088 }
   4089 
   4090 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4091 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   4092 {
   4093   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4094                  (__v16si)
   4095                  _mm512_setzero_si512 (),
   4096                  (__mmask16) __U,
   4097                  _MM_FROUND_CUR_DIRECTION);
   4098 }
   4099 
   4100 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   4101   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4102                                            (__v8si)_mm256_setzero_si256(), \
   4103                                            (__mmask8)-1, (int)(R)); })
   4104 
   4105 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4106   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4107                                            (__v8si)(__m256i)(W), \
   4108                                            (__mmask8)(U), (int)(R)); })
   4109 
   4110 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
   4111   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4112                                            (__v8si)_mm256_setzero_si256(), \
   4113                                            (__mmask8)(U), (int)(R)); })
   4114 
   4115 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4116 _mm512_cvtpd_epi32 (__m512d __A)
   4117 {
   4118   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4119                  (__v8si)
   4120                  _mm256_undefined_si256 (),
   4121                  (__mmask8) -1,
   4122                  _MM_FROUND_CUR_DIRECTION);
   4123 }
   4124 
   4125 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4126 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4127 {
   4128   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4129                  (__v8si) __W,
   4130                  (__mmask8) __U,
   4131                  _MM_FROUND_CUR_DIRECTION);
   4132 }
   4133 
   4134 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4135 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   4136 {
   4137   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4138                  (__v8si)
   4139                  _mm256_setzero_si256 (),
   4140                  (__mmask8) __U,
   4141                  _MM_FROUND_CUR_DIRECTION);
   4142 }
   4143 
   4144 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   4145   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4146                                             (__v16si)_mm512_setzero_si512(), \
   4147                                             (__mmask16)-1, (int)(R)); })
   4148 
   4149 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
   4150   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4151                                             (__v16si)(__m512i)(W), \
   4152                                             (__mmask16)(U), (int)(R)); })
   4153 
   4154 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
   4155   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4156                                             (__v16si)_mm512_setzero_si512(), \
   4157                                             (__mmask16)(U), (int)(R)); })
   4158 
   4159 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4160 _mm512_cvtps_epu32 ( __m512 __A)
   4161 {
   4162   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
   4163                   (__v16si)\
   4164                   _mm512_undefined_epi32 (),\
   4165                   (__mmask16) -1,\
   4166                   _MM_FROUND_CUR_DIRECTION);\
   4167 }
   4168 
   4169 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4170 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   4171 {
   4172   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4173                   (__v16si) __W,
   4174                   (__mmask16) __U,
   4175                   _MM_FROUND_CUR_DIRECTION);
   4176 }
   4177 
   4178 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4179 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
   4180 {
   4181   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4182                   (__v16si)
   4183                   _mm512_setzero_si512 (),
   4184                   (__mmask16) __U ,
   4185                   _MM_FROUND_CUR_DIRECTION);
   4186 }
   4187 
   4188 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   4189   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4190                                             (__v8si)_mm256_setzero_si256(), \
   4191                                             (__mmask8)-1, (int)(R)); })
   4192 
   4193 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   4194   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4195                                             (__v8si)(W), \
   4196                                             (__mmask8)(U), (int)(R)); })
   4197 
   4198 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   4199   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4200                                             (__v8si)_mm256_setzero_si256(), \
   4201                                             (__mmask8)(U), (int)(R)); })
   4202 
   4203 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4204 _mm512_cvtpd_epu32 (__m512d __A)
   4205 {
   4206   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4207                   (__v8si)
   4208                   _mm256_undefined_si256 (),
   4209                   (__mmask8) -1,
   4210                   _MM_FROUND_CUR_DIRECTION);
   4211 }
   4212 
   4213 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4214 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   4215 {
   4216   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4217                   (__v8si) __W,
   4218                   (__mmask8) __U,
   4219                   _MM_FROUND_CUR_DIRECTION);
   4220 }
   4221 
   4222 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4223 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   4224 {
   4225   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4226                   (__v8si)
   4227                   _mm256_setzero_si256 (),
   4228                   (__mmask8) __U,
   4229                   _MM_FROUND_CUR_DIRECTION);
   4230 }
   4231 
   4232 static __inline__ double __DEFAULT_FN_ATTRS
   4233 _mm512_cvtsd_f64(__m512d __a)
   4234 {
   4235   return __a[0];
   4236 }
   4237 
   4238 static __inline__ float __DEFAULT_FN_ATTRS
   4239 _mm512_cvtss_f32(__m512 __a)
   4240 {
   4241   return __a[0];
   4242 }
   4243 
   4244 /* Unpack and Interleave */
   4245 
   4246 static __inline __m512d __DEFAULT_FN_ATTRS
   4247 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   4248 {
   4249   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4250                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4251 }
   4252 
   4253 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4254 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4255 {
   4256   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4257                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4258                                            (__v8df)__W);
   4259 }
   4260 
   4261 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4262 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
   4263 {
   4264   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4265                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4266                                            (__v8df)_mm512_setzero_pd());
   4267 }
   4268 
   4269 static __inline __m512d __DEFAULT_FN_ATTRS
   4270 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   4271 {
   4272   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4273                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4274 }
   4275 
   4276 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4277 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4278 {
   4279   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4280                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4281                                            (__v8df)__W);
   4282 }
   4283 
   4284 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4285 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   4286 {
   4287   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4288                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4289                                            (__v8df)_mm512_setzero_pd());
   4290 }
   4291 
   4292 static __inline __m512 __DEFAULT_FN_ATTRS
   4293 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   4294 {
   4295   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4296                                          2,    18,    3,    19,
   4297                                          2+4,  18+4,  3+4,  19+4,
   4298                                          2+8,  18+8,  3+8,  19+8,
   4299                                          2+12, 18+12, 3+12, 19+12);
   4300 }
   4301 
   4302 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4303 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4304 {
   4305   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4306                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4307                                           (__v16sf)__W);
   4308 }
   4309 
   4310 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4311 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4312 {
   4313   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4314                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4315                                           (__v16sf)_mm512_setzero_ps());
   4316 }
   4317 
   4318 static __inline __m512 __DEFAULT_FN_ATTRS
   4319 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   4320 {
   4321   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4322                                          0,    16,    1,    17,
   4323                                          0+4,  16+4,  1+4,  17+4,
   4324                                          0+8,  16+8,  1+8,  17+8,
   4325                                          0+12, 16+12, 1+12, 17+12);
   4326 }
   4327 
   4328 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4329 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4330 {
   4331   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4332                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4333                                           (__v16sf)__W);
   4334 }
   4335 
   4336 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4337 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4338 {
   4339   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4340                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4341                                           (__v16sf)_mm512_setzero_ps());
   4342 }
   4343 
   4344 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4345 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
   4346 {
   4347   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4348                                           2,    18,    3,    19,
   4349                                           2+4,  18+4,  3+4,  19+4,
   4350                                           2+8,  18+8,  3+8,  19+8,
   4351                                           2+12, 18+12, 3+12, 19+12);
   4352 }
   4353 
   4354 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4355 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4356 {
   4357   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4358                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4359                                        (__v16si)__W);
   4360 }
   4361 
   4362 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4363 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4364 {
   4365   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4366                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4367                                        (__v16si)_mm512_setzero_si512());
   4368 }
   4369 
   4370 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4371 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
   4372 {
   4373   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4374                                           0,    16,    1,    17,
   4375                                           0+4,  16+4,  1+4,  17+4,
   4376                                           0+8,  16+8,  1+8,  17+8,
   4377                                           0+12, 16+12, 1+12, 17+12);
   4378 }
   4379 
   4380 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4381 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4382 {
   4383   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4384                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4385                                        (__v16si)__W);
   4386 }
   4387 
   4388 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4389 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4390 {
   4391   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4392                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4393                                        (__v16si)_mm512_setzero_si512());
   4394 }
   4395 
   4396 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4397 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
   4398 {
   4399   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4400                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4401 }
   4402 
   4403 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4404 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4405 {
   4406   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4407                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4408                                         (__v8di)__W);
   4409 }
   4410 
   4411 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4412 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
   4413 {
   4414   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4415                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4416                                         (__v8di)_mm512_setzero_si512());
   4417 }
   4418 
   4419 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4420 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   4421 {
   4422   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4423                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4424 }
   4425 
   4426 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4427 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4428 {
   4429   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4430                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4431                                         (__v8di)__W);
   4432 }
   4433 
   4434 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4435 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4436 {
   4437   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4438                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4439                                         (__v8di)_mm512_setzero_si512());
   4440 }
   4441 
   4442 /* Bit Test */
   4443 
   4444 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4445 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   4446 {
   4447   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4448             (__v16si) __B,
   4449             (__mmask16) -1);
   4450 }
   4451 
   4452 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4453 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   4454 {
   4455   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4456                  (__v16si) __B, __U);
   4457 }
   4458 
   4459 static __inline __mmask8 __DEFAULT_FN_ATTRS
   4460 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   4461 {
   4462   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   4463                  (__v8di) __B,
   4464                  (__mmask8) -1);
   4465 }
   4466 
   4467 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4468 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   4469 {
   4470   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   4471 }
   4472 
   4473 
   4474 /* SIMD load ops */
   4475 
   4476 static __inline __m512i __DEFAULT_FN_ATTRS
   4477 _mm512_loadu_si512 (void const *__P)
   4478 {
   4479   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4480                   (__v16si)
   4481                   _mm512_setzero_si512 (),
   4482                   (__mmask16) -1);
   4483 }
   4484 
   4485 static __inline __m512i __DEFAULT_FN_ATTRS
   4486 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   4487 {
   4488   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4489                   (__v16si) __W,
   4490                   (__mmask16) __U);
   4491 }
   4492 
   4493 
   4494 static __inline __m512i __DEFAULT_FN_ATTRS
   4495 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   4496 {
   4497   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
   4498                                                      (__v16si)
   4499                                                      _mm512_setzero_si512 (),
   4500                                                      (__mmask16) __U);
   4501 }
   4502 
   4503 static __inline __m512i __DEFAULT_FN_ATTRS
   4504 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   4505 {
   4506   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
   4507                   (__v8di) __W,
   4508                   (__mmask8) __U);
   4509 }
   4510 
   4511 static __inline __m512i __DEFAULT_FN_ATTRS
   4512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   4513 {
   4514   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
   4515                                                      (__v8di)
   4516                                                      _mm512_setzero_si512 (),
   4517                                                      (__mmask8) __U);
   4518 }
   4519 
   4520 static __inline __m512 __DEFAULT_FN_ATTRS
   4521 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   4522 {
   4523   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
   4524                    (__v16sf) __W,
   4525                    (__mmask16) __U);
   4526 }
   4527 
   4528 static __inline __m512 __DEFAULT_FN_ATTRS
   4529 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   4530 {
   4531   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
   4532                                                   (__v16sf)
   4533                                                   _mm512_setzero_ps (),
   4534                                                   (__mmask16) __U);
   4535 }
   4536 
   4537 static __inline __m512d __DEFAULT_FN_ATTRS
   4538 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   4539 {
   4540   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
   4541                 (__v8df) __W,
   4542                 (__mmask8) __U);
   4543 }
   4544 
   4545 static __inline __m512d __DEFAULT_FN_ATTRS
   4546 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   4547 {
   4548   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
   4549                                                    (__v8df)
   4550                                                    _mm512_setzero_pd (),
   4551                                                    (__mmask8) __U);
   4552 }
   4553 
   4554 static __inline __m512d __DEFAULT_FN_ATTRS
   4555 _mm512_loadu_pd(void const *__p)
   4556 {
   4557   struct __loadu_pd {
   4558     __m512d __v;
   4559   } __attribute__((__packed__, __may_alias__));
   4560   return ((struct __loadu_pd*)__p)->__v;
   4561 }
   4562 
   4563 static __inline __m512 __DEFAULT_FN_ATTRS
   4564 _mm512_loadu_ps(void const *__p)
   4565 {
   4566   struct __loadu_ps {
   4567     __m512 __v;
   4568   } __attribute__((__packed__, __may_alias__));
   4569   return ((struct __loadu_ps*)__p)->__v;
   4570 }
   4571 
   4572 static __inline __m512 __DEFAULT_FN_ATTRS
   4573 _mm512_load_ps(void const *__p)
   4574 {
   4575   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   4576                                                   (__v16sf)
   4577                                                   _mm512_setzero_ps (),
   4578                                                   (__mmask16) -1);
   4579 }
   4580 
   4581 static __inline __m512 __DEFAULT_FN_ATTRS
   4582 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
   4583 {
   4584   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
   4585                    (__v16sf) __W,
   4586                    (__mmask16) __U);
   4587 }
   4588 
   4589 static __inline __m512 __DEFAULT_FN_ATTRS
   4590 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   4591 {
   4592   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   4593                                                   (__v16sf)
   4594                                                   _mm512_setzero_ps (),
   4595                                                   (__mmask16) __U);
   4596 }
   4597 
   4598 static __inline __m512d __DEFAULT_FN_ATTRS
   4599 _mm512_load_pd(void const *__p)
   4600 {
   4601   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   4602                                                    (__v8df)
   4603                                                    _mm512_setzero_pd (),
   4604                                                    (__mmask8) -1);
   4605 }
   4606 
   4607 static __inline __m512d __DEFAULT_FN_ATTRS
   4608 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
   4609 {
   4610   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
   4611                           (__v8df) __W,
   4612                           (__mmask8) __U);
   4613 }
   4614 
   4615 static __inline __m512d __DEFAULT_FN_ATTRS
   4616 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   4617 {
   4618   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   4619                                                    (__v8df)
   4620                                                    _mm512_setzero_pd (),
   4621                                                    (__mmask8) __U);
   4622 }
   4623 
   4624 static __inline __m512i __DEFAULT_FN_ATTRS
   4625 _mm512_load_si512 (void const *__P)
   4626 {
   4627   return *(__m512i *) __P;
   4628 }
   4629 
   4630 static __inline __m512i __DEFAULT_FN_ATTRS
   4631 _mm512_load_epi32 (void const *__P)
   4632 {
   4633   return *(__m512i *) __P;
   4634 }
   4635 
   4636 static __inline __m512i __DEFAULT_FN_ATTRS
   4637 _mm512_load_epi64 (void const *__P)
   4638 {
   4639   return *(__m512i *) __P;
   4640 }
   4641 
   4642 /* SIMD store ops */
   4643 
   4644 static __inline void __DEFAULT_FN_ATTRS
   4645 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   4646 {
   4647   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
   4648                                      (__mmask8) __U);
   4649 }
   4650 
   4651 static __inline void __DEFAULT_FN_ATTRS
   4652 _mm512_storeu_si512 (void *__P, __m512i __A)
   4653 {
   4654   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
   4655             (__mmask16) -1);
   4656 }
   4657 
   4658 static __inline void __DEFAULT_FN_ATTRS
   4659 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   4660 {
   4661   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
   4662                                      (__mmask16) __U);
   4663 }
   4664 
   4665 static __inline void __DEFAULT_FN_ATTRS
   4666 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   4667 {
   4668   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
   4669 }
   4670 
   4671 static __inline void __DEFAULT_FN_ATTRS
   4672 _mm512_storeu_pd(void *__P, __m512d __A)
   4673 {
   4674   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
   4675 }
   4676 
   4677 static __inline void __DEFAULT_FN_ATTRS
   4678 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   4679 {
   4680   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
   4681                                    (__mmask16) __U);
   4682 }
   4683 
   4684 static __inline void __DEFAULT_FN_ATTRS
   4685 _mm512_storeu_ps(void *__P, __m512 __A)
   4686 {
   4687   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
   4688 }
   4689 
   4690 static __inline void __DEFAULT_FN_ATTRS
   4691 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   4692 {
   4693   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   4694 }
   4695 
   4696 static __inline void __DEFAULT_FN_ATTRS
   4697 _mm512_store_pd(void *__P, __m512d __A)
   4698 {
   4699   *(__m512d*)__P = __A;
   4700 }
   4701 
   4702 static __inline void __DEFAULT_FN_ATTRS
   4703 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   4704 {
   4705   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   4706                                    (__mmask16) __U);
   4707 }
   4708 
   4709 static __inline void __DEFAULT_FN_ATTRS
   4710 _mm512_store_ps(void *__P, __m512 __A)
   4711 {
   4712   *(__m512*)__P = __A;
   4713 }
   4714 
   4715 static __inline void __DEFAULT_FN_ATTRS
   4716 _mm512_store_si512 (void *__P, __m512i __A)
   4717 {
   4718   *(__m512i *) __P = __A;
   4719 }
   4720 
   4721 static __inline void __DEFAULT_FN_ATTRS
   4722 _mm512_store_epi32 (void *__P, __m512i __A)
   4723 {
   4724   *(__m512i *) __P = __A;
   4725 }
   4726 
   4727 static __inline void __DEFAULT_FN_ATTRS
   4728 _mm512_store_epi64 (void *__P, __m512i __A)
   4729 {
   4730   *(__m512i *) __P = __A;
   4731 }
   4732 
   4733 /* Mask ops */
   4734 
   4735 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4736 _mm512_knot(__mmask16 __M)
   4737 {
   4738   return __builtin_ia32_knothi(__M);
   4739 }
   4740 
   4741 /* Integer compare */
   4742 
   4743 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4744 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   4745   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4746                                                    (__mmask16)-1);
   4747 }
   4748 
   4749 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4750 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4751   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4752                                                    __u);
   4753 }
   4754 
   4755 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4756 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   4757   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4758                                                  (__mmask16)-1);
   4759 }
   4760 
   4761 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4762 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4763   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4764                                                  __u);
   4765 }
   4766 
   4767 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4768 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4769   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4770                                                   __u);
   4771 }
   4772 
   4773 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4774 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   4775   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4776                                                   (__mmask8)-1);
   4777 }
   4778 
   4779 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4780 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   4781   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4782                                                 (__mmask8)-1);
   4783 }
   4784 
   4785 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4786 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4787   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4788                                                 __u);
   4789 }
   4790 
   4791 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4792 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   4793   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4794                                                 (__mmask16)-1);
   4795 }
   4796 
   4797 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4798 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4799   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4800                                                 __u);
   4801 }
   4802 
   4803 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4804 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   4805   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4806                                                  (__mmask16)-1);
   4807 }
   4808 
   4809 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4810 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4811   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4812                                                  __u);
   4813 }
   4814 
   4815 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4816 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   4817   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4818                                                (__mmask8)-1);
   4819 }
   4820 
   4821 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4822 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4823   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4824                                                __u);
   4825 }
   4826 
   4827 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4828 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   4829   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4830                                                 (__mmask8)-1);
   4831 }
   4832 
   4833 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4834 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4835   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4836                                                 __u);
   4837 }
   4838 
   4839 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4840 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   4841   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4842                                                    (__mmask16)-1);
   4843 }
   4844 
   4845 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4846 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4847   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4848                                                    __u);
   4849 }
   4850 
   4851 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4852 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   4853   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4854                                                  (__mmask16)-1);
   4855 }
   4856 
   4857 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4858 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4859   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4860                                                  __u);
   4861 }
   4862 
   4863 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4864 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4865   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4866                                                   __u);
   4867 }
   4868 
   4869 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4870 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   4871   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4872                                                   (__mmask8)-1);
   4873 }
   4874 
   4875 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4876 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   4877   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4878                                                 (__mmask8)-1);
   4879 }
   4880 
   4881 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4882 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4883   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4884                                                 __u);
   4885 }
   4886 
   4887 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4888 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   4889   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4890                                                 (__mmask16)-1);
   4891 }
   4892 
   4893 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4894 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4895   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4896                                                 __u);
   4897 }
   4898 
   4899 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4900 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   4901   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4902                                                  (__mmask16)-1);
   4903 }
   4904 
   4905 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4906 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4907   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4908                                                  __u);
   4909 }
   4910 
   4911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4912 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   4913   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4914                                                (__mmask8)-1);
   4915 }
   4916 
   4917 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4918 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4919   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4920                                                __u);
   4921 }
   4922 
   4923 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4924 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   4925   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4926                                                 (__mmask8)-1);
   4927 }
   4928 
   4929 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4930 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4931   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4932                                                 __u);
   4933 }
   4934 
   4935 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4936 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   4937   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4938                                                 (__mmask16)-1);
   4939 }
   4940 
   4941 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4942 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4943   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4944                                                 __u);
   4945 }
   4946 
   4947 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4948 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   4949   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4950                                                  (__mmask16)-1);
   4951 }
   4952 
   4953 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4954 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4955   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4956                                                  __u);
   4957 }
   4958 
   4959 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4960 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   4961   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4962                                                (__mmask8)-1);
   4963 }
   4964 
   4965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4966 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4967   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4968                                                __u);
   4969 }
   4970 
   4971 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4972 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   4973   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4974                                                 (__mmask8)-1);
   4975 }
   4976 
   4977 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4978 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4979   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4980                                                 __u);
   4981 }
   4982 
   4983 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4984 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   4985   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4986                                                 (__mmask16)-1);
   4987 }
   4988 
   4989 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4990 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4991   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4992                                                 __u);
   4993 }
   4994 
   4995 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4996 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   4997   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4998                                                  (__mmask16)-1);
   4999 }
   5000 
   5001 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5002 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   5003   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   5004                                                  __u);
   5005 }
   5006 
   5007 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5008 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   5009   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5010                                                (__mmask8)-1);
   5011 }
   5012 
   5013 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5014 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5015   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5016                                                __u);
   5017 }
   5018 
   5019 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5020 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   5021   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5022                                                 (__mmask8)-1);
   5023 }
   5024 
   5025 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   5026 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   5027   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   5028                                                 __u);
   5029 }
   5030 
   5031 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5032 _mm512_cvtepi8_epi32(__m128i __A)
   5033 {
   5034   /* This function always performs a signed extension, but __v16qi is a char
   5035      which may be signed or unsigned, so use __v16qs. */
   5036   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
   5037 }
   5038 
   5039 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5040 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5041 {
   5042   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5043                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5044                                              (__v16si)__W);
   5045 }
   5046 
   5047 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5048 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
   5049 {
   5050   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5051                                              (__v16si)_mm512_cvtepi8_epi32(__A),
   5052                                              (__v16si)_mm512_setzero_si512());
   5053 }
   5054 
   5055 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5056 _mm512_cvtepi8_epi64(__m128i __A)
   5057 {
   5058   /* This function always performs a signed extension, but __v16qi is a char
   5059      which may be signed or unsigned, so use __v16qs. */
   5060   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5061 }
   5062 
   5063 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5064 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5065 {
   5066   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5067                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5068                                              (__v8di)__W);
   5069 }
   5070 
   5071 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5072 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   5073 {
   5074   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5075                                              (__v8di)_mm512_cvtepi8_epi64(__A),
   5076                                              (__v8di)_mm512_setzero_si512 ());
   5077 }
   5078 
   5079 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5080 _mm512_cvtepi32_epi64(__m256i __X)
   5081 {
   5082   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
   5083 }
   5084 
   5085 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5086 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5087 {
   5088   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5089                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5090                                              (__v8di)__W);
   5091 }
   5092 
   5093 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5094 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
   5095 {
   5096   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5097                                              (__v8di)_mm512_cvtepi32_epi64(__X),
   5098                                              (__v8di)_mm512_setzero_si512());
   5099 }
   5100 
   5101 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5102 _mm512_cvtepi16_epi32(__m256i __A)
   5103 {
   5104   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
   5105 }
   5106 
   5107 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5108 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5109 {
   5110   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5111                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5112                                             (__v16si)__W);
   5113 }
   5114 
   5115 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5116 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
   5117 {
   5118   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5119                                             (__v16si)_mm512_cvtepi16_epi32(__A),
   5120                                             (__v16si)_mm512_setzero_si512 ());
   5121 }
   5122 
   5123 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5124 _mm512_cvtepi16_epi64(__m128i __A)
   5125 {
   5126   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
   5127 }
   5128 
   5129 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5130 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5131 {
   5132   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5133                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5134                                              (__v8di)__W);
   5135 }
   5136 
   5137 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5138 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   5139 {
   5140   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5141                                              (__v8di)_mm512_cvtepi16_epi64(__A),
   5142                                              (__v8di)_mm512_setzero_si512());
   5143 }
   5144 
   5145 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5146 _mm512_cvtepu8_epi32(__m128i __A)
   5147 {
   5148   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
   5149 }
   5150 
   5151 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5152 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
   5153 {
   5154   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5155                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5156                                              (__v16si)__W);
   5157 }
   5158 
   5159 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5160 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
   5161 {
   5162   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5163                                              (__v16si)_mm512_cvtepu8_epi32(__A),
   5164                                              (__v16si)_mm512_setzero_si512());
   5165 }
   5166 
   5167 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5168 _mm512_cvtepu8_epi64(__m128i __A)
   5169 {
   5170   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
   5171 }
   5172 
   5173 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5174 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5175 {
   5176   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5177                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5178                                              (__v8di)__W);
   5179 }
   5180 
   5181 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5182 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   5183 {
   5184   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5185                                              (__v8di)_mm512_cvtepu8_epi64(__A),
   5186                                              (__v8di)_mm512_setzero_si512());
   5187 }
   5188 
   5189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5190 _mm512_cvtepu32_epi64(__m256i __X)
   5191 {
   5192   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
   5193 }
   5194 
   5195 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5196 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
   5197 {
   5198   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5199                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5200                                              (__v8di)__W);
   5201 }
   5202 
   5203 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5204 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
   5205 {
   5206   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5207                                              (__v8di)_mm512_cvtepu32_epi64(__X),
   5208                                              (__v8di)_mm512_setzero_si512());
   5209 }
   5210 
   5211 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5212 _mm512_cvtepu16_epi32(__m256i __A)
   5213 {
   5214   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
   5215 }
   5216 
   5217 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5218 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
   5219 {
   5220   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5221                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5222                                             (__v16si)__W);
   5223 }
   5224 
   5225 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5226 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
   5227 {
   5228   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5229                                             (__v16si)_mm512_cvtepu16_epi32(__A),
   5230                                             (__v16si)_mm512_setzero_si512());
   5231 }
   5232 
   5233 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5234 _mm512_cvtepu16_epi64(__m128i __A)
   5235 {
   5236   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
   5237 }
   5238 
   5239 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5240 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
   5241 {
   5242   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5243                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5244                                              (__v8di)__W);
   5245 }
   5246 
   5247 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5248 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   5249 {
   5250   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5251                                              (__v8di)_mm512_cvtepu16_epi64(__A),
   5252                                              (__v8di)_mm512_setzero_si512());
   5253 }
   5254 
   5255 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5256 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   5257 {
   5258   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5259               (__v16si) __B,
   5260               (__v16si)
   5261               _mm512_setzero_si512 (),
   5262               (__mmask16) -1);
   5263 }
   5264 
   5265 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5266 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5267 {
   5268   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5269               (__v16si) __B,
   5270               (__v16si) __W,
   5271               (__mmask16) __U);
   5272 }
   5273 
   5274 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5275 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5276 {
   5277   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5278               (__v16si) __B,
   5279               (__v16si)
   5280               _mm512_setzero_si512 (),
   5281               (__mmask16) __U);
   5282 }
   5283 
   5284 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5285 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   5286 {
   5287   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5288               (__v8di) __B,
   5289               (__v8di)
   5290               _mm512_setzero_si512 (),
   5291               (__mmask8) -1);
   5292 }
   5293 
   5294 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5295 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5296 {
   5297   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5298               (__v8di) __B,
   5299               (__v8di) __W,
   5300               (__mmask8) __U);
   5301 }
   5302 
   5303 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5304 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5305 {
   5306   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5307               (__v8di) __B,
   5308               (__v8di)
   5309               _mm512_setzero_si512 (),
   5310               (__mmask8) __U);
   5311 }
   5312 
   5313 
   5314 
   5315 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   5316   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5317                                          (__v16si)(__m512i)(b), (int)(p), \
   5318                                          (__mmask16)-1); })
   5319 
   5320 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   5321   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5322                                           (__v16si)(__m512i)(b), (int)(p), \
   5323                                           (__mmask16)-1); })
   5324 
   5325 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   5326   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5327                                         (__v8di)(__m512i)(b), (int)(p), \
   5328                                         (__mmask8)-1); })
   5329 
   5330 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   5331   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5332                                          (__v8di)(__m512i)(b), (int)(p), \
   5333                                          (__mmask8)-1); })
   5334 
   5335 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   5336   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5337                                          (__v16si)(__m512i)(b), (int)(p), \
   5338                                          (__mmask16)(m)); })
   5339 
   5340 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   5341   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5342                                           (__v16si)(__m512i)(b), (int)(p), \
   5343                                           (__mmask16)(m)); })
   5344 
   5345 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   5346   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5347                                         (__v8di)(__m512i)(b), (int)(p), \
   5348                                         (__mmask8)(m)); })
   5349 
   5350 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   5351   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5352                                          (__v8di)(__m512i)(b), (int)(p), \
   5353                                          (__mmask8)(m)); })
   5354 
   5355 #define _mm512_rol_epi32(a, b) __extension__ ({ \
   5356   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5357                                         (__v16si)_mm512_setzero_si512(), \
   5358                                         (__mmask16)-1); })
   5359 
   5360 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
   5361   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5362                                         (__v16si)(__m512i)(W), \
   5363                                         (__mmask16)(U)); })
   5364 
   5365 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
   5366   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5367                                         (__v16si)_mm512_setzero_si512(), \
   5368                                         (__mmask16)(U)); })
   5369 
   5370 #define _mm512_rol_epi64(a, b) __extension__ ({ \
   5371   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5372                                         (__v8di)_mm512_setzero_si512(), \
   5373                                         (__mmask8)-1); })
   5374 
   5375 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
   5376   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5377                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5378 
   5379 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
   5380   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5381                                         (__v8di)_mm512_setzero_si512(), \
   5382                                         (__mmask8)(U)); })
   5383 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5384 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   5385 {
   5386   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5387               (__v16si) __B,
   5388               (__v16si)
   5389               _mm512_setzero_si512 (),
   5390               (__mmask16) -1);
   5391 }
   5392 
   5393 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5394 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5395 {
   5396   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5397               (__v16si) __B,
   5398               (__v16si) __W,
   5399               (__mmask16) __U);
   5400 }
   5401 
   5402 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5403 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5404 {
   5405   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5406               (__v16si) __B,
   5407               (__v16si)
   5408               _mm512_setzero_si512 (),
   5409               (__mmask16) __U);
   5410 }
   5411 
   5412 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5413 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   5414 {
   5415   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5416               (__v8di) __B,
   5417               (__v8di)
   5418               _mm512_setzero_si512 (),
   5419               (__mmask8) -1);
   5420 }
   5421 
   5422 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5423 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5424 {
   5425   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5426               (__v8di) __B,
   5427               (__v8di) __W,
   5428               (__mmask8) __U);
   5429 }
   5430 
   5431 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5432 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5433 {
   5434   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5435               (__v8di) __B,
   5436               (__v8di)
   5437               _mm512_setzero_si512 (),
   5438               (__mmask8) __U);
   5439 }
   5440 
   5441 #define _mm512_ror_epi32(A, B) __extension__ ({ \
   5442   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5443                                         (__v16si)_mm512_setzero_si512(), \
   5444                                         (__mmask16)-1); })
   5445 
   5446 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   5447   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5448                                         (__v16si)(__m512i)(W), \
   5449                                         (__mmask16)(U)); })
   5450 
   5451 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
   5452   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5453                                         (__v16si)_mm512_setzero_si512(), \
   5454                                         (__mmask16)(U)); })
   5455 
   5456 #define _mm512_ror_epi64(A, B) __extension__ ({ \
   5457   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5458                                         (__v8di)_mm512_setzero_si512(), \
   5459                                         (__mmask8)-1); })
   5460 
   5461 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   5462   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5463                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5464 
   5465 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
   5466   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5467                                         (__v8di)_mm512_setzero_si512(), \
   5468                                         (__mmask8)(U)); })
   5469 
   5470 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5471 _mm512_slli_epi32(__m512i __A, int __B)
   5472 {
   5473   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
   5474 }
   5475 
   5476 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5477 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5478 {
   5479   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5480                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5481                                          (__v16si)__W);
   5482 }
   5483 
   5484 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5485 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5486   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5487                                          (__v16si)_mm512_slli_epi32(__A, __B),
   5488                                          (__v16si)_mm512_setzero_si512());
   5489 }
   5490 
   5491 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5492 _mm512_slli_epi64(__m512i __A, int __B)
   5493 {
   5494   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
   5495 }
   5496 
   5497 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5498 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5499 {
   5500   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5501                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5502                                           (__v8di)__W);
   5503 }
   5504 
   5505 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5506 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
   5507 {
   5508   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5509                                           (__v8di)_mm512_slli_epi64(__A, __B),
   5510                                           (__v8di)_mm512_setzero_si512());
   5511 }
   5512 
   5513 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5514 _mm512_srli_epi32(__m512i __A, int __B)
   5515 {
   5516   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
   5517 }
   5518 
   5519 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5520 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   5521 {
   5522   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5523                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5524                                          (__v16si)__W);
   5525 }
   5526 
   5527 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5528 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
   5529   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   5530                                          (__v16si)_mm512_srli_epi32(__A, __B),
   5531                                          (__v16si)_mm512_setzero_si512());
   5532 }
   5533 
   5534 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5535 _mm512_srli_epi64(__m512i __A, int __B)
   5536 {
   5537   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
   5538 }
   5539 
   5540 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5541 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   5542 {
   5543   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5544                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5545                                           (__v8di)__W);
   5546 }
   5547 
   5548 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5549 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
   5550 {
   5551   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   5552                                           (__v8di)_mm512_srli_epi64(__A, __B),
   5553                                           (__v8di)_mm512_setzero_si512());
   5554 }
   5555 
   5556 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5557 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5558 {
   5559   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5560               (__v16si) __W,
   5561               (__mmask16) __U);
   5562 }
   5563 
   5564 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5565 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
   5566 {
   5567   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5568               (__v16si)
   5569               _mm512_setzero_si512 (),
   5570               (__mmask16) __U);
   5571 }
   5572 
   5573 static __inline__ void __DEFAULT_FN_ATTRS
   5574 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5575 {
   5576   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
   5577           (__mmask16) __U);
   5578 }
   5579 
   5580 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5581 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   5582 {
   5583   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5584                  (__v16si) __A,
   5585                  (__v16si) __W);
   5586 }
   5587 
   5588 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5589 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
   5590 {
   5591   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5592                  (__v16si) __A,
   5593                  (__v16si) _mm512_setzero_si512 ());
   5594 }
   5595 
   5596 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5597 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   5598 {
   5599   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5600                  (__v8di) __A,
   5601                  (__v8di) __W);
   5602 }
   5603 
   5604 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5605 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
   5606 {
   5607   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5608                  (__v8di) __A,
   5609                  (__v8di) _mm512_setzero_si512 ());
   5610 }
   5611 
   5612 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5613 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5614 {
   5615   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5616               (__v8di) __W,
   5617               (__mmask8) __U);
   5618 }
   5619 
   5620 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5621 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5622 {
   5623   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5624               (__v8di)
   5625               _mm512_setzero_si512 (),
   5626               (__mmask8) __U);
   5627 }
   5628 
   5629 static __inline__ void __DEFAULT_FN_ATTRS
   5630 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5631 {
   5632   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
   5633           (__mmask8) __U);
   5634 }
   5635 
   5636 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5637 _mm512_movedup_pd (__m512d __A)
   5638 {
   5639   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
   5640                                           0, 0, 2, 2, 4, 4, 6, 6);
   5641 }
   5642 
   5643 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5644 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   5645 {
   5646   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5647                                               (__v8df)_mm512_movedup_pd(__A),
   5648                                               (__v8df)__W);
   5649 }
   5650 
   5651 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5652 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   5653 {
   5654   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5655                                               (__v8df)_mm512_movedup_pd(__A),
   5656                                               (__v8df)_mm512_setzero_pd());
   5657 }
   5658 
   5659 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
   5660   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5661                                              (__v8df)(__m512d)(B), \
   5662                                              (__v8di)(__m512i)(C), (int)(imm), \
   5663                                              (__mmask8)-1, (int)(R)); })
   5664 
   5665 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
   5666   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5667                                              (__v8df)(__m512d)(B), \
   5668                                              (__v8di)(__m512i)(C), (int)(imm), \
   5669                                              (__mmask8)(U), (int)(R)); })
   5670 
   5671 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5672   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5673                                              (__v8df)(__m512d)(B), \
   5674                                              (__v8di)(__m512i)(C), (int)(imm), \
   5675                                              (__mmask8)-1, \
   5676                                              _MM_FROUND_CUR_DIRECTION); })
   5677 
   5678 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5679   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5680                                              (__v8df)(__m512d)(B), \
   5681                                              (__v8di)(__m512i)(C), (int)(imm), \
   5682                                              (__mmask8)(U), \
   5683                                              _MM_FROUND_CUR_DIRECTION); })
   5684 
   5685 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
   5686   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5687                                               (__v8df)(__m512d)(B), \
   5688                                               (__v8di)(__m512i)(C), \
   5689                                               (int)(imm), (__mmask8)(U), \
   5690                                               (int)(R)); })
   5691 
   5692 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5693   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5694                                               (__v8df)(__m512d)(B), \
   5695                                               (__v8di)(__m512i)(C), \
   5696                                               (int)(imm), (__mmask8)(U), \
   5697                                               _MM_FROUND_CUR_DIRECTION); })
   5698 
   5699 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
   5700   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5701                                             (__v16sf)(__m512)(B), \
   5702                                             (__v16si)(__m512i)(C), (int)(imm), \
   5703                                             (__mmask16)-1, (int)(R)); })
   5704 
   5705 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
   5706   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5707                                             (__v16sf)(__m512)(B), \
   5708                                             (__v16si)(__m512i)(C), (int)(imm), \
   5709                                             (__mmask16)(U), (int)(R)); })
   5710 
   5711 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5712   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5713                                             (__v16sf)(__m512)(B), \
   5714                                             (__v16si)(__m512i)(C), (int)(imm), \
   5715                                             (__mmask16)-1, \
   5716                                             _MM_FROUND_CUR_DIRECTION); })
   5717 
   5718 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5719   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5720                                             (__v16sf)(__m512)(B), \
   5721                                             (__v16si)(__m512i)(C), (int)(imm), \
   5722                                             (__mmask16)(U), \
   5723                                             _MM_FROUND_CUR_DIRECTION); })
   5724 
   5725 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
   5726   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5727                                              (__v16sf)(__m512)(B), \
   5728                                              (__v16si)(__m512i)(C), \
   5729                                              (int)(imm), (__mmask16)(U), \
   5730                                              (int)(R)); })
   5731 
   5732 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5733   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5734                                              (__v16sf)(__m512)(B), \
   5735                                              (__v16si)(__m512i)(C), \
   5736                                              (int)(imm), (__mmask16)(U), \
   5737                                              _MM_FROUND_CUR_DIRECTION); })
   5738 
   5739 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
   5740   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5741                                           (__v2df)(__m128d)(B), \
   5742                                           (__v2di)(__m128i)(C), (int)(imm), \
   5743                                           (__mmask8)-1, (int)(R)); })
   5744 
   5745 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
   5746   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5747                                           (__v2df)(__m128d)(B), \
   5748                                           (__v2di)(__m128i)(C), (int)(imm), \
   5749                                           (__mmask8)(U), (int)(R)); })
   5750 
   5751 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
   5752   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5753                                           (__v2df)(__m128d)(B), \
   5754                                           (__v2di)(__m128i)(C), (int)(imm), \
   5755                                           (__mmask8)-1, \
   5756                                           _MM_FROUND_CUR_DIRECTION); })
   5757 
   5758 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
   5759   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5760                                           (__v2df)(__m128d)(B), \
   5761                                           (__v2di)(__m128i)(C), (int)(imm), \
   5762                                           (__mmask8)(U), \
   5763                                           _MM_FROUND_CUR_DIRECTION); })
   5764 
   5765 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
   5766   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5767                                            (__v2df)(__m128d)(B), \
   5768                                            (__v2di)(__m128i)(C), (int)(imm), \
   5769                                            (__mmask8)(U), (int)(R)); })
   5770 
   5771 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
   5772   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5773                                            (__v2df)(__m128d)(B), \
   5774                                            (__v2di)(__m128i)(C), (int)(imm), \
   5775                                            (__mmask8)(U), \
   5776                                            _MM_FROUND_CUR_DIRECTION); })
   5777 
   5778 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
   5779   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5780                                          (__v4sf)(__m128)(B), \
   5781                                          (__v4si)(__m128i)(C), (int)(imm), \
   5782                                          (__mmask8)-1, (int)(R)); })
   5783 
   5784 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
   5785   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5786                                          (__v4sf)(__m128)(B), \
   5787                                          (__v4si)(__m128i)(C), (int)(imm), \
   5788                                          (__mmask8)(U), (int)(R)); })
   5789 
   5790 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
   5791   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5792                                          (__v4sf)(__m128)(B), \
   5793                                          (__v4si)(__m128i)(C), (int)(imm), \
   5794                                          (__mmask8)-1, \
   5795                                          _MM_FROUND_CUR_DIRECTION); })
   5796 
   5797 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
   5798   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5799                                          (__v4sf)(__m128)(B), \
   5800                                          (__v4si)(__m128i)(C), (int)(imm), \
   5801                                          (__mmask8)(U), \
   5802                                          _MM_FROUND_CUR_DIRECTION); })
   5803 
   5804 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
   5805   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5806                                           (__v4sf)(__m128)(B), \
   5807                                           (__v4si)(__m128i)(C), (int)(imm), \
   5808                                           (__mmask8)(U), (int)(R)); })
   5809 
   5810 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
   5811   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5812                                           (__v4sf)(__m128)(B), \
   5813                                           (__v4si)(__m128i)(C), (int)(imm), \
   5814                                           (__mmask8)(U), \
   5815                                           _MM_FROUND_CUR_DIRECTION); })
   5816 
   5817 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
   5818   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5819                                                  (__v2df)(__m128d)(B), \
   5820                                                  (__v2df)_mm_setzero_pd(), \
   5821                                                  (__mmask8)-1, (int)(R)); })
   5822 
   5823 
   5824 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5825 _mm_getexp_sd (__m128d __A, __m128d __B)
   5826 {
   5827   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
   5828                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5829 }
   5830 
   5831 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5832 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5833 {
   5834  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5835           (__v2df) __B,
   5836           (__v2df) __W,
   5837           (__mmask8) __U,
   5838           _MM_FROUND_CUR_DIRECTION);
   5839 }
   5840 
   5841 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
   5842   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5843                                                  (__v2df)(__m128d)(B), \
   5844                                                  (__v2df)(__m128d)(W), \
   5845                                                  (__mmask8)(U), (int)(R)); })
   5846 
   5847 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5848 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   5849 {
   5850  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5851           (__v2df) __B,
   5852           (__v2df) _mm_setzero_pd (),
   5853           (__mmask8) __U,
   5854           _MM_FROUND_CUR_DIRECTION);
   5855 }
   5856 
   5857 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
   5858   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5859                                                  (__v2df)(__m128d)(B), \
   5860                                                  (__v2df)_mm_setzero_pd(), \
   5861                                                  (__mmask8)(U), (int)(R)); })
   5862 
   5863 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
   5864   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5865                                                 (__v4sf)(__m128)(B), \
   5866                                                 (__v4sf)_mm_setzero_ps(), \
   5867                                                 (__mmask8)-1, (int)(R)); })
   5868 
   5869 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5870 _mm_getexp_ss (__m128 __A, __m128 __B)
   5871 {
   5872   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5873                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5874 }
   5875 
   5876 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5877 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5878 {
   5879  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5880           (__v4sf) __B,
   5881           (__v4sf) __W,
   5882           (__mmask8) __U,
   5883           _MM_FROUND_CUR_DIRECTION);
   5884 }
   5885 
   5886 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
   5887   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5888                                                 (__v4sf)(__m128)(B), \
   5889                                                 (__v4sf)(__m128)(W), \
   5890                                                 (__mmask8)(U), (int)(R)); })
   5891 
   5892 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5893 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   5894 {
   5895  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5896           (__v4sf) __B,
   5897           (__v4sf) _mm_setzero_pd (),
   5898           (__mmask8) __U,
   5899           _MM_FROUND_CUR_DIRECTION);
   5900 }
   5901 
   5902 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
   5903   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5904                                                 (__v4sf)(__m128)(B), \
   5905                                                 (__v4sf)_mm_setzero_ps(), \
   5906                                                 (__mmask8)(U), (int)(R)); })
   5907 
   5908 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
   5909   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5910                                                (__v2df)(__m128d)(B), \
   5911                                                (int)(((D)<<2) | (C)), \
   5912                                                (__v2df)_mm_setzero_pd(), \
   5913                                                (__mmask8)-1, (int)(R)); })
   5914 
   5915 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
   5916   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5917                                                (__v2df)(__m128d)(B), \
   5918                                                (int)(((D)<<2) | (C)), \
   5919                                                (__v2df)_mm_setzero_pd(), \
   5920                                                (__mmask8)-1, \
   5921                                                _MM_FROUND_CUR_DIRECTION); })
   5922 
   5923 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
   5924   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5925                                                (__v2df)(__m128d)(B), \
   5926                                                (int)(((D)<<2) | (C)), \
   5927                                                (__v2df)(__m128d)(W), \
   5928                                                (__mmask8)(U), \
   5929                                                _MM_FROUND_CUR_DIRECTION); })
   5930 
   5931 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
   5932   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5933                                                (__v2df)(__m128d)(B), \
   5934                                                (int)(((D)<<2) | (C)), \
   5935                                                (__v2df)(__m128d)(W), \
   5936                                                (__mmask8)(U), (int)(R)); })
   5937 
   5938 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
   5939   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5940                                                (__v2df)(__m128d)(B), \
   5941                                                (int)(((D)<<2) | (C)), \
   5942                                                (__v2df)_mm_setzero_pd(), \
   5943                                                (__mmask8)(U), \
   5944                                                _MM_FROUND_CUR_DIRECTION); })
   5945 
   5946 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
   5947   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5948                                                (__v2df)(__m128d)(B), \
   5949                                                (int)(((D)<<2) | (C)), \
   5950                                                (__v2df)_mm_setzero_pd(), \
   5951                                                (__mmask8)(U), (int)(R)); })
   5952 
   5953 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
   5954   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5955                                               (__v4sf)(__m128)(B), \
   5956                                               (int)(((D)<<2) | (C)), \
   5957                                               (__v4sf)_mm_setzero_ps(), \
   5958                                               (__mmask8)-1, (int)(R)); })
   5959 
   5960 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
   5961   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5962                                               (__v4sf)(__m128)(B), \
   5963                                               (int)(((D)<<2) | (C)), \
   5964                                               (__v4sf)_mm_setzero_ps(), \
   5965                                               (__mmask8)-1, \
   5966                                               _MM_FROUND_CUR_DIRECTION); })
   5967 
   5968 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
   5969   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5970                                               (__v4sf)(__m128)(B), \
   5971                                               (int)(((D)<<2) | (C)), \
   5972                                               (__v4sf)(__m128)(W), \
   5973                                               (__mmask8)(U), \
   5974                                               _MM_FROUND_CUR_DIRECTION); })
   5975 
   5976 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
   5977   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5978                                               (__v4sf)(__m128)(B), \
   5979                                               (int)(((D)<<2) | (C)), \
   5980                                               (__v4sf)(__m128)(W), \
   5981                                               (__mmask8)(U), (int)(R)); })
   5982 
   5983 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
   5984   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5985                                               (__v4sf)(__m128)(B), \
   5986                                               (int)(((D)<<2) | (C)), \
   5987                                               (__v4sf)_mm_setzero_pd(), \
   5988                                               (__mmask8)(U), \
   5989                                               _MM_FROUND_CUR_DIRECTION); })
   5990 
   5991 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
   5992   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5993                                               (__v4sf)(__m128)(B), \
   5994                                               (int)(((D)<<2) | (C)), \
   5995                                               (__v4sf)_mm_setzero_ps(), \
   5996                                               (__mmask8)(U), (int)(R)); })
   5997 
   5998 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5999 _mm512_kmov (__mmask16 __A)
   6000 {
   6001   return  __A;
   6002 }
   6003 
   6004 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
   6005   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
   6006                               (int)(P), (int)(R)); })
   6007 
   6008 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
   6009   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
   6010                               (int)(P), (int)(R)); })
   6011 
   6012 #ifdef __x86_64__
   6013 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   6014   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6015 #endif
   6016 
   6017 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6018 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   6019          __mmask16 __U, __m512i __B)
   6020 {
   6021   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   6022                    (__v16si) __I
   6023                    /* idx */ ,
   6024                    (__v16si) __B,
   6025                    (__mmask16) __U);
   6026 }
   6027 
   6028 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6029 _mm512_sll_epi32(__m512i __A, __m128i __B)
   6030 {
   6031   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
   6032 }
   6033 
   6034 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6035 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6036 {
   6037   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6038                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6039                                           (__v16si)__W);
   6040 }
   6041 
   6042 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6043 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6044 {
   6045   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6046                                           (__v16si)_mm512_sll_epi32(__A, __B),
   6047                                           (__v16si)_mm512_setzero_si512());
   6048 }
   6049 
   6050 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6051 _mm512_sll_epi64(__m512i __A, __m128i __B)
   6052 {
   6053   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
   6054 }
   6055 
   6056 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6057 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6058 {
   6059   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6060                                              (__v8di)_mm512_sll_epi64(__A, __B),
   6061                                              (__v8di)__W);
   6062 }
   6063 
   6064 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6065 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6066 {
   6067   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6068                                            (__v8di)_mm512_sll_epi64(__A, __B),
   6069                                            (__v8di)_mm512_setzero_si512());
   6070 }
   6071 
   6072 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6073 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
   6074 {
   6075   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
   6076 }
   6077 
   6078 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6079 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6080 {
   6081   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6082                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6083                                            (__v16si)__W);
   6084 }
   6085 
   6086 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6087 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6088 {
   6089   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6090                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
   6091                                            (__v16si)_mm512_setzero_si512());
   6092 }
   6093 
   6094 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6095 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
   6096 {
   6097   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
   6098 }
   6099 
   6100 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6101 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6102 {
   6103   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6104                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6105                                             (__v8di)__W);
   6106 }
   6107 
   6108 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6109 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6110 {
   6111   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6112                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
   6113                                             (__v8di)_mm512_setzero_si512());
   6114 }
   6115 
   6116 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6117 _mm512_sra_epi32(__m512i __A, __m128i __B)
   6118 {
   6119   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
   6120 }
   6121 
   6122 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6123 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6124 {
   6125   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6126                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6127                                           (__v16si)__W);
   6128 }
   6129 
   6130 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6131 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6132 {
   6133   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6134                                           (__v16si)_mm512_sra_epi32(__A, __B),
   6135                                           (__v16si)_mm512_setzero_si512());
   6136 }
   6137 
   6138 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6139 _mm512_sra_epi64(__m512i __A, __m128i __B)
   6140 {
   6141   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
   6142 }
   6143 
   6144 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6145 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6146 {
   6147   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6148                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6149                                            (__v8di)__W);
   6150 }
   6151 
   6152 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6153 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6154 {
   6155   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6156                                            (__v8di)_mm512_sra_epi64(__A, __B),
   6157                                            (__v8di)_mm512_setzero_si512());
   6158 }
   6159 
   6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6161 _mm512_srav_epi32(__m512i __X, __m512i __Y)
   6162 {
   6163   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
   6164 }
   6165 
   6166 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6167 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6168 {
   6169   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6170                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6171                                            (__v16si)__W);
   6172 }
   6173 
   6174 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6175 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6176 {
   6177   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6178                                            (__v16si)_mm512_srav_epi32(__X, __Y),
   6179                                            (__v16si)_mm512_setzero_si512());
   6180 }
   6181 
   6182 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6183 _mm512_srav_epi64(__m512i __X, __m512i __Y)
   6184 {
   6185   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
   6186 }
   6187 
   6188 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6189 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6190 {
   6191   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6192                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6193                                             (__v8di)__W);
   6194 }
   6195 
   6196 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6197 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6198 {
   6199   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6200                                             (__v8di)_mm512_srav_epi64(__X, __Y),
   6201                                             (__v8di)_mm512_setzero_si512());
   6202 }
   6203 
   6204 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6205 _mm512_srl_epi32(__m512i __A, __m128i __B)
   6206 {
   6207   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
   6208 }
   6209 
   6210 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6211 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6212 {
   6213   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6214                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6215                                           (__v16si)__W);
   6216 }
   6217 
   6218 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6219 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
   6220 {
   6221   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6222                                           (__v16si)_mm512_srl_epi32(__A, __B),
   6223                                           (__v16si)_mm512_setzero_si512());
   6224 }
   6225 
   6226 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6227 _mm512_srl_epi64(__m512i __A, __m128i __B)
   6228 {
   6229   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
   6230 }
   6231 
   6232 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6233 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6234 {
   6235   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6236                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6237                                            (__v8di)__W);
   6238 }
   6239 
   6240 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6241 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
   6242 {
   6243   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6244                                            (__v8di)_mm512_srl_epi64(__A, __B),
   6245                                            (__v8di)_mm512_setzero_si512());
   6246 }
   6247 
   6248 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6249 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
   6250 {
   6251   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
   6252 }
   6253 
   6254 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6255 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6256 {
   6257   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6258                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6259                                            (__v16si)__W);
   6260 }
   6261 
   6262 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6263 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
   6264 {
   6265   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   6266                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
   6267                                            (__v16si)_mm512_setzero_si512());
   6268 }
   6269 
   6270 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6271 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
   6272 {
   6273   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
   6274 }
   6275 
   6276 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6277 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6278 {
   6279   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6280                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6281                                             (__v8di)__W);
   6282 }
   6283 
   6284 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6285 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
   6286 {
   6287   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
   6288                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
   6289                                             (__v8di)_mm512_setzero_si512());
   6290 }
   6291 
   6292 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6293   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6294                                             (__v16si)(__m512i)(B), \
   6295                                             (__v16si)(__m512i)(C), (int)(imm), \
   6296                                             (__mmask16)-1); })
   6297 
   6298 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6299   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
   6300                                             (__v16si)(__m512i)(B), \
   6301                                             (__v16si)(__m512i)(C), (int)(imm), \
   6302                                             (__mmask16)(U)); })
   6303 
   6304 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6305   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
   6306                                              (__v16si)(__m512i)(B), \
   6307                                              (__v16si)(__m512i)(C), \
   6308                                              (int)(imm), (__mmask16)(U)); })
   6309 
   6310 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6311   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6312                                             (__v8di)(__m512i)(B), \
   6313                                             (__v8di)(__m512i)(C), (int)(imm), \
   6314                                             (__mmask8)-1); })
   6315 
   6316 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6317   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
   6318                                             (__v8di)(__m512i)(B), \
   6319                                             (__v8di)(__m512i)(C), (int)(imm), \
   6320                                             (__mmask8)(U)); })
   6321 
   6322 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6323   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
   6324                                              (__v8di)(__m512i)(B), \
   6325                                              (__v8di)(__m512i)(C), (int)(imm), \
   6326                                              (__mmask8)(U)); })
   6327 
   6328 #ifdef __x86_64__
   6329 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
   6330   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6331 #endif
   6332 
   6333 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
   6334   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6335 
   6336 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
   6337   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6338 
   6339 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
   6340   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6341 
   6342 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6343 _mm_cvtsd_u32 (__m128d __A)
   6344 {
   6345   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
   6346              _MM_FROUND_CUR_DIRECTION);
   6347 }
   6348 
   6349 #ifdef __x86_64__
   6350 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
   6351   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
   6352                                                   (int)(R)); })
   6353 
   6354 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6355 _mm_cvtsd_u64 (__m128d __A)
   6356 {
   6357   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
   6358                  __A,
   6359                  _MM_FROUND_CUR_DIRECTION);
   6360 }
   6361 #endif
   6362 
   6363 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
   6364   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6365 
   6366 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
   6367   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6368 
   6369 #ifdef __x86_64__
   6370 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
   6371   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6372 
   6373 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
   6374   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6375 #endif
   6376 
   6377 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
   6378   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6379 
   6380 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6381 _mm_cvtss_u32 (__m128 __A)
   6382 {
   6383   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
   6384              _MM_FROUND_CUR_DIRECTION);
   6385 }
   6386 
   6387 #ifdef __x86_64__
   6388 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
   6389   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
   6390                                                   (int)(R)); })
   6391 
   6392 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6393 _mm_cvtss_u64 (__m128 __A)
   6394 {
   6395   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
   6396                  __A,
   6397                  _MM_FROUND_CUR_DIRECTION);
   6398 }
   6399 #endif
   6400 
   6401 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
   6402   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6403 
   6404 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
   6405   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
   6406 
   6407 static __inline__ int __DEFAULT_FN_ATTRS
   6408 _mm_cvttsd_i32 (__m128d __A)
   6409 {
   6410   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
   6411               _MM_FROUND_CUR_DIRECTION);
   6412 }
   6413 
   6414 #ifdef __x86_64__
   6415 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
   6416   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6417 
   6418 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
   6419   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   6420 
   6421 static __inline__ long long __DEFAULT_FN_ATTRS
   6422 _mm_cvttsd_i64 (__m128d __A)
   6423 {
   6424   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
   6425               _MM_FROUND_CUR_DIRECTION);
   6426 }
   6427 #endif
   6428 
   6429 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
   6430   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
   6431 
   6432 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6433 _mm_cvttsd_u32 (__m128d __A)
   6434 {
   6435   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
   6436               _MM_FROUND_CUR_DIRECTION);
   6437 }
   6438 
   6439 #ifdef __x86_64__
   6440 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
   6441   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
   6442                                                    (int)(R)); })
   6443 
   6444 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6445 _mm_cvttsd_u64 (__m128d __A)
   6446 {
   6447   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
   6448                   __A,
   6449                   _MM_FROUND_CUR_DIRECTION);
   6450 }
   6451 #endif
   6452 
   6453 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
   6454   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6455 
   6456 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
   6457   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
   6458 
   6459 static __inline__ int __DEFAULT_FN_ATTRS
   6460 _mm_cvttss_i32 (__m128 __A)
   6461 {
   6462   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
   6463               _MM_FROUND_CUR_DIRECTION);
   6464 }
   6465 
   6466 #ifdef __x86_64__
   6467 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
   6468   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6469 
   6470 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
   6471   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
   6472 
   6473 static __inline__ long long __DEFAULT_FN_ATTRS
   6474 _mm_cvttss_i64 (__m128 __A)
   6475 {
   6476   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
   6477               _MM_FROUND_CUR_DIRECTION);
   6478 }
   6479 #endif
   6480 
   6481 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
   6482   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
   6483 
   6484 static __inline__ unsigned __DEFAULT_FN_ATTRS
   6485 _mm_cvttss_u32 (__m128 __A)
   6486 {
   6487   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
   6488               _MM_FROUND_CUR_DIRECTION);
   6489 }
   6490 
   6491 #ifdef __x86_64__
   6492 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
   6493   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
   6494                                                    (int)(R)); })
   6495 
   6496 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   6497 _mm_cvttss_u64 (__m128 __A)
   6498 {
   6499   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
   6500                   __A,
   6501                   _MM_FROUND_CUR_DIRECTION);
   6502 }
   6503 #endif
   6504 
   6505 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6506 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
   6507             __m512d __B)
   6508 {
   6509   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
   6510               (__v8di) __I
   6511               /* idx */ ,
   6512               (__v8df) __B,
   6513               (__mmask8) __U);
   6514 }
   6515 
   6516 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6517 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
   6518             __m512 __B)
   6519 {
   6520   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
   6521                    (__v16si) __I
   6522                    /* idx */ ,
   6523                    (__v16sf) __B,
   6524                    (__mmask16) __U);
   6525 }
   6526 
   6527 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6528 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
   6529          __mmask8 __U, __m512i __B)
   6530 {
   6531   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
   6532                    (__v8di) __I
   6533                    /* idx */ ,
   6534                    (__v8di) __B,
   6535                    (__mmask8) __U);
   6536 }
   6537 
   6538 #define _mm512_permute_pd(X, C) __extension__ ({ \
   6539   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   6540                                    (__v8df)_mm512_undefined_pd(), \
   6541                                    0 + (((C) >> 0) & 0x1), \
   6542                                    0 + (((C) >> 1) & 0x1), \
   6543                                    2 + (((C) >> 2) & 0x1), \
   6544                                    2 + (((C) >> 3) & 0x1), \
   6545                                    4 + (((C) >> 4) & 0x1), \
   6546                                    4 + (((C) >> 5) & 0x1), \
   6547                                    6 + (((C) >> 6) & 0x1), \
   6548                                    6 + (((C) >> 7) & 0x1)); })
   6549 
   6550 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6551   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6552                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6553                                        (__v8df)(__m512d)(W)); })
   6554 
   6555 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
   6556   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   6557                                        (__v8df)_mm512_permute_pd((X), (C)), \
   6558                                        (__v8df)_mm512_setzero_pd()); })
   6559 
   6560 #define _mm512_permute_ps(X, C) __extension__ ({ \
   6561   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
   6562                                   (__v16sf)_mm512_undefined_ps(), \
   6563                                    0  + (((C) >> 0) & 0x3), \
   6564                                    0  + (((C) >> 2) & 0x3), \
   6565                                    0  + (((C) >> 4) & 0x3), \
   6566                                    0  + (((C) >> 6) & 0x3), \
   6567                                    4  + (((C) >> 0) & 0x3), \
   6568                                    4  + (((C) >> 2) & 0x3), \
   6569                                    4  + (((C) >> 4) & 0x3), \
   6570                                    4  + (((C) >> 6) & 0x3), \
   6571                                    8  + (((C) >> 0) & 0x3), \
   6572                                    8  + (((C) >> 2) & 0x3), \
   6573                                    8  + (((C) >> 4) & 0x3), \
   6574                                    8  + (((C) >> 6) & 0x3), \
   6575                                    12 + (((C) >> 0) & 0x3), \
   6576                                    12 + (((C) >> 2) & 0x3), \
   6577                                    12 + (((C) >> 4) & 0x3), \
   6578                                    12 + (((C) >> 6) & 0x3)); })
   6579 
   6580 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6581   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6582                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6583                                       (__v16sf)(__m512)(W)); })
   6584 
   6585 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
   6586   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   6587                                       (__v16sf)_mm512_permute_ps((X), (C)), \
   6588                                       (__v16sf)_mm512_setzero_ps()); })
   6589 
   6590 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6591 _mm512_permutevar_pd(__m512d __A, __m512i __C)
   6592 {
   6593   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
   6594 }
   6595 
   6596 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6597 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
   6598 {
   6599   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6600                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6601                                          (__v8df)__W);
   6602 }
   6603 
   6604 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6605 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
   6606 {
   6607   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   6608                                          (__v8df)_mm512_permutevar_pd(__A, __C),
   6609                                          (__v8df)_mm512_setzero_pd());
   6610 }
   6611 
   6612 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6613 _mm512_permutevar_ps(__m512 __A, __m512i __C)
   6614 {
   6615   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
   6616 }
   6617 
   6618 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6619 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
   6620 {
   6621   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6622                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6623                                         (__v16sf)__W);
   6624 }
   6625 
   6626 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6627 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
   6628 {
   6629   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   6630                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
   6631                                         (__v16sf)_mm512_setzero_ps());
   6632 }
   6633 
   6634 static __inline __m512d __DEFAULT_FN_ATTRS
   6635 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
   6636 {
   6637   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6638                     /* idx */ ,
   6639                     (__v8df) __A,
   6640                     (__v8df) __B,
   6641                     (__mmask8) -1);
   6642 }
   6643 
   6644 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6645 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
   6646 {
   6647   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6648                     /* idx */ ,
   6649                     (__v8df) __A,
   6650                     (__v8df) __B,
   6651                     (__mmask8) __U);
   6652 }
   6653 
   6654 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6655 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
   6656             __m512d __B)
   6657 {
   6658   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
   6659                                                          /* idx */ ,
   6660                                                          (__v8df) __A,
   6661                                                          (__v8df) __B,
   6662                                                          (__mmask8) __U);
   6663 }
   6664 
   6665 static __inline __m512 __DEFAULT_FN_ATTRS
   6666 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
   6667 {
   6668   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6669                                                          /* idx */ ,
   6670                                                          (__v16sf) __A,
   6671                                                          (__v16sf) __B,
   6672                                                          (__mmask16) -1);
   6673 }
   6674 
   6675 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6676 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
   6677 {
   6678   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6679                                                          /* idx */ ,
   6680                                                          (__v16sf) __A,
   6681                                                          (__v16sf) __B,
   6682                                                          (__mmask16) __U);
   6683 }
   6684 
   6685 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6686 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
   6687             __m512 __B)
   6688 {
   6689   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
   6690                                                         /* idx */ ,
   6691                                                         (__v16sf) __A,
   6692                                                         (__v16sf) __B,
   6693                                                         (__mmask16) __U);
   6694 }
   6695 
   6696 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6697 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
   6698 {
   6699   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6700              (__v16si) __B,
   6701              (__mmask16) -1);
   6702 }
   6703 
   6704 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   6705 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   6706 {
   6707   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   6708              (__v16si) __B, __U);
   6709 }
   6710 
   6711 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6712 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
   6713 {
   6714   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6715             (__v8di) __B,
   6716             (__mmask8) -1);
   6717 }
   6718 
   6719 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6720 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   6721 {
   6722   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   6723             (__v8di) __B, __U);
   6724 }
   6725 
   6726 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
   6727   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6728                                              (__v8si)_mm256_undefined_si256(), \
   6729                                              (__mmask8)-1, (int)(R)); })
   6730 
   6731 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   6732   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6733                                              (__v8si)(__m256i)(W), \
   6734                                              (__mmask8)(U), (int)(R)); })
   6735 
   6736 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
   6737   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
   6738                                              (__v8si)_mm256_setzero_si256(), \
   6739                                              (__mmask8)(U), (int)(R)); })
   6740 
   6741 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6742 _mm512_cvttpd_epu32 (__m512d __A)
   6743 {
   6744   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6745                   (__v8si)
   6746                   _mm256_undefined_si256 (),
   6747                   (__mmask8) -1,
   6748                   _MM_FROUND_CUR_DIRECTION);
   6749 }
   6750 
   6751 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6752 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   6753 {
   6754   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6755                   (__v8si) __W,
   6756                   (__mmask8) __U,
   6757                   _MM_FROUND_CUR_DIRECTION);
   6758 }
   6759 
   6760 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6761 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
   6762 {
   6763   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   6764                   (__v8si)
   6765                   _mm256_setzero_si256 (),
   6766                   (__mmask8) __U,
   6767                   _MM_FROUND_CUR_DIRECTION);
   6768 }
   6769 
   6770 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
   6771   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6772                                                 (__v2df)(__m128d)(B), \
   6773                                                 (__v2df)_mm_setzero_pd(), \
   6774                                                 (__mmask8)-1, (int)(imm), \
   6775                                                 (int)(R)); })
   6776 
   6777 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
   6778   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6779                                                 (__v2df)(__m128d)(B), \
   6780                                                 (__v2df)_mm_setzero_pd(), \
   6781                                                 (__mmask8)-1, (int)(imm), \
   6782                                                 _MM_FROUND_CUR_DIRECTION); })
   6783 
   6784 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
   6785   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6786                                                 (__v2df)(__m128d)(B), \
   6787                                                 (__v2df)(__m128d)(W), \
   6788                                                 (__mmask8)(U), (int)(imm), \
   6789                                                 _MM_FROUND_CUR_DIRECTION); })
   6790 
   6791 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
   6792   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6793                                                 (__v2df)(__m128d)(B), \
   6794                                                 (__v2df)(__m128d)(W), \
   6795                                                 (__mmask8)(U), (int)(I), \
   6796                                                 (int)(R)); })
   6797 
   6798 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
   6799   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6800                                                 (__v2df)(__m128d)(B), \
   6801                                                 (__v2df)_mm_setzero_pd(), \
   6802                                                 (__mmask8)(U), (int)(I), \
   6803                                                 _MM_FROUND_CUR_DIRECTION); })
   6804 
   6805 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
   6806   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
   6807                                                 (__v2df)(__m128d)(B), \
   6808                                                 (__v2df)_mm_setzero_pd(), \
   6809                                                 (__mmask8)(U), (int)(I), \
   6810                                                 (int)(R)); })
   6811 
   6812 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
   6813   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6814                                                (__v4sf)(__m128)(B), \
   6815                                                (__v4sf)_mm_setzero_ps(), \
   6816                                                (__mmask8)-1, (int)(imm), \
   6817                                                (int)(R)); })
   6818 
   6819 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
   6820   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6821                                                (__v4sf)(__m128)(B), \
   6822                                                (__v4sf)_mm_setzero_ps(), \
   6823                                                (__mmask8)-1, (int)(imm), \
   6824                                                _MM_FROUND_CUR_DIRECTION); })
   6825 
   6826 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
   6827   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6828                                                (__v4sf)(__m128)(B), \
   6829                                                (__v4sf)(__m128)(W), \
   6830                                                (__mmask8)(U), (int)(I), \
   6831                                                _MM_FROUND_CUR_DIRECTION); })
   6832 
   6833 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
   6834   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6835                                                (__v4sf)(__m128)(B), \
   6836                                                (__v4sf)(__m128)(W), \
   6837                                                (__mmask8)(U), (int)(I), \
   6838                                                (int)(R)); })
   6839 
   6840 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
   6841   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6842                                                (__v4sf)(__m128)(B), \
   6843                                                (__v4sf)_mm_setzero_ps(), \
   6844                                                (__mmask8)(U), (int)(I), \
   6845                                                _MM_FROUND_CUR_DIRECTION); })
   6846 
   6847 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
   6848   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
   6849                                                (__v4sf)(__m128)(B), \
   6850                                                (__v4sf)_mm_setzero_ps(), \
   6851                                                (__mmask8)(U), (int)(I), \
   6852                                                (int)(R)); })
   6853 
   6854 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
   6855   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6856                                            (__v8df)(__m512d)(B), \
   6857                                            (__v8df)_mm512_undefined_pd(), \
   6858                                            (__mmask8)-1, (int)(R)); })
   6859 
   6860 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
   6861   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6862                                            (__v8df)(__m512d)(B), \
   6863                                            (__v8df)(__m512d)(W), \
   6864                                            (__mmask8)(U), (int)(R)); })
   6865 
   6866 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
   6867   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
   6868                                            (__v8df)(__m512d)(B), \
   6869                                            (__v8df)_mm512_setzero_pd(), \
   6870                                            (__mmask8)(U), (int)(R)); })
   6871 
   6872 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6873 _mm512_scalef_pd (__m512d __A, __m512d __B)
   6874 {
   6875   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6876                 (__v8df) __B,
   6877                 (__v8df)
   6878                 _mm512_undefined_pd (),
   6879                 (__mmask8) -1,
   6880                 _MM_FROUND_CUR_DIRECTION);
   6881 }
   6882 
   6883 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6884 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   6885 {
   6886   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6887                 (__v8df) __B,
   6888                 (__v8df) __W,
   6889                 (__mmask8) __U,
   6890                 _MM_FROUND_CUR_DIRECTION);
   6891 }
   6892 
   6893 static __inline__ __m512d __DEFAULT_FN_ATTRS
   6894 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
   6895 {
   6896   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   6897                 (__v8df) __B,
   6898                 (__v8df)
   6899                 _mm512_setzero_pd (),
   6900                 (__mmask8) __U,
   6901                 _MM_FROUND_CUR_DIRECTION);
   6902 }
   6903 
   6904 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
   6905   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6906                                           (__v16sf)(__m512)(B), \
   6907                                           (__v16sf)_mm512_undefined_ps(), \
   6908                                           (__mmask16)-1, (int)(R)); })
   6909 
   6910 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
   6911   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6912                                           (__v16sf)(__m512)(B), \
   6913                                           (__v16sf)(__m512)(W), \
   6914                                           (__mmask16)(U), (int)(R)); })
   6915 
   6916 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
   6917   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
   6918                                           (__v16sf)(__m512)(B), \
   6919                                           (__v16sf)_mm512_setzero_ps(), \
   6920                                           (__mmask16)(U), (int)(R)); })
   6921 
   6922 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6923 _mm512_scalef_ps (__m512 __A, __m512 __B)
   6924 {
   6925   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6926                (__v16sf) __B,
   6927                (__v16sf)
   6928                _mm512_undefined_ps (),
   6929                (__mmask16) -1,
   6930                _MM_FROUND_CUR_DIRECTION);
   6931 }
   6932 
   6933 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6934 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   6935 {
   6936   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6937                (__v16sf) __B,
   6938                (__v16sf) __W,
   6939                (__mmask16) __U,
   6940                _MM_FROUND_CUR_DIRECTION);
   6941 }
   6942 
   6943 static __inline__ __m512 __DEFAULT_FN_ATTRS
   6944 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
   6945 {
   6946   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   6947                (__v16sf) __B,
   6948                (__v16sf)
   6949                _mm512_setzero_ps (),
   6950                (__mmask16) __U,
   6951                _MM_FROUND_CUR_DIRECTION);
   6952 }
   6953 
   6954 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
   6955   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6956                                               (__v2df)(__m128d)(B), \
   6957                                               (__v2df)_mm_setzero_pd(), \
   6958                                               (__mmask8)-1, (int)(R)); })
   6959 
   6960 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6961 _mm_scalef_sd (__m128d __A, __m128d __B)
   6962 {
   6963   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
   6964               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
   6965               (__mmask8) -1,
   6966               _MM_FROUND_CUR_DIRECTION);
   6967 }
   6968 
   6969 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6970 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   6971 {
   6972  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   6973                  (__v2df) __B,
   6974                 (__v2df) __W,
   6975                 (__mmask8) __U,
   6976                 _MM_FROUND_CUR_DIRECTION);
   6977 }
   6978 
   6979 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
   6980   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6981                                               (__v2df)(__m128d)(B), \
   6982                                               (__v2df)(__m128d)(W), \
   6983                                               (__mmask8)(U), (int)(R)); })
   6984 
   6985 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6986 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
   6987 {
   6988  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
   6989                  (__v2df) __B,
   6990                 (__v2df) _mm_setzero_pd (),
   6991                 (__mmask8) __U,
   6992                 _MM_FROUND_CUR_DIRECTION);
   6993 }
   6994 
   6995 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
   6996   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
   6997                                               (__v2df)(__m128d)(B), \
   6998                                               (__v2df)_mm_setzero_pd(), \
   6999                                               (__mmask8)(U), (int)(R)); })
   7000 
   7001 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
   7002   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7003                                              (__v4sf)(__m128)(B), \
   7004                                              (__v4sf)_mm_setzero_ps(), \
   7005                                              (__mmask8)-1, (int)(R)); })
   7006 
   7007 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7008 _mm_scalef_ss (__m128 __A, __m128 __B)
   7009 {
   7010   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
   7011              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
   7012              (__mmask8) -1,
   7013              _MM_FROUND_CUR_DIRECTION);
   7014 }
   7015 
   7016 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7017 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7018 {
   7019  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7020                 (__v4sf) __B,
   7021                 (__v4sf) __W,
   7022                 (__mmask8) __U,
   7023                 _MM_FROUND_CUR_DIRECTION);
   7024 }
   7025 
   7026 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
   7027   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7028                                              (__v4sf)(__m128)(B), \
   7029                                              (__v4sf)(__m128)(W), \
   7030                                              (__mmask8)(U), (int)(R)); })
   7031 
   7032 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7033 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7034 {
   7035  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
   7036                  (__v4sf) __B,
   7037                 (__v4sf) _mm_setzero_ps (),
   7038                 (__mmask8) __U,
   7039                 _MM_FROUND_CUR_DIRECTION);
   7040 }
   7041 
   7042 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
   7043   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
   7044                                              (__v4sf)(__m128)(B), \
   7045                                              (__v4sf)_mm_setzero_ps(), \
   7046                                              (__mmask8)(U), \
   7047                                              _MM_FROUND_CUR_DIRECTION); })
   7048 
   7049 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7050 _mm512_srai_epi32(__m512i __A, int __B)
   7051 {
   7052   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
   7053 }
   7054 
   7055 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7056 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
   7057 {
   7058   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7059                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7060                                          (__v16si)__W);
   7061 }
   7062 
   7063 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7064 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
   7065   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
   7066                                          (__v16si)_mm512_srai_epi32(__A, __B), \
   7067                                          (__v16si)_mm512_setzero_si512());
   7068 }
   7069 
   7070 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7071 _mm512_srai_epi64(__m512i __A, int __B)
   7072 {
   7073   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
   7074 }
   7075 
   7076 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7077 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
   7078 {
   7079   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7080                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7081                                           (__v8di)__W);
   7082 }
   7083 
   7084 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7085 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
   7086 {
   7087   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
   7088                                           (__v8di)_mm512_srai_epi64(__A, __B), \
   7089                                           (__v8di)_mm512_setzero_si512());
   7090 }
   7091 
   7092 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
   7093   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7094                                          (__v16sf)(__m512)(B), (int)(imm), \
   7095                                          (__v16sf)_mm512_undefined_ps(), \
   7096                                          (__mmask16)-1); })
   7097 
   7098 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
   7099   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7100                                          (__v16sf)(__m512)(B), (int)(imm), \
   7101                                          (__v16sf)(__m512)(W), \
   7102                                          (__mmask16)(U)); })
   7103 
   7104 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
   7105   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
   7106                                          (__v16sf)(__m512)(B), (int)(imm), \
   7107                                          (__v16sf)_mm512_setzero_ps(), \
   7108                                          (__mmask16)(U)); })
   7109 
   7110 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
   7111   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7112                                           (__v8df)(__m512d)(B), (int)(imm), \
   7113                                           (__v8df)_mm512_undefined_pd(), \
   7114                                           (__mmask8)-1); })
   7115 
   7116 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
   7117   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7118                                           (__v8df)(__m512d)(B), (int)(imm), \
   7119                                           (__v8df)(__m512d)(W), \
   7120                                           (__mmask8)(U)); })
   7121 
   7122 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
   7123   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
   7124                                           (__v8df)(__m512d)(B), (int)(imm), \
   7125                                           (__v8df)_mm512_setzero_pd(), \
   7126                                           (__mmask8)(U)); })
   7127 
   7128 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
   7129   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7130                                           (__v16si)(__m512i)(B), (int)(imm), \
   7131                                           (__v16si)_mm512_setzero_si512(), \
   7132                                           (__mmask16)-1); })
   7133 
   7134 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
   7135   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7136                                           (__v16si)(__m512i)(B), (int)(imm), \
   7137                                           (__v16si)(__m512i)(W), \
   7138                                           (__mmask16)(U)); })
   7139 
   7140 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
   7141   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
   7142                                           (__v16si)(__m512i)(B), (int)(imm), \
   7143                                           (__v16si)_mm512_setzero_si512(), \
   7144                                           (__mmask16)(U)); })
   7145 
   7146 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
   7147   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7148                                           (__v8di)(__m512i)(B), (int)(imm), \
   7149                                           (__v8di)_mm512_setzero_si512(), \
   7150                                           (__mmask8)-1); })
   7151 
   7152 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
   7153   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7154                                           (__v8di)(__m512i)(B), (int)(imm), \
   7155                                           (__v8di)(__m512i)(W), \
   7156                                           (__mmask8)(U)); })
   7157 
   7158 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
   7159   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
   7160                                           (__v8di)(__m512i)(B), (int)(imm), \
   7161                                           (__v8di)_mm512_setzero_si512(), \
   7162                                           (__mmask8)(U)); })
   7163 
   7164 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
   7165   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7166                                    (__v8df)(__m512d)(B), \
   7167                                    0  + (((M) >> 0) & 0x1), \
   7168                                    8  + (((M) >> 1) & 0x1), \
   7169                                    2  + (((M) >> 2) & 0x1), \
   7170                                    10 + (((M) >> 3) & 0x1), \
   7171                                    4  + (((M) >> 4) & 0x1), \
   7172                                    12 + (((M) >> 5) & 0x1), \
   7173                                    6  + (((M) >> 6) & 0x1), \
   7174                                    14 + (((M) >> 7) & 0x1)); })
   7175 
   7176 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7177   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7178                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7179                                        (__v8df)(__m512d)(W)); })
   7180 
   7181 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7182   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7183                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
   7184                                        (__v8df)_mm512_setzero_pd()); })
   7185 
   7186 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
   7187   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
   7188                                    (__v16sf)(__m512)(B), \
   7189                                    0  + (((M) >> 0) & 0x3), \
   7190                                    0  + (((M) >> 2) & 0x3), \
   7191                                    16 + (((M) >> 4) & 0x3), \
   7192                                    16 + (((M) >> 6) & 0x3), \
   7193                                    4  + (((M) >> 0) & 0x3), \
   7194                                    4  + (((M) >> 2) & 0x3), \
   7195                                    20 + (((M) >> 4) & 0x3), \
   7196                                    20 + (((M) >> 6) & 0x3), \
   7197                                    8  + (((M) >> 0) & 0x3), \
   7198                                    8  + (((M) >> 2) & 0x3), \
   7199                                    24 + (((M) >> 4) & 0x3), \
   7200                                    24 + (((M) >> 6) & 0x3), \
   7201                                    12 + (((M) >> 0) & 0x3), \
   7202                                    12 + (((M) >> 2) & 0x3), \
   7203                                    28 + (((M) >> 4) & 0x3), \
   7204                                    28 + (((M) >> 6) & 0x3)); })
   7205 
   7206 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7207   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7208                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7209                                       (__v16sf)(__m512)(W)); })
   7210 
   7211 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7212   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7213                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
   7214                                       (__v16sf)_mm512_setzero_ps()); })
   7215 
   7216 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
   7217   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7218                                             (__v2df)(__m128d)(B), \
   7219                                             (__v2df)_mm_setzero_pd(), \
   7220                                             (__mmask8)-1, (int)(R)); })
   7221 
   7222 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7223 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   7224 {
   7225  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7226                  (__v2df) __B,
   7227                 (__v2df) __W,
   7228                 (__mmask8) __U,
   7229                 _MM_FROUND_CUR_DIRECTION);
   7230 }
   7231 
   7232 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
   7233   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7234                                             (__v2df)(__m128d)(B), \
   7235                                             (__v2df)(__m128d)(W), \
   7236                                             (__mmask8)(U), (int)(R)); })
   7237 
   7238 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7239 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
   7240 {
   7241  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
   7242                  (__v2df) __B,
   7243                 (__v2df) _mm_setzero_pd (),
   7244                 (__mmask8) __U,
   7245                 _MM_FROUND_CUR_DIRECTION);
   7246 }
   7247 
   7248 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
   7249   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
   7250                                             (__v2df)(__m128d)(B), \
   7251                                             (__v2df)_mm_setzero_pd(), \
   7252                                             (__mmask8)(U), (int)(R)); })
   7253 
   7254 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
   7255   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7256                                            (__v4sf)(__m128)(B), \
   7257                                            (__v4sf)_mm_setzero_ps(), \
   7258                                            (__mmask8)-1, (int)(R)); })
   7259 
   7260 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7261 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   7262 {
   7263  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7264                  (__v4sf) __B,
   7265                 (__v4sf) __W,
   7266                 (__mmask8) __U,
   7267                 _MM_FROUND_CUR_DIRECTION);
   7268 }
   7269 
   7270 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
   7271   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7272                                            (__v4sf)(__m128)(B), \
   7273                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
   7274                                            (int)(R)); })
   7275 
   7276 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7277 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
   7278 {
   7279  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
   7280                  (__v4sf) __B,
   7281                 (__v4sf) _mm_setzero_ps (),
   7282                 (__mmask8) __U,
   7283                 _MM_FROUND_CUR_DIRECTION);
   7284 }
   7285 
   7286 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
   7287   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
   7288                                            (__v4sf)(__m128)(B), \
   7289                                            (__v4sf)_mm_setzero_ps(), \
   7290                                            (__mmask8)(U), (int)(R)); })
   7291 
   7292 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7293 _mm512_broadcast_f32x4(__m128 __A)
   7294 {
   7295   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
   7296                                          0, 1, 2, 3, 0, 1, 2, 3,
   7297                                          0, 1, 2, 3, 0, 1, 2, 3);
   7298 }
   7299 
   7300 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7301 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
   7302 {
   7303   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7304                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7305                                            (__v16sf)__O);
   7306 }
   7307 
   7308 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7309 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
   7310 {
   7311   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
   7312                                            (__v16sf)_mm512_broadcast_f32x4(__A),
   7313                                            (__v16sf)_mm512_setzero_ps());
   7314 }
   7315 
   7316 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7317 _mm512_broadcast_f64x4(__m256d __A)
   7318 {
   7319   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
   7320                                           0, 1, 2, 3, 0, 1, 2, 3);
   7321 }
   7322 
   7323 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7324 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
   7325 {
   7326   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7327                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7328                                             (__v8df)__O);
   7329 }
   7330 
   7331 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7332 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
   7333 {
   7334   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
   7335                                             (__v8df)_mm512_broadcast_f64x4(__A),
   7336                                             (__v8df)_mm512_setzero_pd());
   7337 }
   7338 
   7339 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7340 _mm512_broadcast_i32x4(__m128i __A)
   7341 {
   7342   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   7343                                           0, 1, 2, 3, 0, 1, 2, 3,
   7344                                           0, 1, 2, 3, 0, 1, 2, 3);
   7345 }
   7346 
   7347 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7348 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
   7349 {
   7350   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7351                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7352                                            (__v16si)__O);
   7353 }
   7354 
   7355 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7356 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
   7357 {
   7358   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
   7359                                            (__v16si)_mm512_broadcast_i32x4(__A),
   7360                                            (__v16si)_mm512_setzero_si512());
   7361 }
   7362 
   7363 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7364 _mm512_broadcast_i64x4(__m256i __A)
   7365 {
   7366   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
   7367                                           0, 1, 2, 3, 0, 1, 2, 3);
   7368 }
   7369 
   7370 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7371 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
   7372 {
   7373   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7374                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7375                                             (__v8di)__O);
   7376 }
   7377 
   7378 static __inline__ __m512i __DEFAULT_FN_ATTRS
   7379 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
   7380 {
   7381   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
   7382                                             (__v8di)_mm512_broadcast_i64x4(__A),
   7383                                             (__v8di)_mm512_setzero_si512());
   7384 }
   7385 
   7386 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7387 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
   7388 {
   7389   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7390                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7391                                               (__v8df) __O);
   7392 }
   7393 
   7394 static __inline__ __m512d __DEFAULT_FN_ATTRS
   7395 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   7396 {
   7397   return (__m512d)__builtin_ia32_selectpd_512(__M,
   7398                                               (__v8df) _mm512_broadcastsd_pd(__A),
   7399                                               (__v8df) _mm512_setzero_pd());
   7400 }
   7401 
   7402 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7403 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
   7404 {
   7405   return (__m512)__builtin_ia32_selectps_512(__M,
   7406                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7407                                              (__v16sf) __O);
   7408 }
   7409 
   7410 static __inline__ __m512 __DEFAULT_FN_ATTRS
   7411 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
   7412 {
   7413   return (__m512)__builtin_ia32_selectps_512(__M,
   7414                                              (__v16sf) _mm512_broadcastss_ps(__A),
   7415                                              (__v16sf) _mm512_setzero_ps());
   7416 }
   7417 
   7418 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7419 _mm512_cvtsepi32_epi8 (__m512i __A)
   7420 {
   7421   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7422                (__v16qi) _mm_undefined_si128 (),
   7423                (__mmask16) -1);
   7424 }
   7425 
   7426 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7427 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7428 {
   7429   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7430                (__v16qi) __O, __M);
   7431 }
   7432 
   7433 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7434 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
   7435 {
   7436   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   7437                (__v16qi) _mm_setzero_si128 (),
   7438                __M);
   7439 }
   7440 
   7441 static __inline__ void __DEFAULT_FN_ATTRS
   7442 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7443 {
   7444   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7445 }
   7446 
   7447 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7448 _mm512_cvtsepi32_epi16 (__m512i __A)
   7449 {
   7450   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7451                (__v16hi) _mm256_undefined_si256 (),
   7452                (__mmask16) -1);
   7453 }
   7454 
   7455 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7456 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7457 {
   7458   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7459                (__v16hi) __O, __M);
   7460 }
   7461 
   7462 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7463 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
   7464 {
   7465   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   7466                (__v16hi) _mm256_setzero_si256 (),
   7467                __M);
   7468 }
   7469 
   7470 static __inline__ void __DEFAULT_FN_ATTRS
   7471 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7472 {
   7473   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7474 }
   7475 
   7476 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7477 _mm512_cvtsepi64_epi8 (__m512i __A)
   7478 {
   7479   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7480                (__v16qi) _mm_undefined_si128 (),
   7481                (__mmask8) -1);
   7482 }
   7483 
   7484 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7485 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7486 {
   7487   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7488                (__v16qi) __O, __M);
   7489 }
   7490 
   7491 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7492 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
   7493 {
   7494   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   7495                (__v16qi) _mm_setzero_si128 (),
   7496                __M);
   7497 }
   7498 
   7499 static __inline__ void __DEFAULT_FN_ATTRS
   7500 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7501 {
   7502   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7503 }
   7504 
   7505 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7506 _mm512_cvtsepi64_epi32 (__m512i __A)
   7507 {
   7508   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7509                (__v8si) _mm256_undefined_si256 (),
   7510                (__mmask8) -1);
   7511 }
   7512 
   7513 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7514 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7515 {
   7516   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7517                (__v8si) __O, __M);
   7518 }
   7519 
   7520 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7521 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
   7522 {
   7523   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   7524                (__v8si) _mm256_setzero_si256 (),
   7525                __M);
   7526 }
   7527 
   7528 static __inline__ void __DEFAULT_FN_ATTRS
   7529 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
   7530 {
   7531   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7532 }
   7533 
   7534 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7535 _mm512_cvtsepi64_epi16 (__m512i __A)
   7536 {
   7537   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7538                (__v8hi) _mm_undefined_si128 (),
   7539                (__mmask8) -1);
   7540 }
   7541 
   7542 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7543 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7544 {
   7545   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7546                (__v8hi) __O, __M);
   7547 }
   7548 
   7549 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7550 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
   7551 {
   7552   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   7553                (__v8hi) _mm_setzero_si128 (),
   7554                __M);
   7555 }
   7556 
   7557 static __inline__ void __DEFAULT_FN_ATTRS
   7558 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
   7559 {
   7560   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7561 }
   7562 
   7563 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7564 _mm512_cvtusepi32_epi8 (__m512i __A)
   7565 {
   7566   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7567                 (__v16qi) _mm_undefined_si128 (),
   7568                 (__mmask16) -1);
   7569 }
   7570 
   7571 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7572 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7573 {
   7574   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7575                 (__v16qi) __O,
   7576                 __M);
   7577 }
   7578 
   7579 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7580 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
   7581 {
   7582   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   7583                 (__v16qi) _mm_setzero_si128 (),
   7584                 __M);
   7585 }
   7586 
   7587 static __inline__ void __DEFAULT_FN_ATTRS
   7588 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7589 {
   7590   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7591 }
   7592 
   7593 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7594 _mm512_cvtusepi32_epi16 (__m512i __A)
   7595 {
   7596   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7597                 (__v16hi) _mm256_undefined_si256 (),
   7598                 (__mmask16) -1);
   7599 }
   7600 
   7601 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7602 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7603 {
   7604   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7605                 (__v16hi) __O,
   7606                 __M);
   7607 }
   7608 
   7609 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7610 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
   7611 {
   7612   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   7613                 (__v16hi) _mm256_setzero_si256 (),
   7614                 __M);
   7615 }
   7616 
   7617 static __inline__ void __DEFAULT_FN_ATTRS
   7618 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   7619 {
   7620   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   7621 }
   7622 
   7623 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7624 _mm512_cvtusepi64_epi8 (__m512i __A)
   7625 {
   7626   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7627                 (__v16qi) _mm_undefined_si128 (),
   7628                 (__mmask8) -1);
   7629 }
   7630 
   7631 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7632 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7633 {
   7634   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7635                 (__v16qi) __O,
   7636                 __M);
   7637 }
   7638 
   7639 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7640 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
   7641 {
   7642   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   7643                 (__v16qi) _mm_setzero_si128 (),
   7644                 __M);
   7645 }
   7646 
   7647 static __inline__ void __DEFAULT_FN_ATTRS
   7648 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7649 {
   7650   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7651 }
   7652 
   7653 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7654 _mm512_cvtusepi64_epi32 (__m512i __A)
   7655 {
   7656   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7657                 (__v8si) _mm256_undefined_si256 (),
   7658                 (__mmask8) -1);
   7659 }
   7660 
   7661 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7662 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7663 {
   7664   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7665                 (__v8si) __O, __M);
   7666 }
   7667 
   7668 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7669 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
   7670 {
   7671   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   7672                 (__v8si) _mm256_setzero_si256 (),
   7673                 __M);
   7674 }
   7675 
   7676 static __inline__ void __DEFAULT_FN_ATTRS
   7677 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7678 {
   7679   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
   7680 }
   7681 
   7682 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7683 _mm512_cvtusepi64_epi16 (__m512i __A)
   7684 {
   7685   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7686                 (__v8hi) _mm_undefined_si128 (),
   7687                 (__mmask8) -1);
   7688 }
   7689 
   7690 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7691 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7692 {
   7693   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7694                 (__v8hi) __O, __M);
   7695 }
   7696 
   7697 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7698 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
   7699 {
   7700   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   7701                 (__v8hi) _mm_setzero_si128 (),
   7702                 __M);
   7703 }
   7704 
   7705 static __inline__ void __DEFAULT_FN_ATTRS
   7706 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7707 {
   7708   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
   7709 }
   7710 
   7711 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7712 _mm512_cvtepi32_epi8 (__m512i __A)
   7713 {
   7714   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7715               (__v16qi) _mm_undefined_si128 (),
   7716               (__mmask16) -1);
   7717 }
   7718 
   7719 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7720 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   7721 {
   7722   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7723               (__v16qi) __O, __M);
   7724 }
   7725 
   7726 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7727 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
   7728 {
   7729   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   7730               (__v16qi) _mm_setzero_si128 (),
   7731               __M);
   7732 }
   7733 
   7734 static __inline__ void __DEFAULT_FN_ATTRS
   7735 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   7736 {
   7737   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   7738 }
   7739 
   7740 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7741 _mm512_cvtepi32_epi16 (__m512i __A)
   7742 {
   7743   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7744               (__v16hi) _mm256_undefined_si256 (),
   7745               (__mmask16) -1);
   7746 }
   7747 
   7748 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7749 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   7750 {
   7751   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7752               (__v16hi) __O, __M);
   7753 }
   7754 
   7755 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7756 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
   7757 {
   7758   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   7759               (__v16hi) _mm256_setzero_si256 (),
   7760               __M);
   7761 }
   7762 
   7763 static __inline__ void __DEFAULT_FN_ATTRS
   7764 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
   7765 {
   7766   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
   7767 }
   7768 
   7769 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7770 _mm512_cvtepi64_epi8 (__m512i __A)
   7771 {
   7772   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7773               (__v16qi) _mm_undefined_si128 (),
   7774               (__mmask8) -1);
   7775 }
   7776 
   7777 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7778 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   7779 {
   7780   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7781               (__v16qi) __O, __M);
   7782 }
   7783 
   7784 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7785 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
   7786 {
   7787   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   7788               (__v16qi) _mm_setzero_si128 (),
   7789               __M);
   7790 }
   7791 
   7792 static __inline__ void __DEFAULT_FN_ATTRS
   7793 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   7794 {
   7795   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   7796 }
   7797 
   7798 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7799 _mm512_cvtepi64_epi32 (__m512i __A)
   7800 {
   7801   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7802               (__v8si) _mm256_undefined_si256 (),
   7803               (__mmask8) -1);
   7804 }
   7805 
   7806 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7807 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   7808 {
   7809   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7810               (__v8si) __O, __M);
   7811 }
   7812 
   7813 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7814 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
   7815 {
   7816   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   7817               (__v8si) _mm256_setzero_si256 (),
   7818               __M);
   7819 }
   7820 
   7821 static __inline__ void __DEFAULT_FN_ATTRS
   7822 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   7823 {
   7824   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   7825 }
   7826 
   7827 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7828 _mm512_cvtepi64_epi16 (__m512i __A)
   7829 {
   7830   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7831               (__v8hi) _mm_undefined_si128 (),
   7832               (__mmask8) -1);
   7833 }
   7834 
   7835 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7836 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   7837 {
   7838   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7839               (__v8hi) __O, __M);
   7840 }
   7841 
   7842 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7843 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
   7844 {
   7845   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   7846               (__v8hi) _mm_setzero_si128 (),
   7847               __M);
   7848 }
   7849 
   7850 static __inline__ void __DEFAULT_FN_ATTRS
   7851 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   7852 {
   7853   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   7854 }
   7855 
   7856 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
   7857   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
   7858                                    (__v16si)_mm512_undefined_epi32(), \
   7859                                    0 + ((imm) & 0x3) * 4,             \
   7860                                    1 + ((imm) & 0x3) * 4,             \
   7861                                    2 + ((imm) & 0x3) * 4,             \
   7862                                    3 + ((imm) & 0x3) * 4); })
   7863 
   7864 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
   7865   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7866                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7867                                 (__v4si)(W)); })
   7868 
   7869 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
   7870   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   7871                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
   7872                                 (__v4si)_mm_setzero_si128()); })
   7873 
   7874 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
   7875   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
   7876                                    (__v8di)_mm512_undefined_epi32(), \
   7877                                    ((imm) & 1) ? 4 : 0,              \
   7878                                    ((imm) & 1) ? 5 : 1,              \
   7879                                    ((imm) & 1) ? 6 : 2,              \
   7880                                    ((imm) & 1) ? 7 : 3); })
   7881 
   7882 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
   7883   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7884                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   7885                                 (__v4di)(W)); })
   7886 
   7887 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
   7888   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   7889                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
   7890                                 (__v4di)_mm256_setzero_si256()); })
   7891 
   7892 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
   7893   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
   7894                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
   7895                                  ((imm) & 0x1) ?  0 :  8, \
   7896                                  ((imm) & 0x1) ?  1 :  9, \
   7897                                  ((imm) & 0x1) ?  2 : 10, \
   7898                                  ((imm) & 0x1) ?  3 : 11, \
   7899                                  ((imm) & 0x1) ?  8 :  4, \
   7900                                  ((imm) & 0x1) ?  9 :  5, \
   7901                                  ((imm) & 0x1) ? 10 :  6, \
   7902                                  ((imm) & 0x1) ? 11 :  7); })
   7903 
   7904 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
   7905   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7906                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   7907                                   (__v8df)(W)); })
   7908 
   7909 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
   7910   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   7911                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
   7912                                   (__v8df)_mm512_setzero_pd()); })
   7913 
   7914 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
   7915   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
   7916                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
   7917                                  ((imm) & 0x1) ?  0 :  8, \
   7918                                  ((imm) & 0x1) ?  1 :  9, \
   7919                                  ((imm) & 0x1) ?  2 : 10, \
   7920                                  ((imm) & 0x1) ?  3 : 11, \
   7921                                  ((imm) & 0x1) ?  8 :  4, \
   7922                                  ((imm) & 0x1) ?  9 :  5, \
   7923                                  ((imm) & 0x1) ? 10 :  6, \
   7924                                  ((imm) & 0x1) ? 11 :  7); })
   7925 
   7926 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
   7927   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   7928                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   7929                                   (__v8di)(W)); })
   7930 
   7931 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
   7932   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   7933                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
   7934                                   (__v8di)_mm512_setzero_si512()); })
   7935 
   7936 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
   7937   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
   7938                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
   7939                                   (((imm) & 0x3) == 0) ? 16 :  0, \
   7940                                   (((imm) & 0x3) == 0) ? 17 :  1, \
   7941                                   (((imm) & 0x3) == 0) ? 18 :  2, \
   7942                                   (((imm) & 0x3) == 0) ? 19 :  3, \
   7943                                   (((imm) & 0x3) == 1) ? 16 :  4, \
   7944                                   (((imm) & 0x3) == 1) ? 17 :  5, \
   7945                                   (((imm) & 0x3) == 1) ? 18 :  6, \
   7946                                   (((imm) & 0x3) == 1) ? 19 :  7, \
   7947                                   (((imm) & 0x3) == 2) ? 16 :  8, \
   7948                                   (((imm) & 0x3) == 2) ? 17 :  9, \
   7949                                   (((imm) & 0x3) == 2) ? 18 : 10, \
   7950                                   (((imm) & 0x3) == 2) ? 19 : 11, \
   7951                                   (((imm) & 0x3) == 3) ? 16 : 12, \
   7952                                   (((imm) & 0x3) == 3) ? 17 : 13, \
   7953                                   (((imm) & 0x3) == 3) ? 18 : 14, \
   7954                                   (((imm) & 0x3) == 3) ? 19 : 15); })
   7955 
   7956 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
   7957   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7958                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   7959                                  (__v16sf)(W)); })
   7960 
   7961 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
   7962   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
   7963                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
   7964                                  (__v16sf)_mm512_setzero_ps()); })
   7965 
   7966 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
   7967   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   7968                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
   7969                                  (((imm) & 0x3) == 0) ? 16 :  0, \
   7970                                  (((imm) & 0x3) == 0) ? 17 :  1, \
   7971                                  (((imm) & 0x3) == 0) ? 18 :  2, \
   7972                                  (((imm) & 0x3) == 0) ? 19 :  3, \
   7973                                  (((imm) & 0x3) == 1) ? 16 :  4, \
   7974                                  (((imm) & 0x3) == 1) ? 17 :  5, \
   7975                                  (((imm) & 0x3) == 1) ? 18 :  6, \
   7976                                  (((imm) & 0x3) == 1) ? 19 :  7, \
   7977                                  (((imm) & 0x3) == 2) ? 16 :  8, \
   7978                                  (((imm) & 0x3) == 2) ? 17 :  9, \
   7979                                  (((imm) & 0x3) == 2) ? 18 : 10, \
   7980                                  (((imm) & 0x3) == 2) ? 19 : 11, \
   7981                                  (((imm) & 0x3) == 3) ? 16 : 12, \
   7982                                  (((imm) & 0x3) == 3) ? 17 : 13, \
   7983                                  (((imm) & 0x3) == 3) ? 18 : 14, \
   7984                                  (((imm) & 0x3) == 3) ? 19 : 15); })
   7985 
   7986 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
   7987   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   7988                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   7989                                  (__v16si)(W)); })
   7990 
   7991 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
   7992   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   7993                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
   7994                                  (__v16si)_mm512_setzero_si512()); })
   7995 
   7996 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
   7997   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   7998                                             (int)(((C)<<2) | (B)), \
   7999                                             (__v8df)_mm512_undefined_pd(), \
   8000                                             (__mmask8)-1, (int)(R)); })
   8001 
   8002 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
   8003   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8004                                             (int)(((C)<<2) | (B)), \
   8005                                             (__v8df)(__m512d)(W), \
   8006                                             (__mmask8)(U), (int)(R)); })
   8007 
   8008 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
   8009   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8010                                             (int)(((C)<<2) | (B)), \
   8011                                             (__v8df)_mm512_setzero_pd(), \
   8012                                             (__mmask8)(U), (int)(R)); })
   8013 
   8014 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
   8015   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8016                                             (int)(((C)<<2) | (B)), \
   8017                                             (__v8df)_mm512_setzero_pd(), \
   8018                                             (__mmask8)-1, \
   8019                                             _MM_FROUND_CUR_DIRECTION); })
   8020 
   8021 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
   8022   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8023                                             (int)(((C)<<2) | (B)), \
   8024                                             (__v8df)(__m512d)(W), \
   8025                                             (__mmask8)(U), \
   8026                                             _MM_FROUND_CUR_DIRECTION); })
   8027 
   8028 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
   8029   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
   8030                                             (int)(((C)<<2) | (B)), \
   8031                                             (__v8df)_mm512_setzero_pd(), \
   8032                                             (__mmask8)(U), \
   8033                                             _MM_FROUND_CUR_DIRECTION); })
   8034 
   8035 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
   8036   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8037                                            (int)(((C)<<2) | (B)), \
   8038                                            (__v16sf)_mm512_undefined_ps(), \
   8039                                            (__mmask16)-1, (int)(R)); })
   8040 
   8041 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
   8042   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8043                                            (int)(((C)<<2) | (B)), \
   8044                                            (__v16sf)(__m512)(W), \
   8045                                            (__mmask16)(U), (int)(R)); })
   8046 
   8047 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
   8048   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8049                                            (int)(((C)<<2) | (B)), \
   8050                                            (__v16sf)_mm512_setzero_ps(), \
   8051                                            (__mmask16)(U), (int)(R)); })
   8052 
   8053 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
   8054   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8055                                            (int)(((C)<<2)|(B)), \
   8056                                            (__v16sf)_mm512_undefined_ps(), \
   8057                                            (__mmask16)-1, \
   8058                                            _MM_FROUND_CUR_DIRECTION); })
   8059 
   8060 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8061   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8062                                            (int)(((C)<<2)|(B)), \
   8063                                            (__v16sf)(__m512)(W), \
   8064                                            (__mmask16)(U), \
   8065                                            _MM_FROUND_CUR_DIRECTION); })
   8066 
   8067 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8068   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
   8069                                            (int)(((C)<<2)|(B)), \
   8070                                            (__v16sf)_mm512_setzero_ps(), \
   8071                                            (__mmask16)(U), \
   8072                                            _MM_FROUND_CUR_DIRECTION); })
   8073 
   8074 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
   8075   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8076                                            (__v8df)_mm512_undefined_pd(), \
   8077                                            (__mmask8)-1, (int)(R)); })
   8078 
   8079 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
   8080   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8081                                            (__v8df)(__m512d)(W), \
   8082                                            (__mmask8)(U), (int)(R)); })
   8083 
   8084 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
   8085   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
   8086                                            (__v8df)_mm512_setzero_pd(), \
   8087                                            (__mmask8)(U), (int)(R)); })
   8088 
   8089 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8090 _mm512_getexp_pd (__m512d __A)
   8091 {
   8092   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8093                 (__v8df) _mm512_undefined_pd (),
   8094                 (__mmask8) -1,
   8095                 _MM_FROUND_CUR_DIRECTION);
   8096 }
   8097 
   8098 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8099 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8100 {
   8101   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8102                 (__v8df) __W,
   8103                 (__mmask8) __U,
   8104                 _MM_FROUND_CUR_DIRECTION);
   8105 }
   8106 
   8107 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8108 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
   8109 {
   8110   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8111                 (__v8df) _mm512_setzero_pd (),
   8112                 (__mmask8) __U,
   8113                 _MM_FROUND_CUR_DIRECTION);
   8114 }
   8115 
   8116 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
   8117   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8118                                           (__v16sf)_mm512_undefined_ps(), \
   8119                                           (__mmask16)-1, (int)(R)); })
   8120 
   8121 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
   8122   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8123                                           (__v16sf)(__m512)(W), \
   8124                                           (__mmask16)(U), (int)(R)); })
   8125 
   8126 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
   8127   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
   8128                                           (__v16sf)_mm512_setzero_ps(), \
   8129                                           (__mmask16)(U), (int)(R)); })
   8130 
   8131 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8132 _mm512_getexp_ps (__m512 __A)
   8133 {
   8134   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8135                (__v16sf) _mm512_undefined_ps (),
   8136                (__mmask16) -1,
   8137                _MM_FROUND_CUR_DIRECTION);
   8138 }
   8139 
   8140 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8141 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8142 {
   8143   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8144                (__v16sf) __W,
   8145                (__mmask16) __U,
   8146                _MM_FROUND_CUR_DIRECTION);
   8147 }
   8148 
   8149 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8150 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
   8151 {
   8152   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8153                (__v16sf) _mm512_setzero_ps (),
   8154                (__mmask16) __U,
   8155                _MM_FROUND_CUR_DIRECTION);
   8156 }
   8157 
   8158 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
   8159   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
   8160                                        (float const *)(addr), \
   8161                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8162                                        (int)(scale)); })
   8163 
   8164 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
   8165   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
   8166                                        (float const *)(addr), \
   8167                                        (__v8di)(__m512i)(index), \
   8168                                        (__mmask8)(mask), (int)(scale)); })
   8169 
   8170 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
   8171   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
   8172                                         (int const *)(addr), \
   8173                                         (__v8di)(__m512i)(index), \
   8174                                         (__mmask8)-1, (int)(scale)); })
   8175 
   8176 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8177   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
   8178                                         (int const *)(addr), \
   8179                                         (__v8di)(__m512i)(index), \
   8180                                         (__mmask8)(mask), (int)(scale)); })
   8181 
   8182 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
   8183   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
   8184                                        (double const *)(addr), \
   8185                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8186                                        (int)(scale)); })
   8187 
   8188 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8189   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
   8190                                        (double const *)(addr), \
   8191                                        (__v8di)(__m512i)(index), \
   8192                                        (__mmask8)(mask), (int)(scale)); })
   8193 
   8194 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
   8195   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
   8196                                        (long long const *)(addr), \
   8197                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
   8198                                        (int)(scale)); })
   8199 
   8200 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8201   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
   8202                                        (long long const *)(addr), \
   8203                                        (__v8di)(__m512i)(index), \
   8204                                        (__mmask8)(mask), (int)(scale)); })
   8205 
   8206 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
   8207   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
   8208                                        (float const *)(addr), \
   8209                                        (__v16sf)(__m512)(index), \
   8210                                        (__mmask16)-1, (int)(scale)); })
   8211 
   8212 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8213   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
   8214                                        (float const *)(addr), \
   8215                                        (__v16sf)(__m512)(index), \
   8216                                        (__mmask16)(mask), (int)(scale)); })
   8217 
   8218 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
   8219   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
   8220                                         (int const *)(addr), \
   8221                                         (__v16si)(__m512i)(index), \
   8222                                         (__mmask16)-1, (int)(scale)); })
   8223 
   8224 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8225   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
   8226                                         (int const *)(addr), \
   8227                                         (__v16si)(__m512i)(index), \
   8228                                         (__mmask16)(mask), (int)(scale)); })
   8229 
   8230 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
   8231   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
   8232                                        (double const *)(addr), \
   8233                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8234                                        (int)(scale)); })
   8235 
   8236 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8237   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
   8238                                        (double const *)(addr), \
   8239                                        (__v8si)(__m256i)(index), \
   8240                                        (__mmask8)(mask), (int)(scale)); })
   8241 
   8242 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
   8243   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
   8244                                        (long long const *)(addr), \
   8245                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
   8246                                        (int)(scale)); })
   8247 
   8248 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8249   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
   8250                                        (long long const *)(addr), \
   8251                                        (__v8si)(__m256i)(index), \
   8252                                        (__mmask8)(mask), (int)(scale)); })
   8253 
   8254 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
   8255   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
   8256                                 (__v8di)(__m512i)(index), \
   8257                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8258 
   8259 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8260   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
   8261                                 (__v8di)(__m512i)(index), \
   8262                                 (__v8sf)(__m256)(v1), (int)(scale)); })
   8263 
   8264 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8265   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
   8266                                 (__v8di)(__m512i)(index), \
   8267                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8268 
   8269 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8270   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
   8271                                 (__v8di)(__m512i)(index), \
   8272                                 (__v8si)(__m256i)(v1), (int)(scale)); })
   8273 
   8274 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
   8275   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
   8276                                (__v8di)(__m512i)(index), \
   8277                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8278 
   8279 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8280   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
   8281                                (__v8di)(__m512i)(index), \
   8282                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8283 
   8284 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8285   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
   8286                                (__v8di)(__m512i)(index), \
   8287                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8288 
   8289 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8290   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
   8291                                (__v8di)(__m512i)(index), \
   8292                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8293 
   8294 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
   8295   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
   8296                                 (__v16si)(__m512i)(index), \
   8297                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8298 
   8299 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
   8300   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
   8301                                 (__v16si)(__m512i)(index), \
   8302                                 (__v16sf)(__m512)(v1), (int)(scale)); })
   8303 
   8304 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
   8305   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
   8306                                 (__v16si)(__m512i)(index), \
   8307                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8308 
   8309 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
   8310   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
   8311                                 (__v16si)(__m512i)(index), \
   8312                                 (__v16si)(__m512i)(v1), (int)(scale)); })
   8313 
   8314 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
   8315   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
   8316                                (__v8si)(__m256i)(index), \
   8317                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8318 
   8319 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
   8320   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
   8321                                (__v8si)(__m256i)(index), \
   8322                                (__v8df)(__m512d)(v1), (int)(scale)); })
   8323 
   8324 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
   8325   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
   8326                                (__v8si)(__m256i)(index), \
   8327                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8328 
   8329 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
   8330   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
   8331                                (__v8si)(__m256i)(index), \
   8332                                (__v8di)(__m512i)(v1), (int)(scale)); })
   8333 
   8334 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8335 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8336 {
   8337  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8338           (__v4sf) __A,
   8339           (__v4sf) __B,
   8340           (__mmask8) __U,
   8341           _MM_FROUND_CUR_DIRECTION);
   8342 }
   8343 
   8344 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
   8345   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8346                                         (__v4sf)(__m128)(A), \
   8347                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8348                                         (int)(R)); })
   8349 
   8350 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8351 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8352 {
   8353  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8354           (__v4sf) __B,
   8355           (__v4sf) __C,
   8356           (__mmask8) __U,
   8357           _MM_FROUND_CUR_DIRECTION);
   8358 }
   8359 
   8360 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8361   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8362                                          (__v4sf)(__m128)(B), \
   8363                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8364                                          _MM_FROUND_CUR_DIRECTION); })
   8365 
   8366 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8367 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8368 {
   8369  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
   8370           (__v4sf) __X,
   8371           (__v4sf) __Y,
   8372           (__mmask8) __U,
   8373           _MM_FROUND_CUR_DIRECTION);
   8374 }
   8375 
   8376 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
   8377   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
   8378                                          (__v4sf)(__m128)(X), \
   8379                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8380                                          (int)(R)); })
   8381 
   8382 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8383 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8384 {
   8385  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8386           (__v4sf) __A,
   8387           -(__v4sf) __B,
   8388           (__mmask8) __U,
   8389           _MM_FROUND_CUR_DIRECTION);
   8390 }
   8391 
   8392 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8393   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8394                                         (__v4sf)(__m128)(A), \
   8395                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8396                                         (int)(R)); })
   8397 
   8398 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8399 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8400 {
   8401  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
   8402           (__v4sf) __B,
   8403           -(__v4sf) __C,
   8404           (__mmask8) __U,
   8405           _MM_FROUND_CUR_DIRECTION);
   8406 }
   8407 
   8408 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8409   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
   8410                                          (__v4sf)(__m128)(B), \
   8411                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8412                                          (int)(R)); })
   8413 
   8414 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8415 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8416 {
   8417  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
   8418           (__v4sf) __X,
   8419           (__v4sf) __Y,
   8420           (__mmask8) __U,
   8421           _MM_FROUND_CUR_DIRECTION);
   8422 }
   8423 
   8424 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
   8425   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
   8426                                          (__v4sf)(__m128)(X), \
   8427                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8428                                          (int)(R)); })
   8429 
   8430 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8431 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8432 {
   8433  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8434           -(__v4sf) __A,
   8435           (__v4sf) __B,
   8436           (__mmask8) __U,
   8437           _MM_FROUND_CUR_DIRECTION);
   8438 }
   8439 
   8440 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
   8441   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8442                                         -(__v4sf)(__m128)(A), \
   8443                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
   8444                                         (int)(R)); })
   8445 
   8446 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8447 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8448 {
   8449  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8450           (__v4sf) __B,
   8451           (__v4sf) __C,
   8452           (__mmask8) __U,
   8453           _MM_FROUND_CUR_DIRECTION);
   8454 }
   8455 
   8456 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
   8457   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8458                                          (__v4sf)(__m128)(B), \
   8459                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
   8460                                          (int)(R)); })
   8461 
   8462 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8463 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8464 {
   8465  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
   8466           (__v4sf) __X,
   8467           (__v4sf) __Y,
   8468           (__mmask8) __U,
   8469           _MM_FROUND_CUR_DIRECTION);
   8470 }
   8471 
   8472 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
   8473   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
   8474                                          (__v4sf)(__m128)(X), \
   8475                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8476                                          (int)(R)); })
   8477 
   8478 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8479 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   8480 {
   8481  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
   8482           -(__v4sf) __A,
   8483           -(__v4sf) __B,
   8484           (__mmask8) __U,
   8485           _MM_FROUND_CUR_DIRECTION);
   8486 }
   8487 
   8488 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
   8489   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
   8490                                         -(__v4sf)(__m128)(A), \
   8491                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
   8492                                         (int)(R)); })
   8493 
   8494 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8495 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   8496 {
   8497  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
   8498           (__v4sf) __B,
   8499           -(__v4sf) __C,
   8500           (__mmask8) __U,
   8501           _MM_FROUND_CUR_DIRECTION);
   8502 }
   8503 
   8504 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
   8505   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
   8506                                          (__v4sf)(__m128)(B), \
   8507                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
   8508                                          _MM_FROUND_CUR_DIRECTION); })
   8509 
   8510 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8511 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
   8512 {
   8513  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
   8514           (__v4sf) __X,
   8515           (__v4sf) __Y,
   8516           (__mmask8) __U,
   8517           _MM_FROUND_CUR_DIRECTION);
   8518 }
   8519 
   8520 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
   8521   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
   8522                                          (__v4sf)(__m128)(X), \
   8523                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
   8524                                          (int)(R)); })
   8525 
   8526 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8527 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8528 {
   8529  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8530           (__v2df) __A,
   8531           (__v2df) __B,
   8532           (__mmask8) __U,
   8533           _MM_FROUND_CUR_DIRECTION);
   8534 }
   8535 
   8536 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
   8537   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8538                                          (__v2df)(__m128d)(A), \
   8539                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8540                                          (int)(R)); })
   8541 
   8542 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8543 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8544 {
   8545  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8546           (__v2df) __B,
   8547           (__v2df) __C,
   8548           (__mmask8) __U,
   8549           _MM_FROUND_CUR_DIRECTION);
   8550 }
   8551 
   8552 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8553   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8554                                           (__v2df)(__m128d)(B), \
   8555                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8556                                           _MM_FROUND_CUR_DIRECTION); })
   8557 
   8558 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8559 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8560 {
   8561  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
   8562           (__v2df) __X,
   8563           (__v2df) __Y,
   8564           (__mmask8) __U,
   8565           _MM_FROUND_CUR_DIRECTION);
   8566 }
   8567 
   8568 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
   8569   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
   8570                                           (__v2df)(__m128d)(X), \
   8571                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8572                                           (int)(R)); })
   8573 
   8574 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8575 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8576 {
   8577  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8578           (__v2df) __A,
   8579           -(__v2df) __B,
   8580           (__mmask8) __U,
   8581           _MM_FROUND_CUR_DIRECTION);
   8582 }
   8583 
   8584 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8585   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8586                                          (__v2df)(__m128d)(A), \
   8587                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8588                                          (int)(R)); })
   8589 
   8590 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8591 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8592 {
   8593  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
   8594           (__v2df) __B,
   8595           -(__v2df) __C,
   8596           (__mmask8) __U,
   8597           _MM_FROUND_CUR_DIRECTION);
   8598 }
   8599 
   8600 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8601   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
   8602                                           (__v2df)(__m128d)(B), \
   8603                                           -(__v2df)(__m128d)(C), \
   8604                                           (__mmask8)(U), (int)(R)); })
   8605 
   8606 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8607 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8608 {
   8609  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
   8610           (__v2df) __X,
   8611           (__v2df) __Y,
   8612           (__mmask8) __U,
   8613           _MM_FROUND_CUR_DIRECTION);
   8614 }
   8615 
   8616 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
   8617   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
   8618                                           (__v2df)(__m128d)(X), \
   8619                                           (__v2df)(__m128d)(Y), \
   8620                                           (__mmask8)(U), (int)(R)); })
   8621 
   8622 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8623 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8624 {
   8625  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8626           -(__v2df) __A,
   8627           (__v2df) __B,
   8628           (__mmask8) __U,
   8629           _MM_FROUND_CUR_DIRECTION);
   8630 }
   8631 
   8632 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
   8633   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8634                                          -(__v2df)(__m128d)(A), \
   8635                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
   8636                                          (int)(R)); })
   8637 
   8638 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8639 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8640 {
   8641  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8642           (__v2df) __B,
   8643           (__v2df) __C,
   8644           (__mmask8) __U,
   8645           _MM_FROUND_CUR_DIRECTION);
   8646 }
   8647 
   8648 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
   8649   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8650                                           (__v2df)(__m128d)(B), \
   8651                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
   8652                                           (int)(R)); })
   8653 
   8654 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8655 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8656 {
   8657  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
   8658           (__v2df) __X,
   8659           (__v2df) __Y,
   8660           (__mmask8) __U,
   8661           _MM_FROUND_CUR_DIRECTION);
   8662 }
   8663 
   8664 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
   8665   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
   8666                                           (__v2df)(__m128d)(X), \
   8667                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
   8668                                           (int)(R)); })
   8669 
   8670 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8671 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   8672 {
   8673  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
   8674           -(__v2df) __A,
   8675           -(__v2df) __B,
   8676           (__mmask8) __U,
   8677           _MM_FROUND_CUR_DIRECTION);
   8678 }
   8679 
   8680 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
   8681   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
   8682                                          -(__v2df)(__m128d)(A), \
   8683                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
   8684                                          (int)(R)); })
   8685 
   8686 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8687 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   8688 {
   8689  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
   8690           (__v2df) __B,
   8691           -(__v2df) __C,
   8692           (__mmask8) __U,
   8693           _MM_FROUND_CUR_DIRECTION);
   8694 }
   8695 
   8696 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
   8697   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
   8698                                           (__v2df)(__m128d)(B), \
   8699                                           -(__v2df)(__m128d)(C), \
   8700                                           (__mmask8)(U), \
   8701                                           _MM_FROUND_CUR_DIRECTION); })
   8702 
   8703 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8704 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
   8705 {
   8706  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
   8707           (__v2df) __X,
   8708           (__v2df) (__Y),
   8709           (__mmask8) __U,
   8710           _MM_FROUND_CUR_DIRECTION);
   8711 }
   8712 
   8713 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
   8714   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
   8715                                           (__v2df)(__m128d)(X), \
   8716                                           (__v2df)(__m128d)(Y), \
   8717                                           (__mmask8)(U), (int)(R)); })
   8718 
   8719 #define _mm512_permutex_pd(X, C) __extension__ ({ \
   8720   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
   8721                                    (__v8df)_mm512_undefined_pd(), \
   8722                                    0 + (((C) >> 0) & 0x3), \
   8723                                    0 + (((C) >> 2) & 0x3), \
   8724                                    0 + (((C) >> 4) & 0x3), \
   8725                                    0 + (((C) >> 6) & 0x3), \
   8726                                    4 + (((C) >> 0) & 0x3), \
   8727                                    4 + (((C) >> 2) & 0x3), \
   8728                                    4 + (((C) >> 4) & 0x3), \
   8729                                    4 + (((C) >> 6) & 0x3)); })
   8730 
   8731 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
   8732   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8733                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8734                                        (__v8df)(__m512d)(W)); })
   8735 
   8736 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
   8737   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   8738                                        (__v8df)_mm512_permutex_pd((X), (C)), \
   8739                                        (__v8df)_mm512_setzero_pd()); })
   8740 
   8741 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
   8742   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
   8743                                    (__v8di)_mm512_undefined_epi32(), \
   8744                                    0 + (((C) >> 0) & 0x3), \
   8745                                    0 + (((C) >> 2) & 0x3), \
   8746                                    0 + (((C) >> 4) & 0x3), \
   8747                                    0 + (((C) >> 6) & 0x3), \
   8748                                    4 + (((C) >> 0) & 0x3), \
   8749                                    4 + (((C) >> 2) & 0x3), \
   8750                                    4 + (((C) >> 4) & 0x3), \
   8751                                    4 + (((C) >> 6) & 0x3)); })
   8752 
   8753 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
   8754   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8755                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8756                                       (__v8di)(__m512i)(W)); })
   8757 
   8758 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
   8759   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
   8760                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
   8761                                       (__v8di)_mm512_setzero_si512()); })
   8762 
   8763 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8764 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
   8765 {
   8766   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8767                  (__v8di) __X,
   8768                  (__v8df) _mm512_undefined_pd (),
   8769                  (__mmask8) -1);
   8770 }
   8771 
   8772 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8773 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
   8774 {
   8775   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8776                  (__v8di) __X,
   8777                  (__v8df) __W,
   8778                  (__mmask8) __U);
   8779 }
   8780 
   8781 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8782 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
   8783 {
   8784   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   8785                  (__v8di) __X,
   8786                  (__v8df) _mm512_setzero_pd (),
   8787                  (__mmask8) __U);
   8788 }
   8789 
   8790 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8791 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
   8792 {
   8793   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8794                  (__v8di) __X,
   8795                  (__v8di) _mm512_setzero_si512 (),
   8796                  __M);
   8797 }
   8798 
   8799 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8800 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
   8801 {
   8802   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8803                  (__v8di) __X,
   8804                  (__v8di) _mm512_undefined_epi32 (),
   8805                  (__mmask8) -1);
   8806 }
   8807 
   8808 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8809 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
   8810              __m512i __Y)
   8811 {
   8812   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   8813                  (__v8di) __X,
   8814                  (__v8di) __W,
   8815                  __M);
   8816 }
   8817 
   8818 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8819 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
   8820 {
   8821   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8822                 (__v16si) __X,
   8823                 (__v16sf) _mm512_undefined_ps (),
   8824                 (__mmask16) -1);
   8825 }
   8826 
   8827 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8828 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
   8829 {
   8830   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8831                 (__v16si) __X,
   8832                 (__v16sf) __W,
   8833                 (__mmask16) __U);
   8834 }
   8835 
   8836 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8837 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
   8838 {
   8839   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   8840                 (__v16si) __X,
   8841                 (__v16sf) _mm512_setzero_ps (),
   8842                 (__mmask16) __U);
   8843 }
   8844 
   8845 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8846 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
   8847 {
   8848   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8849                  (__v16si) __X,
   8850                  (__v16si) _mm512_setzero_si512 (),
   8851                  __M);
   8852 }
   8853 
   8854 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8855 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
   8856 {
   8857   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8858                  (__v16si) __X,
   8859                  (__v16si) _mm512_undefined_epi32 (),
   8860                  (__mmask16) -1);
   8861 }
   8862 
   8863 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
   8864 
   8865 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8866 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
   8867              __m512i __Y)
   8868 {
   8869   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   8870                  (__v16si) __X,
   8871                  (__v16si) __W,
   8872                  __M);
   8873 }
   8874 
   8875 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
   8876 
   8877 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8878 _mm512_kand (__mmask16 __A, __mmask16 __B)
   8879 {
   8880   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
   8881 }
   8882 
   8883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8884 _mm512_kandn (__mmask16 __A, __mmask16 __B)
   8885 {
   8886   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
   8887 }
   8888 
   8889 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8890 _mm512_kor (__mmask16 __A, __mmask16 __B)
   8891 {
   8892   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
   8893 }
   8894 
   8895 static __inline__ int __DEFAULT_FN_ATTRS
   8896 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
   8897 {
   8898   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
   8899 }
   8900 
   8901 static __inline__ int __DEFAULT_FN_ATTRS
   8902 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
   8903 {
   8904   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
   8905 }
   8906 
   8907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8908 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   8909 {
   8910   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
   8911 }
   8912 
   8913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8914 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
   8915 {
   8916   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
   8917 }
   8918 
   8919 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   8920 _mm512_kxor (__mmask16 __A, __mmask16 __B)
   8921 {
   8922   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
   8923 }
   8924 
   8925 static __inline__ void __DEFAULT_FN_ATTRS
   8926 _mm512_stream_si512 (__m512i * __P, __m512i __A)
   8927 {
   8928   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
   8929 }
   8930 
   8931 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8932 _mm512_stream_load_si512 (void *__P)
   8933 {
   8934   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
   8935 }
   8936 
   8937 static __inline__ void __DEFAULT_FN_ATTRS
   8938 _mm512_stream_pd (double *__P, __m512d __A)
   8939 {
   8940   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
   8941 }
   8942 
   8943 static __inline__ void __DEFAULT_FN_ATTRS
   8944 _mm512_stream_ps (float *__P, __m512 __A)
   8945 {
   8946   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
   8947 }
   8948 
   8949 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8950 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8951 {
   8952   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   8953                   (__v8df) __W,
   8954                   (__mmask8) __U);
   8955 }
   8956 
   8957 static __inline__ __m512d __DEFAULT_FN_ATTRS
   8958 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
   8959 {
   8960   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   8961                   (__v8df)
   8962                   _mm512_setzero_pd (),
   8963                   (__mmask8) __U);
   8964 }
   8965 
   8966 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8967 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   8968 {
   8969   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   8970                   (__v8di) __W,
   8971                   (__mmask8) __U);
   8972 }
   8973 
   8974 static __inline__ __m512i __DEFAULT_FN_ATTRS
   8975 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
   8976 {
   8977   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   8978                   (__v8di)
   8979                   _mm512_setzero_si512 (),
   8980                   (__mmask8) __U);
   8981 }
   8982 
   8983 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8984 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8985 {
   8986   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   8987                  (__v16sf) __W,
   8988                  (__mmask16) __U);
   8989 }
   8990 
   8991 static __inline__ __m512 __DEFAULT_FN_ATTRS
   8992 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
   8993 {
   8994   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   8995                  (__v16sf)
   8996                  _mm512_setzero_ps (),
   8997                  (__mmask16) __U);
   8998 }
   8999 
   9000 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9001 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9002 {
   9003   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9004                   (__v16si) __W,
   9005                   (__mmask16) __U);
   9006 }
   9007 
   9008 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9009 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
   9010 {
   9011   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9012                   (__v16si)
   9013                   _mm512_setzero_si512 (),
   9014                   (__mmask16) __U);
   9015 }
   9016 
   9017 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
   9018   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9019                                       (__v4sf)(__m128)(Y), (int)(P), \
   9020                                       (__mmask8)-1, (int)(R)); })
   9021 
   9022 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
   9023   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9024                                       (__v4sf)(__m128)(Y), (int)(P), \
   9025                                       (__mmask8)(M), (int)(R)); })
   9026 
   9027 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
   9028   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9029                                       (__v4sf)(__m128)(Y), (int)(P), \
   9030                                       (__mmask8)-1, \
   9031                                       _MM_FROUND_CUR_DIRECTION); })
   9032 
   9033 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
   9034   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
   9035                                       (__v4sf)(__m128)(Y), (int)(P), \
   9036                                       (__mmask8)(M), \
   9037                                       _MM_FROUND_CUR_DIRECTION); })
   9038 
   9039 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
   9040   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9041                                       (__v2df)(__m128d)(Y), (int)(P), \
   9042                                       (__mmask8)-1, (int)(R)); })
   9043 
   9044 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
   9045   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9046                                       (__v2df)(__m128d)(Y), (int)(P), \
   9047                                       (__mmask8)(M), (int)(R)); })
   9048 
   9049 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
   9050   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9051                                       (__v2df)(__m128d)(Y), (int)(P), \
   9052                                       (__mmask8)-1, \
   9053                                       _MM_FROUND_CUR_DIRECTION); })
   9054 
   9055 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
   9056   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
   9057                                       (__v2df)(__m128d)(Y), (int)(P), \
   9058                                       (__mmask8)(M), \
   9059                                       _MM_FROUND_CUR_DIRECTION); })
   9060 
   9061 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9062 _mm512_movehdup_ps (__m512 __A)
   9063 {
   9064   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9065                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
   9066 }
   9067 
   9068 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9069 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9070 {
   9071   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9072                                              (__v16sf)_mm512_movehdup_ps(__A),
   9073                                              (__v16sf)__W);
   9074 }
   9075 
   9076 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9077 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
   9078 {
   9079   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9080                                              (__v16sf)_mm512_movehdup_ps(__A),
   9081                                              (__v16sf)_mm512_setzero_ps());
   9082 }
   9083 
   9084 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9085 _mm512_moveldup_ps (__m512 __A)
   9086 {
   9087   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
   9088                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
   9089 }
   9090 
   9091 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9092 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9093 {
   9094   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9095                                              (__v16sf)_mm512_moveldup_ps(__A),
   9096                                              (__v16sf)__W);
   9097 }
   9098 
   9099 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9100 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
   9101 {
   9102   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
   9103                                              (__v16sf)_mm512_moveldup_ps(__A),
   9104                                              (__v16sf)_mm512_setzero_ps());
   9105 }
   9106 
   9107 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9108 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   9109 {
   9110   __m128 res = __A;
   9111   res[0] = (__U & 1) ? __B[0] : __W[0];
   9112   return res;
   9113 }
   9114 
   9115 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9116 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
   9117 {
   9118   __m128 res = __A;
   9119   res[0] = (__U & 1) ? __B[0] : 0;
   9120   return res;
   9121 }
   9122 
   9123 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9124 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   9125 {
   9126   __m128d res = __A;
   9127   res[0] = (__U & 1) ? __B[0] : __W[0];
   9128   return res;
   9129 }
   9130 
   9131 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9132 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
   9133 {
   9134   __m128d res = __A;
   9135   res[0] = (__U & 1) ? __B[0] : 0;
   9136   return res;
   9137 }
   9138 
   9139 static __inline__ void __DEFAULT_FN_ATTRS
   9140 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
   9141 {
   9142   __builtin_ia32_storess128_mask ((__v16sf *)__W,
   9143                 (__v16sf) _mm512_castps128_ps512(__A),
   9144                 (__mmask16) __U & (__mmask16)1);
   9145 }
   9146 
   9147 static __inline__ void __DEFAULT_FN_ATTRS
   9148 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
   9149 {
   9150   __builtin_ia32_storesd128_mask ((__v8df *)__W,
   9151                 (__v8df) _mm512_castpd128_pd512(__A),
   9152                 (__mmask8) __U & 1);
   9153 }
   9154 
   9155 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9156 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
   9157 {
   9158   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
   9159                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
   9160                                                 0, 4, 4, 4);
   9161 
   9162   return (__m128) __builtin_shufflevector(
   9163                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9164                                       (__v16sf) _mm512_castps128_ps512(src),
   9165                                       (__mmask16) __U & 1),
   9166                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9167 }
   9168 
   9169 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9170 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
   9171 {
   9172   return (__m128) __builtin_shufflevector(
   9173                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
   9174                                       (__v16sf) _mm512_setzero_ps(),
   9175                                       (__mmask16) __U & 1),
   9176                            _mm512_undefined_ps(), 0, 1, 2, 3);
   9177 }
   9178 
   9179 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9180 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
   9181 {
   9182   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
   9183                                                  (__v2df) {0.0, 0.0}, 0, 2);
   9184 
   9185   return (__m128d) __builtin_shufflevector(
   9186                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9187                                       (__v8df) _mm512_castpd128_pd512(src),
   9188                                       (__mmask8) __U & 1),
   9189                             _mm512_undefined_pd(), 0, 1);
   9190 }
   9191 
   9192 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9193 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
   9194 {
   9195   return (__m128d) __builtin_shufflevector(
   9196                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
   9197                                       (__v8df) _mm512_setzero_pd(),
   9198                                       (__mmask8) __U & 1),
   9199                             _mm512_undefined_pd(), 0, 1);
   9200 }
   9201 
   9202 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
   9203   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
   9204                                    (__v16si)_mm512_undefined_epi32(), \
   9205                                    0  + (((I) >> 0) & 0x3), \
   9206                                    0  + (((I) >> 2) & 0x3), \
   9207                                    0  + (((I) >> 4) & 0x3), \
   9208                                    0  + (((I) >> 6) & 0x3), \
   9209                                    4  + (((I) >> 0) & 0x3), \
   9210                                    4  + (((I) >> 2) & 0x3), \
   9211                                    4  + (((I) >> 4) & 0x3), \
   9212                                    4  + (((I) >> 6) & 0x3), \
   9213                                    8  + (((I) >> 0) & 0x3), \
   9214                                    8  + (((I) >> 2) & 0x3), \
   9215                                    8  + (((I) >> 4) & 0x3), \
   9216                                    8  + (((I) >> 6) & 0x3), \
   9217                                    12 + (((I) >> 0) & 0x3), \
   9218                                    12 + (((I) >> 2) & 0x3), \
   9219                                    12 + (((I) >> 4) & 0x3), \
   9220                                    12 + (((I) >> 6) & 0x3)); })
   9221 
   9222 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
   9223   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9224                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9225                                       (__v16si)(__m512i)(W)); })
   9226 
   9227 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
   9228   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
   9229                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
   9230                                       (__v16si)_mm512_setzero_si512()); })
   9231 
   9232 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9233 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9234 {
   9235   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9236                 (__v8df) __W,
   9237                 (__mmask8) __U);
   9238 }
   9239 
   9240 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9241 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
   9242 {
   9243   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9244                 (__v8df) _mm512_setzero_pd (),
   9245                 (__mmask8) __U);
   9246 }
   9247 
   9248 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9249 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9250 {
   9251   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9252                 (__v8di) __W,
   9253                 (__mmask8) __U);
   9254 }
   9255 
   9256 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9257 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
   9258 {
   9259   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9260                 (__v8di) _mm512_setzero_pd (),
   9261                 (__mmask8) __U);
   9262 }
   9263 
   9264 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9265 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
   9266 {
   9267   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9268               (__v8df) __W,
   9269               (__mmask8) __U);
   9270 }
   9271 
   9272 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9273 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
   9274 {
   9275   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
   9276               (__v8df) _mm512_setzero_pd(),
   9277               (__mmask8) __U);
   9278 }
   9279 
   9280 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9281 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
   9282 {
   9283   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9284               (__v8di) __W,
   9285               (__mmask8) __U);
   9286 }
   9287 
   9288 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9289 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
   9290 {
   9291   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
   9292               (__v8di) _mm512_setzero_pd(),
   9293               (__mmask8) __U);
   9294 }
   9295 
   9296 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9297 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
   9298 {
   9299   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9300                    (__v16sf) __W,
   9301                    (__mmask16) __U);
   9302 }
   9303 
   9304 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9305 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
   9306 {
   9307   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
   9308                    (__v16sf) _mm512_setzero_ps(),
   9309                    (__mmask16) __U);
   9310 }
   9311 
   9312 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9313 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
   9314 {
   9315   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9316               (__v16si) __W,
   9317               (__mmask16) __U);
   9318 }
   9319 
   9320 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9321 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
   9322 {
   9323   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
   9324               (__v16si) _mm512_setzero_ps(),
   9325               (__mmask16) __U);
   9326 }
   9327 
   9328 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9329 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9330 {
   9331   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9332                (__v16sf) __W,
   9333                (__mmask16) __U);
   9334 }
   9335 
   9336 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9337 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
   9338 {
   9339   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9340                (__v16sf) _mm512_setzero_ps(),
   9341                (__mmask16) __U);
   9342 }
   9343 
   9344 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9345 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9346 {
   9347   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9348                 (__v16si) __W,
   9349                 (__mmask16) __U);
   9350 }
   9351 
   9352 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9353 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
   9354 {
   9355   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9356                 (__v16si) _mm512_setzero_ps(),
   9357                 (__mmask16) __U);
   9358 }
   9359 
   9360 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
   9361   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9362                                            (__v8df)_mm512_undefined_pd(), \
   9363                                            (__mmask8)-1, (int)(R)); })
   9364 
   9365 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
   9366   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9367                                            (__v8df)(__m512d)(W), \
   9368                                            (__mmask8)(U), (int)(R)); })
   9369 
   9370 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
   9371   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
   9372                                            (__v8df)_mm512_setzero_pd(), \
   9373                                            (__mmask8)(U), (int)(R)); })
   9374 
   9375 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9376 _mm512_cvtps_pd (__m256 __A)
   9377 {
   9378   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9379                 (__v8df)
   9380                 _mm512_undefined_pd (),
   9381                 (__mmask8) -1,
   9382                 _MM_FROUND_CUR_DIRECTION);
   9383 }
   9384 
   9385 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9386 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
   9387 {
   9388   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9389                 (__v8df) __W,
   9390                 (__mmask8) __U,
   9391                 _MM_FROUND_CUR_DIRECTION);
   9392 }
   9393 
   9394 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9395 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
   9396 {
   9397   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   9398                 (__v8df)
   9399                 _mm512_setzero_pd (),
   9400                 (__mmask8) __U,
   9401                 _MM_FROUND_CUR_DIRECTION);
   9402 }
   9403 
   9404 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9405 _mm512_cvtpslo_pd (__m512 __A)
   9406 {
   9407   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
   9408 }
   9409 
   9410 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9411 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
   9412 {
   9413   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
   9414 }
   9415 
   9416 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9417 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9418 {
   9419   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9420               (__v8df) __A,
   9421               (__v8df) __W);
   9422 }
   9423 
   9424 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9425 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
   9426 {
   9427   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   9428               (__v8df) __A,
   9429               (__v8df) _mm512_setzero_pd ());
   9430 }
   9431 
   9432 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9433 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9434 {
   9435   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9436              (__v16sf) __A,
   9437              (__v16sf) __W);
   9438 }
   9439 
   9440 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9441 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
   9442 {
   9443   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   9444              (__v16sf) __A,
   9445              (__v16sf) _mm512_setzero_ps ());
   9446 }
   9447 
   9448 static __inline__ void __DEFAULT_FN_ATTRS
   9449 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
   9450 {
   9451   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
   9452             (__mmask8) __U);
   9453 }
   9454 
   9455 static __inline__ void __DEFAULT_FN_ATTRS
   9456 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   9457 {
   9458   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
   9459             (__mmask8) __U);
   9460 }
   9461 
   9462 static __inline__ void __DEFAULT_FN_ATTRS
   9463 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
   9464 {
   9465   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
   9466             (__mmask16) __U);
   9467 }
   9468 
   9469 static __inline__ void __DEFAULT_FN_ATTRS
   9470 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   9471 {
   9472   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
   9473             (__mmask16) __U);
   9474 }
   9475 
   9476 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
   9477   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9478                                              (__v2df)(__m128d)(B), \
   9479                                              (__v4sf)_mm_undefined_ps(), \
   9480                                              (__mmask8)-1, (int)(R)); })
   9481 
   9482 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
   9483   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9484                                              (__v2df)(__m128d)(B), \
   9485                                              (__v4sf)(__m128)(W), \
   9486                                              (__mmask8)(U), (int)(R)); })
   9487 
   9488 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
   9489   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
   9490                                              (__v2df)(__m128d)(B), \
   9491                                              (__v4sf)_mm_setzero_ps(), \
   9492                                              (__mmask8)(U), (int)(R)); })
   9493 
   9494 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9495 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
   9496 {
   9497   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9498                                              (__v2df)(__B),
   9499                                              (__v4sf)(__W),
   9500                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9501 }
   9502 
   9503 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9504 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
   9505 {
   9506   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
   9507                                              (__v2df)(__B),
   9508                                              (__v4sf)_mm_setzero_ps(),
   9509                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9510 }
   9511 
   9512 #define _mm_cvtss_i32 _mm_cvtss_si32
   9513 #define _mm_cvtsd_i32 _mm_cvtsd_si32
   9514 #define _mm_cvti32_sd _mm_cvtsi32_sd
   9515 #define _mm_cvti32_ss _mm_cvtsi32_ss
   9516 #ifdef __x86_64__
   9517 #define _mm_cvtss_i64 _mm_cvtss_si64
   9518 #define _mm_cvtsd_i64 _mm_cvtsd_si64
   9519 #define _mm_cvti64_sd _mm_cvtsi64_sd
   9520 #define _mm_cvti64_ss _mm_cvtsi64_ss
   9521 #endif
   9522 
   9523 #ifdef __x86_64__
   9524 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
   9525   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9526                                      (int)(R)); })
   9527 
   9528 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
   9529   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
   9530                                      (int)(R)); })
   9531 #endif
   9532 
   9533 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
   9534   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9535 
   9536 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
   9537   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
   9538 
   9539 #ifdef __x86_64__
   9540 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
   9541   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9542                                     (int)(R)); })
   9543 
   9544 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
   9545   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
   9546                                     (int)(R)); })
   9547 #endif
   9548 
   9549 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
   9550   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9551                                               (__v4sf)(__m128)(B), \
   9552                                               (__v2df)_mm_undefined_pd(), \
   9553                                               (__mmask8)-1, (int)(R)); })
   9554 
   9555 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
   9556   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9557                                               (__v4sf)(__m128)(B), \
   9558                                               (__v2df)(__m128d)(W), \
   9559                                               (__mmask8)(U), (int)(R)); })
   9560 
   9561 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
   9562   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
   9563                                               (__v4sf)(__m128)(B), \
   9564                                               (__v2df)_mm_setzero_pd(), \
   9565                                               (__mmask8)(U), (int)(R)); })
   9566 
   9567 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9568 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
   9569 {
   9570   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9571                                               (__v4sf)(__B),
   9572                                               (__v2df)(__W),
   9573                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9574 }
   9575 
   9576 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9577 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
   9578 {
   9579   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
   9580                                               (__v4sf)(__B),
   9581                                               (__v2df)_mm_setzero_pd(),
   9582                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
   9583 }
   9584 
   9585 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9586 _mm_cvtu32_sd (__m128d __A, unsigned __B)
   9587 {
   9588   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
   9589 }
   9590 
   9591 #ifdef __x86_64__
   9592 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
   9593   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
   9594                                       (unsigned long long)(B), (int)(R)); })
   9595 
   9596 static __inline__ __m128d __DEFAULT_FN_ATTRS
   9597 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
   9598 {
   9599   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
   9600                  _MM_FROUND_CUR_DIRECTION);
   9601 }
   9602 #endif
   9603 
   9604 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
   9605   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
   9606                                      (int)(R)); })
   9607 
   9608 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9609 _mm_cvtu32_ss (__m128 __A, unsigned __B)
   9610 {
   9611   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
   9612                 _MM_FROUND_CUR_DIRECTION);
   9613 }
   9614 
   9615 #ifdef __x86_64__
   9616 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
   9617   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
   9618                                      (unsigned long long)(B), (int)(R)); })
   9619 
   9620 static __inline__ __m128 __DEFAULT_FN_ATTRS
   9621 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
   9622 {
   9623   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
   9624                 _MM_FROUND_CUR_DIRECTION);
   9625 }
   9626 #endif
   9627 
   9628 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9629 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   9630 {
   9631   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
   9632                  __M);
   9633 }
   9634 
   9635 #ifdef __x86_64__
   9636 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9637 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   9638 {
   9639   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
   9640                  __M);
   9641 }
   9642 #endif
   9643 
   9644 static  __inline __m512i __DEFAULT_FN_ATTRS
   9645 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
   9646     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
   9647     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
   9648     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
   9649     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
   9650     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
   9651     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
   9652     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
   9653     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
   9654     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
   9655     char __e4, char __e3, char __e2, char __e1, char __e0) {
   9656 
   9657   return __extension__ (__m512i)(__v64qi)
   9658     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9659      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9660      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9661      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
   9662      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
   9663      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
   9664      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
   9665      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
   9666 }
   9667 
   9668 static  __inline __m512i __DEFAULT_FN_ATTRS
   9669 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
   9670     short __e27, short __e26, short __e25, short __e24, short __e23,
   9671     short __e22, short __e21, short __e20, short __e19, short __e18,
   9672     short __e17, short __e16, short __e15, short __e14, short __e13,
   9673     short __e12, short __e11, short __e10, short __e9, short __e8,
   9674     short __e7, short __e6, short __e5, short __e4, short __e3,
   9675     short __e2, short __e1, short __e0) {
   9676   return __extension__ (__m512i)(__v32hi)
   9677     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
   9678      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
   9679      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
   9680      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
   9681 }
   9682 
   9683 static __inline __m512i __DEFAULT_FN_ATTRS
   9684 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
   9685      int __E, int __F, int __G, int __H,
   9686      int __I, int __J, int __K, int __L,
   9687      int __M, int __N, int __O, int __P)
   9688 {
   9689   return __extension__ (__m512i)(__v16si)
   9690   { __P, __O, __N, __M, __L, __K, __J, __I,
   9691     __H, __G, __F, __E, __D, __C, __B, __A };
   9692 }
   9693 
   9694 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
   9695        e8,e9,e10,e11,e12,e13,e14,e15)          \
   9696   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
   9697                    (e5),(e4),(e3),(e2),(e1),(e0))
   9698 
   9699 static __inline__ __m512i __DEFAULT_FN_ATTRS
   9700 _mm512_set_epi64 (long long __A, long long __B, long long __C,
   9701      long long __D, long long __E, long long __F,
   9702      long long __G, long long __H)
   9703 {
   9704   return __extension__ (__m512i) (__v8di)
   9705   { __H, __G, __F, __E, __D, __C, __B, __A };
   9706 }
   9707 
   9708 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
   9709   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9710 
   9711 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9712 _mm512_set_pd (double __A, double __B, double __C, double __D,
   9713         double __E, double __F, double __G, double __H)
   9714 {
   9715   return __extension__ (__m512d)
   9716   { __H, __G, __F, __E, __D, __C, __B, __A };
   9717 }
   9718 
   9719 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
   9720   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
   9721 
   9722 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9723 _mm512_set_ps (float __A, float __B, float __C, float __D,
   9724         float __E, float __F, float __G, float __H,
   9725         float __I, float __J, float __K, float __L,
   9726         float __M, float __N, float __O, float __P)
   9727 {
   9728   return __extension__ (__m512)
   9729   { __P, __O, __N, __M, __L, __K, __J, __I,
   9730     __H, __G, __F, __E, __D, __C, __B, __A };
   9731 }
   9732 
   9733 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
   9734   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
   9735                 (e4),(e3),(e2),(e1),(e0))
   9736 
   9737 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9738 _mm512_abs_ps(__m512 __A)
   9739 {
   9740   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9741 }
   9742 
   9743 static __inline__ __m512 __DEFAULT_FN_ATTRS
   9744 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
   9745 {
   9746   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
   9747 }
   9748 
   9749 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9750 _mm512_abs_pd(__m512d __A)
   9751 {
   9752   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
   9753 }
   9754 
   9755 static __inline__ __m512d __DEFAULT_FN_ATTRS
   9756 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
   9757 {
   9758   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
   9759 }
   9760 
   9761 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
   9762 // outputs. This class of vector operation forms the basis of many scientific
   9763 // computations. In vector-reduction arithmetic, the evaluation off is
   9764 // independent of the order of the input elements of V.
   9765 
   9766 // Used bisection method. At each step, we partition the vector with previous
   9767 // step in half, and the operation is performed on its two halves.
   9768 // This takes log2(n) steps where n is the number of elements in the vector.
   9769 
   9770 // Vec512 - Vector with size 512.
   9771 // Operator - Can be one of following: +,*,&,|
   9772 // T2  - Can get 'i' for int and 'f' for float.
   9773 // T1 - Can get 'i' for int and 'd' for double.
   9774 
   9775 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
   9776   __extension__({                                                      \
   9777     __m256##T1 Vec256 = __builtin_shufflevector(                       \
   9778                             (__v8d##T2)Vec512,                         \
   9779                             (__v8d##T2)Vec512,                         \
   9780                             0, 1, 2, 3)                                \
   9781                         Operator                                       \
   9782                         __builtin_shufflevector(                       \
   9783                             (__v8d##T2)Vec512,                         \
   9784                             (__v8d##T2)Vec512,                         \
   9785                             4, 5, 6, 7);                               \
   9786     __m128##T1 Vec128 = __builtin_shufflevector(                       \
   9787                             (__v4d##T2)Vec256,                         \
   9788                             (__v4d##T2)Vec256,                         \
   9789                             0, 1)                                      \
   9790                         Operator                                       \
   9791                         __builtin_shufflevector(                       \
   9792                             (__v4d##T2)Vec256,                         \
   9793                             (__v4d##T2)Vec256,                         \
   9794                             2, 3);                                     \
   9795     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
   9796                                      (__v2d##T2)Vec128, 0, -1)         \
   9797              Operator                                                  \
   9798              __builtin_shufflevector((__v2d##T2)Vec128,                \
   9799                                      (__v2d##T2)Vec128, 1, -1);        \
   9800     return Vec128[0];                                                  \
   9801   })
   9802 
   9803 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
   9804   _mm512_reduce_operator_64bit(__W, +, i, i);
   9805 }
   9806 
   9807 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
   9808   _mm512_reduce_operator_64bit(__W, *, i, i);
   9809 }
   9810 
   9811 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
   9812   _mm512_reduce_operator_64bit(__W, &, i, i);
   9813 }
   9814 
   9815 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
   9816   _mm512_reduce_operator_64bit(__W, |, i, i);
   9817 }
   9818 
   9819 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
   9820   _mm512_reduce_operator_64bit(__W, +, f, d);
   9821 }
   9822 
   9823 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
   9824   _mm512_reduce_operator_64bit(__W, *, f, d);
   9825 }
   9826 
   9827 // Vec512 - Vector with size 512.
   9828 // Vec512Neutral - All vector elements set to the identity element.
   9829 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
   9830 // Operator - Can be one of following: +,*,&,|
   9831 // Mask - Intrinsic Mask
   9832 // T2  - Can get 'i' for int and 'f' for float.
   9833 // T1 - Can get 'i' for int and 'd' for packed double-precision.
   9834 // T3 - Can be Pd for packed double or q for q-word.
   9835 
   9836 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
   9837                                           Mask, T2, T1, T3)                    \
   9838   __extension__({                                                              \
   9839     Vec512 = __builtin_ia32_select##T3##_512(                                  \
   9840                  (__mmask8)Mask,                                               \
   9841                  (__v8d##T2)Vec512,                                            \
   9842                  (__v8d##T2)Vec512Neutral);                                    \
   9843     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
   9844   })
   9845 
   9846 static __inline__ long long __DEFAULT_FN_ATTRS
   9847 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   9848   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
   9849 }
   9850 
   9851 static __inline__ long long __DEFAULT_FN_ATTRS
   9852 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
   9853   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
   9854 }
   9855 
   9856 static __inline__ long long __DEFAULT_FN_ATTRS
   9857 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   9858   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   9859                                     &, __M,  i, i, q);
   9860 }
   9861 
   9862 static __inline__ long long __DEFAULT_FN_ATTRS
   9863 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   9864   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
   9865                                     i, i, q);
   9866 }
   9867 
   9868 static __inline__ double __DEFAULT_FN_ATTRS
   9869 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   9870   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
   9871                                     f, d, pd);
   9872 }
   9873 
   9874 static __inline__ double __DEFAULT_FN_ATTRS
   9875 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   9876   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
   9877                                     f, d, pd);
   9878 }
   9879 
   9880 // Vec512 - Vector with size 512.
   9881 // Operator - Can be one of following: +,*,&,|
   9882 // T2 - Can get 'i' for int and ' ' for packed single.
   9883 // T1 - Can get 'i' for int and 'f' for float.
   9884 
   9885 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
   9886     __m256##T1 Vec256 =                                                        \
   9887             (__m256##T1)(__builtin_shufflevector(                              \
   9888                                     (__v16s##T2)Vec512,                        \
   9889                                     (__v16s##T2)Vec512,                        \
   9890                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
   9891                                 Operator                                       \
   9892                          __builtin_shufflevector(                              \
   9893                                     (__v16s##T2)Vec512,                        \
   9894                                     (__v16s##T2)Vec512,                        \
   9895                                     8, 9, 10, 11, 12, 13, 14, 15));            \
   9896     __m128##T1 Vec128 =                                                        \
   9897              (__m128##T1)(__builtin_shufflevector(                             \
   9898                                     (__v8s##T2)Vec256,                         \
   9899                                     (__v8s##T2)Vec256,                         \
   9900                                     0, 1, 2, 3)                                \
   9901                                 Operator                                       \
   9902                           __builtin_shufflevector(                             \
   9903                                     (__v8s##T2)Vec256,                         \
   9904                                     (__v8s##T2)Vec256,                         \
   9905                                     4, 5, 6, 7));                              \
   9906     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   9907                                     (__v4s##T2)Vec128,                         \
   9908                                     (__v4s##T2)Vec128,                         \
   9909                                     0, 1, -1, -1)                              \
   9910                                 Operator                                       \
   9911                           __builtin_shufflevector(                             \
   9912                                     (__v4s##T2)Vec128,                         \
   9913                                     (__v4s##T2)Vec128,                         \
   9914                                     2, 3, -1, -1));                            \
   9915     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
   9916                                     (__v4s##T2)Vec128,                         \
   9917                                     (__v4s##T2)Vec128,                         \
   9918                                     0, -1, -1, -1)                             \
   9919                                 Operator                                       \
   9920                           __builtin_shufflevector(                             \
   9921                                     (__v4s##T2)Vec128,                         \
   9922                                     (__v4s##T2)Vec128,                         \
   9923                                     1, -1, -1, -1));                           \
   9924     return Vec128[0];                                                          \
   9925   })
   9926 
   9927 static __inline__ int __DEFAULT_FN_ATTRS
   9928 _mm512_reduce_add_epi32(__m512i __W) {
   9929   _mm512_reduce_operator_32bit(__W, +, i, i);
   9930 }
   9931 
   9932 static __inline__ int __DEFAULT_FN_ATTRS
   9933 _mm512_reduce_mul_epi32(__m512i __W) {
   9934   _mm512_reduce_operator_32bit(__W, *, i, i);
   9935 }
   9936 
   9937 static __inline__ int __DEFAULT_FN_ATTRS
   9938 _mm512_reduce_and_epi32(__m512i __W) {
   9939   _mm512_reduce_operator_32bit(__W, &, i, i);
   9940 }
   9941 
   9942 static __inline__ int __DEFAULT_FN_ATTRS
   9943 _mm512_reduce_or_epi32(__m512i __W) {
   9944   _mm512_reduce_operator_32bit(__W, |, i, i);
   9945 }
   9946 
   9947 static __inline__ float __DEFAULT_FN_ATTRS
   9948 _mm512_reduce_add_ps(__m512 __W) {
   9949   _mm512_reduce_operator_32bit(__W, +, f, );
   9950 }
   9951 
   9952 static __inline__ float __DEFAULT_FN_ATTRS
   9953 _mm512_reduce_mul_ps(__m512 __W) {
   9954   _mm512_reduce_operator_32bit(__W, *, f, );
   9955 }
   9956 
   9957 // Vec512 - Vector with size 512.
   9958 // Vec512Neutral - All vector elements set to the identity element.
   9959 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
   9960 // Operator - Can be one of following: +,*,&,|
   9961 // Mask - Intrinsic Mask
   9962 // T2  - Can get 'i' for int and 'f' for float.
   9963 // T1 - Can get 'i' for int and 'd' for double.
   9964 // T3 - Can be Ps for packed single or d for d-word.
   9965 
   9966 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
   9967                                           Mask, T2, T1, T3)                    \
   9968   __extension__({                                                              \
   9969     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   9970                              (__mmask16)Mask,                                  \
   9971                              (__v16s##T2)Vec512,                               \
   9972                              (__v16s##T2)Vec512Neutral);                       \
   9973     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
   9974   })
   9975 
   9976 static __inline__ int __DEFAULT_FN_ATTRS
   9977 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   9978   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
   9979 }
   9980 
   9981 static __inline__ int __DEFAULT_FN_ATTRS
   9982 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
   9983   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
   9984 }
   9985 
   9986 static __inline__ int __DEFAULT_FN_ATTRS
   9987 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   9988   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
   9989                                     i, i, d);
   9990 }
   9991 
   9992 static __inline__ int __DEFAULT_FN_ATTRS
   9993 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   9994   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
   9995 }
   9996 
   9997 static __inline__ float __DEFAULT_FN_ATTRS
   9998 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
   9999   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
   10000 }
   10001 
   10002 static __inline__ float __DEFAULT_FN_ATTRS
   10003 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   10004   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
   10005 }
   10006 
   10007 // Used bisection method. At each step, we partition the vector with previous
   10008 // step in half, and the operation is performed on its two halves.
   10009 // This takes log2(n) steps where n is the number of elements in the vector.
   10010 // This macro uses only intrinsics from the AVX512F feature.
   10011 
   10012 // Vec512 - Vector with size of 512.
   10013 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10014 //              __mm512_max_epi64
   10015 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10016 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10017 
   10018 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10019         Vec512 = _mm512_##IntrinName(                                          \
   10020                                 (__m512##T1)__builtin_shufflevector(           \
   10021                                                 (__v8d##T2)Vec512,             \
   10022                                                 (__v8d##T2)Vec512,             \
   10023                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
   10024                                 (__m512##T1)__builtin_shufflevector(           \
   10025                                                 (__v8d##T2)Vec512,             \
   10026                                                 (__v8d##T2)Vec512,             \
   10027                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
   10028         Vec512 = _mm512_##IntrinName(                                          \
   10029                                 (__m512##T1)__builtin_shufflevector(           \
   10030                                                 (__v8d##T2)Vec512,             \
   10031                                                 (__v8d##T2)Vec512,             \
   10032                                                  0, 1, -1, -1, -1, -1, -1, -1),\
   10033                                 (__m512##T1)__builtin_shufflevector(           \
   10034                                                 (__v8d##T2)Vec512,             \
   10035                                                 (__v8d##T2)Vec512,             \
   10036                                                  2, 3, -1, -1, -1, -1, -1,     \
   10037                                                  -1));                         \
   10038         Vec512 = _mm512_##IntrinName(                                          \
   10039                                 (__m512##T1)__builtin_shufflevector(           \
   10040                                                 (__v8d##T2)Vec512,             \
   10041                                                 (__v8d##T2)Vec512,             \
   10042                                                 0, -1, -1, -1, -1, -1, -1, -1),\
   10043                                 (__m512##T1)__builtin_shufflevector(           \
   10044                                                 (__v8d##T2)Vec512,             \
   10045                                                 (__v8d##T2)Vec512,             \
   10046                                                 1, -1, -1, -1, -1, -1, -1, -1))\
   10047                                                 ;                              \
   10048     return Vec512[0];                                                          \
   10049   })
   10050 
   10051 static __inline__ long long __DEFAULT_FN_ATTRS
   10052 _mm512_reduce_max_epi64(__m512i __V) {
   10053   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
   10054 }
   10055 
   10056 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10057 _mm512_reduce_max_epu64(__m512i __V) {
   10058   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
   10059 }
   10060 
   10061 static __inline__ double __DEFAULT_FN_ATTRS
   10062 _mm512_reduce_max_pd(__m512d __V) {
   10063   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
   10064 }
   10065 
   10066 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
   10067 (__m512i __V) {
   10068   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
   10069 }
   10070 
   10071 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10072 _mm512_reduce_min_epu64(__m512i __V) {
   10073   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
   10074 }
   10075 
   10076 static __inline__ double __DEFAULT_FN_ATTRS
   10077 _mm512_reduce_min_pd(__m512d __V) {
   10078   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
   10079 }
   10080 
   10081 // Vec512 - Vector with size 512.
   10082 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10083 // Identity element: {max_epi,0x8000000000000000}
   10084 //                   {max_epu,0x0000000000000000}
   10085 //                   {max_pd, 0xFFF0000000000000}
   10086 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
   10087 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
   10088 //                   {min_pd, 0x7FF0000000000000}
   10089 //
   10090 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
   10091 //              __mm512_max_epi64
   10092 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
   10093 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
   10094 // T3 - Can get 'q' q word and 'pd' for packed double.
   10095 //      [__builtin_ia32_select{q|pd}_512]
   10096 // Mask - Intrinsic Mask
   10097 
   10098 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10099                                         T2, T3, Mask)                          \
   10100   __extension__({                                                              \
   10101     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10102                              (__mmask8)Mask,                                   \
   10103                              (__v8d##T2)Vec512,                                \
   10104                              (__v8d##T2)Vec512Neutral);                        \
   10105     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
   10106   })
   10107 
   10108 static __inline__ long long __DEFAULT_FN_ATTRS
   10109 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   10110   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
   10111                                   max_epi64, i, i, q, __M);
   10112 }
   10113 
   10114 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10115 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   10116   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
   10117                                   max_epu64, i, i, q, __M);
   10118 }
   10119 
   10120 static __inline__ double __DEFAULT_FN_ATTRS
   10121 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
   10122   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
   10123                                   max_pd, d, f, pd, __M);
   10124 }
   10125 
   10126 static __inline__ long long __DEFAULT_FN_ATTRS
   10127 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   10128   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
   10129                                   min_epi64, i, i, q, __M);
   10130 }
   10131 
   10132 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
   10133 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   10134   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
   10135                                   min_epu64, i, i, q, __M);
   10136 }
   10137 
   10138 static __inline__ double __DEFAULT_FN_ATTRS
   10139 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
   10140   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
   10141                                   min_pd, d, f, pd, __M);
   10142 }
   10143 
   10144 // Vec512 - Vector with size 512.
   10145 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10146 //              __mm512_max_epi32
   10147 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10148 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10149 
   10150 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
   10151     Vec512 = _mm512_##IntrinName(                                              \
   10152                   (__m512##T1)__builtin_shufflevector(                         \
   10153                                   (__v16s##T2)Vec512,                          \
   10154                                   (__v16s##T2)Vec512,                          \
   10155                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
   10156                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10157                   (__m512##T1)__builtin_shufflevector(                         \
   10158                                   (__v16s##T2)Vec512,                          \
   10159                                   (__v16s##T2)Vec512,                          \
   10160                                   8, 9, 10, 11, 12, 13, 14, 15,                \
   10161                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10162     Vec512 = _mm512_##IntrinName(                                              \
   10163                   (__m512##T1)__builtin_shufflevector(                         \
   10164                                   (__v16s##T2)Vec512,                          \
   10165                                   (__v16s##T2)Vec512,                          \
   10166                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
   10167                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10168                   (__m512##T1)__builtin_shufflevector(                         \
   10169                                   (__v16s##T2)Vec512,                          \
   10170                                   (__v16s##T2)Vec512,                          \
   10171                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
   10172                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10173     Vec512 = _mm512_##IntrinName(                                              \
   10174                   (__m512##T1)__builtin_shufflevector(                         \
   10175                                   (__v16s##T2)Vec512,                          \
   10176                                   (__v16s##T2)Vec512,                          \
   10177                                   0, 1, -1, -1, -1, -1, -1, -1,                \
   10178                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10179                   (__m512##T1)__builtin_shufflevector(                         \
   10180                                   (__v16s##T2)Vec512,                          \
   10181                                   (__v16s##T2)Vec512,                          \
   10182                                   2, 3, -1, -1, -1, -1, -1, -1,                \
   10183                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10184     Vec512 = _mm512_##IntrinName(                                              \
   10185                   (__m512##T1)__builtin_shufflevector(                         \
   10186                                   (__v16s##T2)Vec512,                          \
   10187                                   (__v16s##T2)Vec512,                          \
   10188                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
   10189                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
   10190                   (__m512##T1)__builtin_shufflevector(                         \
   10191                                   (__v16s##T2)Vec512,                          \
   10192                                   (__v16s##T2)Vec512,                          \
   10193                                   1, -1, -1, -1, -1, -1, -1, -1,               \
   10194                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
   10195     return Vec512[0];                                                          \
   10196   })
   10197 
   10198 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
   10199   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
   10200 }
   10201 
   10202 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10203 _mm512_reduce_max_epu32(__m512i a) {
   10204   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
   10205 }
   10206 
   10207 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
   10208   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
   10209 }
   10210 
   10211 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
   10212   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
   10213 }
   10214 
   10215 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10216 _mm512_reduce_min_epu32(__m512i a) {
   10217   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
   10218 }
   10219 
   10220 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
   10221   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
   10222 }
   10223 
   10224 // Vec512 - Vector with size 512.
   10225 // Vec512Neutral - A 512 length vector with elements set to the identity element
   10226 // Identity element: {max_epi,0x80000000}
   10227 //                   {max_epu,0x00000000}
   10228 //                   {max_ps, 0xFF800000}
   10229 //                   {min_epi,0x7FFFFFFF}
   10230 //                   {min_epu,0xFFFFFFFF}
   10231 //                   {min_ps, 0x7F800000}
   10232 //
   10233 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
   10234 //              __mm512_max_epi32
   10235 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
   10236 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
   10237 // T3 - Can get 'q' q word and 'pd' for packed double.
   10238 //      [__builtin_ia32_select{q|pd}_512]
   10239 // Mask - Intrinsic Mask
   10240 
   10241 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
   10242                                         T2, T3, Mask)                          \
   10243   __extension__({                                                              \
   10244     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
   10245                                         (__mmask16)Mask,                       \
   10246                                         (__v16s##T2)Vec512,                    \
   10247                                         (__v16s##T2)Vec512Neutral);            \
   10248    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
   10249    })
   10250 
   10251 static __inline__ int __DEFAULT_FN_ATTRS
   10252 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   10253   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
   10254                                   i, i, d, __M);
   10255 }
   10256 
   10257 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10258 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   10259   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
   10260                                   i, i, d, __M);
   10261 }
   10262 
   10263 static __inline__ float __DEFAULT_FN_ATTRS
   10264 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
   10265   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
   10266                                   ps, __M);
   10267 }
   10268 
   10269 static __inline__ int __DEFAULT_FN_ATTRS
   10270 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   10271   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
   10272                                   i, i, d, __M);
   10273 }
   10274 
   10275 static __inline__ unsigned int __DEFAULT_FN_ATTRS
   10276 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   10277   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
   10278                                   i, i, d, __M);
   10279 }
   10280 
   10281 static __inline__ float __DEFAULT_FN_ATTRS
   10282 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
   10283   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
   10284                                   ps, __M);
   10285 }
   10286 
   10287 #undef __DEFAULT_FN_ATTRS
   10288 
   10289 #endif // __AVX512FINTRIN_H
   10290