Home | History | Annotate | Download | only in Headers
      1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 #ifndef __IMMINTRIN_H
     24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     25 #endif
     26 
     27 #ifndef __AVX512FINTRIN_H
     28 #define __AVX512FINTRIN_H
     29 
     30 typedef char __v64qi __attribute__((__vector_size__(64)));
     31 typedef short __v32hi __attribute__((__vector_size__(64)));
     32 typedef double __v8df __attribute__((__vector_size__(64)));
     33 typedef float __v16sf __attribute__((__vector_size__(64)));
     34 typedef long long __v8di __attribute__((__vector_size__(64)));
     35 typedef int __v16si __attribute__((__vector_size__(64)));
     36 
     37 /* Unsigned types */
     38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
     39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
     40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
     41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
     42 
     43 typedef float __m512 __attribute__((__vector_size__(64)));
     44 typedef double __m512d __attribute__((__vector_size__(64)));
     45 typedef long long __m512i __attribute__((__vector_size__(64)));
     46 
     47 typedef unsigned char __mmask8;
     48 typedef unsigned short __mmask16;
     49 
     50 /* Rounding mode macros.  */
     51 #define _MM_FROUND_TO_NEAREST_INT   0x00
     52 #define _MM_FROUND_TO_NEG_INF       0x01
     53 #define _MM_FROUND_TO_POS_INF       0x02
     54 #define _MM_FROUND_TO_ZERO          0x03
     55 #define _MM_FROUND_CUR_DIRECTION    0x04
     56 
     57 typedef enum
     58 {
     59   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
     60   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
     61   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
     62   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
     63   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
     64   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
     65   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
     66   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
     67   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
     68   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
     69   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
     70   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
     71   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
     72   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
     73   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
     74   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
     75   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
     76   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
     77   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
     78   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
     79   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
     80   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
     81   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
     82   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
     83   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
     84   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
     85   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
     86   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
     87   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
     88   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
     89   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
     90   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
     91   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
     92   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
     93   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
     94   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
     95   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
     96   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
     97   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
     98   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
     99   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
    100   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
    101   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
    102   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
    103   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
    104   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
    105   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
    106   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
    107   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
    108   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
    109   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
    110   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
    111   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
    112   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
    113   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
    114   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
    115   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
    116   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
    117   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
    118   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
    119   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
    120   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
    121   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
    122   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
    123   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
    124   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
    125   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
    126   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
    127   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
    128   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
    129   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
    130   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
    131   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
    132   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
    133   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
    134   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
    135   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
    136   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
    137   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
    138   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
    139   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
    140   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
    141   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
    142   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
    143   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
    144   _MM_PERM_DDDD = 0xFF
    145 } _MM_PERM_ENUM;
    146 
    147 typedef enum
    148 {
    149   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
    150   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
    151   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
    152   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
    153 } _MM_MANTISSA_NORM_ENUM;
    154 
    155 typedef enum
    156 {
    157   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
    158   _MM_MANT_SIGN_zero,   /* sign = 0             */
    159   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
    160 } _MM_MANTISSA_SIGN_ENUM;
    161 
    162 /* Define the default attributes for the functions in this file. */
    163 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
    164 
    165 /* Create vectors with repeated elements */
    166 
    167 static  __inline __m512i __DEFAULT_FN_ATTRS
    168 _mm512_setzero_si512(void)
    169 {
    170   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    171 }
    172 
    173 #define _mm512_setzero_epi32 _mm512_setzero_si512
    174 
    175 static __inline__ __m512d __DEFAULT_FN_ATTRS
    176 _mm512_undefined_pd(void)
    177 {
    178   return (__m512d)__builtin_ia32_undef512();
    179 }
    180 
    181 static __inline__ __m512 __DEFAULT_FN_ATTRS
    182 _mm512_undefined(void)
    183 {
    184   return (__m512)__builtin_ia32_undef512();
    185 }
    186 
    187 static __inline__ __m512 __DEFAULT_FN_ATTRS
    188 _mm512_undefined_ps(void)
    189 {
    190   return (__m512)__builtin_ia32_undef512();
    191 }
    192 
    193 static __inline__ __m512i __DEFAULT_FN_ATTRS
    194 _mm512_undefined_epi32(void)
    195 {
    196   return (__m512i)__builtin_ia32_undef512();
    197 }
    198 
    199 static __inline__ __m512i __DEFAULT_FN_ATTRS
    200 _mm512_broadcastd_epi32 (__m128i __A)
    201 {
    202   return (__m512i)__builtin_shufflevector((__v4si) __A,
    203                                           (__v4si)_mm_undefined_si128(),
    204                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    205 }
    206 
    207 static __inline__ __m512i __DEFAULT_FN_ATTRS
    208 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
    209 {
    210   return (__m512i)__builtin_ia32_selectd_512(__M,
    211                                              (__v16si) _mm512_broadcastd_epi32(__A),
    212                                              (__v16si) __O);
    213 }
    214 
    215 static __inline__ __m512i __DEFAULT_FN_ATTRS
    216 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
    217 {
    218   return (__m512i)__builtin_ia32_selectd_512(__M,
    219                                              (__v16si) _mm512_broadcastd_epi32(__A),
    220                                              (__v16si) _mm512_setzero_si512());
    221 }
    222 
    223 static __inline__ __m512i __DEFAULT_FN_ATTRS
    224 _mm512_broadcastq_epi64 (__m128i __A)
    225 {
    226   return (__m512i)__builtin_shufflevector((__v2di) __A,
    227                                           (__v2di) _mm_undefined_si128(),
    228                                           0, 0, 0, 0, 0, 0, 0, 0);
    229 }
    230 
    231 static __inline__ __m512i __DEFAULT_FN_ATTRS
    232 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
    233 {
    234   return (__m512i)__builtin_ia32_selectq_512(__M,
    235                                              (__v8di) _mm512_broadcastq_epi64(__A),
    236                                              (__v8di) __O);
    237 
    238 }
    239 
    240 static __inline__ __m512i __DEFAULT_FN_ATTRS
    241 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
    242 {
    243   return (__m512i)__builtin_ia32_selectq_512(__M,
    244                                              (__v8di) _mm512_broadcastq_epi64(__A),
    245                                              (__v8di) _mm512_setzero_si512());
    246 }
    247 
    248 static __inline __m512i __DEFAULT_FN_ATTRS
    249 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
    250 {
    251   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
    252                  (__v16si)
    253                  _mm512_setzero_si512 (),
    254                  __M);
    255 }
    256 
    257 static __inline __m512i __DEFAULT_FN_ATTRS
    258 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
    259 {
    260 #ifdef __x86_64__
    261   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
    262                  (__v8di)
    263                  _mm512_setzero_si512 (),
    264                  __M);
    265 #else
    266   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
    267                  (__v8di)
    268                  _mm512_setzero_si512 (),
    269                  __M);
    270 #endif
    271 }
    272 
    273 static __inline __m512 __DEFAULT_FN_ATTRS
    274 _mm512_setzero_ps(void)
    275 {
    276   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    277                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    278 }
    279 
    280 #define _mm512_setzero _mm512_setzero_ps
    281 
    282 static  __inline __m512d __DEFAULT_FN_ATTRS
    283 _mm512_setzero_pd(void)
    284 {
    285   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    286 }
    287 
    288 static __inline __m512 __DEFAULT_FN_ATTRS
    289 _mm512_set1_ps(float __w)
    290 {
    291   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
    292                    __w, __w, __w, __w, __w, __w, __w, __w  };
    293 }
    294 
    295 static __inline __m512d __DEFAULT_FN_ATTRS
    296 _mm512_set1_pd(double __w)
    297 {
    298   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
    299 }
    300 
    301 static __inline __m512i __DEFAULT_FN_ATTRS
    302 _mm512_set1_epi8(char __w)
    303 {
    304   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
    305                              __w, __w, __w, __w, __w, __w, __w, __w,
    306                              __w, __w, __w, __w, __w, __w, __w, __w,
    307                              __w, __w, __w, __w, __w, __w, __w, __w,
    308                              __w, __w, __w, __w, __w, __w, __w, __w,
    309                              __w, __w, __w, __w, __w, __w, __w, __w,
    310                              __w, __w, __w, __w, __w, __w, __w, __w,
    311                              __w, __w, __w, __w, __w, __w, __w, __w  };
    312 }
    313 
    314 static __inline __m512i __DEFAULT_FN_ATTRS
    315 _mm512_set1_epi16(short __w)
    316 {
    317   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
    318                              __w, __w, __w, __w, __w, __w, __w, __w,
    319                              __w, __w, __w, __w, __w, __w, __w, __w,
    320                              __w, __w, __w, __w, __w, __w, __w, __w };
    321 }
    322 
    323 static __inline __m512i __DEFAULT_FN_ATTRS
    324 _mm512_set1_epi32(int __s)
    325 {
    326   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
    327                              __s, __s, __s, __s, __s, __s, __s, __s };
    328 }
    329 
    330 static __inline __m512i __DEFAULT_FN_ATTRS
    331 _mm512_set1_epi64(long long __d)
    332 {
    333   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
    334 }
    335 
    336 static __inline__ __m512 __DEFAULT_FN_ATTRS
    337 _mm512_broadcastss_ps(__m128 __A)
    338 {
    339   return (__m512)__builtin_shufflevector((__v4sf) __A,
    340                                          (__v4sf)_mm_undefined_ps(),
    341                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    342 }
    343 
    344 static __inline __m512i __DEFAULT_FN_ATTRS
    345 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    346 {
    347   return  (__m512i)(__v16si)
    348    { __D, __C, __B, __A, __D, __C, __B, __A,
    349      __D, __C, __B, __A, __D, __C, __B, __A };
    350 }
    351 
    352 static __inline __m512i __DEFAULT_FN_ATTRS
    353 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    354        long long __D)
    355 {
    356   return  (__m512i) (__v8di)
    357    { __D, __C, __B, __A, __D, __C, __B, __A };
    358 }
    359 
    360 static __inline __m512d __DEFAULT_FN_ATTRS
    361 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    362 {
    363   return  (__m512d)
    364    { __D, __C, __B, __A, __D, __C, __B, __A };
    365 }
    366 
    367 static __inline __m512 __DEFAULT_FN_ATTRS
    368 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    369 {
    370   return  (__m512)
    371    { __D, __C, __B, __A, __D, __C, __B, __A,
    372      __D, __C, __B, __A, __D, __C, __B, __A };
    373 }
    374 
    375 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
    376   _mm512_set4_epi32((e3),(e2),(e1),(e0))
    377 
    378 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
    379   _mm512_set4_epi64((e3),(e2),(e1),(e0))
    380 
    381 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
    382   _mm512_set4_pd((e3),(e2),(e1),(e0))
    383 
    384 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
    385   _mm512_set4_ps((e3),(e2),(e1),(e0))
    386 
    387 static __inline__ __m512d __DEFAULT_FN_ATTRS
    388 _mm512_broadcastsd_pd(__m128d __A)
    389 {
    390   return (__m512d)__builtin_shufflevector((__v2df) __A,
    391                                           (__v2df) _mm_undefined_pd(),
    392                                           0, 0, 0, 0, 0, 0, 0, 0);
    393 }
    394 
    395 /* Cast between vector types */
    396 
    397 static __inline __m512d __DEFAULT_FN_ATTRS
    398 _mm512_castpd256_pd512(__m256d __a)
    399 {
    400   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
    401 }
    402 
    403 static __inline __m512 __DEFAULT_FN_ATTRS
    404 _mm512_castps256_ps512(__m256 __a)
    405 {
    406   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
    407                                           -1, -1, -1, -1, -1, -1, -1, -1);
    408 }
    409 
    410 static __inline __m128d __DEFAULT_FN_ATTRS
    411 _mm512_castpd512_pd128(__m512d __a)
    412 {
    413   return __builtin_shufflevector(__a, __a, 0, 1);
    414 }
    415 
    416 static __inline __m256d __DEFAULT_FN_ATTRS
    417 _mm512_castpd512_pd256 (__m512d __A)
    418 {
    419   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
    420 }
    421 
    422 static __inline __m128 __DEFAULT_FN_ATTRS
    423 _mm512_castps512_ps128(__m512 __a)
    424 {
    425   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
    426 }
    427 
    428 static __inline __m256 __DEFAULT_FN_ATTRS
    429 _mm512_castps512_ps256 (__m512 __A)
    430 {
    431   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
    432 }
    433 
    434 static __inline __m512 __DEFAULT_FN_ATTRS
    435 _mm512_castpd_ps (__m512d __A)
    436 {
    437   return (__m512) (__A);
    438 }
    439 
    440 static __inline __m512i __DEFAULT_FN_ATTRS
    441 _mm512_castpd_si512 (__m512d __A)
    442 {
    443   return (__m512i) (__A);
    444 }
    445 
    446 static __inline__ __m512d __DEFAULT_FN_ATTRS
    447 _mm512_castpd128_pd512 (__m128d __A)
    448 {
    449   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    450 }
    451 
    452 static __inline __m512d __DEFAULT_FN_ATTRS
    453 _mm512_castps_pd (__m512 __A)
    454 {
    455   return (__m512d) (__A);
    456 }
    457 
    458 static __inline __m512i __DEFAULT_FN_ATTRS
    459 _mm512_castps_si512 (__m512 __A)
    460 {
    461   return (__m512i) (__A);
    462 }
    463 
    464 static __inline__ __m512 __DEFAULT_FN_ATTRS
    465 _mm512_castps128_ps512 (__m128 __A)
    466 {
    467     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    468 }
    469 
    470 static __inline__ __m512i __DEFAULT_FN_ATTRS
    471 _mm512_castsi128_si512 (__m128i __A)
    472 {
    473    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
    474 }
    475 
    476 static __inline__ __m512i __DEFAULT_FN_ATTRS
    477 _mm512_castsi256_si512 (__m256i __A)
    478 {
    479    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
    480 }
    481 
    482 static __inline __m512 __DEFAULT_FN_ATTRS
    483 _mm512_castsi512_ps (__m512i __A)
    484 {
    485   return (__m512) (__A);
    486 }
    487 
    488 static __inline __m512d __DEFAULT_FN_ATTRS
    489 _mm512_castsi512_pd (__m512i __A)
    490 {
    491   return (__m512d) (__A);
    492 }
    493 
    494 static __inline __m128i __DEFAULT_FN_ATTRS
    495 _mm512_castsi512_si128 (__m512i __A)
    496 {
    497   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
    498 }
    499 
    500 static __inline __m256i __DEFAULT_FN_ATTRS
    501 _mm512_castsi512_si256 (__m512i __A)
    502 {
    503   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
    504 }
    505 
    506 /* Bitwise operators */
    507 static __inline__ __m512i __DEFAULT_FN_ATTRS
    508 _mm512_and_epi32(__m512i __a, __m512i __b)
    509 {
    510   return (__m512i)((__v16su)__a & (__v16su)__b);
    511 }
    512 
    513 static __inline__ __m512i __DEFAULT_FN_ATTRS
    514 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    515 {
    516   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    517                 (__v16si) _mm512_and_epi32(__a, __b),
    518                 (__v16si) __src);
    519 }
    520 
    521 static __inline__ __m512i __DEFAULT_FN_ATTRS
    522 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    523 {
    524   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
    525                                          __k, __a, __b);
    526 }
    527 
    528 static __inline__ __m512i __DEFAULT_FN_ATTRS
    529 _mm512_and_epi64(__m512i __a, __m512i __b)
    530 {
    531   return (__m512i)((__v8du)__a & (__v8du)__b);
    532 }
    533 
    534 static __inline__ __m512i __DEFAULT_FN_ATTRS
    535 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    536 {
    537     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
    538                 (__v8di) _mm512_and_epi64(__a, __b),
    539                 (__v8di) __src);
    540 }
    541 
    542 static __inline__ __m512i __DEFAULT_FN_ATTRS
    543 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    544 {
    545   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
    546                                          __k, __a, __b);
    547 }
    548 
    549 static __inline__ __m512i __DEFAULT_FN_ATTRS
    550 _mm512_andnot_si512 (__m512i __A, __m512i __B)
    551 {
    552   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    553 }
    554 
    555 static __inline__ __m512i __DEFAULT_FN_ATTRS
    556 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
    557 {
    558   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
    559 }
    560 
    561 static __inline__ __m512i __DEFAULT_FN_ATTRS
    562 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    563 {
    564   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
    565                                          (__v16si)_mm512_andnot_epi32(__A, __B),
    566                                          (__v16si)__W);
    567 }
    568 
    569 static __inline__ __m512i __DEFAULT_FN_ATTRS
    570 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
    571 {
    572   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
    573                                            __U, __A, __B);
    574 }
    575 
    576 static __inline__ __m512i __DEFAULT_FN_ATTRS
    577 _mm512_andnot_epi64(__m512i __A, __m512i __B)
    578 {
    579   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
    580 }
    581 
    582 static __inline__ __m512i __DEFAULT_FN_ATTRS
    583 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    584 {
    585   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
    586                                           (__v8di)_mm512_andnot_epi64(__A, __B),
    587                                           (__v8di)__W);
    588 }
    589 
    590 static __inline__ __m512i __DEFAULT_FN_ATTRS
    591 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
    592 {
    593   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
    594                                            __U, __A, __B);
    595 }
    596 
    597 static __inline__ __m512i __DEFAULT_FN_ATTRS
    598 _mm512_or_epi32(__m512i __a, __m512i __b)
    599 {
    600   return (__m512i)((__v16su)__a | (__v16su)__b);
    601 }
    602 
    603 static __inline__ __m512i __DEFAULT_FN_ATTRS
    604 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    605 {
    606   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    607                                              (__v16si)_mm512_or_epi32(__a, __b),
    608                                              (__v16si)__src);
    609 }
    610 
    611 static __inline__ __m512i __DEFAULT_FN_ATTRS
    612 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    613 {
    614   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
    615 }
    616 
    617 static __inline__ __m512i __DEFAULT_FN_ATTRS
    618 _mm512_or_epi64(__m512i __a, __m512i __b)
    619 {
    620   return (__m512i)((__v8du)__a | (__v8du)__b);
    621 }
    622 
    623 static __inline__ __m512i __DEFAULT_FN_ATTRS
    624 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    625 {
    626   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    627                                              (__v8di)_mm512_or_epi64(__a, __b),
    628                                              (__v8di)__src);
    629 }
    630 
    631 static __inline__ __m512i __DEFAULT_FN_ATTRS
    632 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    633 {
    634   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
    635 }
    636 
    637 static __inline__ __m512i __DEFAULT_FN_ATTRS
    638 _mm512_xor_epi32(__m512i __a, __m512i __b)
    639 {
    640   return (__m512i)((__v16su)__a ^ (__v16su)__b);
    641 }
    642 
    643 static __inline__ __m512i __DEFAULT_FN_ATTRS
    644 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
    645 {
    646   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
    647                                             (__v16si)_mm512_xor_epi32(__a, __b),
    648                                             (__v16si)__src);
    649 }
    650 
    651 static __inline__ __m512i __DEFAULT_FN_ATTRS
    652 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
    653 {
    654   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
    655 }
    656 
    657 static __inline__ __m512i __DEFAULT_FN_ATTRS
    658 _mm512_xor_epi64(__m512i __a, __m512i __b)
    659 {
    660   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    661 }
    662 
    663 static __inline__ __m512i __DEFAULT_FN_ATTRS
    664 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
    665 {
    666   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
    667                                              (__v8di)_mm512_xor_epi64(__a, __b),
    668                                              (__v8di)__src);
    669 }
    670 
    671 static __inline__ __m512i __DEFAULT_FN_ATTRS
    672 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
    673 {
    674   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
    675 }
    676 
    677 static __inline__ __m512i __DEFAULT_FN_ATTRS
    678 _mm512_and_si512(__m512i __a, __m512i __b)
    679 {
    680   return (__m512i)((__v8du)__a & (__v8du)__b);
    681 }
    682 
    683 static __inline__ __m512i __DEFAULT_FN_ATTRS
    684 _mm512_or_si512(__m512i __a, __m512i __b)
    685 {
    686   return (__m512i)((__v8du)__a | (__v8du)__b);
    687 }
    688 
    689 static __inline__ __m512i __DEFAULT_FN_ATTRS
    690 _mm512_xor_si512(__m512i __a, __m512i __b)
    691 {
    692   return (__m512i)((__v8du)__a ^ (__v8du)__b);
    693 }
    694 
    695 /* Arithmetic */
    696 
    697 static __inline __m512d __DEFAULT_FN_ATTRS
    698 _mm512_add_pd(__m512d __a, __m512d __b)
    699 {
    700   return (__m512d)((__v8df)__a + (__v8df)__b);
    701 }
    702 
    703 static __inline __m512 __DEFAULT_FN_ATTRS
    704 _mm512_add_ps(__m512 __a, __m512 __b)
    705 {
    706   return (__m512)((__v16sf)__a + (__v16sf)__b);
    707 }
    708 
    709 static __inline __m512d __DEFAULT_FN_ATTRS
    710 _mm512_mul_pd(__m512d __a, __m512d __b)
    711 {
    712   return (__m512d)((__v8df)__a * (__v8df)__b);
    713 }
    714 
    715 static __inline __m512 __DEFAULT_FN_ATTRS
    716 _mm512_mul_ps(__m512 __a, __m512 __b)
    717 {
    718   return (__m512)((__v16sf)__a * (__v16sf)__b);
    719 }
    720 
    721 static __inline __m512d __DEFAULT_FN_ATTRS
    722 _mm512_sub_pd(__m512d __a, __m512d __b)
    723 {
    724   return (__m512d)((__v8df)__a - (__v8df)__b);
    725 }
    726 
    727 static __inline __m512 __DEFAULT_FN_ATTRS
    728 _mm512_sub_ps(__m512 __a, __m512 __b)
    729 {
    730   return (__m512)((__v16sf)__a - (__v16sf)__b);
    731 }
    732 
    733 static __inline__ __m512i __DEFAULT_FN_ATTRS
    734 _mm512_add_epi64 (__m512i __A, __m512i __B)
    735 {
    736   return (__m512i) ((__v8du) __A + (__v8du) __B);
    737 }
    738 
    739 static __inline__ __m512i __DEFAULT_FN_ATTRS
    740 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    741 {
    742   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    743              (__v8di) __B,
    744              (__v8di) __W,
    745              (__mmask8) __U);
    746 }
    747 
    748 static __inline__ __m512i __DEFAULT_FN_ATTRS
    749 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    750 {
    751   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    752              (__v8di) __B,
    753              (__v8di)
    754              _mm512_setzero_si512 (),
    755              (__mmask8) __U);
    756 }
    757 
    758 static __inline__ __m512i __DEFAULT_FN_ATTRS
    759 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    760 {
    761   return (__m512i) ((__v8du) __A - (__v8du) __B);
    762 }
    763 
    764 static __inline__ __m512i __DEFAULT_FN_ATTRS
    765 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    766 {
    767   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    768              (__v8di) __B,
    769              (__v8di) __W,
    770              (__mmask8) __U);
    771 }
    772 
    773 static __inline__ __m512i __DEFAULT_FN_ATTRS
    774 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    775 {
    776   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    777              (__v8di) __B,
    778              (__v8di)
    779              _mm512_setzero_si512 (),
    780              (__mmask8) __U);
    781 }
    782 
    783 static __inline__ __m512i __DEFAULT_FN_ATTRS
    784 _mm512_add_epi32 (__m512i __A, __m512i __B)
    785 {
    786   return (__m512i) ((__v16su) __A + (__v16su) __B);
    787 }
    788 
    789 static __inline__ __m512i __DEFAULT_FN_ATTRS
    790 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    791 {
    792   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    793              (__v16si) __B,
    794              (__v16si) __W,
    795              (__mmask16) __U);
    796 }
    797 
    798 static __inline__ __m512i __DEFAULT_FN_ATTRS
    799 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    800 {
    801   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    802              (__v16si) __B,
    803              (__v16si)
    804              _mm512_setzero_si512 (),
    805              (__mmask16) __U);
    806 }
    807 
    808 static __inline__ __m512i __DEFAULT_FN_ATTRS
    809 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    810 {
    811   return (__m512i) ((__v16su) __A - (__v16su) __B);
    812 }
    813 
    814 static __inline__ __m512i __DEFAULT_FN_ATTRS
    815 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    816 {
    817   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    818              (__v16si) __B,
    819              (__v16si) __W,
    820              (__mmask16) __U);
    821 }
    822 
    823 static __inline__ __m512i __DEFAULT_FN_ATTRS
    824 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    825 {
    826   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    827              (__v16si) __B,
    828              (__v16si)
    829              _mm512_setzero_si512 (),
    830              (__mmask16) __U);
    831 }
    832 
    833 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
    834   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    835                                         (__v8df)(__m512d)(B), \
    836                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
    837                                         (int)(R)); })
    838 
    839 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
    840   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    841                                         (__v8df)(__m512d)(B), \
    842                                         (__v8df)_mm512_setzero_pd(), \
    843                                         (__mmask8)(U), (int)(R)); })
    844 
    845 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
    846   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
    847                                         (__v8df)(__m512d)(B), \
    848                                         (__v8df)_mm512_undefined_pd(), \
    849                                         (__mmask8)-1, (int)(R)); })
    850 
    851 static  __inline__ __m512d __DEFAULT_FN_ATTRS
    852 _mm512_max_pd(__m512d __A, __m512d __B)
    853 {
    854   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    855              (__v8df) __B,
    856              (__v8df)
    857              _mm512_setzero_pd (),
    858              (__mmask8) -1,
    859              _MM_FROUND_CUR_DIRECTION);
    860 }
    861 
    862 static __inline__ __m512d __DEFAULT_FN_ATTRS
    863 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
    864 {
    865   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    866                   (__v8df) __B,
    867                   (__v8df) __W,
    868                   (__mmask8) __U,
    869                   _MM_FROUND_CUR_DIRECTION);
    870 }
    871 
    872 static __inline__ __m512d __DEFAULT_FN_ATTRS
    873 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
    874 {
    875   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
    876                   (__v8df) __B,
    877                   (__v8df)
    878                   _mm512_setzero_pd (),
    879                   (__mmask8) __U,
    880                   _MM_FROUND_CUR_DIRECTION);
    881 }
    882 
    883 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
    884   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    885                                        (__v16sf)(__m512)(B), \
    886                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
    887                                        (int)(R)); })
    888 
    889 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
    890   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    891                                        (__v16sf)(__m512)(B), \
    892                                        (__v16sf)_mm512_setzero_ps(), \
    893                                        (__mmask16)(U), (int)(R)); })
    894 
    895 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
    896   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
    897                                        (__v16sf)(__m512)(B), \
    898                                        (__v16sf)_mm512_undefined_ps(), \
    899                                        (__mmask16)-1, (int)(R)); })
    900 
    901 static  __inline__ __m512 __DEFAULT_FN_ATTRS
    902 _mm512_max_ps(__m512 __A, __m512 __B)
    903 {
    904   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    905             (__v16sf) __B,
    906             (__v16sf)
    907             _mm512_setzero_ps (),
    908             (__mmask16) -1,
    909             _MM_FROUND_CUR_DIRECTION);
    910 }
    911 
    912 static __inline__ __m512 __DEFAULT_FN_ATTRS
    913 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
    914 {
    915   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    916                  (__v16sf) __B,
    917                  (__v16sf) __W,
    918                  (__mmask16) __U,
    919                  _MM_FROUND_CUR_DIRECTION);
    920 }
    921 
    922 static __inline__ __m512 __DEFAULT_FN_ATTRS
    923 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
    924 {
    925   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
    926                  (__v16sf) __B,
    927                  (__v16sf)
    928                  _mm512_setzero_ps (),
    929                  (__mmask16) __U,
    930                  _MM_FROUND_CUR_DIRECTION);
    931 }
    932 
    933 static __inline__ __m128 __DEFAULT_FN_ATTRS
    934 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
    935   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    936                 (__v4sf) __B,
    937                 (__v4sf) __W,
    938                 (__mmask8) __U,
    939                 _MM_FROUND_CUR_DIRECTION);
    940 }
    941 
    942 static __inline__ __m128 __DEFAULT_FN_ATTRS
    943 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
    944   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
    945                 (__v4sf) __B,
    946                 (__v4sf)  _mm_setzero_ps (),
    947                 (__mmask8) __U,
    948                 _MM_FROUND_CUR_DIRECTION);
    949 }
    950 
    951 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
    952   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    953                                           (__v4sf)(__m128)(B), \
    954                                           (__v4sf)_mm_setzero_ps(), \
    955                                           (__mmask8)-1, (int)(R)); })
    956 
    957 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
    958   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    959                                           (__v4sf)(__m128)(B), \
    960                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
    961                                           (int)(R)); })
    962 
    963 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
    964   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
    965                                           (__v4sf)(__m128)(B), \
    966                                           (__v4sf)_mm_setzero_ps(), \
    967                                           (__mmask8)(U), (int)(R)); })
    968 
    969 static __inline__ __m128d __DEFAULT_FN_ATTRS
    970 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
    971   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    972                 (__v2df) __B,
    973                 (__v2df) __W,
    974                 (__mmask8) __U,
    975                 _MM_FROUND_CUR_DIRECTION);
    976 }
    977 
    978 static __inline__ __m128d __DEFAULT_FN_ATTRS
    979 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
    980   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
    981                 (__v2df) __B,
    982                 (__v2df)  _mm_setzero_pd (),
    983                 (__mmask8) __U,
    984                 _MM_FROUND_CUR_DIRECTION);
    985 }
    986 
    987 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
    988   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
    989                                            (__v2df)(__m128d)(B), \
    990                                            (__v2df)_mm_setzero_pd(), \
    991                                            (__mmask8)-1, (int)(R)); })
    992 
    993 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
    994   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
    995                                            (__v2df)(__m128d)(B), \
    996                                            (__v2df)(__m128d)(W), \
    997                                            (__mmask8)(U), (int)(R)); })
    998 
    999 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
   1000   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
   1001                                            (__v2df)(__m128d)(B), \
   1002                                            (__v2df)_mm_setzero_pd(), \
   1003                                            (__mmask8)(U), (int)(R)); })
   1004 
   1005 static __inline __m512i
   1006 __DEFAULT_FN_ATTRS
   1007 _mm512_max_epi32(__m512i __A, __m512i __B)
   1008 {
   1009   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1010               (__v16si) __B,
   1011               (__v16si)
   1012               _mm512_setzero_si512 (),
   1013               (__mmask16) -1);
   1014 }
   1015 
   1016 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1017 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1018 {
   1019   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1020                    (__v16si) __B,
   1021                    (__v16si) __W, __M);
   1022 }
   1023 
   1024 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1025 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1026 {
   1027   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   1028                    (__v16si) __B,
   1029                    (__v16si)
   1030                    _mm512_setzero_si512 (),
   1031                    __M);
   1032 }
   1033 
   1034 static __inline __m512i __DEFAULT_FN_ATTRS
   1035 _mm512_max_epu32(__m512i __A, __m512i __B)
   1036 {
   1037   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1038               (__v16si) __B,
   1039               (__v16si)
   1040               _mm512_setzero_si512 (),
   1041               (__mmask16) -1);
   1042 }
   1043 
   1044 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1045 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1046 {
   1047   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1048                    (__v16si) __B,
   1049                    (__v16si) __W, __M);
   1050 }
   1051 
   1052 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1053 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1054 {
   1055   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   1056                    (__v16si) __B,
   1057                    (__v16si)
   1058                    _mm512_setzero_si512 (),
   1059                    __M);
   1060 }
   1061 
   1062 static __inline __m512i __DEFAULT_FN_ATTRS
   1063 _mm512_max_epi64(__m512i __A, __m512i __B)
   1064 {
   1065   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1066               (__v8di) __B,
   1067               (__v8di)
   1068               _mm512_setzero_si512 (),
   1069               (__mmask8) -1);
   1070 }
   1071 
   1072 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1073 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1074 {
   1075   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1076                    (__v8di) __B,
   1077                    (__v8di) __W, __M);
   1078 }
   1079 
   1080 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1081 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1082 {
   1083   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   1084                    (__v8di) __B,
   1085                    (__v8di)
   1086                    _mm512_setzero_si512 (),
   1087                    __M);
   1088 }
   1089 
   1090 static __inline __m512i __DEFAULT_FN_ATTRS
   1091 _mm512_max_epu64(__m512i __A, __m512i __B)
   1092 {
   1093   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1094               (__v8di) __B,
   1095               (__v8di)
   1096               _mm512_setzero_si512 (),
   1097               (__mmask8) -1);
   1098 }
   1099 
   1100 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1101 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1102 {
   1103   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1104                    (__v8di) __B,
   1105                    (__v8di) __W, __M);
   1106 }
   1107 
   1108 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1109 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1110 {
   1111   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   1112                    (__v8di) __B,
   1113                    (__v8di)
   1114                    _mm512_setzero_si512 (),
   1115                    __M);
   1116 }
   1117 
   1118 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
   1119   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1120                                         (__v8df)(__m512d)(B), \
   1121                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   1122                                         (int)(R)); })
   1123 
   1124 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
   1125   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1126                                         (__v8df)(__m512d)(B), \
   1127                                         (__v8df)_mm512_setzero_pd(), \
   1128                                         (__mmask8)(U), (int)(R)); })
   1129 
   1130 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
   1131   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
   1132                                         (__v8df)(__m512d)(B), \
   1133                                         (__v8df)_mm512_undefined_pd(), \
   1134                                         (__mmask8)-1, (int)(R)); })
   1135 
   1136 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1137 _mm512_min_pd(__m512d __A, __m512d __B)
   1138 {
   1139   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1140              (__v8df) __B,
   1141              (__v8df)
   1142              _mm512_setzero_pd (),
   1143              (__mmask8) -1,
   1144              _MM_FROUND_CUR_DIRECTION);
   1145 }
   1146 
   1147 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1148 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   1149 {
   1150   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1151                   (__v8df) __B,
   1152                   (__v8df) __W,
   1153                   (__mmask8) __U,
   1154                   _MM_FROUND_CUR_DIRECTION);
   1155 }
   1156 
   1157 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
   1158   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1159                                        (__v16sf)(__m512)(B), \
   1160                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   1161                                        (int)(R)); })
   1162 
   1163 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
   1164   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1165                                        (__v16sf)(__m512)(B), \
   1166                                        (__v16sf)_mm512_setzero_ps(), \
   1167                                        (__mmask16)(U), (int)(R)); })
   1168 
   1169 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
   1170   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
   1171                                        (__v16sf)(__m512)(B), \
   1172                                        (__v16sf)_mm512_undefined_ps(), \
   1173                                        (__mmask16)-1, (int)(R)); })
   1174 
   1175 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1176 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   1177 {
   1178   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   1179                   (__v8df) __B,
   1180                   (__v8df)
   1181                   _mm512_setzero_pd (),
   1182                   (__mmask8) __U,
   1183                   _MM_FROUND_CUR_DIRECTION);
   1184 }
   1185 
   1186 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1187 _mm512_min_ps(__m512 __A, __m512 __B)
   1188 {
   1189   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1190             (__v16sf) __B,
   1191             (__v16sf)
   1192             _mm512_setzero_ps (),
   1193             (__mmask16) -1,
   1194             _MM_FROUND_CUR_DIRECTION);
   1195 }
   1196 
   1197 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1198 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   1199 {
   1200   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1201                  (__v16sf) __B,
   1202                  (__v16sf) __W,
   1203                  (__mmask16) __U,
   1204                  _MM_FROUND_CUR_DIRECTION);
   1205 }
   1206 
   1207 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1208 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   1209 {
   1210   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   1211                  (__v16sf) __B,
   1212                  (__v16sf)
   1213                  _mm512_setzero_ps (),
   1214                  (__mmask16) __U,
   1215                  _MM_FROUND_CUR_DIRECTION);
   1216 }
   1217 
   1218 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1219 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1220   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1221                 (__v4sf) __B,
   1222                 (__v4sf) __W,
   1223                 (__mmask8) __U,
   1224                 _MM_FROUND_CUR_DIRECTION);
   1225 }
   1226 
   1227 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1228 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1229   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
   1230                 (__v4sf) __B,
   1231                 (__v4sf)  _mm_setzero_ps (),
   1232                 (__mmask8) __U,
   1233                 _MM_FROUND_CUR_DIRECTION);
   1234 }
   1235 
   1236 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
   1237   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1238                                           (__v4sf)(__m128)(B), \
   1239                                           (__v4sf)_mm_setzero_ps(), \
   1240                                           (__mmask8)-1, (int)(R)); })
   1241 
   1242 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
   1243   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1244                                           (__v4sf)(__m128)(B), \
   1245                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1246                                           (int)(R)); })
   1247 
   1248 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
   1249   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
   1250                                           (__v4sf)(__m128)(B), \
   1251                                           (__v4sf)_mm_setzero_ps(), \
   1252                                           (__mmask8)(U), (int)(R)); })
   1253 
   1254 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1255 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1256   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1257                 (__v2df) __B,
   1258                 (__v2df) __W,
   1259                 (__mmask8) __U,
   1260                 _MM_FROUND_CUR_DIRECTION);
   1261 }
   1262 
   1263 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1264 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1265   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
   1266                 (__v2df) __B,
   1267                 (__v2df)  _mm_setzero_pd (),
   1268                 (__mmask8) __U,
   1269                 _MM_FROUND_CUR_DIRECTION);
   1270 }
   1271 
   1272 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
   1273   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1274                                            (__v2df)(__m128d)(B), \
   1275                                            (__v2df)_mm_setzero_pd(), \
   1276                                            (__mmask8)-1, (int)(R)); })
   1277 
   1278 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
   1279   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1280                                            (__v2df)(__m128d)(B), \
   1281                                            (__v2df)(__m128d)(W), \
   1282                                            (__mmask8)(U), (int)(R)); })
   1283 
   1284 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
   1285   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
   1286                                            (__v2df)(__m128d)(B), \
   1287                                            (__v2df)_mm_setzero_pd(), \
   1288                                            (__mmask8)(U), (int)(R)); })
   1289 
   1290 static __inline __m512i
   1291 __DEFAULT_FN_ATTRS
   1292 _mm512_min_epi32(__m512i __A, __m512i __B)
   1293 {
   1294   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1295               (__v16si) __B,
   1296               (__v16si)
   1297               _mm512_setzero_si512 (),
   1298               (__mmask16) -1);
   1299 }
   1300 
   1301 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1302 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1303 {
   1304   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1305                    (__v16si) __B,
   1306                    (__v16si) __W, __M);
   1307 }
   1308 
   1309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1310 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1311 {
   1312   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   1313                    (__v16si) __B,
   1314                    (__v16si)
   1315                    _mm512_setzero_si512 (),
   1316                    __M);
   1317 }
   1318 
   1319 static __inline __m512i __DEFAULT_FN_ATTRS
   1320 _mm512_min_epu32(__m512i __A, __m512i __B)
   1321 {
   1322   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1323               (__v16si) __B,
   1324               (__v16si)
   1325               _mm512_setzero_si512 (),
   1326               (__mmask16) -1);
   1327 }
   1328 
   1329 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1330 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1331 {
   1332   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1333                    (__v16si) __B,
   1334                    (__v16si) __W, __M);
   1335 }
   1336 
   1337 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1338 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   1339 {
   1340   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   1341                    (__v16si) __B,
   1342                    (__v16si)
   1343                    _mm512_setzero_si512 (),
   1344                    __M);
   1345 }
   1346 
   1347 static __inline __m512i __DEFAULT_FN_ATTRS
   1348 _mm512_min_epi64(__m512i __A, __m512i __B)
   1349 {
   1350   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1351               (__v8di) __B,
   1352               (__v8di)
   1353               _mm512_setzero_si512 (),
   1354               (__mmask8) -1);
   1355 }
   1356 
   1357 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1358 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1359 {
   1360   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1361                    (__v8di) __B,
   1362                    (__v8di) __W, __M);
   1363 }
   1364 
   1365 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1366 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   1367 {
   1368   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   1369                    (__v8di) __B,
   1370                    (__v8di)
   1371                    _mm512_setzero_si512 (),
   1372                    __M);
   1373 }
   1374 
   1375 static __inline __m512i __DEFAULT_FN_ATTRS
   1376 _mm512_min_epu64(__m512i __A, __m512i __B)
   1377 {
   1378   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1379               (__v8di) __B,
   1380               (__v8di)
   1381               _mm512_setzero_si512 (),
   1382               (__mmask8) -1);
   1383 }
   1384 
   1385 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1386 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   1387 {
   1388   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1389                    (__v8di) __B,
   1390                    (__v8di) __W, __M);
   1391 }
   1392 
   1393 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1394 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   1395 {
   1396   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   1397                    (__v8di) __B,
   1398                    (__v8di)
   1399                    _mm512_setzero_si512 (),
   1400                    __M);
   1401 }
   1402 
   1403 static __inline __m512i __DEFAULT_FN_ATTRS
   1404 _mm512_mul_epi32(__m512i __X, __m512i __Y)
   1405 {
   1406   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1407               (__v16si) __Y,
   1408               (__v8di)
   1409               _mm512_setzero_si512 (),
   1410               (__mmask8) -1);
   1411 }
   1412 
   1413 static __inline __m512i __DEFAULT_FN_ATTRS
   1414 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1415 {
   1416   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1417               (__v16si) __Y,
   1418               (__v8di) __W, __M);
   1419 }
   1420 
   1421 static __inline __m512i __DEFAULT_FN_ATTRS
   1422 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
   1423 {
   1424   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
   1425               (__v16si) __Y,
   1426               (__v8di)
   1427               _mm512_setzero_si512 (),
   1428               __M);
   1429 }
   1430 
   1431 static __inline __m512i __DEFAULT_FN_ATTRS
   1432 _mm512_mul_epu32(__m512i __X, __m512i __Y)
   1433 {
   1434   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1435                (__v16si) __Y,
   1436                (__v8di)
   1437                _mm512_setzero_si512 (),
   1438                (__mmask8) -1);
   1439 }
   1440 
   1441 static __inline __m512i __DEFAULT_FN_ATTRS
   1442 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
   1443 {
   1444   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1445                (__v16si) __Y,
   1446                (__v8di) __W, __M);
   1447 }
   1448 
   1449 static __inline __m512i __DEFAULT_FN_ATTRS
   1450 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
   1451 {
   1452   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
   1453                (__v16si) __Y,
   1454                (__v8di)
   1455                _mm512_setzero_si512 (),
   1456                __M);
   1457 }
   1458 
   1459 static __inline __m512i __DEFAULT_FN_ATTRS
   1460 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
   1461 {
   1462   return (__m512i) ((__v16su) __A * (__v16su) __B);
   1463 }
   1464 
   1465 static __inline __m512i __DEFAULT_FN_ATTRS
   1466 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   1467 {
   1468   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
   1469               (__v16si) __B,
   1470               (__v16si)
   1471               _mm512_setzero_si512 (),
   1472               __M);
   1473 }
   1474 
   1475 static __inline __m512i __DEFAULT_FN_ATTRS
   1476 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   1477 {
   1478   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
   1479               (__v16si) __B,
   1480               (__v16si) __W, __M);
   1481 }
   1482 
   1483 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
   1484   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1485                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
   1486                                          (int)(R)); })
   1487 
   1488 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
   1489   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1490                                          (__v8df)_mm512_setzero_pd(), \
   1491                                          (__mmask8)(U), (int)(R)); })
   1492 
   1493 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
   1494   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
   1495                                          (__v8df)_mm512_undefined_pd(), \
   1496                                          (__mmask8)-1, (int)(R)); })
   1497 
   1498 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1499 _mm512_sqrt_pd(__m512d __a)
   1500 {
   1501   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
   1502                                                 (__v8df) _mm512_setzero_pd (),
   1503                                                 (__mmask8) -1,
   1504                                                 _MM_FROUND_CUR_DIRECTION);
   1505 }
   1506 
   1507 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1508 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1509 {
   1510   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1511                    (__v8df) __W,
   1512                    (__mmask8) __U,
   1513                    _MM_FROUND_CUR_DIRECTION);
   1514 }
   1515 
   1516 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1517 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   1518 {
   1519   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1520                    (__v8df)
   1521                    _mm512_setzero_pd (),
   1522                    (__mmask8) __U,
   1523                    _MM_FROUND_CUR_DIRECTION);
   1524 }
   1525 
   1526 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
   1527   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1528                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
   1529                                         (int)(R)); })
   1530 
   1531 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
   1532   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1533                                         (__v16sf)_mm512_setzero_ps(), \
   1534                                         (__mmask16)(U), (int)(R)); })
   1535 
   1536 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
   1537   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
   1538                                         (__v16sf)_mm512_undefined_ps(), \
   1539                                         (__mmask16)-1, (int)(R)); })
   1540 
   1541 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1542 _mm512_sqrt_ps(__m512 __a)
   1543 {
   1544   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
   1545                                                (__v16sf) _mm512_setzero_ps (),
   1546                                                (__mmask16) -1,
   1547                                                _MM_FROUND_CUR_DIRECTION);
   1548 }
   1549 
   1550 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1551 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
   1552 {
   1553   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1554                                                (__v16sf) __W,
   1555                                                (__mmask16) __U,
   1556                                                _MM_FROUND_CUR_DIRECTION);
   1557 }
   1558 
   1559 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1560 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
   1561 {
   1562   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
   1563                                                (__v16sf) _mm512_setzero_ps (),
   1564                                                (__mmask16) __U,
   1565                                                _MM_FROUND_CUR_DIRECTION);
   1566 }
   1567 
   1568 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1569 _mm512_rsqrt14_pd(__m512d __A)
   1570 {
   1571   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1572                  (__v8df)
   1573                  _mm512_setzero_pd (),
   1574                  (__mmask8) -1);}
   1575 
   1576 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1577 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1578 {
   1579   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1580                   (__v8df) __W,
   1581                   (__mmask8) __U);
   1582 }
   1583 
   1584 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1585 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1586 {
   1587   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1588                   (__v8df)
   1589                   _mm512_setzero_pd (),
   1590                   (__mmask8) __U);
   1591 }
   1592 
   1593 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1594 _mm512_rsqrt14_ps(__m512 __A)
   1595 {
   1596   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1597                 (__v16sf)
   1598                 _mm512_setzero_ps (),
   1599                 (__mmask16) -1);
   1600 }
   1601 
   1602 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1603 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1604 {
   1605   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1606                  (__v16sf) __W,
   1607                  (__mmask16) __U);
   1608 }
   1609 
   1610 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1611 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1612 {
   1613   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1614                  (__v16sf)
   1615                  _mm512_setzero_ps (),
   1616                  (__mmask16) __U);
   1617 }
   1618 
   1619 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1620 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
   1621 {
   1622   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1623              (__v4sf) __B,
   1624              (__v4sf)
   1625              _mm_setzero_ps (),
   1626              (__mmask8) -1);
   1627 }
   1628 
   1629 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1630 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1631 {
   1632  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1633           (__v4sf) __B,
   1634           (__v4sf) __W,
   1635           (__mmask8) __U);
   1636 }
   1637 
   1638 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1639 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1640 {
   1641  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
   1642           (__v4sf) __B,
   1643           (__v4sf) _mm_setzero_ps (),
   1644           (__mmask8) __U);
   1645 }
   1646 
   1647 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1648 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
   1649 {
   1650   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
   1651               (__v2df) __B,
   1652               (__v2df)
   1653               _mm_setzero_pd (),
   1654               (__mmask8) -1);
   1655 }
   1656 
   1657 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1658 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1659 {
   1660  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1661           (__v2df) __B,
   1662           (__v2df) __W,
   1663           (__mmask8) __U);
   1664 }
   1665 
   1666 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1667 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1668 {
   1669  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
   1670           (__v2df) __B,
   1671           (__v2df) _mm_setzero_pd (),
   1672           (__mmask8) __U);
   1673 }
   1674 
   1675 static  __inline__ __m512d __DEFAULT_FN_ATTRS
   1676 _mm512_rcp14_pd(__m512d __A)
   1677 {
   1678   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1679                (__v8df)
   1680                _mm512_setzero_pd (),
   1681                (__mmask8) -1);
   1682 }
   1683 
   1684 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1685 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1686 {
   1687   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1688                 (__v8df) __W,
   1689                 (__mmask8) __U);
   1690 }
   1691 
   1692 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1693 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1694 {
   1695   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1696                 (__v8df)
   1697                 _mm512_setzero_pd (),
   1698                 (__mmask8) __U);
   1699 }
   1700 
   1701 static  __inline__ __m512 __DEFAULT_FN_ATTRS
   1702 _mm512_rcp14_ps(__m512 __A)
   1703 {
   1704   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1705               (__v16sf)
   1706               _mm512_setzero_ps (),
   1707               (__mmask16) -1);
   1708 }
   1709 
   1710 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1711 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1712 {
   1713   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1714                    (__v16sf) __W,
   1715                    (__mmask16) __U);
   1716 }
   1717 
   1718 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1719 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1720 {
   1721   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1722                    (__v16sf)
   1723                    _mm512_setzero_ps (),
   1724                    (__mmask16) __U);
   1725 }
   1726 
   1727 static  __inline__ __m128 __DEFAULT_FN_ATTRS
   1728 _mm_rcp14_ss(__m128 __A, __m128 __B)
   1729 {
   1730   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1731                  (__v4sf) __B,
   1732                  (__v4sf)
   1733                  _mm_setzero_ps (),
   1734                  (__mmask8) -1);
   1735 }
   1736 
   1737 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1738 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   1739 {
   1740  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1741           (__v4sf) __B,
   1742           (__v4sf) __W,
   1743           (__mmask8) __U);
   1744 }
   1745 
   1746 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1747 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
   1748 {
   1749  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
   1750           (__v4sf) __B,
   1751           (__v4sf) _mm_setzero_ps (),
   1752           (__mmask8) __U);
   1753 }
   1754 
   1755 static  __inline__ __m128d __DEFAULT_FN_ATTRS
   1756 _mm_rcp14_sd(__m128d __A, __m128d __B)
   1757 {
   1758   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
   1759             (__v2df) __B,
   1760             (__v2df)
   1761             _mm_setzero_pd (),
   1762             (__mmask8) -1);
   1763 }
   1764 
   1765 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1766 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   1767 {
   1768  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1769           (__v2df) __B,
   1770           (__v2df) __W,
   1771           (__mmask8) __U);
   1772 }
   1773 
   1774 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1775 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
   1776 {
   1777  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
   1778           (__v2df) __B,
   1779           (__v2df) _mm_setzero_pd (),
   1780           (__mmask8) __U);
   1781 }
   1782 
   1783 static __inline __m512 __DEFAULT_FN_ATTRS
   1784 _mm512_floor_ps(__m512 __A)
   1785 {
   1786   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1787                                                   _MM_FROUND_FLOOR,
   1788                                                   (__v16sf) __A, -1,
   1789                                                   _MM_FROUND_CUR_DIRECTION);
   1790 }
   1791 
   1792 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1793 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1794 {
   1795   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1796                    _MM_FROUND_FLOOR,
   1797                    (__v16sf) __W, __U,
   1798                    _MM_FROUND_CUR_DIRECTION);
   1799 }
   1800 
   1801 static __inline __m512d __DEFAULT_FN_ATTRS
   1802 _mm512_floor_pd(__m512d __A)
   1803 {
   1804   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1805                                                    _MM_FROUND_FLOOR,
   1806                                                    (__v8df) __A, -1,
   1807                                                    _MM_FROUND_CUR_DIRECTION);
   1808 }
   1809 
   1810 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1811 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1812 {
   1813   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1814                 _MM_FROUND_FLOOR,
   1815                 (__v8df) __W, __U,
   1816                 _MM_FROUND_CUR_DIRECTION);
   1817 }
   1818 
   1819 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1820 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1821 {
   1822   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1823                    _MM_FROUND_CEIL,
   1824                    (__v16sf) __W, __U,
   1825                    _MM_FROUND_CUR_DIRECTION);
   1826 }
   1827 
   1828 static __inline __m512 __DEFAULT_FN_ATTRS
   1829 _mm512_ceil_ps(__m512 __A)
   1830 {
   1831   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   1832                                                   _MM_FROUND_CEIL,
   1833                                                   (__v16sf) __A, -1,
   1834                                                   _MM_FROUND_CUR_DIRECTION);
   1835 }
   1836 
   1837 static __inline __m512d __DEFAULT_FN_ATTRS
   1838 _mm512_ceil_pd(__m512d __A)
   1839 {
   1840   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1841                                                    _MM_FROUND_CEIL,
   1842                                                    (__v8df) __A, -1,
   1843                                                    _MM_FROUND_CUR_DIRECTION);
   1844 }
   1845 
   1846 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1847 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1848 {
   1849   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   1850                 _MM_FROUND_CEIL,
   1851                 (__v8df) __W, __U,
   1852                 _MM_FROUND_CUR_DIRECTION);
   1853 }
   1854 
   1855 static __inline __m512i __DEFAULT_FN_ATTRS
   1856 _mm512_abs_epi64(__m512i __A)
   1857 {
   1858   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1859              (__v8di)
   1860              _mm512_setzero_si512 (),
   1861              (__mmask8) -1);
   1862 }
   1863 
   1864 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1865 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   1866 {
   1867   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1868                   (__v8di) __W,
   1869                   (__mmask8) __U);
   1870 }
   1871 
   1872 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1873 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   1874 {
   1875   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   1876                   (__v8di)
   1877                   _mm512_setzero_si512 (),
   1878                   (__mmask8) __U);
   1879 }
   1880 
   1881 static __inline __m512i __DEFAULT_FN_ATTRS
   1882 _mm512_abs_epi32(__m512i __A)
   1883 {
   1884   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1885              (__v16si)
   1886              _mm512_setzero_si512 (),
   1887              (__mmask16) -1);
   1888 }
   1889 
   1890 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1891 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   1892 {
   1893   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1894                   (__v16si) __W,
   1895                   (__mmask16) __U);
   1896 }
   1897 
   1898 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1899 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   1900 {
   1901   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   1902                   (__v16si)
   1903                   _mm512_setzero_si512 (),
   1904                   (__mmask16) __U);
   1905 }
   1906 
   1907 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1908 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   1909   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1910                 (__v4sf) __B,
   1911                 (__v4sf) __W,
   1912                 (__mmask8) __U,
   1913                 _MM_FROUND_CUR_DIRECTION);
   1914 }
   1915 
   1916 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1917 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   1918   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
   1919                 (__v4sf) __B,
   1920                 (__v4sf)  _mm_setzero_ps (),
   1921                 (__mmask8) __U,
   1922                 _MM_FROUND_CUR_DIRECTION);
   1923 }
   1924 
   1925 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
   1926   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1927                                           (__v4sf)(__m128)(B), \
   1928                                           (__v4sf)_mm_setzero_ps(), \
   1929                                           (__mmask8)-1, (int)(R)); })
   1930 
   1931 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
   1932   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1933                                           (__v4sf)(__m128)(B), \
   1934                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   1935                                           (int)(R)); })
   1936 
   1937 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
   1938   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
   1939                                           (__v4sf)(__m128)(B), \
   1940                                           (__v4sf)_mm_setzero_ps(), \
   1941                                           (__mmask8)(U), (int)(R)); })
   1942 
   1943 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1944 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   1945   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1946                 (__v2df) __B,
   1947                 (__v2df) __W,
   1948                 (__mmask8) __U,
   1949                 _MM_FROUND_CUR_DIRECTION);
   1950 }
   1951 
   1952 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1953 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   1954   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
   1955                 (__v2df) __B,
   1956                 (__v2df)  _mm_setzero_pd (),
   1957                 (__mmask8) __U,
   1958                 _MM_FROUND_CUR_DIRECTION);
   1959 }
   1960 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
   1961   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1962                                            (__v2df)(__m128d)(B), \
   1963                                            (__v2df)_mm_setzero_pd(), \
   1964                                            (__mmask8)-1, (int)(R)); })
   1965 
   1966 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
   1967   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1968                                            (__v2df)(__m128d)(B), \
   1969                                            (__v2df)(__m128d)(W), \
   1970                                            (__mmask8)(U), (int)(R)); })
   1971 
   1972 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
   1973   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
   1974                                            (__v2df)(__m128d)(B), \
   1975                                            (__v2df)_mm_setzero_pd(), \
   1976                                            (__mmask8)(U), (int)(R)); })
   1977 
   1978 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1979 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   1980   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1981              (__v8df) __B,
   1982              (__v8df) __W,
   1983              (__mmask8) __U,
   1984              _MM_FROUND_CUR_DIRECTION);
   1985 }
   1986 
   1987 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1988 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   1989   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   1990              (__v8df) __B,
   1991              (__v8df) _mm512_setzero_pd (),
   1992              (__mmask8) __U,
   1993              _MM_FROUND_CUR_DIRECTION);
   1994 }
   1995 
   1996 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1997 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   1998   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   1999             (__v16sf) __B,
   2000             (__v16sf) __W,
   2001             (__mmask16) __U,
   2002             _MM_FROUND_CUR_DIRECTION);
   2003 }
   2004 
   2005 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2006 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2007   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   2008             (__v16sf) __B,
   2009             (__v16sf) _mm512_setzero_ps (),
   2010             (__mmask16) __U,
   2011             _MM_FROUND_CUR_DIRECTION);
   2012 }
   2013 
   2014 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
   2015   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2016                                         (__v8df)(__m512d)(B), \
   2017                                         (__v8df)_mm512_setzero_pd(), \
   2018                                         (__mmask8)-1, (int)(R)); })
   2019 
   2020 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
   2021   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2022                                         (__v8df)(__m512d)(B), \
   2023                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2024                                         (int)(R)); })
   2025 
   2026 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
   2027   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
   2028                                         (__v8df)(__m512d)(B), \
   2029                                         (__v8df)_mm512_setzero_pd(), \
   2030                                         (__mmask8)(U), (int)(R)); })
   2031 
   2032 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
   2033   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2034                                        (__v16sf)(__m512)(B), \
   2035                                        (__v16sf)_mm512_setzero_ps(), \
   2036                                        (__mmask16)-1, (int)(R)); })
   2037 
   2038 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
   2039   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2040                                        (__v16sf)(__m512)(B), \
   2041                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2042                                        (int)(R)); })
   2043 
   2044 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
   2045   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
   2046                                        (__v16sf)(__m512)(B), \
   2047                                        (__v16sf)_mm512_setzero_ps(), \
   2048                                        (__mmask16)(U), (int)(R)); })
   2049 
   2050 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2051 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2052   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2053                 (__v4sf) __B,
   2054                 (__v4sf) __W,
   2055                 (__mmask8) __U,
   2056                 _MM_FROUND_CUR_DIRECTION);
   2057 }
   2058 
   2059 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2060 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2061   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
   2062                 (__v4sf) __B,
   2063                 (__v4sf)  _mm_setzero_ps (),
   2064                 (__mmask8) __U,
   2065                 _MM_FROUND_CUR_DIRECTION);
   2066 }
   2067 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
   2068   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2069                                           (__v4sf)(__m128)(B), \
   2070                                           (__v4sf)_mm_setzero_ps(), \
   2071                                           (__mmask8)-1, (int)(R)); })
   2072 
   2073 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
   2074   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2075                                           (__v4sf)(__m128)(B), \
   2076                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2077                                           (int)(R)); })
   2078 
   2079 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
   2080   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
   2081                                           (__v4sf)(__m128)(B), \
   2082                                           (__v4sf)_mm_setzero_ps(), \
   2083                                           (__mmask8)(U), (int)(R)); })
   2084 
   2085 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2086 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2087   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2088                 (__v2df) __B,
   2089                 (__v2df) __W,
   2090                 (__mmask8) __U,
   2091                 _MM_FROUND_CUR_DIRECTION);
   2092 }
   2093 
   2094 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2095 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2096   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
   2097                 (__v2df) __B,
   2098                 (__v2df)  _mm_setzero_pd (),
   2099                 (__mmask8) __U,
   2100                 _MM_FROUND_CUR_DIRECTION);
   2101 }
   2102 
   2103 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
   2104   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2105                                            (__v2df)(__m128d)(B), \
   2106                                            (__v2df)_mm_setzero_pd(), \
   2107                                            (__mmask8)-1, (int)(R)); })
   2108 
   2109 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
   2110   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2111                                            (__v2df)(__m128d)(B), \
   2112                                            (__v2df)(__m128d)(W), \
   2113                                            (__mmask8)(U), (int)(R)); })
   2114 
   2115 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
   2116   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
   2117                                            (__v2df)(__m128d)(B), \
   2118                                            (__v2df)_mm_setzero_pd(), \
   2119                                            (__mmask8)(U), (int)(R)); })
   2120 
   2121 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2122 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2123   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2124              (__v8df) __B,
   2125              (__v8df) __W,
   2126              (__mmask8) __U,
   2127              _MM_FROUND_CUR_DIRECTION);
   2128 }
   2129 
   2130 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2131 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2132   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2133              (__v8df) __B,
   2134              (__v8df)
   2135              _mm512_setzero_pd (),
   2136              (__mmask8) __U,
   2137              _MM_FROUND_CUR_DIRECTION);
   2138 }
   2139 
   2140 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2141 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2142   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2143             (__v16sf) __B,
   2144             (__v16sf) __W,
   2145             (__mmask16) __U,
   2146             _MM_FROUND_CUR_DIRECTION);
   2147 }
   2148 
   2149 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2150 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2151   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2152             (__v16sf) __B,
   2153             (__v16sf)
   2154             _mm512_setzero_ps (),
   2155             (__mmask16) __U,
   2156             _MM_FROUND_CUR_DIRECTION);
   2157 }
   2158 
   2159 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
   2160   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2161                                         (__v8df)(__m512d)(B), \
   2162                                         (__v8df)_mm512_setzero_pd(), \
   2163                                         (__mmask8)-1, (int)(R)); })
   2164 
   2165 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
   2166   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2167                                         (__v8df)(__m512d)(B), \
   2168                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2169                                         (int)(R)); })
   2170 
   2171 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
   2172   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
   2173                                         (__v8df)(__m512d)(B), \
   2174                                         (__v8df)_mm512_setzero_pd(), \
   2175                                         (__mmask8)(U), (int)(R)); })
   2176 
   2177 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
   2178   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2179                                        (__v16sf)(__m512)(B), \
   2180                                        (__v16sf)_mm512_setzero_ps(), \
   2181                                        (__mmask16)-1, (int)(R)); })
   2182 
   2183 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
   2184   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2185                                        (__v16sf)(__m512)(B), \
   2186                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2187                                        (int)(R)); });
   2188 
   2189 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
   2190   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
   2191                                        (__v16sf)(__m512)(B), \
   2192                                        (__v16sf)_mm512_setzero_ps(), \
   2193                                        (__mmask16)(U), (int)(R)); });
   2194 
   2195 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2196 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2197   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2198                 (__v4sf) __B,
   2199                 (__v4sf) __W,
   2200                 (__mmask8) __U,
   2201                 _MM_FROUND_CUR_DIRECTION);
   2202 }
   2203 
   2204 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2205 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2206   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
   2207                 (__v4sf) __B,
   2208                 (__v4sf)  _mm_setzero_ps (),
   2209                 (__mmask8) __U,
   2210                 _MM_FROUND_CUR_DIRECTION);
   2211 }
   2212 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
   2213   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2214                                           (__v4sf)(__m128)(B), \
   2215                                           (__v4sf)_mm_setzero_ps(), \
   2216                                           (__mmask8)-1, (int)(R)); })
   2217 
   2218 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
   2219   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2220                                           (__v4sf)(__m128)(B), \
   2221                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2222                                           (int)(R)); })
   2223 
   2224 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
   2225   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
   2226                                           (__v4sf)(__m128)(B), \
   2227                                           (__v4sf)_mm_setzero_ps(), \
   2228                                           (__mmask8)(U), (int)(R)); })
   2229 
   2230 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2231 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2232   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2233                 (__v2df) __B,
   2234                 (__v2df) __W,
   2235                 (__mmask8) __U,
   2236                 _MM_FROUND_CUR_DIRECTION);
   2237 }
   2238 
   2239 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2240 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2241   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
   2242                 (__v2df) __B,
   2243                 (__v2df)  _mm_setzero_pd (),
   2244                 (__mmask8) __U,
   2245                 _MM_FROUND_CUR_DIRECTION);
   2246 }
   2247 
   2248 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
   2249   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2250                                            (__v2df)(__m128d)(B), \
   2251                                            (__v2df)_mm_setzero_pd(), \
   2252                                            (__mmask8)-1, (int)(R)); })
   2253 
   2254 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
   2255   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2256                                            (__v2df)(__m128d)(B), \
   2257                                            (__v2df)(__m128d)(W), \
   2258                                            (__mmask8)(U), (int)(R)); })
   2259 
   2260 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
   2261   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
   2262                                            (__v2df)(__m128d)(B), \
   2263                                            (__v2df)_mm_setzero_pd(), \
   2264                                            (__mmask8)(U), (int)(R)); })
   2265 
   2266 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2267 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2268   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2269              (__v8df) __B,
   2270              (__v8df) __W,
   2271              (__mmask8) __U,
   2272              _MM_FROUND_CUR_DIRECTION);
   2273 }
   2274 
   2275 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2276 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2277   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2278              (__v8df) __B,
   2279              (__v8df)
   2280              _mm512_setzero_pd (),
   2281              (__mmask8) __U,
   2282              _MM_FROUND_CUR_DIRECTION);
   2283 }
   2284 
   2285 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2286 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2287   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2288             (__v16sf) __B,
   2289             (__v16sf) __W,
   2290             (__mmask16) __U,
   2291             _MM_FROUND_CUR_DIRECTION);
   2292 }
   2293 
   2294 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2295 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2296   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2297             (__v16sf) __B,
   2298             (__v16sf)
   2299             _mm512_setzero_ps (),
   2300             (__mmask16) __U,
   2301             _MM_FROUND_CUR_DIRECTION);
   2302 }
   2303 
   2304 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
   2305   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2306                                         (__v8df)(__m512d)(B), \
   2307                                         (__v8df)_mm512_setzero_pd(), \
   2308                                         (__mmask8)-1, (int)(R)); })
   2309 
   2310 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
   2311   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2312                                         (__v8df)(__m512d)(B), \
   2313                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2314                                         (int)(R)); })
   2315 
   2316 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
   2317   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
   2318                                         (__v8df)(__m512d)(B), \
   2319                                         (__v8df)_mm512_setzero_pd(), \
   2320                                         (__mmask8)(U), (int)(R)); })
   2321 
   2322 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
   2323   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2324                                        (__v16sf)(__m512)(B), \
   2325                                        (__v16sf)_mm512_setzero_ps(), \
   2326                                        (__mmask16)-1, (int)(R)); })
   2327 
   2328 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
   2329   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2330                                        (__v16sf)(__m512)(B), \
   2331                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2332                                        (int)(R)); });
   2333 
   2334 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
   2335   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
   2336                                        (__v16sf)(__m512)(B), \
   2337                                        (__v16sf)_mm512_setzero_ps(), \
   2338                                        (__mmask16)(U), (int)(R)); });
   2339 
   2340 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2341 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   2342   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2343                 (__v4sf) __B,
   2344                 (__v4sf) __W,
   2345                 (__mmask8) __U,
   2346                 _MM_FROUND_CUR_DIRECTION);
   2347 }
   2348 
   2349 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2350 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   2351   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
   2352                 (__v4sf) __B,
   2353                 (__v4sf)  _mm_setzero_ps (),
   2354                 (__mmask8) __U,
   2355                 _MM_FROUND_CUR_DIRECTION);
   2356 }
   2357 
   2358 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
   2359   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2360                                           (__v4sf)(__m128)(B), \
   2361                                           (__v4sf)_mm_setzero_ps(), \
   2362                                           (__mmask8)-1, (int)(R)); })
   2363 
   2364 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
   2365   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2366                                           (__v4sf)(__m128)(B), \
   2367                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
   2368                                           (int)(R)); })
   2369 
   2370 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
   2371   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
   2372                                           (__v4sf)(__m128)(B), \
   2373                                           (__v4sf)_mm_setzero_ps(), \
   2374                                           (__mmask8)(U), (int)(R)); })
   2375 
   2376 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2377 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   2378   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2379                 (__v2df) __B,
   2380                 (__v2df) __W,
   2381                 (__mmask8) __U,
   2382                 _MM_FROUND_CUR_DIRECTION);
   2383 }
   2384 
   2385 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2386 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   2387   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
   2388                 (__v2df) __B,
   2389                 (__v2df)  _mm_setzero_pd (),
   2390                 (__mmask8) __U,
   2391                 _MM_FROUND_CUR_DIRECTION);
   2392 }
   2393 
   2394 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
   2395   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2396                                            (__v2df)(__m128d)(B), \
   2397                                            (__v2df)_mm_setzero_pd(), \
   2398                                            (__mmask8)-1, (int)(R)); })
   2399 
   2400 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
   2401   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2402                                            (__v2df)(__m128d)(B), \
   2403                                            (__v2df)(__m128d)(W), \
   2404                                            (__mmask8)(U), (int)(R)); })
   2405 
   2406 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
   2407   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
   2408                                            (__v2df)(__m128d)(B), \
   2409                                            (__v2df)_mm_setzero_pd(), \
   2410                                            (__mmask8)(U), (int)(R)); })
   2411 
   2412 static __inline __m512d __DEFAULT_FN_ATTRS
   2413 _mm512_div_pd(__m512d __a, __m512d __b)
   2414 {
   2415   return (__m512d)((__v8df)__a/(__v8df)__b);
   2416 }
   2417 
   2418 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2419 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   2420   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   2421              (__v8df) __B,
   2422              (__v8df) __W,
   2423              (__mmask8) __U,
   2424              _MM_FROUND_CUR_DIRECTION);
   2425 }
   2426 
   2427 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2428 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   2429   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
   2430              (__v8df) __B,
   2431              (__v8df)
   2432              _mm512_setzero_pd (),
   2433              (__mmask8) __U,
   2434              _MM_FROUND_CUR_DIRECTION);
   2435 }
   2436 
   2437 static __inline __m512 __DEFAULT_FN_ATTRS
   2438 _mm512_div_ps(__m512 __a, __m512 __b)
   2439 {
   2440   return (__m512)((__v16sf)__a/(__v16sf)__b);
   2441 }
   2442 
   2443 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2444 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   2445   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2446             (__v16sf) __B,
   2447             (__v16sf) __W,
   2448             (__mmask16) __U,
   2449             _MM_FROUND_CUR_DIRECTION);
   2450 }
   2451 
   2452 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2453 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   2454   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2455             (__v16sf) __B,
   2456             (__v16sf)
   2457             _mm512_setzero_ps (),
   2458             (__mmask16) __U,
   2459             _MM_FROUND_CUR_DIRECTION);
   2460 }
   2461 
   2462 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
   2463   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2464                                         (__v8df)(__m512d)(B), \
   2465                                         (__v8df)_mm512_setzero_pd(), \
   2466                                         (__mmask8)-1, (int)(R)); })
   2467 
   2468 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
   2469   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2470                                         (__v8df)(__m512d)(B), \
   2471                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
   2472                                         (int)(R)); })
   2473 
   2474 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
   2475   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
   2476                                         (__v8df)(__m512d)(B), \
   2477                                         (__v8df)_mm512_setzero_pd(), \
   2478                                         (__mmask8)(U), (int)(R)); })
   2479 
   2480 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
   2481   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2482                                        (__v16sf)(__m512)(B), \
   2483                                        (__v16sf)_mm512_setzero_ps(), \
   2484                                        (__mmask16)-1, (int)(R)); })
   2485 
   2486 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
   2487   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2488                                        (__v16sf)(__m512)(B), \
   2489                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
   2490                                        (int)(R)); });
   2491 
   2492 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
   2493   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
   2494                                        (__v16sf)(__m512)(B), \
   2495                                        (__v16sf)_mm512_setzero_ps(), \
   2496                                        (__mmask16)(U), (int)(R)); });
   2497 
   2498 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   2499   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
   2500                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
   2501                                          _MM_FROUND_CUR_DIRECTION); })
   2502 
   2503 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
   2504   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2505                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2506                                          _MM_FROUND_CUR_DIRECTION); })
   2507 
   2508 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
   2509   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2510                                          (__v16sf)_mm512_setzero_ps(), \
   2511                                          (__mmask16)(A), \
   2512                                          _MM_FROUND_CUR_DIRECTION); })
   2513 
   2514 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
   2515   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
   2516                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
   2517                                          (int)(R)); })
   2518 
   2519 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
   2520   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
   2521                                          (__v16sf)_mm512_setzero_ps(), \
   2522                                          (__mmask16)(A), (int)(R)); })
   2523 
   2524 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
   2525   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
   2526                                          (__v16sf)_mm512_undefined_ps(), \
   2527                                          (__mmask16)-1, (int)(R)); })
   2528 
   2529 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   2530   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
   2531                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
   2532                                           _MM_FROUND_CUR_DIRECTION); })
   2533 
   2534 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
   2535   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2536                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2537                                           _MM_FROUND_CUR_DIRECTION); })
   2538 
   2539 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
   2540   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2541                                           (__v8df)_mm512_setzero_pd(), \
   2542                                           (__mmask8)(A), \
   2543                                           _MM_FROUND_CUR_DIRECTION); })
   2544 
   2545 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
   2546   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
   2547                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
   2548                                           (int)(R)); })
   2549 
   2550 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
   2551   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
   2552                                           (__v8df)_mm512_setzero_pd(), \
   2553                                           (__mmask8)(A), (int)(R)); })
   2554 
   2555 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
   2556   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
   2557                                           (__v8df)_mm512_undefined_pd(), \
   2558                                           (__mmask8)-1, (int)(R)); })
   2559 
   2560 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   2561   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2562                                            (__v8df)(__m512d)(B), \
   2563                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2564                                            (int)(R)); })
   2565 
   2566 
   2567 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   2568   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2569                                            (__v8df)(__m512d)(B), \
   2570                                            (__v8df)(__m512d)(C), \
   2571                                            (__mmask8)(U), (int)(R)); })
   2572 
   2573 
   2574 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2575   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
   2576                                             (__v8df)(__m512d)(B), \
   2577                                             (__v8df)(__m512d)(C), \
   2578                                             (__mmask8)(U), (int)(R)); })
   2579 
   2580 
   2581 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2582   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2583                                             (__v8df)(__m512d)(B), \
   2584                                             (__v8df)(__m512d)(C), \
   2585                                             (__mmask8)(U), (int)(R)); })
   2586 
   2587 
   2588 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
   2589   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2590                                            (__v8df)(__m512d)(B), \
   2591                                            -(__v8df)(__m512d)(C), \
   2592                                            (__mmask8)-1, (int)(R)); })
   2593 
   2594 
   2595 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2596   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
   2597                                            (__v8df)(__m512d)(B), \
   2598                                            -(__v8df)(__m512d)(C), \
   2599                                            (__mmask8)(U), (int)(R)); })
   2600 
   2601 
   2602 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2603   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
   2604                                             (__v8df)(__m512d)(B), \
   2605                                             -(__v8df)(__m512d)(C), \
   2606                                             (__mmask8)(U), (int)(R)); })
   2607 
   2608 
   2609 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
   2610   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2611                                            (__v8df)(__m512d)(B), \
   2612                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
   2613                                            (int)(R)); })
   2614 
   2615 
   2616 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
   2617   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
   2618                                             (__v8df)(__m512d)(B), \
   2619                                             (__v8df)(__m512d)(C), \
   2620                                             (__mmask8)(U), (int)(R)); })
   2621 
   2622 
   2623 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
   2624   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2625                                             (__v8df)(__m512d)(B), \
   2626                                             (__v8df)(__m512d)(C), \
   2627                                             (__mmask8)(U), (int)(R)); })
   2628 
   2629 
   2630 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
   2631   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
   2632                                            (__v8df)(__m512d)(B), \
   2633                                            -(__v8df)(__m512d)(C), \
   2634                                            (__mmask8)-1, (int)(R)); })
   2635 
   2636 
   2637 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2638   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
   2639                                             (__v8df)(__m512d)(B), \
   2640                                             -(__v8df)(__m512d)(C), \
   2641                                             (__mmask8)(U), (int)(R)); })
   2642 
   2643 
   2644 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2645 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2646 {
   2647   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2648                                                     (__v8df) __B,
   2649                                                     (__v8df) __C,
   2650                                                     (__mmask8) -1,
   2651                                                     _MM_FROUND_CUR_DIRECTION);
   2652 }
   2653 
   2654 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2655 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2656 {
   2657   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2658                                                     (__v8df) __B,
   2659                                                     (__v8df) __C,
   2660                                                     (__mmask8) __U,
   2661                                                     _MM_FROUND_CUR_DIRECTION);
   2662 }
   2663 
   2664 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2665 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2666 {
   2667   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2668                                                      (__v8df) __B,
   2669                                                      (__v8df) __C,
   2670                                                      (__mmask8) __U,
   2671                                                      _MM_FROUND_CUR_DIRECTION);
   2672 }
   2673 
   2674 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2675 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2676 {
   2677   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2678                                                      (__v8df) __B,
   2679                                                      (__v8df) __C,
   2680                                                      (__mmask8) __U,
   2681                                                      _MM_FROUND_CUR_DIRECTION);
   2682 }
   2683 
   2684 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2685 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2686 {
   2687   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2688                                                     (__v8df) __B,
   2689                                                     -(__v8df) __C,
   2690                                                     (__mmask8) -1,
   2691                                                     _MM_FROUND_CUR_DIRECTION);
   2692 }
   2693 
   2694 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2695 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   2696 {
   2697   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2698                                                     (__v8df) __B,
   2699                                                     -(__v8df) __C,
   2700                                                     (__mmask8) __U,
   2701                                                     _MM_FROUND_CUR_DIRECTION);
   2702 }
   2703 
   2704 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2705 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2706 {
   2707   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2708                                                      (__v8df) __B,
   2709                                                      -(__v8df) __C,
   2710                                                      (__mmask8) __U,
   2711                                                      _MM_FROUND_CUR_DIRECTION);
   2712 }
   2713 
   2714 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2715 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
   2716 {
   2717   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2718                                                     (__v8df) __B,
   2719                                                     (__v8df) __C,
   2720                                                     (__mmask8) -1,
   2721                                                     _MM_FROUND_CUR_DIRECTION);
   2722 }
   2723 
   2724 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2725 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   2726 {
   2727   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   2728                                                      (__v8df) __B,
   2729                                                      (__v8df) __C,
   2730                                                      (__mmask8) __U,
   2731                                                      _MM_FROUND_CUR_DIRECTION);
   2732 }
   2733 
   2734 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2735 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2736 {
   2737   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2738                                                      (__v8df) __B,
   2739                                                      (__v8df) __C,
   2740                                                      (__mmask8) __U,
   2741                                                      _MM_FROUND_CUR_DIRECTION);
   2742 }
   2743 
   2744 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2745 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
   2746 {
   2747   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   2748                                                     (__v8df) __B,
   2749                                                     -(__v8df) __C,
   2750                                                     (__mmask8) -1,
   2751                                                     _MM_FROUND_CUR_DIRECTION);
   2752 }
   2753 
   2754 static __inline__ __m512d __DEFAULT_FN_ATTRS
   2755 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   2756 {
   2757   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   2758                                                      (__v8df) __B,
   2759                                                      -(__v8df) __C,
   2760                                                      (__mmask8) __U,
   2761                                                      _MM_FROUND_CUR_DIRECTION);
   2762 }
   2763 
   2764 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
   2765   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2766                                           (__v16sf)(__m512)(B), \
   2767                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2768                                           (int)(R)); })
   2769 
   2770 
   2771 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   2772   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2773                                           (__v16sf)(__m512)(B), \
   2774                                           (__v16sf)(__m512)(C), \
   2775                                           (__mmask16)(U), (int)(R)); })
   2776 
   2777 
   2778 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2779   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
   2780                                            (__v16sf)(__m512)(B), \
   2781                                            (__v16sf)(__m512)(C), \
   2782                                            (__mmask16)(U), (int)(R)); })
   2783 
   2784 
   2785 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2786   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2787                                            (__v16sf)(__m512)(B), \
   2788                                            (__v16sf)(__m512)(C), \
   2789                                            (__mmask16)(U), (int)(R)); })
   2790 
   2791 
   2792 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
   2793   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2794                                           (__v16sf)(__m512)(B), \
   2795                                           -(__v16sf)(__m512)(C), \
   2796                                           (__mmask16)-1, (int)(R)); })
   2797 
   2798 
   2799 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   2800   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
   2801                                           (__v16sf)(__m512)(B), \
   2802                                           -(__v16sf)(__m512)(C), \
   2803                                           (__mmask16)(U), (int)(R)); })
   2804 
   2805 
   2806 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2807   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
   2808                                            (__v16sf)(__m512)(B), \
   2809                                            -(__v16sf)(__m512)(C), \
   2810                                            (__mmask16)(U), (int)(R)); })
   2811 
   2812 
   2813 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
   2814   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2815                                           (__v16sf)(__m512)(B), \
   2816                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
   2817                                           (int)(R)); })
   2818 
   2819 
   2820 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
   2821   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
   2822                                            (__v16sf)(__m512)(B), \
   2823                                            (__v16sf)(__m512)(C), \
   2824                                            (__mmask16)(U), (int)(R)); })
   2825 
   2826 
   2827 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
   2828   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2829                                            (__v16sf)(__m512)(B), \
   2830                                            (__v16sf)(__m512)(C), \
   2831                                            (__mmask16)(U), (int)(R)); })
   2832 
   2833 
   2834 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
   2835   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
   2836                                           (__v16sf)(__m512)(B), \
   2837                                           -(__v16sf)(__m512)(C), \
   2838                                           (__mmask16)-1, (int)(R)); })
   2839 
   2840 
   2841 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
   2842   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
   2843                                            (__v16sf)(__m512)(B), \
   2844                                            -(__v16sf)(__m512)(C), \
   2845                                            (__mmask16)(U), (int)(R)); })
   2846 
   2847 
   2848 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2849 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2850 {
   2851   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2852                                                    (__v16sf) __B,
   2853                                                    (__v16sf) __C,
   2854                                                    (__mmask16) -1,
   2855                                                    _MM_FROUND_CUR_DIRECTION);
   2856 }
   2857 
   2858 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2859 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2860 {
   2861   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2862                                                    (__v16sf) __B,
   2863                                                    (__v16sf) __C,
   2864                                                    (__mmask16) __U,
   2865                                                    _MM_FROUND_CUR_DIRECTION);
   2866 }
   2867 
   2868 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2869 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2870 {
   2871   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2872                                                     (__v16sf) __B,
   2873                                                     (__v16sf) __C,
   2874                                                     (__mmask16) __U,
   2875                                                     _MM_FROUND_CUR_DIRECTION);
   2876 }
   2877 
   2878 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2879 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2880 {
   2881   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2882                                                     (__v16sf) __B,
   2883                                                     (__v16sf) __C,
   2884                                                     (__mmask16) __U,
   2885                                                     _MM_FROUND_CUR_DIRECTION);
   2886 }
   2887 
   2888 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2889 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2890 {
   2891   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2892                                                    (__v16sf) __B,
   2893                                                    -(__v16sf) __C,
   2894                                                    (__mmask16) -1,
   2895                                                    _MM_FROUND_CUR_DIRECTION);
   2896 }
   2897 
   2898 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2899 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   2900 {
   2901   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2902                                                    (__v16sf) __B,
   2903                                                    -(__v16sf) __C,
   2904                                                    (__mmask16) __U,
   2905                                                    _MM_FROUND_CUR_DIRECTION);
   2906 }
   2907 
   2908 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2909 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2910 {
   2911   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2912                                                     (__v16sf) __B,
   2913                                                     -(__v16sf) __C,
   2914                                                     (__mmask16) __U,
   2915                                                     _MM_FROUND_CUR_DIRECTION);
   2916 }
   2917 
   2918 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2919 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
   2920 {
   2921   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2922                                                    (__v16sf) __B,
   2923                                                    (__v16sf) __C,
   2924                                                    (__mmask16) -1,
   2925                                                    _MM_FROUND_CUR_DIRECTION);
   2926 }
   2927 
   2928 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2929 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   2930 {
   2931   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   2932                                                     (__v16sf) __B,
   2933                                                     (__v16sf) __C,
   2934                                                     (__mmask16) __U,
   2935                                                     _MM_FROUND_CUR_DIRECTION);
   2936 }
   2937 
   2938 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2939 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2940 {
   2941   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2942                                                     (__v16sf) __B,
   2943                                                     (__v16sf) __C,
   2944                                                     (__mmask16) __U,
   2945                                                     _MM_FROUND_CUR_DIRECTION);
   2946 }
   2947 
   2948 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2949 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
   2950 {
   2951   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   2952                                                    (__v16sf) __B,
   2953                                                    -(__v16sf) __C,
   2954                                                    (__mmask16) -1,
   2955                                                    _MM_FROUND_CUR_DIRECTION);
   2956 }
   2957 
   2958 static __inline__ __m512 __DEFAULT_FN_ATTRS
   2959 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   2960 {
   2961   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   2962                                                     (__v16sf) __B,
   2963                                                     -(__v16sf) __C,
   2964                                                     (__mmask16) __U,
   2965                                                     _MM_FROUND_CUR_DIRECTION);
   2966 }
   2967 
   2968 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
   2969   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2970                                               (__v8df)(__m512d)(B), \
   2971                                               (__v8df)(__m512d)(C), \
   2972                                               (__mmask8)-1, (int)(R)); })
   2973 
   2974 
   2975 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
   2976   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2977                                               (__v8df)(__m512d)(B), \
   2978                                               (__v8df)(__m512d)(C), \
   2979                                               (__mmask8)(U), (int)(R)); })
   2980 
   2981 
   2982 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
   2983   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
   2984                                                (__v8df)(__m512d)(B), \
   2985                                                (__v8df)(__m512d)(C), \
   2986                                                (__mmask8)(U), (int)(R)); })
   2987 
   2988 
   2989 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
   2990   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   2991                                                (__v8df)(__m512d)(B), \
   2992                                                (__v8df)(__m512d)(C), \
   2993                                                (__mmask8)(U), (int)(R)); })
   2994 
   2995 
   2996 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
   2997   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   2998                                               (__v8df)(__m512d)(B), \
   2999                                               -(__v8df)(__m512d)(C), \
   3000                                               (__mmask8)-1, (int)(R)); })
   3001 
   3002 
   3003 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3004   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
   3005                                               (__v8df)(__m512d)(B), \
   3006                                               -(__v8df)(__m512d)(C), \
   3007                                               (__mmask8)(U), (int)(R)); })
   3008 
   3009 
   3010 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
   3011   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
   3012                                                (__v8df)(__m512d)(B), \
   3013                                                -(__v8df)(__m512d)(C), \
   3014                                                (__mmask8)(U), (int)(R)); })
   3015 
   3016 
   3017 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3018 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
   3019 {
   3020   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3021                                                        (__v8df) __B,
   3022                                                        (__v8df) __C,
   3023                                                        (__mmask8) -1,
   3024                                                        _MM_FROUND_CUR_DIRECTION);
   3025 }
   3026 
   3027 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3028 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3029 {
   3030   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3031                                                        (__v8df) __B,
   3032                                                        (__v8df) __C,
   3033                                                        (__mmask8) __U,
   3034                                                        _MM_FROUND_CUR_DIRECTION);
   3035 }
   3036 
   3037 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3038 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3039 {
   3040   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   3041                                                         (__v8df) __B,
   3042                                                         (__v8df) __C,
   3043                                                         (__mmask8) __U,
   3044                                                         _MM_FROUND_CUR_DIRECTION);
   3045 }
   3046 
   3047 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3048 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3049 {
   3050   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3051                                                         (__v8df) __B,
   3052                                                         (__v8df) __C,
   3053                                                         (__mmask8) __U,
   3054                                                         _MM_FROUND_CUR_DIRECTION);
   3055 }
   3056 
   3057 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3058 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
   3059 {
   3060   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3061                                                        (__v8df) __B,
   3062                                                        -(__v8df) __C,
   3063                                                        (__mmask8) -1,
   3064                                                        _MM_FROUND_CUR_DIRECTION);
   3065 }
   3066 
   3067 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3068 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3069 {
   3070   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3071                                                        (__v8df) __B,
   3072                                                        -(__v8df) __C,
   3073                                                        (__mmask8) __U,
   3074                                                        _MM_FROUND_CUR_DIRECTION);
   3075 }
   3076 
   3077 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3078 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   3079 {
   3080   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3081                                                         (__v8df) __B,
   3082                                                         -(__v8df) __C,
   3083                                                         (__mmask8) __U,
   3084                                                         _MM_FROUND_CUR_DIRECTION);
   3085 }
   3086 
   3087 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
   3088   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3089                                              (__v16sf)(__m512)(B), \
   3090                                              (__v16sf)(__m512)(C), \
   3091                                              (__mmask16)-1, (int)(R)); })
   3092 
   3093 
   3094 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3095   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3096                                              (__v16sf)(__m512)(B), \
   3097                                              (__v16sf)(__m512)(C), \
   3098                                              (__mmask16)(U), (int)(R)); })
   3099 
   3100 
   3101 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3102   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
   3103                                               (__v16sf)(__m512)(B), \
   3104                                               (__v16sf)(__m512)(C), \
   3105                                               (__mmask16)(U), (int)(R)); })
   3106 
   3107 
   3108 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
   3109   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3110                                               (__v16sf)(__m512)(B), \
   3111                                               (__v16sf)(__m512)(C), \
   3112                                               (__mmask16)(U), (int)(R)); })
   3113 
   3114 
   3115 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
   3116   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3117                                              (__v16sf)(__m512)(B), \
   3118                                              -(__v16sf)(__m512)(C), \
   3119                                              (__mmask16)-1, (int)(R)); })
   3120 
   3121 
   3122 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3123   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
   3124                                              (__v16sf)(__m512)(B), \
   3125                                              -(__v16sf)(__m512)(C), \
   3126                                              (__mmask16)(U), (int)(R)); })
   3127 
   3128 
   3129 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
   3130   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
   3131                                               (__v16sf)(__m512)(B), \
   3132                                               -(__v16sf)(__m512)(C), \
   3133                                               (__mmask16)(U), (int)(R)); })
   3134 
   3135 
   3136 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3137 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
   3138 {
   3139   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3140                                                       (__v16sf) __B,
   3141                                                       (__v16sf) __C,
   3142                                                       (__mmask16) -1,
   3143                                                       _MM_FROUND_CUR_DIRECTION);
   3144 }
   3145 
   3146 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3147 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3148 {
   3149   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3150                                                       (__v16sf) __B,
   3151                                                       (__v16sf) __C,
   3152                                                       (__mmask16) __U,
   3153                                                       _MM_FROUND_CUR_DIRECTION);
   3154 }
   3155 
   3156 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3157 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3158 {
   3159   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3160                                                        (__v16sf) __B,
   3161                                                        (__v16sf) __C,
   3162                                                        (__mmask16) __U,
   3163                                                        _MM_FROUND_CUR_DIRECTION);
   3164 }
   3165 
   3166 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3167 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3168 {
   3169   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3170                                                        (__v16sf) __B,
   3171                                                        (__v16sf) __C,
   3172                                                        (__mmask16) __U,
   3173                                                        _MM_FROUND_CUR_DIRECTION);
   3174 }
   3175 
   3176 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3177 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
   3178 {
   3179   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3180                                                       (__v16sf) __B,
   3181                                                       -(__v16sf) __C,
   3182                                                       (__mmask16) -1,
   3183                                                       _MM_FROUND_CUR_DIRECTION);
   3184 }
   3185 
   3186 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3187 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3188 {
   3189   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3190                                                       (__v16sf) __B,
   3191                                                       -(__v16sf) __C,
   3192                                                       (__mmask16) __U,
   3193                                                       _MM_FROUND_CUR_DIRECTION);
   3194 }
   3195 
   3196 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3197 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   3198 {
   3199   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3200                                                        (__v16sf) __B,
   3201                                                        -(__v16sf) __C,
   3202                                                        (__mmask16) __U,
   3203                                                        _MM_FROUND_CUR_DIRECTION);
   3204 }
   3205 
   3206 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3207   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
   3208                                             (__v8df)(__m512d)(B), \
   3209                                             (__v8df)(__m512d)(C), \
   3210                                             (__mmask8)(U), (int)(R)); })
   3211 
   3212 
   3213 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3214 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3215 {
   3216   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   3217                                                      (__v8df) __B,
   3218                                                      (__v8df) __C,
   3219                                                      (__mmask8) __U,
   3220                                                      _MM_FROUND_CUR_DIRECTION);
   3221 }
   3222 
   3223 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3224   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
   3225                                            (__v16sf)(__m512)(B), \
   3226                                            (__v16sf)(__m512)(C), \
   3227                                            (__mmask16)(U), (int)(R)); })
   3228 
   3229 
   3230 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3231 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3232 {
   3233   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   3234                                                     (__v16sf) __B,
   3235                                                     (__v16sf) __C,
   3236                                                     (__mmask16) __U,
   3237                                                     _MM_FROUND_CUR_DIRECTION);
   3238 }
   3239 
   3240 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
   3241   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
   3242                                                (__v8df)(__m512d)(B), \
   3243                                                (__v8df)(__m512d)(C), \
   3244                                                (__mmask8)(U), (int)(R)); })
   3245 
   3246 
   3247 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3248 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3249 {
   3250   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3251                                                         (__v8df) __B,
   3252                                                         (__v8df) __C,
   3253                                                         (__mmask8) __U,
   3254                                                         _MM_FROUND_CUR_DIRECTION);
   3255 }
   3256 
   3257 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
   3258   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
   3259                                               (__v16sf)(__m512)(B), \
   3260                                               (__v16sf)(__m512)(C), \
   3261                                               (__mmask16)(U), (int)(R)); })
   3262 
   3263 
   3264 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3265 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3266 {
   3267   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3268                                                        (__v16sf) __B,
   3269                                                        (__v16sf) __C,
   3270                                                        (__mmask16) __U,
   3271                                                        _MM_FROUND_CUR_DIRECTION);
   3272 }
   3273 
   3274 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
   3275   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
   3276                                             (__v8df)(__m512d)(B), \
   3277                                             (__v8df)(__m512d)(C), \
   3278                                             (__mmask8)(U), (int)(R)); })
   3279 
   3280 
   3281 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3282 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3283 {
   3284   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3285                                                      (__v8df) __B,
   3286                                                      (__v8df) __C,
   3287                                                      (__mmask8) __U,
   3288                                                      _MM_FROUND_CUR_DIRECTION);
   3289 }
   3290 
   3291 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
   3292   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
   3293                                            (__v16sf)(__m512)(B), \
   3294                                            (__v16sf)(__m512)(C), \
   3295                                            (__mmask16)(U), (int)(R)); })
   3296 
   3297 
   3298 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3299 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3300 {
   3301   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3302                                                     (__v16sf) __B,
   3303                                                     (__v16sf) __C,
   3304                                                     (__mmask16) __U,
   3305                                                     _MM_FROUND_CUR_DIRECTION);
   3306 }
   3307 
   3308 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
   3309   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
   3310                                             (__v8df)(__m512d)(B), \
   3311                                             (__v8df)(__m512d)(C), \
   3312                                             (__mmask8)(U), (int)(R)); })
   3313 
   3314 
   3315 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
   3316   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
   3317                                              (__v8df)(__m512d)(B), \
   3318                                              (__v8df)(__m512d)(C), \
   3319                                              (__mmask8)(U), (int)(R)); })
   3320 
   3321 
   3322 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3323 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   3324 {
   3325   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3326                                                      (__v8df) __B,
   3327                                                      (__v8df) __C,
   3328                                                      (__mmask8) __U,
   3329                                                      _MM_FROUND_CUR_DIRECTION);
   3330 }
   3331 
   3332 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3333 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   3334 {
   3335   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3336                                                       (__v8df) __B,
   3337                                                       (__v8df) __C,
   3338                                                       (__mmask8) __U,
   3339                                                       _MM_FROUND_CUR_DIRECTION);
   3340 }
   3341 
   3342 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
   3343   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
   3344                                            (__v16sf)(__m512)(B), \
   3345                                            (__v16sf)(__m512)(C), \
   3346                                            (__mmask16)(U), (int)(R)); })
   3347 
   3348 
   3349 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
   3350   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
   3351                                             (__v16sf)(__m512)(B), \
   3352                                             (__v16sf)(__m512)(C), \
   3353                                             (__mmask16)(U), (int)(R)); })
   3354 
   3355 
   3356 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3357 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   3358 {
   3359   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3360                                                     (__v16sf) __B,
   3361                                                     (__v16sf) __C,
   3362                                                     (__mmask16) __U,
   3363                                                     _MM_FROUND_CUR_DIRECTION);
   3364 }
   3365 
   3366 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3367 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   3368 {
   3369   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3370                                                      (__v16sf) __B,
   3371                                                      (__v16sf) __C,
   3372                                                      (__mmask16) __U,
   3373                                                      _MM_FROUND_CUR_DIRECTION);
   3374 }
   3375 
   3376 
   3377 
   3378 /* Vector permutations */
   3379 
   3380 static __inline __m512i __DEFAULT_FN_ATTRS
   3381 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
   3382 {
   3383   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3384                                                        /* idx */ ,
   3385                                                        (__v16si) __A,
   3386                                                        (__v16si) __B,
   3387                                                        (__mmask16) -1);
   3388 }
   3389 
   3390 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3391 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   3392                                 __m512i __I, __m512i __B)
   3393 {
   3394   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   3395                                                         /* idx */ ,
   3396                                                         (__v16si) __A,
   3397                                                         (__v16si) __B,
   3398                                                         (__mmask16) __U);
   3399 }
   3400 
   3401 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3402 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   3403                                  __m512i __I, __m512i __B)
   3404 {
   3405   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   3406                                                         /* idx */ ,
   3407                                                         (__v16si) __A,
   3408                                                         (__v16si) __B,
   3409                                                         (__mmask16) __U);
   3410 }
   3411 
   3412 static __inline __m512i __DEFAULT_FN_ATTRS
   3413 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
   3414 {
   3415   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3416                                                        /* idx */ ,
   3417                                                        (__v8di) __A,
   3418                                                        (__v8di) __B,
   3419                                                        (__mmask8) -1);
   3420 }
   3421 
   3422 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3423 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   3424                                 __m512i __B)
   3425 {
   3426   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   3427                                                        /* idx */ ,
   3428                                                        (__v8di) __A,
   3429                                                        (__v8di) __B,
   3430                                                        (__mmask8) __U);
   3431 }
   3432 
   3433 
   3434 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3435 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   3436          __m512i __I, __m512i __B)
   3437 {
   3438   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   3439                                                         /* idx */ ,
   3440                                                         (__v8di) __A,
   3441                                                         (__v8di) __B,
   3442                                                         (__mmask8) __U);
   3443 }
   3444 
   3445 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   3446   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3447                                          (__v8di)(__m512i)(B), (int)(I), \
   3448                                          (__v8di)_mm512_setzero_si512(), \
   3449                                          (__mmask8)-1); })
   3450 
   3451 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
   3452   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3453                                          (__v8di)(__m512i)(B), (int)(imm), \
   3454                                          (__v8di)(__m512i)(W), \
   3455                                          (__mmask8)(U)); })
   3456 
   3457 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
   3458   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
   3459                                          (__v8di)(__m512i)(B), (int)(imm), \
   3460                                          (__v8di)_mm512_setzero_si512(), \
   3461                                          (__mmask8)(U)); })
   3462 
   3463 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   3464   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3465                                          (__v16si)(__m512i)(B), (int)(I), \
   3466                                          (__v16si)_mm512_setzero_si512(), \
   3467                                          (__mmask16)-1); })
   3468 
   3469 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
   3470   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3471                                          (__v16si)(__m512i)(B), (int)(imm), \
   3472                                          (__v16si)(__m512i)(W), \
   3473                                          (__mmask16)(U)); })
   3474 
   3475 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
   3476   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
   3477                                          (__v16si)(__m512i)(B), (int)(imm), \
   3478                                          (__v16si)_mm512_setzero_si512(), \
   3479                                          (__mmask16)(U)); })
   3480 /* Vector Extract */
   3481 
   3482 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
   3483   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
   3484                                             (__v4df)_mm256_setzero_si256(), \
   3485                                             (__mmask8)-1); })
   3486 
   3487 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
   3488   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
   3489                                             (__v4df)(__m256d)(W), \
   3490                                             (__mmask8)(U)); })
   3491 
   3492 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
   3493   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
   3494                                             (__v4df)_mm256_setzero_pd(), \
   3495                                             (__mmask8)(U)); })
   3496 
   3497 #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
   3498   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
   3499                                            (__v4sf)_mm_setzero_ps(), \
   3500                                            (__mmask8)-1); })
   3501 
   3502 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
   3503   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
   3504                                            (__v4sf)(__m128)(W), \
   3505                                            (__mmask8)(U)); })
   3506 
   3507 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
   3508   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
   3509                                            (__v4sf)_mm_setzero_ps(), \
   3510                                            (__mmask8)(U)); })
   3511 /* Vector Blend */
   3512 
   3513 static __inline __m512d __DEFAULT_FN_ATTRS
   3514 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
   3515 {
   3516   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
   3517                  (__v8df) __W,
   3518                  (__v8df) __A);
   3519 }
   3520 
   3521 static __inline __m512 __DEFAULT_FN_ATTRS
   3522 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
   3523 {
   3524   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
   3525                 (__v16sf) __W,
   3526                 (__v16sf) __A);
   3527 }
   3528 
   3529 static __inline __m512i __DEFAULT_FN_ATTRS
   3530 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
   3531 {
   3532   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   3533                 (__v8di) __W,
   3534                 (__v8di) __A);
   3535 }
   3536 
   3537 static __inline __m512i __DEFAULT_FN_ATTRS
   3538 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
   3539 {
   3540   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   3541                 (__v16si) __W,
   3542                 (__v16si) __A);
   3543 }
   3544 
   3545 /* Compare */
   3546 
   3547 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   3548   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3549                                           (__v16sf)(__m512)(B), (int)(P), \
   3550                                           (__mmask16)-1, (int)(R)); })
   3551 
   3552 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   3553   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
   3554                                           (__v16sf)(__m512)(B), (int)(P), \
   3555                                           (__mmask16)(U), (int)(R)); })
   3556 
   3557 #define _mm512_cmp_ps_mask(A, B, P) \
   3558   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3559 
   3560 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   3561   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3562 
   3563 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   3564   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3565                                          (__v8df)(__m512d)(B), (int)(P), \
   3566                                          (__mmask8)-1, (int)(R)); })
   3567 
   3568 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   3569   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
   3570                                          (__v8df)(__m512d)(B), (int)(P), \
   3571                                          (__mmask8)(U), (int)(R)); })
   3572 
   3573 #define _mm512_cmp_pd_mask(A, B, P) \
   3574   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3575 
   3576 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   3577   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
   3578 
   3579 /* Conversion */
   3580 
   3581 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
   3582   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3583                                              (__v16si)_mm512_undefined_epi32(), \
   3584                                              (__mmask16)-1, (int)(R)); })
   3585 
   3586 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
   3587   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3588                                              (__v16si)(__m512i)(W), \
   3589                                              (__mmask16)(U), (int)(R)); })
   3590 
   3591 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
   3592   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
   3593                                              (__v16si)_mm512_setzero_si512(), \
   3594                                              (__mmask16)(U), (int)(R)); })
   3595 
   3596 
   3597 static __inline __m512i __DEFAULT_FN_ATTRS
   3598 _mm512_cvttps_epu32(__m512 __A)
   3599 {
   3600   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3601                   (__v16si)
   3602                   _mm512_setzero_si512 (),
   3603                   (__mmask16) -1,
   3604                   _MM_FROUND_CUR_DIRECTION);
   3605 }
   3606 
   3607 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3608 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   3609 {
   3610   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3611                    (__v16si) __W,
   3612                    (__mmask16) __U,
   3613                    _MM_FROUND_CUR_DIRECTION);
   3614 }
   3615 
   3616 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3617 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   3618 {
   3619   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   3620                    (__v16si) _mm512_setzero_si512 (),
   3621                    (__mmask16) __U,
   3622                    _MM_FROUND_CUR_DIRECTION);
   3623 }
   3624 
   3625 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
   3626   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3627                                           (__v16sf)_mm512_setzero_ps(), \
   3628                                           (__mmask16)-1, (int)(R)); })
   3629 
   3630 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
   3631   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3632                                           (__v16sf)(__m512)(W), \
   3633                                           (__mmask16)(U), (int)(R)); })
   3634 
   3635 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
   3636   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
   3637                                           (__v16sf)_mm512_setzero_ps(), \
   3638                                           (__mmask16)(U), (int)(R)); })
   3639 
   3640 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   3641   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3642                                            (__v16sf)_mm512_setzero_ps(), \
   3643                                            (__mmask16)-1, (int)(R)); })
   3644 
   3645 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
   3646   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3647                                            (__v16sf)(__m512)(W), \
   3648                                            (__mmask16)(U), (int)(R)); })
   3649 
   3650 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
   3651   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
   3652                                            (__v16sf)_mm512_setzero_ps(), \
   3653                                            (__mmask16)(U), (int)(R)); })
   3654 
   3655 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3656 _mm512_cvtepu32_ps (__m512i __A)
   3657 {
   3658   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3659                  (__v16sf) _mm512_undefined_ps (),
   3660                  (__mmask16) -1,
   3661                  _MM_FROUND_CUR_DIRECTION);
   3662 }
   3663 
   3664 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3665 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3666 {
   3667   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3668                  (__v16sf) __W,
   3669                  (__mmask16) __U,
   3670                  _MM_FROUND_CUR_DIRECTION);
   3671 }
   3672 
   3673 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3674 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   3675 {
   3676   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   3677                  (__v16sf) _mm512_setzero_ps (),
   3678                  (__mmask16) __U,
   3679                  _MM_FROUND_CUR_DIRECTION);
   3680 }
   3681 
   3682 static __inline __m512d __DEFAULT_FN_ATTRS
   3683 _mm512_cvtepi32_pd(__m256i __A)
   3684 {
   3685   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3686                 (__v8df)
   3687                 _mm512_setzero_pd (),
   3688                 (__mmask8) -1);
   3689 }
   3690 
   3691 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3692 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3693 {
   3694   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3695                 (__v8df) __W,
   3696                 (__mmask8) __U);
   3697 }
   3698 
   3699 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3700 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   3701 {
   3702   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   3703                 (__v8df) _mm512_setzero_pd (),
   3704                 (__mmask8) __U);
   3705 }
   3706 
   3707 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3708 _mm512_cvtepi32_ps (__m512i __A)
   3709 {
   3710   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3711                 (__v16sf) _mm512_undefined_ps (),
   3712                 (__mmask16) -1,
   3713                 _MM_FROUND_CUR_DIRECTION);
   3714 }
   3715 
   3716 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3717 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   3718 {
   3719   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3720                 (__v16sf) __W,
   3721                 (__mmask16) __U,
   3722                 _MM_FROUND_CUR_DIRECTION);
   3723 }
   3724 
   3725 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3726 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   3727 {
   3728   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   3729                 (__v16sf) _mm512_setzero_ps (),
   3730                 (__mmask16) __U,
   3731                 _MM_FROUND_CUR_DIRECTION);
   3732 }
   3733 
   3734 static __inline __m512d __DEFAULT_FN_ATTRS
   3735 _mm512_cvtepu32_pd(__m256i __A)
   3736 {
   3737   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3738                 (__v8df)
   3739                 _mm512_setzero_pd (),
   3740                 (__mmask8) -1);
   3741 }
   3742 
   3743 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3744 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   3745 {
   3746   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3747                   (__v8df) __W,
   3748                   (__mmask8) __U);
   3749 }
   3750 
   3751 static __inline__ __m512d __DEFAULT_FN_ATTRS
   3752 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   3753 {
   3754   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   3755                   (__v8df) _mm512_setzero_pd (),
   3756                   (__mmask8) __U);
   3757 }
   3758 
   3759 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
   3760   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3761                                           (__v8sf)_mm256_setzero_ps(), \
   3762                                           (__mmask8)-1, (int)(R)); })
   3763 
   3764 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   3765   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3766                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
   3767                                           (int)(R)); })
   3768 
   3769 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   3770   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
   3771                                           (__v8sf)_mm256_setzero_ps(), \
   3772                                           (__mmask8)(U), (int)(R)); })
   3773 
   3774 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3775 _mm512_cvtpd_ps (__m512d __A)
   3776 {
   3777   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3778                 (__v8sf) _mm256_undefined_ps (),
   3779                 (__mmask8) -1,
   3780                 _MM_FROUND_CUR_DIRECTION);
   3781 }
   3782 
   3783 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3784 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   3785 {
   3786   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3787                 (__v8sf) __W,
   3788                 (__mmask8) __U,
   3789                 _MM_FROUND_CUR_DIRECTION);
   3790 }
   3791 
   3792 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3793 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   3794 {
   3795   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   3796                 (__v8sf) _mm256_setzero_ps (),
   3797                 (__mmask8) __U,
   3798                 _MM_FROUND_CUR_DIRECTION);
   3799 }
   3800 
   3801 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
   3802   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3803                                             (__v16hi)_mm256_undefined_si256(), \
   3804                                             (__mmask16)-1); })
   3805 
   3806 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
   3807   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3808                                             (__v16hi)(__m256i)(U), \
   3809                                             (__mmask16)(W)); })
   3810 
   3811 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
   3812   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3813                                             (__v16hi)_mm256_setzero_si256(), \
   3814                                             (__mmask16)(W)); })
   3815 
   3816 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   3817   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3818                                             (__v16hi)_mm256_setzero_si256(), \
   3819                                             (__mmask16)-1); })
   3820 
   3821 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
   3822   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3823                                             (__v16hi)(__m256i)(U), \
   3824                                             (__mmask16)(W)); })
   3825 
   3826 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
   3827   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
   3828                                             (__v16hi)_mm256_setzero_si256(), \
   3829                                             (__mmask16)(W)); })
   3830 
   3831 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
   3832   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3833                                            (__v16sf)_mm512_undefined_ps(), \
   3834                                            (__mmask16)-1, (int)(R)); })
   3835 
   3836 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
   3837   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3838                                            (__v16sf)(__m512)(W), \
   3839                                            (__mmask16)(U), (int)(R)); })
   3840 
   3841 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
   3842   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
   3843                                            (__v16sf)_mm512_setzero_ps(), \
   3844                                            (__mmask16)(U), (int)(R)); })
   3845 
   3846 
   3847 static  __inline __m512 __DEFAULT_FN_ATTRS
   3848 _mm512_cvtph_ps(__m256i __A)
   3849 {
   3850   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3851                 (__v16sf)
   3852                 _mm512_setzero_ps (),
   3853                 (__mmask16) -1,
   3854                 _MM_FROUND_CUR_DIRECTION);
   3855 }
   3856 
   3857 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3858 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   3859 {
   3860   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3861                  (__v16sf) __W,
   3862                  (__mmask16) __U,
   3863                  _MM_FROUND_CUR_DIRECTION);
   3864 }
   3865 
   3866 static __inline__ __m512 __DEFAULT_FN_ATTRS
   3867 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   3868 {
   3869   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   3870                  (__v16sf) _mm512_setzero_ps (),
   3871                  (__mmask16) __U,
   3872                  _MM_FROUND_CUR_DIRECTION);
   3873 }
   3874 
   3875 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
   3876   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3877                                             (__v8si)_mm256_setzero_si256(), \
   3878                                             (__mmask8)-1, (int)(R)); })
   3879 
   3880 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   3881   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3882                                             (__v8si)(__m256i)(W), \
   3883                                             (__mmask8)(U), (int)(R)); })
   3884 
   3885 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   3886   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
   3887                                             (__v8si)_mm256_setzero_si256(), \
   3888                                             (__mmask8)(U), (int)(R)); })
   3889 
   3890 static __inline __m256i __DEFAULT_FN_ATTRS
   3891 _mm512_cvttpd_epi32(__m512d __a)
   3892 {
   3893   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
   3894                                                    (__v8si)_mm256_setzero_si256(),
   3895                                                    (__mmask8) -1,
   3896                                                     _MM_FROUND_CUR_DIRECTION);
   3897 }
   3898 
   3899 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3900 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   3901 {
   3902   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   3903                   (__v8si) __W,
   3904                   (__mmask8) __U,
   3905                   _MM_FROUND_CUR_DIRECTION);
   3906 }
   3907 
   3908 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3909 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   3910 {
   3911   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   3912                   (__v8si) _mm256_setzero_si256 (),
   3913                   (__mmask8) __U,
   3914                   _MM_FROUND_CUR_DIRECTION);
   3915 }
   3916 
   3917 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
   3918   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3919                                             (__v16si)_mm512_setzero_si512(), \
   3920                                             (__mmask16)-1, (int)(R)); })
   3921 
   3922 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   3923   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3924                                             (__v16si)(__m512i)(W), \
   3925                                             (__mmask16)(U), (int)(R)); })
   3926 
   3927 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   3928   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
   3929                                             (__v16si)_mm512_setzero_si512(), \
   3930                                             (__mmask16)(U), (int)(R)); })
   3931 
   3932 static __inline __m512i __DEFAULT_FN_ATTRS
   3933 _mm512_cvttps_epi32(__m512 __a)
   3934 {
   3935   return (__m512i)
   3936     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
   3937                                      (__v16si) _mm512_setzero_si512 (),
   3938                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
   3939 }
   3940 
   3941 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3942 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   3943 {
   3944   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   3945                   (__v16si) __W,
   3946                   (__mmask16) __U,
   3947                   _MM_FROUND_CUR_DIRECTION);
   3948 }
   3949 
   3950 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3951 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   3952 {
   3953   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   3954                   (__v16si) _mm512_setzero_si512 (),
   3955                   (__mmask16) __U,
   3956                   _MM_FROUND_CUR_DIRECTION);
   3957 }
   3958 
   3959 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
   3960   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3961                                            (__v16si)_mm512_setzero_si512(), \
   3962                                            (__mmask16)-1, (int)(R)); })
   3963 
   3964 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
   3965   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3966                                            (__v16si)(__m512i)(W), \
   3967                                            (__mmask16)(U), (int)(R)); })
   3968 
   3969 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
   3970   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
   3971                                            (__v16si)_mm512_setzero_si512(), \
   3972                                            (__mmask16)(U), (int)(R)); })
   3973 
   3974 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3975 _mm512_cvtps_epi32 (__m512 __A)
   3976 {
   3977   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3978                  (__v16si) _mm512_undefined_epi32 (),
   3979                  (__mmask16) -1,
   3980                  _MM_FROUND_CUR_DIRECTION);
   3981 }
   3982 
   3983 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3984 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   3985 {
   3986   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3987                  (__v16si) __W,
   3988                  (__mmask16) __U,
   3989                  _MM_FROUND_CUR_DIRECTION);
   3990 }
   3991 
   3992 static __inline__ __m512i __DEFAULT_FN_ATTRS
   3993 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   3994 {
   3995   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   3996                  (__v16si)
   3997                  _mm512_setzero_si512 (),
   3998                  (__mmask16) __U,
   3999                  _MM_FROUND_CUR_DIRECTION);
   4000 }
   4001 
   4002 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
   4003   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4004                                            (__v8si)_mm256_setzero_si256(), \
   4005                                            (__mmask8)-1, (int)(R)); })
   4006 
   4007 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   4008   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4009                                            (__v8si)(__m256i)(W), \
   4010                                            (__mmask8)(U), (int)(R)); })
   4011 
   4012 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
   4013   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
   4014                                            (__v8si)_mm256_setzero_si256(), \
   4015                                            (__mmask8)(U), (int)(R)); })
   4016 
   4017 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4018 _mm512_cvtpd_epi32 (__m512d __A)
   4019 {
   4020   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4021                  (__v8si)
   4022                  _mm256_undefined_si256 (),
   4023                  (__mmask8) -1,
   4024                  _MM_FROUND_CUR_DIRECTION);
   4025 }
   4026 
   4027 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4028 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   4029 {
   4030   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4031                  (__v8si) __W,
   4032                  (__mmask8) __U,
   4033                  _MM_FROUND_CUR_DIRECTION);
   4034 }
   4035 
   4036 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4037 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   4038 {
   4039   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4040                  (__v8si)
   4041                  _mm256_setzero_si256 (),
   4042                  (__mmask8) __U,
   4043                  _MM_FROUND_CUR_DIRECTION);
   4044 }
   4045 
   4046 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
   4047   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4048                                             (__v16si)_mm512_setzero_si512(), \
   4049                                             (__mmask16)-1, (int)(R)); })
   4050 
   4051 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
   4052   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4053                                             (__v16si)(__m512i)(W), \
   4054                                             (__mmask16)(U), (int)(R)); })
   4055 
   4056 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
   4057   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
   4058                                             (__v16si)_mm512_setzero_si512(), \
   4059                                             (__mmask16)(U), (int)(R)); })
   4060 
   4061 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4062 _mm512_cvtps_epu32 ( __m512 __A)
   4063 {
   4064   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
   4065                   (__v16si)\
   4066                   _mm512_undefined_epi32 (),\
   4067                   (__mmask16) -1,\
   4068                   _MM_FROUND_CUR_DIRECTION);\
   4069 }
   4070 
   4071 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4072 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   4073 {
   4074   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4075                   (__v16si) __W,
   4076                   (__mmask16) __U,
   4077                   _MM_FROUND_CUR_DIRECTION);
   4078 }
   4079 
   4080 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4081 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
   4082 {
   4083   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4084                   (__v16si)
   4085                   _mm512_setzero_si512 (),
   4086                   (__mmask16) __U ,
   4087                   _MM_FROUND_CUR_DIRECTION);
   4088 }
   4089 
   4090 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
   4091   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4092                                             (__v8si)_mm256_setzero_si256(), \
   4093                                             (__mmask8)-1, (int)(R)); })
   4094 
   4095 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   4096   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4097                                             (__v8si)(W), \
   4098                                             (__mmask8)(U), (int)(R)); })
   4099 
   4100 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   4101   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
   4102                                             (__v8si)_mm256_setzero_si256(), \
   4103                                             (__mmask8)(U), (int)(R)); })
   4104 
   4105 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4106 _mm512_cvtpd_epu32 (__m512d __A)
   4107 {
   4108   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4109                   (__v8si)
   4110                   _mm256_undefined_si256 (),
   4111                   (__mmask8) -1,
   4112                   _MM_FROUND_CUR_DIRECTION);
   4113 }
   4114 
   4115 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4116 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   4117 {
   4118   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4119                   (__v8si) __W,
   4120                   (__mmask8) __U,
   4121                   _MM_FROUND_CUR_DIRECTION);
   4122 }
   4123 
   4124 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4125 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   4126 {
   4127   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4128                   (__v8si)
   4129                   _mm256_setzero_si256 (),
   4130                   (__mmask8) __U,
   4131                   _MM_FROUND_CUR_DIRECTION);
   4132 }
   4133 
   4134 /* Unpack and Interleave */
   4135 
   4136 static __inline __m512d __DEFAULT_FN_ATTRS
   4137 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
   4138 {
   4139   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4140                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4141 }
   4142 
   4143 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4144 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4145 {
   4146   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4147                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4148                                            (__v8df)__W);
   4149 }
   4150 
   4151 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4152 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
   4153 {
   4154   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4155                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
   4156                                            (__v8df)_mm512_setzero_pd());
   4157 }
   4158 
   4159 static __inline __m512d __DEFAULT_FN_ATTRS
   4160 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
   4161 {
   4162   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
   4163                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4164 }
   4165 
   4166 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4167 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   4168 {
   4169   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4170                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4171                                            (__v8df)__W);
   4172 }
   4173 
   4174 static __inline__ __m512d __DEFAULT_FN_ATTRS
   4175 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   4176 {
   4177   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
   4178                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
   4179                                            (__v8df)_mm512_setzero_pd());
   4180 }
   4181 
   4182 static __inline __m512 __DEFAULT_FN_ATTRS
   4183 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
   4184 {
   4185   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4186                                          2,    18,    3,    19,
   4187                                          2+4,  18+4,  3+4,  19+4,
   4188                                          2+8,  18+8,  3+8,  19+8,
   4189                                          2+12, 18+12, 3+12, 19+12);
   4190 }
   4191 
   4192 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4193 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4194 {
   4195   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4196                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4197                                           (__v16sf)__W);
   4198 }
   4199 
   4200 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4201 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4202 {
   4203   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4204                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
   4205                                           (__v16sf)_mm512_setzero_ps());
   4206 }
   4207 
   4208 static __inline __m512 __DEFAULT_FN_ATTRS
   4209 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
   4210 {
   4211   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
   4212                                          0,    16,    1,    17,
   4213                                          0+4,  16+4,  1+4,  17+4,
   4214                                          0+8,  16+8,  1+8,  17+8,
   4215                                          0+12, 16+12, 1+12, 17+12);
   4216 }
   4217 
   4218 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4219 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   4220 {
   4221   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4222                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4223                                           (__v16sf)__W);
   4224 }
   4225 
   4226 static __inline__ __m512 __DEFAULT_FN_ATTRS
   4227 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   4228 {
   4229   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
   4230                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
   4231                                           (__v16sf)_mm512_setzero_ps());
   4232 }
   4233 
   4234 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4235 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
   4236 {
   4237   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4238                                           2,    18,    3,    19,
   4239                                           2+4,  18+4,  3+4,  19+4,
   4240                                           2+8,  18+8,  3+8,  19+8,
   4241                                           2+12, 18+12, 3+12, 19+12);
   4242 }
   4243 
   4244 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4245 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4246 {
   4247   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4248                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4249                                        (__v16si)__W);
   4250 }
   4251 
   4252 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4253 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4254 {
   4255   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4256                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
   4257                                        (__v16si)_mm512_setzero_si512());
   4258 }
   4259 
   4260 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4261 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
   4262 {
   4263   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
   4264                                           0,    16,    1,    17,
   4265                                           0+4,  16+4,  1+4,  17+4,
   4266                                           0+8,  16+8,  1+8,  17+8,
   4267                                           0+12, 16+12, 1+12, 17+12);
   4268 }
   4269 
   4270 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4271 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4272 {
   4273   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4274                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4275                                        (__v16si)__W);
   4276 }
   4277 
   4278 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4279 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
   4280 {
   4281   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
   4282                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
   4283                                        (__v16si)_mm512_setzero_si512());
   4284 }
   4285 
   4286 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4287 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
   4288 {
   4289   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4290                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
   4291 }
   4292 
   4293 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4294 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4295 {
   4296   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4297                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4298                                         (__v8di)__W);
   4299 }
   4300 
   4301 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4302 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
   4303 {
   4304   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4305                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
   4306                                         (__v8di)_mm512_setzero_si512());
   4307 }
   4308 
   4309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4310 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   4311 {
   4312   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
   4313                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
   4314 }
   4315 
   4316 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4317 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4318 {
   4319   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4320                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4321                                         (__v8di)__W);
   4322 }
   4323 
   4324 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4325 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4326 {
   4327   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
   4328                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
   4329                                         (__v8di)_mm512_setzero_si512());
   4330 }
   4331 
   4332 /* Bit Test */
   4333 
   4334 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4335 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
   4336 {
   4337   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4338             (__v16si) __B,
   4339             (__mmask16) -1);
   4340 }
   4341 
   4342 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4343 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   4344 {
   4345   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   4346                  (__v16si) __B, __U);
   4347 }
   4348 
   4349 static __inline __mmask8 __DEFAULT_FN_ATTRS
   4350 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
   4351 {
   4352   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   4353                  (__v8di) __B,
   4354                  (__mmask8) -1);
   4355 }
   4356 
   4357 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4358 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   4359 {
   4360   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   4361 }
   4362 
   4363 
   4364 /* SIMD load ops */
   4365 
   4366 static __inline __m512i __DEFAULT_FN_ATTRS
   4367 _mm512_loadu_si512 (void const *__P)
   4368 {
   4369   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4370                   (__v16si)
   4371                   _mm512_setzero_si512 (),
   4372                   (__mmask16) -1);
   4373 }
   4374 
   4375 static __inline __m512i __DEFAULT_FN_ATTRS
   4376 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   4377 {
   4378   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
   4379                   (__v16si) __W,
   4380                   (__mmask16) __U);
   4381 }
   4382 
   4383 
   4384 static __inline __m512i __DEFAULT_FN_ATTRS
   4385 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
   4386 {
   4387   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
   4388                                                      (__v16si)
   4389                                                      _mm512_setzero_si512 (),
   4390                                                      (__mmask16) __U);
   4391 }
   4392 
   4393 static __inline __m512i __DEFAULT_FN_ATTRS
   4394 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   4395 {
   4396   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
   4397                   (__v8di) __W,
   4398                   (__mmask8) __U);
   4399 }
   4400 
   4401 static __inline __m512i __DEFAULT_FN_ATTRS
   4402 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
   4403 {
   4404   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
   4405                                                      (__v8di)
   4406                                                      _mm512_setzero_si512 (),
   4407                                                      (__mmask8) __U);
   4408 }
   4409 
   4410 static __inline __m512 __DEFAULT_FN_ATTRS
   4411 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   4412 {
   4413   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
   4414                    (__v16sf) __W,
   4415                    (__mmask16) __U);
   4416 }
   4417 
   4418 static __inline __m512 __DEFAULT_FN_ATTRS
   4419 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
   4420 {
   4421   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
   4422                                                   (__v16sf)
   4423                                                   _mm512_setzero_ps (),
   4424                                                   (__mmask16) __U);
   4425 }
   4426 
   4427 static __inline __m512d __DEFAULT_FN_ATTRS
   4428 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   4429 {
   4430   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
   4431                 (__v8df) __W,
   4432                 (__mmask8) __U);
   4433 }
   4434 
   4435 static __inline __m512d __DEFAULT_FN_ATTRS
   4436 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
   4437 {
   4438   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
   4439                                                    (__v8df)
   4440                                                    _mm512_setzero_pd (),
   4441                                                    (__mmask8) __U);
   4442 }
   4443 
   4444 static __inline __m512d __DEFAULT_FN_ATTRS
   4445 _mm512_loadu_pd(double const *__p)
   4446 {
   4447   struct __loadu_pd {
   4448     __m512d __v;
   4449   } __attribute__((__packed__, __may_alias__));
   4450   return ((struct __loadu_pd*)__p)->__v;
   4451 }
   4452 
   4453 static __inline __m512 __DEFAULT_FN_ATTRS
   4454 _mm512_loadu_ps(float const *__p)
   4455 {
   4456   struct __loadu_ps {
   4457     __m512 __v;
   4458   } __attribute__((__packed__, __may_alias__));
   4459   return ((struct __loadu_ps*)__p)->__v;
   4460 }
   4461 
   4462 static __inline __m512 __DEFAULT_FN_ATTRS
   4463 _mm512_load_ps(float const *__p)
   4464 {
   4465   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
   4466                                                   (__v16sf)
   4467                                                   _mm512_setzero_ps (),
   4468                                                   (__mmask16) -1);
   4469 }
   4470 
   4471 static __inline __m512 __DEFAULT_FN_ATTRS
   4472 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
   4473 {
   4474   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
   4475                    (__v16sf) __W,
   4476                    (__mmask16) __U);
   4477 }
   4478 
   4479 static __inline __m512 __DEFAULT_FN_ATTRS
   4480 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
   4481 {
   4482   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
   4483                                                   (__v16sf)
   4484                                                   _mm512_setzero_ps (),
   4485                                                   (__mmask16) __U);
   4486 }
   4487 
   4488 static __inline __m512d __DEFAULT_FN_ATTRS
   4489 _mm512_load_pd(double const *__p)
   4490 {
   4491   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
   4492                                                    (__v8df)
   4493                                                    _mm512_setzero_pd (),
   4494                                                    (__mmask8) -1);
   4495 }
   4496 
   4497 static __inline __m512d __DEFAULT_FN_ATTRS
   4498 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
   4499 {
   4500   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
   4501                           (__v8df) __W,
   4502                           (__mmask8) __U);
   4503 }
   4504 
   4505 static __inline __m512d __DEFAULT_FN_ATTRS
   4506 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
   4507 {
   4508   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
   4509                                                    (__v8df)
   4510                                                    _mm512_setzero_pd (),
   4511                                                    (__mmask8) __U);
   4512 }
   4513 
   4514 static __inline __m512i __DEFAULT_FN_ATTRS
   4515 _mm512_load_si512 (void const *__P)
   4516 {
   4517   return *(__m512i *) __P;
   4518 }
   4519 
   4520 static __inline __m512i __DEFAULT_FN_ATTRS
   4521 _mm512_load_epi32 (void const *__P)
   4522 {
   4523   return *(__m512i *) __P;
   4524 }
   4525 
   4526 static __inline __m512i __DEFAULT_FN_ATTRS
   4527 _mm512_load_epi64 (void const *__P)
   4528 {
   4529   return *(__m512i *) __P;
   4530 }
   4531 
   4532 /* SIMD store ops */
   4533 
   4534 static __inline void __DEFAULT_FN_ATTRS
   4535 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
   4536 {
   4537   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
   4538                                      (__mmask8) __U);
   4539 }
   4540 
   4541 static __inline void __DEFAULT_FN_ATTRS
   4542 _mm512_storeu_si512 (void *__P, __m512i __A)
   4543 {
   4544   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
   4545             (__mmask16) -1);
   4546 }
   4547 
   4548 static __inline void __DEFAULT_FN_ATTRS
   4549 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
   4550 {
   4551   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
   4552                                      (__mmask16) __U);
   4553 }
   4554 
   4555 static __inline void __DEFAULT_FN_ATTRS
   4556 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
   4557 {
   4558   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
   4559 }
   4560 
   4561 static __inline void __DEFAULT_FN_ATTRS
   4562 _mm512_storeu_pd(void *__P, __m512d __A)
   4563 {
   4564   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
   4565 }
   4566 
   4567 static __inline void __DEFAULT_FN_ATTRS
   4568 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
   4569 {
   4570   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
   4571                                    (__mmask16) __U);
   4572 }
   4573 
   4574 static __inline void __DEFAULT_FN_ATTRS
   4575 _mm512_storeu_ps(void *__P, __m512 __A)
   4576 {
   4577   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
   4578 }
   4579 
   4580 static __inline void __DEFAULT_FN_ATTRS
   4581 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
   4582 {
   4583   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
   4584 }
   4585 
   4586 static __inline void __DEFAULT_FN_ATTRS
   4587 _mm512_store_pd(void *__P, __m512d __A)
   4588 {
   4589   *(__m512d*)__P = __A;
   4590 }
   4591 
   4592 static __inline void __DEFAULT_FN_ATTRS
   4593 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
   4594 {
   4595   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
   4596                                    (__mmask16) __U);
   4597 }
   4598 
   4599 static __inline void __DEFAULT_FN_ATTRS
   4600 _mm512_store_ps(void *__P, __m512 __A)
   4601 {
   4602   *(__m512*)__P = __A;
   4603 }
   4604 
   4605 static __inline void __DEFAULT_FN_ATTRS
   4606 _mm512_store_si512 (void *__P, __m512i __A)
   4607 {
   4608   *(__m512i *) __P = __A;
   4609 }
   4610 
   4611 static __inline void __DEFAULT_FN_ATTRS
   4612 _mm512_store_epi32 (void *__P, __m512i __A)
   4613 {
   4614   *(__m512i *) __P = __A;
   4615 }
   4616 
   4617 static __inline void __DEFAULT_FN_ATTRS
   4618 _mm512_store_epi64 (void *__P, __m512i __A)
   4619 {
   4620   *(__m512i *) __P = __A;
   4621 }
   4622 
   4623 /* Mask ops */
   4624 
   4625 static __inline __mmask16 __DEFAULT_FN_ATTRS
   4626 _mm512_knot(__mmask16 __M)
   4627 {
   4628   return __builtin_ia32_knothi(__M);
   4629 }
   4630 
   4631 /* Integer compare */
   4632 
   4633 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4634 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   4635   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4636                                                    (__mmask16)-1);
   4637 }
   4638 
   4639 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4640 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4641   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
   4642                                                    __u);
   4643 }
   4644 
   4645 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4646 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   4647   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4648                                                  (__mmask16)-1);
   4649 }
   4650 
   4651 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4652 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4653   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
   4654                                                  __u);
   4655 }
   4656 
   4657 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4658 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4659   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4660                                                   __u);
   4661 }
   4662 
   4663 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4664 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   4665   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
   4666                                                   (__mmask8)-1);
   4667 }
   4668 
   4669 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4670 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   4671   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4672                                                 (__mmask8)-1);
   4673 }
   4674 
   4675 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4676 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4677   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
   4678                                                 __u);
   4679 }
   4680 
   4681 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4682 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   4683   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4684                                                 (__mmask16)-1);
   4685 }
   4686 
   4687 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4688 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4689   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4690                                                 __u);
   4691 }
   4692 
   4693 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4694 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   4695   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4696                                                  (__mmask16)-1);
   4697 }
   4698 
   4699 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4700 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4701   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
   4702                                                  __u);
   4703 }
   4704 
   4705 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4706 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   4707   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4708                                                (__mmask8)-1);
   4709 }
   4710 
   4711 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4712 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4713   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4714                                                __u);
   4715 }
   4716 
   4717 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4718 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   4719   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4720                                                 (__mmask8)-1);
   4721 }
   4722 
   4723 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4724 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4725   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
   4726                                                 __u);
   4727 }
   4728 
   4729 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4730 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   4731   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4732                                                    (__mmask16)-1);
   4733 }
   4734 
   4735 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4736 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4737   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
   4738                                                    __u);
   4739 }
   4740 
   4741 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4742 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   4743   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4744                                                  (__mmask16)-1);
   4745 }
   4746 
   4747 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4748 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4749   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
   4750                                                  __u);
   4751 }
   4752 
   4753 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4754 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4755   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4756                                                   __u);
   4757 }
   4758 
   4759 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4760 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   4761   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
   4762                                                   (__mmask8)-1);
   4763 }
   4764 
   4765 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4766 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   4767   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4768                                                 (__mmask8)-1);
   4769 }
   4770 
   4771 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4772 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4773   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
   4774                                                 __u);
   4775 }
   4776 
   4777 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4778 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   4779   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4780                                                 (__mmask16)-1);
   4781 }
   4782 
   4783 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4784 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4785   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4786                                                 __u);
   4787 }
   4788 
   4789 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4790 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   4791   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4792                                                  (__mmask16)-1);
   4793 }
   4794 
   4795 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4796 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4797   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
   4798                                                  __u);
   4799 }
   4800 
   4801 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4802 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   4803   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4804                                                (__mmask8)-1);
   4805 }
   4806 
   4807 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4808 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4809   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4810                                                __u);
   4811 }
   4812 
   4813 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4814 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   4815   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4816                                                 (__mmask8)-1);
   4817 }
   4818 
   4819 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4820 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4821   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
   4822                                                 __u);
   4823 }
   4824 
   4825 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4826 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   4827   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4828                                                 (__mmask16)-1);
   4829 }
   4830 
   4831 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4832 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4833   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4834                                                 __u);
   4835 }
   4836 
   4837 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4838 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   4839   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4840                                                  (__mmask16)-1);
   4841 }
   4842 
   4843 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4844 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4845   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
   4846                                                  __u);
   4847 }
   4848 
   4849 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4850 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   4851   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4852                                                (__mmask8)-1);
   4853 }
   4854 
   4855 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4856 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4857   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4858                                                __u);
   4859 }
   4860 
   4861 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4862 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   4863   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4864                                                 (__mmask8)-1);
   4865 }
   4866 
   4867 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4868 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4869   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
   4870                                                 __u);
   4871 }
   4872 
   4873 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4874 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   4875   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4876                                                 (__mmask16)-1);
   4877 }
   4878 
   4879 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4880 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4881   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4882                                                 __u);
   4883 }
   4884 
   4885 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4886 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   4887   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4888                                                  (__mmask16)-1);
   4889 }
   4890 
   4891 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   4892 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   4893   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
   4894                                                  __u);
   4895 }
   4896 
   4897 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4898 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   4899   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4900                                                (__mmask8)-1);
   4901 }
   4902 
   4903 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4904 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4905   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4906                                                __u);
   4907 }
   4908 
   4909 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4910 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   4911   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4912                                                 (__mmask8)-1);
   4913 }
   4914 
   4915 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   4916 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   4917   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
   4918                                                 __u);
   4919 }
   4920 
   4921 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4922 _mm512_cvtepi8_epi32 (__m128i __A)
   4923 {
   4924   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4925                 (__v16si)
   4926                 _mm512_setzero_si512 (),
   4927                 (__mmask16) -1);
   4928 }
   4929 
   4930 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4931 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   4932 {
   4933   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4934                 (__v16si) __W,
   4935                 (__mmask16) __U);
   4936 }
   4937 
   4938 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4939 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
   4940 {
   4941   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   4942                 (__v16si)
   4943                 _mm512_setzero_si512 (),
   4944                 (__mmask16) __U);
   4945 }
   4946 
   4947 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4948 _mm512_cvtepi8_epi64 (__m128i __A)
   4949 {
   4950   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4951                 (__v8di)
   4952                 _mm512_setzero_si512 (),
   4953                 (__mmask8) -1);
   4954 }
   4955 
   4956 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4957 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   4958 {
   4959   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4960                 (__v8di) __W,
   4961                 (__mmask8) __U);
   4962 }
   4963 
   4964 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4965 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
   4966 {
   4967   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   4968                 (__v8di)
   4969                 _mm512_setzero_si512 (),
   4970                 (__mmask8) __U);
   4971 }
   4972 
   4973 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4974 _mm512_cvtepi32_epi64 (__m256i __X)
   4975 {
   4976   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4977                 (__v8di)
   4978                 _mm512_setzero_si512 (),
   4979                 (__mmask8) -1);
   4980 }
   4981 
   4982 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4983 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   4984 {
   4985   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4986                 (__v8di) __W,
   4987                 (__mmask8) __U);
   4988 }
   4989 
   4990 static __inline__ __m512i __DEFAULT_FN_ATTRS
   4991 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
   4992 {
   4993   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   4994                 (__v8di)
   4995                 _mm512_setzero_si512 (),
   4996                 (__mmask8) __U);
   4997 }
   4998 
   4999 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5000 _mm512_cvtepi16_epi32 (__m256i __A)
   5001 {
   5002   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5003                 (__v16si)
   5004                 _mm512_setzero_si512 (),
   5005                 (__mmask16) -1);
   5006 }
   5007 
   5008 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5009 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   5010 {
   5011   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5012                 (__v16si) __W,
   5013                 (__mmask16) __U);
   5014 }
   5015 
   5016 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5017 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
   5018 {
   5019   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   5020                 (__v16si)
   5021                 _mm512_setzero_si512 (),
   5022                 (__mmask16) __U);
   5023 }
   5024 
   5025 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5026 _mm512_cvtepi16_epi64 (__m128i __A)
   5027 {
   5028   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5029                 (__v8di)
   5030                 _mm512_setzero_si512 (),
   5031                 (__mmask8) -1);
   5032 }
   5033 
   5034 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5035 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5036 {
   5037   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5038                 (__v8di) __W,
   5039                 (__mmask8) __U);
   5040 }
   5041 
   5042 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5043 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
   5044 {
   5045   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   5046                 (__v8di)
   5047                 _mm512_setzero_si512 (),
   5048                 (__mmask8) __U);
   5049 }
   5050 
   5051 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5052 _mm512_cvtepu8_epi32 (__m128i __A)
   5053 {
   5054   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5055                 (__v16si)
   5056                 _mm512_setzero_si512 (),
   5057                 (__mmask16) -1);
   5058 }
   5059 
   5060 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5061 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   5062 {
   5063   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5064                 (__v16si) __W,
   5065                 (__mmask16) __U);
   5066 }
   5067 
   5068 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5069 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
   5070 {
   5071   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   5072                 (__v16si)
   5073                 _mm512_setzero_si512 (),
   5074                 (__mmask16) __U);
   5075 }
   5076 
   5077 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5078 _mm512_cvtepu8_epi64 (__m128i __A)
   5079 {
   5080   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5081                 (__v8di)
   5082                 _mm512_setzero_si512 (),
   5083                 (__mmask8) -1);
   5084 }
   5085 
   5086 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5087 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5088 {
   5089   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5090                 (__v8di) __W,
   5091                 (__mmask8) __U);
   5092 }
   5093 
   5094 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5095 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   5096 {
   5097   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   5098                 (__v8di)
   5099                 _mm512_setzero_si512 (),
   5100                 (__mmask8) __U);
   5101 }
   5102 
   5103 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5104 _mm512_cvtepu32_epi64 (__m256i __X)
   5105 {
   5106   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5107                 (__v8di)
   5108                 _mm512_setzero_si512 (),
   5109                 (__mmask8) -1);
   5110 }
   5111 
   5112 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5113 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   5114 {
   5115   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5116                 (__v8di) __W,
   5117                 (__mmask8) __U);
   5118 }
   5119 
   5120 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5121 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
   5122 {
   5123   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   5124                 (__v8di)
   5125                 _mm512_setzero_si512 (),
   5126                 (__mmask8) __U);
   5127 }
   5128 
   5129 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5130 _mm512_cvtepu16_epi32 (__m256i __A)
   5131 {
   5132   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5133                 (__v16si)
   5134                 _mm512_setzero_si512 (),
   5135                 (__mmask16) -1);
   5136 }
   5137 
   5138 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5139 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   5140 {
   5141   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5142                 (__v16si) __W,
   5143                 (__mmask16) __U);
   5144 }
   5145 
   5146 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5147 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
   5148 {
   5149   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   5150                 (__v16si)
   5151                 _mm512_setzero_si512 (),
   5152                 (__mmask16) __U);
   5153 }
   5154 
   5155 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5156 _mm512_cvtepu16_epi64 (__m128i __A)
   5157 {
   5158   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5159                 (__v8di)
   5160                 _mm512_setzero_si512 (),
   5161                 (__mmask8) -1);
   5162 }
   5163 
   5164 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5165 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   5166 {
   5167   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5168                 (__v8di) __W,
   5169                 (__mmask8) __U);
   5170 }
   5171 
   5172 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5173 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
   5174 {
   5175   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   5176                 (__v8di)
   5177                 _mm512_setzero_si512 (),
   5178                 (__mmask8) __U);
   5179 }
   5180 
   5181 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5182 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   5183 {
   5184   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5185               (__v16si) __B,
   5186               (__v16si)
   5187               _mm512_setzero_si512 (),
   5188               (__mmask16) -1);
   5189 }
   5190 
   5191 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5192 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5193 {
   5194   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5195               (__v16si) __B,
   5196               (__v16si) __W,
   5197               (__mmask16) __U);
   5198 }
   5199 
   5200 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5201 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5202 {
   5203   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   5204               (__v16si) __B,
   5205               (__v16si)
   5206               _mm512_setzero_si512 (),
   5207               (__mmask16) __U);
   5208 }
   5209 
   5210 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5211 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   5212 {
   5213   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5214               (__v8di) __B,
   5215               (__v8di)
   5216               _mm512_setzero_si512 (),
   5217               (__mmask8) -1);
   5218 }
   5219 
   5220 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5221 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5222 {
   5223   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5224               (__v8di) __B,
   5225               (__v8di) __W,
   5226               (__mmask8) __U);
   5227 }
   5228 
   5229 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5230 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5231 {
   5232   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   5233               (__v8di) __B,
   5234               (__v8di)
   5235               _mm512_setzero_si512 (),
   5236               (__mmask8) __U);
   5237 }
   5238 
   5239 
   5240 
   5241 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   5242   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5243                                          (__v16si)(__m512i)(b), (int)(p), \
   5244                                          (__mmask16)-1); })
   5245 
   5246 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   5247   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5248                                           (__v16si)(__m512i)(b), (int)(p), \
   5249                                           (__mmask16)-1); })
   5250 
   5251 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   5252   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5253                                         (__v8di)(__m512i)(b), (int)(p), \
   5254                                         (__mmask8)-1); })
   5255 
   5256 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   5257   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5258                                          (__v8di)(__m512i)(b), (int)(p), \
   5259                                          (__mmask8)-1); })
   5260 
   5261 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   5262   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
   5263                                          (__v16si)(__m512i)(b), (int)(p), \
   5264                                          (__mmask16)(m)); })
   5265 
   5266 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   5267   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
   5268                                           (__v16si)(__m512i)(b), (int)(p), \
   5269                                           (__mmask16)(m)); })
   5270 
   5271 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   5272   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
   5273                                         (__v8di)(__m512i)(b), (int)(p), \
   5274                                         (__mmask8)(m)); })
   5275 
   5276 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   5277   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
   5278                                          (__v8di)(__m512i)(b), (int)(p), \
   5279                                          (__mmask8)(m)); })
   5280 
   5281 #define _mm512_rol_epi32(a, b) __extension__ ({ \
   5282   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5283                                         (__v16si)_mm512_setzero_si512(), \
   5284                                         (__mmask16)-1); })
   5285 
   5286 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
   5287   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5288                                         (__v16si)(__m512i)(W), \
   5289                                         (__mmask16)(U)); })
   5290 
   5291 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
   5292   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
   5293                                         (__v16si)_mm512_setzero_si512(), \
   5294                                         (__mmask16)(U)); })
   5295 
   5296 #define _mm512_rol_epi64(a, b) __extension__ ({ \
   5297   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5298                                         (__v8di)_mm512_setzero_si512(), \
   5299                                         (__mmask8)-1); })
   5300 
   5301 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
   5302   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5303                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5304 
   5305 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
   5306   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
   5307                                         (__v8di)_mm512_setzero_si512(), \
   5308                                         (__mmask8)(U)); })
   5309 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5310 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   5311 {
   5312   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5313               (__v16si) __B,
   5314               (__v16si)
   5315               _mm512_setzero_si512 (),
   5316               (__mmask16) -1);
   5317 }
   5318 
   5319 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5320 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   5321 {
   5322   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5323               (__v16si) __B,
   5324               (__v16si) __W,
   5325               (__mmask16) __U);
   5326 }
   5327 
   5328 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5329 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   5330 {
   5331   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   5332               (__v16si) __B,
   5333               (__v16si)
   5334               _mm512_setzero_si512 (),
   5335               (__mmask16) __U);
   5336 }
   5337 
   5338 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5339 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   5340 {
   5341   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5342               (__v8di) __B,
   5343               (__v8di)
   5344               _mm512_setzero_si512 (),
   5345               (__mmask8) -1);
   5346 }
   5347 
   5348 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5349 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   5350 {
   5351   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5352               (__v8di) __B,
   5353               (__v8di) __W,
   5354               (__mmask8) __U);
   5355 }
   5356 
   5357 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5358 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   5359 {
   5360   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   5361               (__v8di) __B,
   5362               (__v8di)
   5363               _mm512_setzero_si512 (),
   5364               (__mmask8) __U);
   5365 }
   5366 
   5367 #define _mm512_ror_epi32(A, B) __extension__ ({ \
   5368   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5369                                         (__v16si)_mm512_setzero_si512(), \
   5370                                         (__mmask16)-1); })
   5371 
   5372 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   5373   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5374                                         (__v16si)(__m512i)(W), \
   5375                                         (__mmask16)(U)); })
   5376 
   5377 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
   5378   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
   5379                                         (__v16si)_mm512_setzero_si512(), \
   5380                                         (__mmask16)(U)); })
   5381 
   5382 #define _mm512_ror_epi64(A, B) __extension__ ({ \
   5383   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5384                                         (__v8di)_mm512_setzero_si512(), \
   5385                                         (__mmask8)-1); })
   5386 
   5387 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   5388   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5389                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
   5390 
   5391 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
   5392   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
   5393                                         (__v8di)_mm512_setzero_si512(), \
   5394                                         (__mmask8)(U)); })
   5395 
   5396 #define _mm512_slli_epi32(A, B) __extension__ ({ \
   5397   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5398                                          (__v16si)_mm512_setzero_si512(), \
   5399                                          (__mmask16)-1); })
   5400 
   5401 #define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \
   5402   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5403                                          (__v16si)(__m512i)(W), \
   5404                                          (__mmask16)(U)); })
   5405 
   5406 #define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \
   5407   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5408                                          (__v16si)_mm512_setzero_si512(), \
   5409                                          (__mmask16)(U)); })
   5410 
   5411 #define _mm512_slli_epi64(A, B) __extension__ ({ \
   5412   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5413                                          (__v8di)_mm512_setzero_si512(), \
   5414                                          (__mmask8)-1); })
   5415 
   5416 #define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \
   5417   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5418                                          (__v8di)(__m512i)(W), \
   5419                                          (__mmask8)(U)); })
   5420 
   5421 #define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \
   5422   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5423                                          (__v8di)_mm512_setzero_si512(), \
   5424                                          (__mmask8)(U)); })
   5425 
   5426 
   5427 
   5428 #define _mm512_srli_epi32(A, B) __extension__ ({ \
   5429   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5430                                          (__v16si)_mm512_setzero_si512(), \
   5431                                          (__mmask16)-1); })
   5432 
   5433 #define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \
   5434   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5435                                          (__v16si)(__m512i)(W), \
   5436                                          (__mmask16)(U)); })
   5437 
   5438 #define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \
   5439   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
   5440                                          (__v16si)_mm512_setzero_si512(), \
   5441                                          (__mmask16)(U)); })
   5442 
   5443 #define _mm512_srli_epi64(A, B) __extension__ ({ \
   5444   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5445                                          (__v8di)_mm512_setzero_si512(), \
   5446                                          (__mmask8)-1); })
   5447 
   5448 #define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \
   5449   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5450                                          (__v8di)(__m512i)(W), \
   5451                                          (__mmask8)(U)); })
   5452 
   5453 #define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \
   5454   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
   5455                                          (__v8di)_mm512_setzero_si512(), \
   5456                                          (__mmask8)(U)); })
   5457 
   5458 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5459 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5460 {
   5461   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5462               (__v16si) __W,
   5463               (__mmask16) __U);
   5464 }
   5465 
   5466 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5467 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
   5468 {
   5469   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
   5470               (__v16si)
   5471               _mm512_setzero_si512 (),
   5472               (__mmask16) __U);
   5473 }
   5474 
   5475 static __inline__ void __DEFAULT_FN_ATTRS
   5476 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5477 {
   5478   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
   5479           (__mmask16) __U);
   5480 }
   5481 
   5482 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5483 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   5484 {
   5485   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5486                  (__v16si) __A,
   5487                  (__v16si) __W);
   5488 }
   5489 
   5490 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5491 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
   5492 {
   5493   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
   5494                  (__v16si) __A,
   5495                  (__v16si) _mm512_setzero_si512 ());
   5496 }
   5497 
   5498 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5499 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   5500 {
   5501   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5502                  (__v8di) __A,
   5503                  (__v8di) __W);
   5504 }
   5505 
   5506 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5507 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
   5508 {
   5509   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
   5510                  (__v8di) __A,
   5511                  (__v8di) _mm512_setzero_si512 ());
   5512 }
   5513 
   5514 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5515 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5516 {
   5517   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5518               (__v8di) __W,
   5519               (__mmask8) __U);
   5520 }
   5521 
   5522 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5523 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5524 {
   5525   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
   5526               (__v8di)
   5527               _mm512_setzero_si512 (),
   5528               (__mmask8) __U);
   5529 }
   5530 
   5531 static __inline__ void __DEFAULT_FN_ATTRS
   5532 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5533 {
   5534   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
   5535           (__mmask8) __U);
   5536 }
   5537 
   5538 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5539 _mm512_movedup_pd (__m512d __A)
   5540 {
   5541   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
   5542                                           0, 0, 2, 2, 4, 4, 6, 6);
   5543 }
   5544 
   5545 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5546 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   5547 {
   5548   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5549                                               (__v8df)_mm512_movedup_pd(__A),
   5550                                               (__v8df)__W);
   5551 }
   5552 
   5553 static __inline__ __m512d __DEFAULT_FN_ATTRS
   5554 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   5555 {
   5556   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
   5557                                               (__v8df)_mm512_movedup_pd(__A),
   5558                                               (__v8df)_mm512_setzero_pd());
   5559 }
   5560 
   5561 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
   5562   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5563                                              (__v8df)(__m512d)(B), \
   5564                                              (__v8di)(__m512i)(C), (int)(imm), \
   5565                                              (__mmask8)-1, (int)(R)); })
   5566 
   5567 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
   5568   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5569                                              (__v8df)(__m512d)(B), \
   5570                                              (__v8di)(__m512i)(C), (int)(imm), \
   5571                                              (__mmask8)(U), (int)(R)); })
   5572 
   5573 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5574   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5575                                              (__v8df)(__m512d)(B), \
   5576                                              (__v8di)(__m512i)(C), (int)(imm), \
   5577                                              (__mmask8)-1, \
   5578                                              _MM_FROUND_CUR_DIRECTION); })
   5579 
   5580 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5581   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
   5582                                              (__v8df)(__m512d)(B), \
   5583                                              (__v8di)(__m512i)(C), (int)(imm), \
   5584                                              (__mmask8)(U), \
   5585                                              _MM_FROUND_CUR_DIRECTION); })
   5586 
   5587 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
   5588   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5589                                               (__v8df)(__m512d)(B), \
   5590                                               (__v8di)(__m512i)(C), \
   5591                                               (int)(imm), (__mmask8)(U), \
   5592                                               (int)(R)); })
   5593 
   5594 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5595   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
   5596                                               (__v8df)(__m512d)(B), \
   5597                                               (__v8di)(__m512i)(C), \
   5598                                               (int)(imm), (__mmask8)(U), \
   5599                                               _MM_FROUND_CUR_DIRECTION); })
   5600 
   5601 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
   5602   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5603                                             (__v16sf)(__m512)(B), \
   5604                                             (__v16si)(__m512i)(C), (int)(imm), \
   5605                                             (__mmask16)-1, (int)(R)); })
   5606 
   5607 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
   5608   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5609                                             (__v16sf)(__m512)(B), \
   5610                                             (__v16si)(__m512i)(C), (int)(imm), \
   5611                                             (__mmask16)(U), (int)(R)); })
   5612 
   5613 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5614   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5615                                             (__v16sf)(__m512)(B), \
   5616                                             (__v16si)(__m512i)(C), (int)(imm), \
   5617                                             (__mmask16)-1, \
   5618                                             _MM_FROUND_CUR_DIRECTION); })
   5619 
   5620 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5621   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
   5622                                             (__v16sf)(__m512)(B), \
   5623                                             (__v16si)(__m512i)(C), (int)(imm), \
   5624                                             (__mmask16)(U), \
   5625                                             _MM_FROUND_CUR_DIRECTION); })
   5626 
   5627 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
   5628   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5629                                              (__v16sf)(__m512)(B), \
   5630                                              (__v16si)(__m512i)(C), \
   5631                                              (int)(imm), (__mmask16)(U), \
   5632                                              (int)(R)); })
   5633 
   5634 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5635   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
   5636                                              (__v16sf)(__m512)(B), \
   5637                                              (__v16si)(__m512i)(C), \
   5638                                              (int)(imm), (__mmask16)(U), \
   5639                                              _MM_FROUND_CUR_DIRECTION); })
   5640 
   5641 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
   5642   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5643                                           (__v2df)(__m128d)(B), \
   5644                                           (__v2di)(__m128i)(C), (int)(imm), \
   5645                                           (__mmask8)-1, (int)(R)); })
   5646 
   5647 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
   5648   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5649                                           (__v2df)(__m128d)(B), \
   5650                                           (__v2di)(__m128i)(C), (int)(imm), \
   5651                                           (__mmask8)(U), (int)(R)); })
   5652 
   5653 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
   5654   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5655                                           (__v2df)(__m128d)(B), \
   5656                                           (__v2di)(__m128i)(C), (int)(imm), \
   5657                                           (__mmask8)-1, \
   5658                                           _MM_FROUND_CUR_DIRECTION); })
   5659 
   5660 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
   5661   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
   5662                                           (__v2df)(__m128d)(B), \
   5663                                           (__v2di)(__m128i)(C), (int)(imm), \
   5664                                           (__mmask8)(U), \
   5665                                           _MM_FROUND_CUR_DIRECTION); })
   5666 
   5667 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
   5668   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5669                                            (__v2df)(__m128d)(B), \
   5670                                            (__v2di)(__m128i)(C), (int)(imm), \
   5671                                            (__mmask8)(U), (int)(R)); })
   5672 
   5673 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
   5674   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
   5675                                            (__v2df)(__m128d)(B), \
   5676                                            (__v2di)(__m128i)(C), (int)(imm), \
   5677                                            (__mmask8)(U), \
   5678                                            _MM_FROUND_CUR_DIRECTION); })
   5679 
   5680 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
   5681   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5682                                          (__v4sf)(__m128)(B), \
   5683                                          (__v4si)(__m128i)(C), (int)(imm), \
   5684                                          (__mmask8)-1, (int)(R)); })
   5685 
   5686 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
   5687   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5688                                          (__v4sf)(__m128)(B), \
   5689                                          (__v4si)(__m128i)(C), (int)(imm), \
   5690                                          (__mmask8)(U), (int)(R)); })
   5691 
   5692 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
   5693   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5694                                          (__v4sf)(__m128)(B), \
   5695                                          (__v4si)(__m128i)(C), (int)(imm), \
   5696                                          (__mmask8)-1, \
   5697                                          _MM_FROUND_CUR_DIRECTION); })
   5698 
   5699 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
   5700   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
   5701                                          (__v4sf)(__m128)(B), \
   5702                                          (__v4si)(__m128i)(C), (int)(imm), \
   5703                                          (__mmask8)(U), \
   5704                                          _MM_FROUND_CUR_DIRECTION); })
   5705 
   5706 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
   5707   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5708                                           (__v4sf)(__m128)(B), \
   5709                                           (__v4si)(__m128i)(C), (int)(imm), \
   5710                                           (__mmask8)(U), (int)(R)); })
   5711 
   5712 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
   5713   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
   5714                                           (__v4sf)(__m128)(B), \
   5715                                           (__v4si)(__m128i)(C), (int)(imm), \
   5716                                           (__mmask8)(U), \
   5717                                           _MM_FROUND_CUR_DIRECTION); })
   5718 
   5719 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
   5720   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5721                                                  (__v2df)(__m128d)(B), \
   5722                                                  (__v2df)_mm_setzero_pd(), \
   5723                                                  (__mmask8)-1, (int)(R)); })
   5724 
   5725 
   5726 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5727 _mm_getexp_sd (__m128d __A, __m128d __B)
   5728 {
   5729   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
   5730                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5731 }
   5732 
   5733 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5734 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5735 {
   5736  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5737           (__v2df) __B,
   5738           (__v2df) __W,
   5739           (__mmask8) __U,
   5740           _MM_FROUND_CUR_DIRECTION);
   5741 }
   5742 
   5743 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
   5744   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5745                                                  (__v2df)(__m128d)(B), \
   5746                                                  (__v2df)(__m128d)(W), \
   5747                                                  (__mmask8)(U), (int)(R)); })
   5748 
   5749 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5750 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   5751 {
   5752  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
   5753           (__v2df) __B,
   5754           (__v2df) _mm_setzero_pd (),
   5755           (__mmask8) __U,
   5756           _MM_FROUND_CUR_DIRECTION);
   5757 }
   5758 
   5759 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
   5760   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
   5761                                                  (__v2df)(__m128d)(B), \
   5762                                                  (__v2df)_mm_setzero_pd(), \
   5763                                                  (__mmask8)(U), (int)(R)); })
   5764 
   5765 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
   5766   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5767                                                 (__v4sf)(__m128)(B), \
   5768                                                 (__v4sf)_mm_setzero_ps(), \
   5769                                                 (__mmask8)-1, (int)(R)); })
   5770 
   5771 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5772 _mm_getexp_ss (__m128 __A, __m128 __B)
   5773 {
   5774   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5775                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
   5776 }
   5777 
   5778 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5779 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5780 {
   5781  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5782           (__v4sf) __B,
   5783           (__v4sf) __W,
   5784           (__mmask8) __U,
   5785           _MM_FROUND_CUR_DIRECTION);
   5786 }
   5787 
   5788 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
   5789   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5790                                                 (__v4sf)(__m128)(B), \
   5791                                                 (__v4sf)(__m128)(W), \
   5792                                                 (__mmask8)(U), (int)(R)); })
   5793 
   5794 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5795 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   5796 {
   5797  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
   5798           (__v4sf) __B,
   5799           (__v4sf) _mm_setzero_pd (),
   5800           (__mmask8) __U,
   5801           _MM_FROUND_CUR_DIRECTION);
   5802 }
   5803 
   5804 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
   5805   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
   5806                                                 (__v4sf)(__m128)(B), \
   5807                                                 (__v4sf)_mm_setzero_ps(), \
   5808                                                 (__mmask8)(U), (int)(R)); })
   5809 
   5810 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
   5811   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5812                                                (__v2df)(__m128d)(B), \
   5813                                                (int)(((D)<<2) | (C)), \
   5814                                                (__v2df)_mm_setzero_pd(), \
   5815                                                (__mmask8)-1, (int)(R)); })
   5816 
   5817 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
   5818   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5819                                                (__v2df)(__m128d)(B), \
   5820                                                (int)(((D)<<2) | (C)), \
   5821                                                (__v2df)_mm_setzero_pd(), \
   5822                                                (__mmask8)-1, \
   5823                                                _MM_FROUND_CUR_DIRECTION); })
   5824 
   5825 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
   5826   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5827                                                (__v2df)(__m128d)(B), \
   5828                                                (int)(((D)<<2) | (C)), \
   5829                                                (__v2df)(__m128d)(W), \
   5830                                                (__mmask8)(U), \
   5831                                                _MM_FROUND_CUR_DIRECTION); })
   5832 
   5833 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
   5834   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5835                                                (__v2df)(__m128d)(B), \
   5836                                                (int)(((D)<<2) | (C)), \
   5837                                                (__v2df)(__m128d)(W), \
   5838                                                (__mmask8)(U), (int)(R)); })
   5839 
   5840 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
   5841   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5842                                                (__v2df)(__m128d)(B), \
   5843                                                (int)(((D)<<2) | (C)), \
   5844                                                (__v2df)_mm_setzero_pd(), \
   5845                                                (__mmask8)(U), \
   5846                                                _MM_FROUND_CUR_DIRECTION); })
   5847 
   5848 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
   5849   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
   5850                                                (__v2df)(__m128d)(B), \
   5851                                                (int)(((D)<<2) | (C)), \
   5852                                                (__v2df)_mm_setzero_pd(), \
   5853                                                (__mmask8)(U), (int)(R)); })
   5854 
   5855 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
   5856   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5857                                               (__v4sf)(__m128)(B), \
   5858                                               (int)(((D)<<2) | (C)), \
   5859                                               (__v4sf)_mm_setzero_ps(), \
   5860                                               (__mmask8)-1, (int)(R)); })
   5861 
   5862 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
   5863   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5864                                               (__v4sf)(__m128)(B), \
   5865                                               (int)(((D)<<2) | (C)), \
   5866                                               (__v4sf)_mm_setzero_ps(), \
   5867                                               (__mmask8)-1, \
   5868                                               _MM_FROUND_CUR_DIRECTION); })
   5869 
   5870 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
   5871   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5872                                               (__v4sf)(__m128)(B), \
   5873                                               (int)(((D)<<2) | (C)), \
   5874                                               (__v4sf)(__m128)(W), \
   5875                                               (__mmask8)(U), \
   5876                                               _MM_FROUND_CUR_DIRECTION); })
   5877 
   5878 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
   5879   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5880                                               (__v4sf)(__m128)(B), \
   5881                                               (int)(((D)<<2) | (C)), \
   5882                                               (__v4sf)(__m128)(W), \
   5883                                               (__mmask8)(U), (int)(R)); })
   5884 
   5885 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
   5886   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5887                                               (__v4sf)(__m128)(B), \
   5888                                               (int)(((D)<<2) | (C)), \
   5889                                               (__v4sf)_mm_setzero_pd(), \
   5890                                               (__mmask8)(U), \
   5891                                               _MM_FROUND_CUR_DIRECTION); })
   5892 
   5893 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
   5894   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
   5895                                               (__v4sf)(__m128)(B), \
   5896                                               (int)(((D)<<2) | (C)), \
   5897                                               (__v4sf)_mm_setzero_ps(), \
   5898                                               (__mmask8)(U), (int)(R)); })
   5899 
   5900 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
   5901 _mm512_kmov (__mmask16 __A)
   5902 {
   5903   return  __A;
   5904 }
   5905 
   5906 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
   5907   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
   5908                               (int)(P), (int)(R)); })
   5909 
   5910 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
   5911   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
   5912                               (int)(P), (int)(R)); })
   5913 
   5914 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   5915   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
   5916 
   5917 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5918 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   5919          __mmask16 __U, __m512i __B)
   5920 {
   5921   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   5922                    (__v16si) __I
   5923                    /* idx */ ,
   5924                    (__v16si) __B,
   5925                    (__mmask16) __U);
   5926 }
   5927 
   5928 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5929 _mm512_sll_epi32 (__m512i __A, __m128i __B)
   5930 {
   5931   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5932              (__v4si) __B,
   5933              (__v16si)
   5934              _mm512_setzero_si512 (),
   5935              (__mmask16) -1);
   5936 }
   5937 
   5938 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5939 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   5940 {
   5941   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5942              (__v4si) __B,
   5943              (__v16si) __W,
   5944              (__mmask16) __U);
   5945 }
   5946 
   5947 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5948 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   5949 {
   5950   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   5951              (__v4si) __B,
   5952              (__v16si)
   5953              _mm512_setzero_si512 (),
   5954              (__mmask16) __U);
   5955 }
   5956 
   5957 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5958 _mm512_sll_epi64 (__m512i __A, __m128i __B)
   5959 {
   5960   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5961              (__v2di) __B,
   5962              (__v8di)
   5963              _mm512_setzero_si512 (),
   5964              (__mmask8) -1);
   5965 }
   5966 
   5967 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5968 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   5969 {
   5970   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5971              (__v2di) __B,
   5972              (__v8di) __W,
   5973              (__mmask8) __U);
   5974 }
   5975 
   5976 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5977 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   5978 {
   5979   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   5980              (__v2di) __B,
   5981              (__v8di)
   5982              _mm512_setzero_si512 (),
   5983              (__mmask8) __U);
   5984 }
   5985 
   5986 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5987 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
   5988 {
   5989   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   5990               (__v16si) __Y,
   5991               (__v16si)
   5992               _mm512_setzero_si512 (),
   5993               (__mmask16) -1);
   5994 }
   5995 
   5996 static __inline__ __m512i __DEFAULT_FN_ATTRS
   5997 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   5998 {
   5999   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   6000               (__v16si) __Y,
   6001               (__v16si) __W,
   6002               (__mmask16) __U);
   6003 }
   6004 
   6005 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6006 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6007 {
   6008   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
   6009               (__v16si) __Y,
   6010               (__v16si)
   6011               _mm512_setzero_si512 (),
   6012               (__mmask16) __U);
   6013 }
   6014 
   6015 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6016 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
   6017 {
   6018   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6019              (__v8di) __Y,
   6020              (__v8di)
   6021              _mm512_undefined_pd (),
   6022              (__mmask8) -1);
   6023 }
   6024 
   6025 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6026 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6027 {
   6028   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6029              (__v8di) __Y,
   6030              (__v8di) __W,
   6031              (__mmask8) __U);
   6032 }
   6033 
   6034 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6035 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
   6036 {
   6037   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
   6038              (__v8di) __Y,
   6039              (__v8di)
   6040              _mm512_setzero_si512 (),
   6041              (__mmask8) __U);
   6042 }
   6043 
   6044 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6045 _mm512_sra_epi32 (__m512i __A, __m128i __B)
   6046 {
   6047   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6048              (__v4si) __B,
   6049              (__v16si)
   6050              _mm512_setzero_si512 (),
   6051              (__mmask16) -1);
   6052 }
   6053 
   6054 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6055 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6056 {
   6057   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6058              (__v4si) __B,
   6059              (__v16si) __W,
   6060              (__mmask16) __U);
   6061 }
   6062 
   6063 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6064 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   6065 {
   6066   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   6067              (__v4si) __B,
   6068              (__v16si)
   6069              _mm512_setzero_si512 (),
   6070              (__mmask16) __U);
   6071 }
   6072 
   6073 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6074 _mm512_sra_epi64 (__m512i __A, __m128i __B)
   6075 {
   6076   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6077              (__v2di) __B,
   6078              (__v8di)
   6079              _mm512_setzero_si512 (),
   6080              (__mmask8) -1);
   6081 }
   6082 
   6083 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6084 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6085 {
   6086   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6087              (__v2di) __B,
   6088              (__v8di) __W,
   6089              (__mmask8) __U);
   6090 }
   6091 
   6092 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6093 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   6094 {
   6095   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   6096              (__v2di) __B,
   6097              (__v8di)
   6098              _mm512_setzero_si512 (),
   6099              (__mmask8) __U);
   6100 }
   6101 
   6102 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6103 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
   6104 {
   6105   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6106               (__v16si) __Y,
   6107               (__v16si)
   6108               _mm512_setzero_si512 (),
   6109               (__mmask16) -1);
   6110 }
   6111 
   6112 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6113 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6114 {
   6115   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6116               (__v16si) __Y,
   6117               (__v16si) __W,
   6118               (__mmask16) __U);
   6119 }
   6120 
   6121 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6122 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6123 {
   6124   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
   6125               (__v16si) __Y,
   6126               (__v16si)
   6127               _mm512_setzero_si512 (),
   6128               (__mmask16) __U);
   6129 }
   6130 
   6131 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6132 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
   6133 {
   6134   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6135              (__v8di) __Y,
   6136              (__v8di)
   6137              _mm512_setzero_si512 (),
   6138              (__mmask8) -1);
   6139 }
   6140 
   6141 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6142 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
   6143 {
   6144   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6145              (__v8di) __Y,
   6146              (__v8di) __W,
   6147              (__mmask8) __U);
   6148 }
   6149 
   6150 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6151 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
   6152 {
   6153   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
   6154              (__v8di) __Y,
   6155              (__v8di)
   6156              _mm512_setzero_si512 (),
   6157              (__mmask8) __U);
   6158 }
   6159 
   6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6161 _mm512_srl_epi32 (__m512i __A, __m128i __B)
   6162 {
   6163   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6164              (__v4si) __B,
   6165              (__v16si)
   6166              _mm512_setzero_si512 (),
   6167              (__mmask16) -1);
   6168 }
   6169 
   6170 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6171 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   6172 {
   6173   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6174              (__v4si) __B,
   6175              (__v16si) __W,
   6176              (__mmask16) __U);
   6177 }
   6178 
   6179 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6180 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   6181 {
   6182   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   6183              (__v4si) __B,
   6184              (__v16si)
   6185              _mm512_setzero_si512 (),
   6186              (__mmask16) __U);
   6187 }
   6188 
   6189 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6190 _mm512_srl_epi64 (__m512i __A, __m128i __B)
   6191 {
   6192   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6193              (__v2di) __B,
   6194              (__v8di)
   6195              _mm512_setzero_si512 (),
   6196              (__mmask8) -1);
   6197 }
   6198 
   6199 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6200 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   6201 {
   6202   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6203              (__v2di) __B,
   6204              (__v8di) __W,
   6205              (__mmask8) __U);
   6206 }
   6207 
   6208 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6209 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   6210 {
   6211   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   6212              (__v2di) __B,
   6213              (__v8di)
   6214              _mm512_setzero_si512 (),
   6215              (__mmask8) __U);
   6216 }
   6217 
   6218 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6219 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
   6220 {
   6221   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6222               (__v16si) __Y,
   6223               (__v16si)
   6224               _mm512_setzero_si512 (),
   6225               (__mmask16) -1);
   6226 }
   6227 
   6228 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6229 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
   6230 {
   6231   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6232               (__v16si) __Y,
   6233               (__v16si) __W,
   6234               (__mmask16) __U);
   6235 }
   6236 
   6237 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6238 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
   6239 {
   6240   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
   6241               (__v16si) __Y,
   6242               (__v16si)
   6243               _mm512_setzero_si512 (),
   6244               (__mmask16) __U);
   6245 }
   6246 
   6247 static __inline__ __m512i __DEFAULT_FN_ATTRS
   6248 _mm512_srlv_epi64 (__m512i