Home | History | Annotate | Download | only in include
      1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512FINTRIN_H_INCLUDED
     29 #define _AVX512FINTRIN_H_INCLUDED
     30 
     31 #ifndef __AVX512F__
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512f")
     34 #define __DISABLE_AVX512F__
     35 #endif /* __AVX512F__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
     39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
     40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
     41 typedef int __v16si __attribute__ ((__vector_size__ (64)));
     42 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
     43 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
     44 
     45 /* The Intel API is flexible enough that we must allow aliasing with other
     46    vector types, and their scalar components.  */
     47 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
     48 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
     49 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
     50 
     51 typedef unsigned char  __mmask8;
     52 typedef unsigned short __mmask16;
     53 
     54 extern __inline __m512i
     55 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     56 _mm512_set_epi64 (long long __A, long long __B, long long __C,
     57 		  long long __D, long long __E, long long __F,
     58 		  long long __G, long long __H)
     59 {
     60   return __extension__ (__m512i) (__v8di)
     61 	 { __H, __G, __F, __E, __D, __C, __B, __A };
     62 }
     63 
     64 /* Create the vector [A B C D E F G H I J K L M N O P].  */
     65 extern __inline __m512i
     66 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     67 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
     68 		  int __E, int __F, int __G, int __H,
     69 		  int __I, int __J, int __K, int __L,
     70 		  int __M, int __N, int __O, int __P)
     71 {
     72   return __extension__ (__m512i)(__v16si)
     73 	 { __P, __O, __N, __M, __L, __K, __J, __I,
     74 	   __H, __G, __F, __E, __D, __C, __B, __A };
     75 }
     76 
     77 extern __inline __m512d
     78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     79 _mm512_set_pd (double __A, double __B, double __C, double __D,
     80 	       double __E, double __F, double __G, double __H)
     81 {
     82   return __extension__ (__m512d)
     83 	 { __H, __G, __F, __E, __D, __C, __B, __A };
     84 }
     85 
     86 extern __inline __m512
     87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     88 _mm512_set_ps (float __A, float __B, float __C, float __D,
     89 	       float __E, float __F, float __G, float __H,
     90 	       float __I, float __J, float __K, float __L,
     91 	       float __M, float __N, float __O, float __P)
     92 {
     93   return __extension__ (__m512)
     94 	 { __P, __O, __N, __M, __L, __K, __J, __I,
     95 	   __H, __G, __F, __E, __D, __C, __B, __A };
     96 }
     97 
     98 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)			      \
     99   _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
    100 
    101 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,			      \
    102 			  e8,e9,e10,e11,e12,e13,e14,e15)		      \
    103   _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
    104 
    105 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)				      \
    106   _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
    107 
    108 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
    109   _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
    110 
    111 extern __inline __m512
    112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    113 _mm512_undefined_ps (void)
    114 {
    115   __m512 __Y = __Y;
    116   return __Y;
    117 }
    118 
    119 extern __inline __m512d
    120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    121 _mm512_undefined_pd (void)
    122 {
    123   __m512d __Y = __Y;
    124   return __Y;
    125 }
    126 
    127 extern __inline __m512i
    128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    129 _mm512_undefined_si512 (void)
    130 {
    131   __m512i __Y = __Y;
    132   return __Y;
    133 }
    134 
    135 extern __inline __m512i
    136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    137 _mm512_set1_epi8 (char __A)
    138 {
    139   return __extension__ (__m512i)(__v64qi)
    140 	 { __A, __A, __A, __A, __A, __A, __A, __A,
    141 	   __A, __A, __A, __A, __A, __A, __A, __A,
    142 	   __A, __A, __A, __A, __A, __A, __A, __A,
    143 	   __A, __A, __A, __A, __A, __A, __A, __A,
    144 	   __A, __A, __A, __A, __A, __A, __A, __A,
    145 	   __A, __A, __A, __A, __A, __A, __A, __A,
    146 	   __A, __A, __A, __A, __A, __A, __A, __A,
    147 	   __A, __A, __A, __A, __A, __A, __A, __A };
    148 }
    149 
    150 extern __inline __m512i
    151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    152 _mm512_set1_epi16 (short __A)
    153 {
    154   return __extension__ (__m512i)(__v32hi)
    155 	 { __A, __A, __A, __A, __A, __A, __A, __A,
    156 	   __A, __A, __A, __A, __A, __A, __A, __A,
    157 	   __A, __A, __A, __A, __A, __A, __A, __A,
    158 	   __A, __A, __A, __A, __A, __A, __A, __A };
    159 }
    160 
    161 extern __inline __m512d
    162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    163 _mm512_set1_pd (double __A)
    164 {
    165   return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
    166 						  (__v2df) { __A, },
    167 						  (__v8df)
    168 						  _mm512_undefined_pd (),
    169 						  (__mmask8) -1);
    170 }
    171 
    172 extern __inline __m512
    173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    174 _mm512_set1_ps (float __A)
    175 {
    176   return (__m512) __builtin_ia32_broadcastss512 (__extension__
    177 						 (__v4sf) { __A, },
    178 						 (__v16sf)
    179 						 _mm512_undefined_ps (),
    180 						 (__mmask16) -1);
    181 }
    182 
    183 /* Create the vector [A B C D A B C D A B C D A B C D].  */
    184 extern __inline __m512i
    185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    186 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
    187 {
    188   return __extension__ (__m512i)(__v16si)
    189 	 { __D, __C, __B, __A, __D, __C, __B, __A,
    190 	   __D, __C, __B, __A, __D, __C, __B, __A };
    191 }
    192 
    193 extern __inline __m512i
    194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    195 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
    196 		   long long __D)
    197 {
    198   return __extension__ (__m512i) (__v8di)
    199 	 { __D, __C, __B, __A, __D, __C, __B, __A };
    200 }
    201 
    202 extern __inline __m512d
    203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    204 _mm512_set4_pd (double __A, double __B, double __C, double __D)
    205 {
    206   return __extension__ (__m512d)
    207 	 { __D, __C, __B, __A, __D, __C, __B, __A };
    208 }
    209 
    210 extern __inline __m512
    211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    212 _mm512_set4_ps (float __A, float __B, float __C, float __D)
    213 {
    214   return __extension__ (__m512)
    215 	 { __D, __C, __B, __A, __D, __C, __B, __A,
    216 	   __D, __C, __B, __A, __D, __C, __B, __A };
    217 }
    218 
    219 #define _mm512_setr4_epi64(e0,e1,e2,e3)					      \
    220   _mm512_set4_epi64(e3,e2,e1,e0)
    221 
    222 #define _mm512_setr4_epi32(e0,e1,e2,e3)					      \
    223   _mm512_set4_epi32(e3,e2,e1,e0)
    224 
    225 #define _mm512_setr4_pd(e0,e1,e2,e3)					      \
    226   _mm512_set4_pd(e3,e2,e1,e0)
    227 
    228 #define _mm512_setr4_ps(e0,e1,e2,e3)					      \
    229   _mm512_set4_ps(e3,e2,e1,e0)
    230 
    231 extern __inline __m512
    232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    233 _mm512_setzero_ps (void)
    234 {
    235   return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    236 				 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    237 }
    238 
    239 extern __inline __m512d
    240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    241 _mm512_setzero_pd (void)
    242 {
    243   return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
    244 }
    245 
    246 extern __inline __m512i
    247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    248 _mm512_setzero_epi32 (void)
    249 {
    250   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    251 }
    252 
    253 extern __inline __m512i
    254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    255 _mm512_setzero_si512 (void)
    256 {
    257   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
    258 }
    259 
    260 extern __inline __m512d
    261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    262 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
    263 {
    264   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
    265 						  (__v8df) __W,
    266 						  (__mmask8) __U);
    267 }
    268 
    269 extern __inline __m512d
    270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    271 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
    272 {
    273   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
    274 						  (__v8df)
    275 						  _mm512_setzero_pd (),
    276 						  (__mmask8) __U);
    277 }
    278 
    279 extern __inline __m512
    280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    281 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
    282 {
    283   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
    284 						 (__v16sf) __W,
    285 						 (__mmask16) __U);
    286 }
    287 
    288 extern __inline __m512
    289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    290 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
    291 {
    292   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
    293 						 (__v16sf)
    294 						 _mm512_setzero_ps (),
    295 						 (__mmask16) __U);
    296 }
    297 
    298 extern __inline __m512d
    299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    300 _mm512_load_pd (void const *__P)
    301 {
    302   return *(__m512d *) __P;
    303 }
    304 
    305 extern __inline __m512d
    306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    307 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
    308 {
    309   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
    310 						   (__v8df) __W,
    311 						   (__mmask8) __U);
    312 }
    313 
    314 extern __inline __m512d
    315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    316 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
    317 {
    318   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
    319 						   (__v8df)
    320 						   _mm512_setzero_pd (),
    321 						   (__mmask8) __U);
    322 }
    323 
    324 extern __inline void
    325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    326 _mm512_store_pd (void *__P, __m512d __A)
    327 {
    328   *(__m512d *) __P = __A;
    329 }
    330 
    331 extern __inline void
    332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    333 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
    334 {
    335   __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
    336 				   (__mmask8) __U);
    337 }
    338 
    339 extern __inline __m512
    340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    341 _mm512_load_ps (void const *__P)
    342 {
    343   return *(__m512 *) __P;
    344 }
    345 
    346 extern __inline __m512
    347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    348 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
    349 {
    350   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
    351 						  (__v16sf) __W,
    352 						  (__mmask16) __U);
    353 }
    354 
    355 extern __inline __m512
    356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    357 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
    358 {
    359   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
    360 						  (__v16sf)
    361 						  _mm512_setzero_ps (),
    362 						  (__mmask16) __U);
    363 }
    364 
    365 extern __inline void
    366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    367 _mm512_store_ps (void *__P, __m512 __A)
    368 {
    369   *(__m512 *) __P = __A;
    370 }
    371 
    372 extern __inline void
    373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    374 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
    375 {
    376   __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
    377 				   (__mmask16) __U);
    378 }
    379 
    380 extern __inline __m512i
    381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    382 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
    383 {
    384   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
    385 						     (__v8di) __W,
    386 						     (__mmask8) __U);
    387 }
    388 
    389 extern __inline __m512i
    390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    391 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
    392 {
    393   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
    394 						     (__v8di)
    395 						     _mm512_setzero_si512 (),
    396 						     (__mmask8) __U);
    397 }
    398 
    399 extern __inline __m512i
    400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    401 _mm512_load_epi64 (void const *__P)
    402 {
    403   return *(__m512i *) __P;
    404 }
    405 
    406 extern __inline __m512i
    407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    408 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
    409 {
    410   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
    411 							(__v8di) __W,
    412 							(__mmask8) __U);
    413 }
    414 
    415 extern __inline __m512i
    416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    417 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
    418 {
    419   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
    420 							(__v8di)
    421 							_mm512_setzero_si512 (),
    422 							(__mmask8) __U);
    423 }
    424 
    425 extern __inline void
    426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    427 _mm512_store_epi64 (void *__P, __m512i __A)
    428 {
    429   *(__m512i *) __P = __A;
    430 }
    431 
    432 extern __inline void
    433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    434 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
    435 {
    436   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
    437 					(__mmask8) __U);
    438 }
    439 
    440 extern __inline __m512i
    441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    442 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
    443 {
    444   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
    445 						     (__v16si) __W,
    446 						     (__mmask16) __U);
    447 }
    448 
    449 extern __inline __m512i
    450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    451 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
    452 {
    453   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
    454 						     (__v16si)
    455 						     _mm512_setzero_si512 (),
    456 						     (__mmask16) __U);
    457 }
    458 
    459 extern __inline __m512i
    460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    461 _mm512_load_si512 (void const *__P)
    462 {
    463   return *(__m512i *) __P;
    464 }
    465 
    466 extern __inline __m512i
    467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    468 _mm512_load_epi32 (void const *__P)
    469 {
    470   return *(__m512i *) __P;
    471 }
    472 
    473 extern __inline __m512i
    474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    475 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
    476 {
    477   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
    478 							(__v16si) __W,
    479 							(__mmask16) __U);
    480 }
    481 
    482 extern __inline __m512i
    483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    484 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
    485 {
    486   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
    487 							(__v16si)
    488 							_mm512_setzero_si512 (),
    489 							(__mmask16) __U);
    490 }
    491 
    492 extern __inline void
    493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    494 _mm512_store_si512 (void *__P, __m512i __A)
    495 {
    496   *(__m512i *) __P = __A;
    497 }
    498 
    499 extern __inline void
    500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    501 _mm512_store_epi32 (void *__P, __m512i __A)
    502 {
    503   *(__m512i *) __P = __A;
    504 }
    505 
    506 extern __inline void
    507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    508 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
    509 {
    510   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
    511 					(__mmask16) __U);
    512 }
    513 
    514 extern __inline __m512i
    515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    516 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
    517 {
    518   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
    519 						  (__v16si) __B,
    520 						  (__v16si)
    521 						  _mm512_undefined_si512 (),
    522 						  (__mmask16) -1);
    523 }
    524 
    525 extern __inline __m512i
    526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
    528 {
    529   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
    530 						  (__v16si) __B,
    531 						  (__v16si)
    532 						  _mm512_setzero_si512 (),
    533 						  __M);
    534 }
    535 
    536 extern __inline __m512i
    537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
    539 {
    540   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
    541 						  (__v16si) __B,
    542 						  (__v16si) __W, __M);
    543 }
    544 
    545 extern __inline __m512i
    546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
    548 {
    549   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
    550 						  (__v16si) __Y,
    551 						  (__v16si)
    552 						  _mm512_undefined_si512 (),
    553 						  (__mmask16) -1);
    554 }
    555 
    556 extern __inline __m512i
    557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
    559 {
    560   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
    561 						  (__v16si) __Y,
    562 						  (__v16si) __W,
    563 						  (__mmask16) __U);
    564 }
    565 
    566 extern __inline __m512i
    567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
    569 {
    570   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
    571 						  (__v16si) __Y,
    572 						  (__v16si)
    573 						  _mm512_setzero_si512 (),
    574 						  (__mmask16) __U);
    575 }
    576 
    577 extern __inline __m512i
    578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
    580 {
    581   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
    582 						  (__v16si) __Y,
    583 						  (__v16si)
    584 						  _mm512_undefined_si512 (),
    585 						  (__mmask16) -1);
    586 }
    587 
    588 extern __inline __m512i
    589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
    591 {
    592   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
    593 						  (__v16si) __Y,
    594 						  (__v16si) __W,
    595 						  (__mmask16) __U);
    596 }
    597 
    598 extern __inline __m512i
    599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
    601 {
    602   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
    603 						  (__v16si) __Y,
    604 						  (__v16si)
    605 						  _mm512_setzero_si512 (),
    606 						  (__mmask16) __U);
    607 }
    608 
    609 extern __inline __m512i
    610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
    612 {
    613   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
    614 						  (__v16si) __Y,
    615 						  (__v16si)
    616 						  _mm512_undefined_si512 (),
    617 						  (__mmask16) -1);
    618 }
    619 
    620 extern __inline __m512i
    621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
    623 {
    624   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
    625 						  (__v16si) __Y,
    626 						  (__v16si) __W,
    627 						  (__mmask16) __U);
    628 }
    629 
    630 extern __inline __m512i
    631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
    633 {
    634   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
    635 						  (__v16si) __Y,
    636 						  (__v16si)
    637 						  _mm512_setzero_si512 (),
    638 						  (__mmask16) __U);
    639 }
    640 
    641 extern __inline __m512i
    642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    643 _mm512_add_epi64 (__m512i __A, __m512i __B)
    644 {
    645   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    646 						 (__v8di) __B,
    647 						 (__v8di)
    648 						 _mm512_undefined_si512 (),
    649 						 (__mmask8) -1);
    650 }
    651 
    652 extern __inline __m512i
    653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    654 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    655 {
    656   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    657 						 (__v8di) __B,
    658 						 (__v8di) __W,
    659 						 (__mmask8) __U);
    660 }
    661 
    662 extern __inline __m512i
    663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    664 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    665 {
    666   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
    667 						 (__v8di) __B,
    668 						 (__v8di)
    669 						 _mm512_setzero_si512 (),
    670 						 (__mmask8) __U);
    671 }
    672 
    673 extern __inline __m512i
    674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    675 _mm512_sub_epi64 (__m512i __A, __m512i __B)
    676 {
    677   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    678 						 (__v8di) __B,
    679 						 (__v8di)
    680 						 _mm512_undefined_pd (),
    681 						 (__mmask8) -1);
    682 }
    683 
    684 extern __inline __m512i
    685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    686 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
    687 {
    688   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    689 						 (__v8di) __B,
    690 						 (__v8di) __W,
    691 						 (__mmask8) __U);
    692 }
    693 
    694 extern __inline __m512i
    695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    696 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
    697 {
    698   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
    699 						 (__v8di) __B,
    700 						 (__v8di)
    701 						 _mm512_setzero_si512 (),
    702 						 (__mmask8) __U);
    703 }
    704 
    705 extern __inline __m512i
    706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    707 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
    708 {
    709   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
    710 						 (__v8di) __Y,
    711 						 (__v8di)
    712 						 _mm512_undefined_pd (),
    713 						 (__mmask8) -1);
    714 }
    715 
    716 extern __inline __m512i
    717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    718 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
    719 {
    720   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
    721 						 (__v8di) __Y,
    722 						 (__v8di) __W,
    723 						 (__mmask8) __U);
    724 }
    725 
    726 extern __inline __m512i
    727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    728 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
    729 {
    730   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
    731 						 (__v8di) __Y,
    732 						 (__v8di)
    733 						 _mm512_setzero_si512 (),
    734 						 (__mmask8) __U);
    735 }
    736 
    737 extern __inline __m512i
    738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    739 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
    740 {
    741   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
    742 						 (__v8di) __Y,
    743 						 (__v8di)
    744 						 _mm512_undefined_si512 (),
    745 						 (__mmask8) -1);
    746 }
    747 
    748 extern __inline __m512i
    749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    750 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
    751 {
    752   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
    753 						 (__v8di) __Y,
    754 						 (__v8di) __W,
    755 						 (__mmask8) __U);
    756 }
    757 
    758 extern __inline __m512i
    759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    760 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
    761 {
    762   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
    763 						 (__v8di) __Y,
    764 						 (__v8di)
    765 						 _mm512_setzero_si512 (),
    766 						 (__mmask8) __U);
    767 }
    768 
    769 extern __inline __m512i
    770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    771 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
    772 {
    773   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
    774 						 (__v8di) __Y,
    775 						 (__v8di)
    776 						 _mm512_undefined_si512 (),
    777 						 (__mmask8) -1);
    778 }
    779 
    780 extern __inline __m512i
    781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    782 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
    783 {
    784   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
    785 						 (__v8di) __Y,
    786 						 (__v8di) __W,
    787 						 (__mmask8) __U);
    788 }
    789 
    790 extern __inline __m512i
    791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    792 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
    793 {
    794   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
    795 						 (__v8di) __Y,
    796 						 (__v8di)
    797 						 _mm512_setzero_si512 (),
    798 						 (__mmask8) __U);
    799 }
    800 
    801 extern __inline __m512i
    802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    803 _mm512_add_epi32 (__m512i __A, __m512i __B)
    804 {
    805   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    806 						 (__v16si) __B,
    807 						 (__v16si)
    808 						 _mm512_undefined_si512 (),
    809 						 (__mmask16) -1);
    810 }
    811 
    812 extern __inline __m512i
    813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    814 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    815 {
    816   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    817 						 (__v16si) __B,
    818 						 (__v16si) __W,
    819 						 (__mmask16) __U);
    820 }
    821 
    822 extern __inline __m512i
    823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    824 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    825 {
    826   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
    827 						 (__v16si) __B,
    828 						 (__v16si)
    829 						 _mm512_setzero_si512 (),
    830 						 (__mmask16) __U);
    831 }
    832 
    833 extern __inline __m512i
    834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    835 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
    836 {
    837   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    838 						  (__v16si) __Y,
    839 						  (__v8di)
    840 						  _mm512_undefined_si512 (),
    841 						  (__mmask8) -1);
    842 }
    843 
    844 extern __inline __m512i
    845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    846 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
    847 {
    848   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    849 						  (__v16si) __Y,
    850 						  (__v8di) __W, __M);
    851 }
    852 
    853 extern __inline __m512i
    854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    855 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
    856 {
    857   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
    858 						  (__v16si) __Y,
    859 						  (__v8di)
    860 						  _mm512_setzero_si512 (),
    861 						  __M);
    862 }
    863 
    864 extern __inline __m512i
    865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    866 _mm512_sub_epi32 (__m512i __A, __m512i __B)
    867 {
    868   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    869 						 (__v16si) __B,
    870 						 (__v16si)
    871 						 _mm512_undefined_si512 (),
    872 						 (__mmask16) -1);
    873 }
    874 
    875 extern __inline __m512i
    876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    877 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
    878 {
    879   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    880 						 (__v16si) __B,
    881 						 (__v16si) __W,
    882 						 (__mmask16) __U);
    883 }
    884 
    885 extern __inline __m512i
    886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    887 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
    888 {
    889   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
    890 						 (__v16si) __B,
    891 						 (__v16si)
    892 						 _mm512_setzero_si512 (),
    893 						 (__mmask16) __U);
    894 }
    895 
    896 extern __inline __m512i
    897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    898 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
    899 {
    900   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    901 						   (__v16si) __Y,
    902 						   (__v8di)
    903 						   _mm512_undefined_si512 (),
    904 						   (__mmask8) -1);
    905 }
    906 
    907 extern __inline __m512i
    908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    909 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
    910 {
    911   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    912 						   (__v16si) __Y,
    913 						   (__v8di) __W, __M);
    914 }
    915 
    916 extern __inline __m512i
    917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    918 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
    919 {
    920   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
    921 						   (__v16si) __Y,
    922 						   (__v8di)
    923 						   _mm512_setzero_si512 (),
    924 						   __M);
    925 }
    926 
    927 #ifdef __OPTIMIZE__
    928 extern __inline __m512i
    929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    930 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
    931 {
    932   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
    933 						  (__v8di)
    934 						  _mm512_undefined_si512 (),
    935 						  (__mmask8) -1);
    936 }
    937 
    938 extern __inline __m512i
    939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    940 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
    941 			unsigned int __B)
    942 {
    943   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
    944 						  (__v8di) __W,
    945 						  (__mmask8) __U);
    946 }
    947 
    948 extern __inline __m512i
    949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    950 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
    951 {
    952   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
    953 						  (__v8di)
    954 						  _mm512_setzero_si512 (),
    955 						  (__mmask8) __U);
    956 }
    957 #else
    958 #define _mm512_slli_epi64(X, C)						   \
    959   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
    960     (__v8di)(__m512i)_mm512_undefined_si512 (),\
    961     (__mmask8)-1))
    962 
    963 #define _mm512_mask_slli_epi64(W, U, X, C)				   \
    964   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
    965     (__v8di)(__m512i)(W),\
    966     (__mmask8)(U)))
    967 
    968 #define _mm512_maskz_slli_epi64(U, X, C)                                   \
    969   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
    970     (__v8di)(__m512i)_mm512_setzero_si512 (),\
    971     (__mmask8)(U)))
    972 #endif
    973 
    974 extern __inline __m512i
    975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    976 _mm512_sll_epi64 (__m512i __A, __m128i __B)
    977 {
    978   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
    979 						 (__v2di) __B,
    980 						 (__v8di)
    981 						 _mm512_undefined_si512 (),
    982 						 (__mmask8) -1);
    983 }
    984 
    985 extern __inline __m512i
    986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    987 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
    988 {
    989   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
    990 						 (__v2di) __B,
    991 						 (__v8di) __W,
    992 						 (__mmask8) __U);
    993 }
    994 
    995 extern __inline __m512i
    996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    997 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
    998 {
    999   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
   1000 						 (__v2di) __B,
   1001 						 (__v8di)
   1002 						 _mm512_setzero_si512 (),
   1003 						 (__mmask8) __U);
   1004 }
   1005 
   1006 #ifdef __OPTIMIZE__
   1007 extern __inline __m512i
   1008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1009 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
   1010 {
   1011   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
   1012 						  (__v8di)
   1013 						  _mm512_undefined_si512 (),
   1014 						  (__mmask8) -1);
   1015 }
   1016 
   1017 extern __inline __m512i
   1018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1019 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
   1020 			__m512i __A, unsigned int __B)
   1021 {
   1022   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
   1023 						  (__v8di) __W,
   1024 						  (__mmask8) __U);
   1025 }
   1026 
   1027 extern __inline __m512i
   1028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1029 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
   1030 {
   1031   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
   1032 						  (__v8di)
   1033 						  _mm512_setzero_si512 (),
   1034 						  (__mmask8) __U);
   1035 }
   1036 #else
   1037 #define _mm512_srli_epi64(X, C)						   \
   1038   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1039     (__v8di)(__m512i)_mm512_undefined_si512 (),\
   1040     (__mmask8)-1))
   1041 
   1042 #define _mm512_mask_srli_epi64(W, U, X, C)				   \
   1043   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1044     (__v8di)(__m512i)(W),\
   1045     (__mmask8)(U)))
   1046 
   1047 #define _mm512_maskz_srli_epi64(U, X, C)                                   \
   1048   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1049     (__v8di)(__m512i)_mm512_setzero_si512 (),\
   1050     (__mmask8)(U)))
   1051 #endif
   1052 
   1053 extern __inline __m512i
   1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1055 _mm512_srl_epi64 (__m512i __A, __m128i __B)
   1056 {
   1057   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   1058 						 (__v2di) __B,
   1059 						 (__v8di)
   1060 						 _mm512_undefined_si512 (),
   1061 						 (__mmask8) -1);
   1062 }
   1063 
   1064 extern __inline __m512i
   1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1066 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   1067 {
   1068   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   1069 						 (__v2di) __B,
   1070 						 (__v8di) __W,
   1071 						 (__mmask8) __U);
   1072 }
   1073 
   1074 extern __inline __m512i
   1075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1076 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   1077 {
   1078   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
   1079 						 (__v2di) __B,
   1080 						 (__v8di)
   1081 						 _mm512_setzero_si512 (),
   1082 						 (__mmask8) __U);
   1083 }
   1084 
   1085 #ifdef __OPTIMIZE__
   1086 extern __inline __m512i
   1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1088 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
   1089 {
   1090   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
   1091 						  (__v8di)
   1092 						  _mm512_undefined_si512 (),
   1093 						  (__mmask8) -1);
   1094 }
   1095 
   1096 extern __inline __m512i
   1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1098 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
   1099 			unsigned int __B)
   1100 {
   1101   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
   1102 						  (__v8di) __W,
   1103 						  (__mmask8) __U);
   1104 }
   1105 
   1106 extern __inline __m512i
   1107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1108 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
   1109 {
   1110   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
   1111 						  (__v8di)
   1112 						  _mm512_setzero_si512 (),
   1113 						  (__mmask8) __U);
   1114 }
   1115 #else
   1116 #define _mm512_srai_epi64(X, C)						   \
   1117   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1118     (__v8di)(__m512i)_mm512_undefined_si512 (),\
   1119     (__mmask8)-1))
   1120 
   1121 #define _mm512_mask_srai_epi64(W, U, X, C)				   \
   1122   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1123     (__v8di)(__m512i)(W),\
   1124     (__mmask8)(U)))
   1125 
   1126 #define _mm512_maskz_srai_epi64(U, X, C)				   \
   1127   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
   1128     (__v8di)(__m512i)_mm512_setzero_si512 (),\
   1129     (__mmask8)(U)))
   1130 #endif
   1131 
   1132 extern __inline __m512i
   1133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1134 _mm512_sra_epi64 (__m512i __A, __m128i __B)
   1135 {
   1136   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   1137 						 (__v2di) __B,
   1138 						 (__v8di)
   1139 						 _mm512_undefined_si512 (),
   1140 						 (__mmask8) -1);
   1141 }
   1142 
   1143 extern __inline __m512i
   1144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1145 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
   1146 {
   1147   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   1148 						 (__v2di) __B,
   1149 						 (__v8di) __W,
   1150 						 (__mmask8) __U);
   1151 }
   1152 
   1153 extern __inline __m512i
   1154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1155 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
   1156 {
   1157   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
   1158 						 (__v2di) __B,
   1159 						 (__v8di)
   1160 						 _mm512_setzero_si512 (),
   1161 						 (__mmask8) __U);
   1162 }
   1163 
   1164 #ifdef __OPTIMIZE__
   1165 extern __inline __m512i
   1166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1167 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
   1168 {
   1169   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
   1170 						  (__v16si)
   1171 						  _mm512_undefined_si512 (),
   1172 						  (__mmask16) -1);
   1173 }
   1174 
   1175 extern __inline __m512i
   1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1177 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   1178 			unsigned int __B)
   1179 {
   1180   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
   1181 						  (__v16si) __W,
   1182 						  (__mmask16) __U);
   1183 }
   1184 
   1185 extern __inline __m512i
   1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1187 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
   1188 {
   1189   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
   1190 						  (__v16si)
   1191 						  _mm512_setzero_si512 (),
   1192 						  (__mmask16) __U);
   1193 }
   1194 #else
   1195 #define _mm512_slli_epi32(X, C)						    \
   1196   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1197     (__v16si)(__m512i)_mm512_undefined_si512 (),\
   1198     (__mmask16)-1))
   1199 
   1200 #define _mm512_mask_slli_epi32(W, U, X, C)                                  \
   1201   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1202     (__v16si)(__m512i)(W),\
   1203     (__mmask16)(U)))
   1204 
   1205 #define _mm512_maskz_slli_epi32(U, X, C)                                    \
   1206   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1207     (__v16si)(__m512i)_mm512_setzero_si512 (),\
   1208     (__mmask16)(U)))
   1209 #endif
   1210 
   1211 extern __inline __m512i
   1212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1213 _mm512_sll_epi32 (__m512i __A, __m128i __B)
   1214 {
   1215   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   1216 						 (__v4si) __B,
   1217 						 (__v16si)
   1218 						 _mm512_undefined_si512 (),
   1219 						 (__mmask16) -1);
   1220 }
   1221 
   1222 extern __inline __m512i
   1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1224 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   1225 {
   1226   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   1227 						 (__v4si) __B,
   1228 						 (__v16si) __W,
   1229 						 (__mmask16) __U);
   1230 }
   1231 
   1232 extern __inline __m512i
   1233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1234 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   1235 {
   1236   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
   1237 						 (__v4si) __B,
   1238 						 (__v16si)
   1239 						 _mm512_setzero_si512 (),
   1240 						 (__mmask16) __U);
   1241 }
   1242 
   1243 #ifdef __OPTIMIZE__
   1244 extern __inline __m512i
   1245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1246 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
   1247 {
   1248   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
   1249 						  (__v16si)
   1250 						  _mm512_undefined_si512 (),
   1251 						  (__mmask16) -1);
   1252 }
   1253 
   1254 extern __inline __m512i
   1255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1256 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
   1257 			__m512i __A, unsigned int __B)
   1258 {
   1259   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
   1260 						  (__v16si) __W,
   1261 						  (__mmask16) __U);
   1262 }
   1263 
   1264 extern __inline __m512i
   1265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1266 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
   1267 {
   1268   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
   1269 						  (__v16si)
   1270 						  _mm512_setzero_si512 (),
   1271 						  (__mmask16) __U);
   1272 }
   1273 #else
   1274 #define _mm512_srli_epi32(X, C)						    \
   1275   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1276     (__v16si)(__m512i)_mm512_undefined_si512 (),\
   1277     (__mmask16)-1))
   1278 
   1279 #define _mm512_mask_srli_epi32(W, U, X, C)                                  \
   1280   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1281     (__v16si)(__m512i)(W),\
   1282     (__mmask16)(U)))
   1283 
   1284 #define _mm512_maskz_srli_epi32(U, X, C)				    \
   1285   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1286     (__v16si)(__m512i)_mm512_setzero_si512 (),\
   1287     (__mmask16)(U)))
   1288 #endif
   1289 
   1290 extern __inline __m512i
   1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1292 _mm512_srl_epi32 (__m512i __A, __m128i __B)
   1293 {
   1294   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   1295 						 (__v4si) __B,
   1296 						 (__v16si)
   1297 						 _mm512_undefined_si512 (),
   1298 						 (__mmask16) -1);
   1299 }
   1300 
   1301 extern __inline __m512i
   1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1303 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   1304 {
   1305   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   1306 						 (__v4si) __B,
   1307 						 (__v16si) __W,
   1308 						 (__mmask16) __U);
   1309 }
   1310 
   1311 extern __inline __m512i
   1312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1313 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   1314 {
   1315   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
   1316 						 (__v4si) __B,
   1317 						 (__v16si)
   1318 						 _mm512_setzero_si512 (),
   1319 						 (__mmask16) __U);
   1320 }
   1321 
   1322 #ifdef __OPTIMIZE__
   1323 extern __inline __m512i
   1324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1325 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
   1326 {
   1327   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
   1328 						  (__v16si)
   1329 						  _mm512_undefined_si512 (),
   1330 						  (__mmask16) -1);
   1331 }
   1332 
   1333 extern __inline __m512i
   1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1335 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   1336 			unsigned int __B)
   1337 {
   1338   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
   1339 						  (__v16si) __W,
   1340 						  (__mmask16) __U);
   1341 }
   1342 
   1343 extern __inline __m512i
   1344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1345 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
   1346 {
   1347   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
   1348 						  (__v16si)
   1349 						  _mm512_setzero_si512 (),
   1350 						  (__mmask16) __U);
   1351 }
   1352 #else
   1353 #define _mm512_srai_epi32(X, C)						    \
   1354   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1355     (__v16si)(__m512i)_mm512_undefined_si512 (),\
   1356     (__mmask16)-1))
   1357 
   1358 #define _mm512_mask_srai_epi32(W, U, X, C)				    \
   1359   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1360     (__v16si)(__m512i)(W),\
   1361     (__mmask16)(U)))
   1362 
   1363 #define _mm512_maskz_srai_epi32(U, X, C)				    \
   1364   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
   1365     (__v16si)(__m512i)_mm512_setzero_si512 (),\
   1366     (__mmask16)(U)))
   1367 #endif
   1368 
   1369 extern __inline __m512i
   1370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1371 _mm512_sra_epi32 (__m512i __A, __m128i __B)
   1372 {
   1373   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   1374 						 (__v4si) __B,
   1375 						 (__v16si)
   1376 						 _mm512_undefined_si512 (),
   1377 						 (__mmask16) -1);
   1378 }
   1379 
   1380 extern __inline __m512i
   1381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1382 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
   1383 {
   1384   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   1385 						 (__v4si) __B,
   1386 						 (__v16si) __W,
   1387 						 (__mmask16) __U);
   1388 }
   1389 
   1390 extern __inline __m512i
   1391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1392 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
   1393 {
   1394   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
   1395 						 (__v4si) __B,
   1396 						 (__v16si)
   1397 						 _mm512_setzero_si512 (),
   1398 						 (__mmask16) __U);
   1399 }
   1400 
   1401 #ifdef __OPTIMIZE__
   1402 extern __inline __m128d
   1403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1404 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
   1405 {
   1406   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
   1407 					       (__v2df) __B,
   1408 					       __R);
   1409 }
   1410 
   1411 extern __inline __m128
   1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1413 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
   1414 {
   1415   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
   1416 					      (__v4sf) __B,
   1417 					      __R);
   1418 }
   1419 
   1420 extern __inline __m128d
   1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1422 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
   1423 {
   1424   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
   1425 					       (__v2df) __B,
   1426 					       __R);
   1427 }
   1428 
   1429 extern __inline __m128
   1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1431 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
   1432 {
   1433   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
   1434 					      (__v4sf) __B,
   1435 					      __R);
   1436 }
   1437 
   1438 #else
   1439 #define _mm_add_round_sd(A, B, C)            \
   1440     (__m128d)__builtin_ia32_addsd_round(A, B, C)
   1441 
   1442 #define _mm_add_round_ss(A, B, C)            \
   1443     (__m128)__builtin_ia32_addss_round(A, B, C)
   1444 
   1445 #define _mm_sub_round_sd(A, B, C)            \
   1446     (__m128d)__builtin_ia32_subsd_round(A, B, C)
   1447 
   1448 #define _mm_sub_round_ss(A, B, C)            \
   1449     (__m128)__builtin_ia32_subss_round(A, B, C)
   1450 #endif
   1451 
   1452 #ifdef __OPTIMIZE__
   1453 extern __inline __m512i
   1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1455 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
   1456 {
   1457   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
   1458 						     (__v8di) __B,
   1459 						     (__v8di) __C, imm,
   1460 						     (__mmask8) -1);
   1461 }
   1462 
   1463 extern __inline __m512i
   1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1465 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
   1466 				__m512i __C, const int imm)
   1467 {
   1468   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
   1469 						     (__v8di) __B,
   1470 						     (__v8di) __C, imm,
   1471 						     (__mmask8) __U);
   1472 }
   1473 
   1474 extern __inline __m512i
   1475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1476 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
   1477 				 __m512i __C, const int imm)
   1478 {
   1479   return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
   1480 						      (__v8di) __B,
   1481 						      (__v8di) __C,
   1482 						      imm, (__mmask8) __U);
   1483 }
   1484 
   1485 extern __inline __m512i
   1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1487 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
   1488 {
   1489   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
   1490 						     (__v16si) __B,
   1491 						     (__v16si) __C,
   1492 						     imm, (__mmask16) -1);
   1493 }
   1494 
   1495 extern __inline __m512i
   1496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1497 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
   1498 				__m512i __C, const int imm)
   1499 {
   1500   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
   1501 						     (__v16si) __B,
   1502 						     (__v16si) __C,
   1503 						     imm, (__mmask16) __U);
   1504 }
   1505 
   1506 extern __inline __m512i
   1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1508 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
   1509 				 __m512i __C, const int imm)
   1510 {
   1511   return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
   1512 						      (__v16si) __B,
   1513 						      (__v16si) __C,
   1514 						      imm, (__mmask16) __U);
   1515 }
   1516 #else
   1517 #define _mm512_ternarylogic_epi64(A, B, C, I)				\
   1518   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
   1519     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
   1520 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)			\
   1521   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
   1522     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
   1523 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)			\
   1524   ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),	\
   1525     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
   1526 #define _mm512_ternarylogic_epi32(A, B, C, I)				\
   1527   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
   1528     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
   1529     (__mmask16)-1))
   1530 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)			\
   1531   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
   1532     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
   1533     (__mmask16)(U)))
   1534 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)			\
   1535   ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),	\
   1536     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
   1537     (__mmask16)(U)))
   1538 #endif
   1539 
   1540 extern __inline __m512d
   1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1542 _mm512_rcp14_pd (__m512d __A)
   1543 {
   1544   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1545 						   (__v8df)
   1546 						   _mm512_undefined_pd (),
   1547 						   (__mmask8) -1);
   1548 }
   1549 
   1550 extern __inline __m512d
   1551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1552 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1553 {
   1554   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1555 						   (__v8df) __W,
   1556 						   (__mmask8) __U);
   1557 }
   1558 
   1559 extern __inline __m512d
   1560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1561 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
   1562 {
   1563   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
   1564 						   (__v8df)
   1565 						   _mm512_setzero_pd (),
   1566 						   (__mmask8) __U);
   1567 }
   1568 
   1569 extern __inline __m512
   1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1571 _mm512_rcp14_ps (__m512 __A)
   1572 {
   1573   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1574 						  (__v16sf)
   1575 						  _mm512_undefined_ps (),
   1576 						  (__mmask16) -1);
   1577 }
   1578 
   1579 extern __inline __m512
   1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1581 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1582 {
   1583   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1584 						  (__v16sf) __W,
   1585 						  (__mmask16) __U);
   1586 }
   1587 
   1588 extern __inline __m512
   1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1590 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
   1591 {
   1592   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
   1593 						  (__v16sf)
   1594 						  _mm512_setzero_ps (),
   1595 						  (__mmask16) __U);
   1596 }
   1597 
   1598 extern __inline __m128d
   1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1600 _mm_rcp14_sd (__m128d __A, __m128d __B)
   1601 {
   1602   return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
   1603 					   (__v2df) __A);
   1604 }
   1605 
   1606 extern __inline __m128
   1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1608 _mm_rcp14_ss (__m128 __A, __m128 __B)
   1609 {
   1610   return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
   1611 					  (__v4sf) __A);
   1612 }
   1613 
   1614 extern __inline __m512d
   1615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1616 _mm512_rsqrt14_pd (__m512d __A)
   1617 {
   1618   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1619 						     (__v8df)
   1620 						     _mm512_undefined_pd (),
   1621 						     (__mmask8) -1);
   1622 }
   1623 
   1624 extern __inline __m512d
   1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1626 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
   1627 {
   1628   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1629 						     (__v8df) __W,
   1630 						     (__mmask8) __U);
   1631 }
   1632 
   1633 extern __inline __m512d
   1634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1635 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
   1636 {
   1637   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
   1638 						     (__v8df)
   1639 						     _mm512_setzero_pd (),
   1640 						     (__mmask8) __U);
   1641 }
   1642 
   1643 extern __inline __m512
   1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1645 _mm512_rsqrt14_ps (__m512 __A)
   1646 {
   1647   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1648 						    (__v16sf)
   1649 						    _mm512_undefined_ps (),
   1650 						    (__mmask16) -1);
   1651 }
   1652 
   1653 extern __inline __m512
   1654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1655 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
   1656 {
   1657   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1658 						    (__v16sf) __W,
   1659 						    (__mmask16) __U);
   1660 }
   1661 
   1662 extern __inline __m512
   1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1664 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
   1665 {
   1666   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
   1667 						    (__v16sf)
   1668 						    _mm512_setzero_ps (),
   1669 						    (__mmask16) __U);
   1670 }
   1671 
   1672 extern __inline __m128d
   1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1674 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
   1675 {
   1676   return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
   1677 					     (__v2df) __A);
   1678 }
   1679 
   1680 extern __inline __m128
   1681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1682 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
   1683 {
   1684   return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
   1685 					    (__v4sf) __A);
   1686 }
   1687 
   1688 #ifdef __OPTIMIZE__
   1689 extern __inline __m512d
   1690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1691 _mm512_sqrt_round_pd (__m512d __A, const int __R)
   1692 {
   1693   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1694 						  (__v8df)
   1695 						  _mm512_undefined_pd (),
   1696 						  (__mmask8) -1, __R);
   1697 }
   1698 
   1699 extern __inline __m512d
   1700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1701 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   1702 			   const int __R)
   1703 {
   1704   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1705 						  (__v8df) __W,
   1706 						  (__mmask8) __U, __R);
   1707 }
   1708 
   1709 extern __inline __m512d
   1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1711 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
   1712 {
   1713   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   1714 						  (__v8df)
   1715 						  _mm512_setzero_pd (),
   1716 						  (__mmask8) __U, __R);
   1717 }
   1718 
   1719 extern __inline __m512
   1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1721 _mm512_sqrt_round_ps (__m512 __A, const int __R)
   1722 {
   1723   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   1724 						 (__v16sf)
   1725 						 _mm512_undefined_ps (),
   1726 						 (__mmask16) -1, __R);
   1727 }
   1728 
   1729 extern __inline __m512
   1730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1731 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
   1732 {
   1733   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   1734 						 (__v16sf) __W,
   1735 						 (__mmask16) __U, __R);
   1736 }
   1737 
   1738 extern __inline __m512
   1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1740 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
   1741 {
   1742   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   1743 						 (__v16sf)
   1744 						 _mm512_setzero_ps (),
   1745 						 (__mmask16) __U, __R);
   1746 }
   1747 
   1748 extern __inline __m128d
   1749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1750 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
   1751 {
   1752   return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
   1753 						(__v2df) __A,
   1754 						__R);
   1755 }
   1756 
   1757 extern __inline __m128
   1758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1759 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
   1760 {
   1761   return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
   1762 					       (__v4sf) __A,
   1763 					       __R);
   1764 }
   1765 #else
   1766 #define _mm512_sqrt_round_pd(A, C)            \
   1767     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
   1768 
   1769 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
   1770     (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
   1771 
   1772 #define _mm512_maskz_sqrt_round_pd(U, A, C)   \
   1773     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
   1774 
   1775 #define _mm512_sqrt_round_ps(A, C)            \
   1776     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
   1777 
   1778 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
   1779     (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
   1780 
   1781 #define _mm512_maskz_sqrt_round_ps(U, A, C)   \
   1782     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
   1783 
   1784 #define _mm_sqrt_round_sd(A, B, C)            \
   1785     (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
   1786 
   1787 #define _mm_sqrt_round_ss(A, B, C)            \
   1788     (__m128)__builtin_ia32_sqrtss_round(A, B, C)
   1789 #endif
   1790 
   1791 extern __inline __m512i
   1792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1793 _mm512_cvtepi8_epi32 (__m128i __A)
   1794 {
   1795   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   1796 						    (__v16si)
   1797 						    _mm512_undefined_si512 (),
   1798 						    (__mmask16) -1);
   1799 }
   1800 
   1801 extern __inline __m512i
   1802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1803 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   1804 {
   1805   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   1806 						    (__v16si) __W,
   1807 						    (__mmask16) __U);
   1808 }
   1809 
   1810 extern __inline __m512i
   1811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1812 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
   1813 {
   1814   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
   1815 						    (__v16si)
   1816 						    _mm512_setzero_si512 (),
   1817 						    (__mmask16) __U);
   1818 }
   1819 
   1820 extern __inline __m512i
   1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1822 _mm512_cvtepi8_epi64 (__m128i __A)
   1823 {
   1824   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   1825 						    (__v8di)
   1826 						    _mm512_undefined_si512 (),
   1827 						    (__mmask8) -1);
   1828 }
   1829 
   1830 extern __inline __m512i
   1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1832 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   1833 {
   1834   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   1835 						    (__v8di) __W,
   1836 						    (__mmask8) __U);
   1837 }
   1838 
   1839 extern __inline __m512i
   1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1841 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
   1842 {
   1843   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
   1844 						    (__v8di)
   1845 						    _mm512_setzero_si512 (),
   1846 						    (__mmask8) __U);
   1847 }
   1848 
   1849 extern __inline __m512i
   1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1851 _mm512_cvtepi16_epi32 (__m256i __A)
   1852 {
   1853   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   1854 						    (__v16si)
   1855 						    _mm512_undefined_si512 (),
   1856 						    (__mmask16) -1);
   1857 }
   1858 
   1859 extern __inline __m512i
   1860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1861 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   1862 {
   1863   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   1864 						    (__v16si) __W,
   1865 						    (__mmask16) __U);
   1866 }
   1867 
   1868 extern __inline __m512i
   1869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1870 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
   1871 {
   1872   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
   1873 						    (__v16si)
   1874 						    _mm512_setzero_si512 (),
   1875 						    (__mmask16) __U);
   1876 }
   1877 
   1878 extern __inline __m512i
   1879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1880 _mm512_cvtepi16_epi64 (__m128i __A)
   1881 {
   1882   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   1883 						    (__v8di)
   1884 						    _mm512_undefined_si512 (),
   1885 						    (__mmask8) -1);
   1886 }
   1887 
   1888 extern __inline __m512i
   1889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1890 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   1891 {
   1892   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   1893 						    (__v8di) __W,
   1894 						    (__mmask8) __U);
   1895 }
   1896 
   1897 extern __inline __m512i
   1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1899 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
   1900 {
   1901   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
   1902 						    (__v8di)
   1903 						    _mm512_setzero_si512 (),
   1904 						    (__mmask8) __U);
   1905 }
   1906 
   1907 extern __inline __m512i
   1908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1909 _mm512_cvtepi32_epi64 (__m256i __X)
   1910 {
   1911   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   1912 						    (__v8di)
   1913 						    _mm512_undefined_si512 (),
   1914 						    (__mmask8) -1);
   1915 }
   1916 
   1917 extern __inline __m512i
   1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1919 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   1920 {
   1921   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   1922 						    (__v8di) __W,
   1923 						    (__mmask8) __U);
   1924 }
   1925 
   1926 extern __inline __m512i
   1927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1928 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
   1929 {
   1930   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
   1931 						    (__v8di)
   1932 						    _mm512_setzero_si512 (),
   1933 						    (__mmask8) __U);
   1934 }
   1935 
   1936 extern __inline __m512i
   1937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1938 _mm512_cvtepu8_epi32 (__m128i __A)
   1939 {
   1940   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   1941 						    (__v16si)
   1942 						    _mm512_undefined_si512 (),
   1943 						    (__mmask16) -1);
   1944 }
   1945 
   1946 extern __inline __m512i
   1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1948 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
   1949 {
   1950   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   1951 						    (__v16si) __W,
   1952 						    (__mmask16) __U);
   1953 }
   1954 
   1955 extern __inline __m512i
   1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1957 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
   1958 {
   1959   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
   1960 						    (__v16si)
   1961 						    _mm512_setzero_si512 (),
   1962 						    (__mmask16) __U);
   1963 }
   1964 
   1965 extern __inline __m512i
   1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1967 _mm512_cvtepu8_epi64 (__m128i __A)
   1968 {
   1969   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   1970 						    (__v8di)
   1971 						    _mm512_undefined_si512 (),
   1972 						    (__mmask8) -1);
   1973 }
   1974 
   1975 extern __inline __m512i
   1976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1977 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   1978 {
   1979   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   1980 						    (__v8di) __W,
   1981 						    (__mmask8) __U);
   1982 }
   1983 
   1984 extern __inline __m512i
   1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1986 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   1987 {
   1988   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
   1989 						    (__v8di)
   1990 						    _mm512_setzero_si512 (),
   1991 						    (__mmask8) __U);
   1992 }
   1993 
   1994 extern __inline __m512i
   1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1996 _mm512_cvtepu16_epi32 (__m256i __A)
   1997 {
   1998   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   1999 						    (__v16si)
   2000 						    _mm512_undefined_si512 (),
   2001 						    (__mmask16) -1);
   2002 }
   2003 
   2004 extern __inline __m512i
   2005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2006 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
   2007 {
   2008   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   2009 						    (__v16si) __W,
   2010 						    (__mmask16) __U);
   2011 }
   2012 
   2013 extern __inline __m512i
   2014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2015 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
   2016 {
   2017   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
   2018 						    (__v16si)
   2019 						    _mm512_setzero_si512 (),
   2020 						    (__mmask16) __U);
   2021 }
   2022 
   2023 extern __inline __m512i
   2024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2025 _mm512_cvtepu16_epi64 (__m128i __A)
   2026 {
   2027   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   2028 						    (__v8di)
   2029 						    _mm512_undefined_si512 (),
   2030 						    (__mmask8) -1);
   2031 }
   2032 
   2033 extern __inline __m512i
   2034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2035 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
   2036 {
   2037   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   2038 						    (__v8di) __W,
   2039 						    (__mmask8) __U);
   2040 }
   2041 
   2042 extern __inline __m512i
   2043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2044 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
   2045 {
   2046   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
   2047 						    (__v8di)
   2048 						    _mm512_setzero_si512 (),
   2049 						    (__mmask8) __U);
   2050 }
   2051 
   2052 extern __inline __m512i
   2053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2054 _mm512_cvtepu32_epi64 (__m256i __X)
   2055 {
   2056   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   2057 						    (__v8di)
   2058 						    _mm512_undefined_si512 (),
   2059 						    (__mmask8) -1);
   2060 }
   2061 
   2062 extern __inline __m512i
   2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2064 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
   2065 {
   2066   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   2067 						    (__v8di) __W,
   2068 						    (__mmask8) __U);
   2069 }
   2070 
   2071 extern __inline __m512i
   2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2073 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
   2074 {
   2075   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
   2076 						    (__v8di)
   2077 						    _mm512_setzero_si512 (),
   2078 						    (__mmask8) __U);
   2079 }
   2080 
   2081 #ifdef __OPTIMIZE__
   2082 extern __inline __m512d
   2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2084 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
   2085 {
   2086   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   2087 						 (__v8df) __B,
   2088 						 (__v8df)
   2089 						 _mm512_undefined_pd (),
   2090 						 (__mmask8) -1, __R);
   2091 }
   2092 
   2093 extern __inline __m512d
   2094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2095 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2096 			  __m512d __B, const int __R)
   2097 {
   2098   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   2099 						 (__v8df) __B,
   2100 						 (__v8df) __W,
   2101 						 (__mmask8) __U, __R);
   2102 }
   2103 
   2104 extern __inline __m512d
   2105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2106 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2107 			   const int __R)
   2108 {
   2109   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   2110 						 (__v8df) __B,
   2111 						 (__v8df)
   2112 						 _mm512_setzero_pd (),
   2113 						 (__mmask8) __U, __R);
   2114 }
   2115 
   2116 extern __inline __m512
   2117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2118 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
   2119 {
   2120   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   2121 						(__v16sf) __B,
   2122 						(__v16sf)
   2123 						_mm512_undefined_ps (),
   2124 						(__mmask16) -1, __R);
   2125 }
   2126 
   2127 extern __inline __m512
   2128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2129 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2130 			  __m512 __B, const int __R)
   2131 {
   2132   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   2133 						(__v16sf) __B,
   2134 						(__v16sf) __W,
   2135 						(__mmask16) __U, __R);
   2136 }
   2137 
   2138 extern __inline __m512
   2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2140 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2141 {
   2142   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   2143 						(__v16sf) __B,
   2144 						(__v16sf)
   2145 						_mm512_setzero_ps (),
   2146 						(__mmask16) __U, __R);
   2147 }
   2148 
   2149 extern __inline __m512d
   2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2151 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
   2152 {
   2153   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2154 						 (__v8df) __B,
   2155 						 (__v8df)
   2156 						 _mm512_undefined_pd (),
   2157 						 (__mmask8) -1, __R);
   2158 }
   2159 
   2160 extern __inline __m512d
   2161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2162 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2163 			  __m512d __B, const int __R)
   2164 {
   2165   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2166 						 (__v8df) __B,
   2167 						 (__v8df) __W,
   2168 						 (__mmask8) __U, __R);
   2169 }
   2170 
   2171 extern __inline __m512d
   2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2173 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2174 			   const int __R)
   2175 {
   2176   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   2177 						 (__v8df) __B,
   2178 						 (__v8df)
   2179 						 _mm512_setzero_pd (),
   2180 						 (__mmask8) __U, __R);
   2181 }
   2182 
   2183 extern __inline __m512
   2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2185 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
   2186 {
   2187   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2188 						(__v16sf) __B,
   2189 						(__v16sf)
   2190 						_mm512_undefined_ps (),
   2191 						(__mmask16) -1, __R);
   2192 }
   2193 
   2194 extern __inline __m512
   2195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2196 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2197 			  __m512 __B, const int __R)
   2198 {
   2199   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2200 						(__v16sf) __B,
   2201 						(__v16sf) __W,
   2202 						(__mmask16) __U, __R);
   2203 }
   2204 
   2205 extern __inline __m512
   2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2207 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2208 {
   2209   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   2210 						(__v16sf) __B,
   2211 						(__v16sf)
   2212 						_mm512_setzero_ps (),
   2213 						(__mmask16) __U, __R);
   2214 }
   2215 #else
   2216 #define _mm512_add_round_pd(A, B, C)            \
   2217     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
   2218 
   2219 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
   2220     (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
   2221 
   2222 #define _mm512_maskz_add_round_pd(U, A, B, C)   \
   2223     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
   2224 
   2225 #define _mm512_add_round_ps(A, B, C)            \
   2226     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
   2227 
   2228 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
   2229     (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
   2230 
   2231 #define _mm512_maskz_add_round_ps(U, A, B, C)   \
   2232     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
   2233 
   2234 #define _mm512_sub_round_pd(A, B, C)            \
   2235     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
   2236 
   2237 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
   2238     (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
   2239 
   2240 #define _mm512_maskz_sub_round_pd(U, A, B, C)   \
   2241     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
   2242 
   2243 #define _mm512_sub_round_ps(A, B, C)            \
   2244     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
   2245 
   2246 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
   2247     (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
   2248 
   2249 #define _mm512_maskz_sub_round_ps(U, A, B, C)   \
   2250     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
   2251 #endif
   2252 
   2253 #ifdef __OPTIMIZE__
   2254 extern __inline __m512d
   2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2256 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
   2257 {
   2258   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2259 						 (__v8df) __B,
   2260 						 (__v8df)
   2261 						 _mm512_undefined_pd (),
   2262 						 (__mmask8) -1, __R);
   2263 }
   2264 
   2265 extern __inline __m512d
   2266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2267 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2268 			  __m512d __B, const int __R)
   2269 {
   2270   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2271 						 (__v8df) __B,
   2272 						 (__v8df) __W,
   2273 						 (__mmask8) __U, __R);
   2274 }
   2275 
   2276 extern __inline __m512d
   2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2278 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2279 			   const int __R)
   2280 {
   2281   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   2282 						 (__v8df) __B,
   2283 						 (__v8df)
   2284 						 _mm512_setzero_pd (),
   2285 						 (__mmask8) __U, __R);
   2286 }
   2287 
   2288 extern __inline __m512
   2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2290 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
   2291 {
   2292   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2293 						(__v16sf) __B,
   2294 						(__v16sf)
   2295 						_mm512_undefined_ps (),
   2296 						(__mmask16) -1, __R);
   2297 }
   2298 
   2299 extern __inline __m512
   2300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2301 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2302 			  __m512 __B, const int __R)
   2303 {
   2304   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2305 						(__v16sf) __B,
   2306 						(__v16sf) __W,
   2307 						(__mmask16) __U, __R);
   2308 }
   2309 
   2310 extern __inline __m512
   2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2312 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2313 {
   2314   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   2315 						(__v16sf) __B,
   2316 						(__v16sf)
   2317 						_mm512_setzero_ps (),
   2318 						(__mmask16) __U, __R);
   2319 }
   2320 
   2321 extern __inline __m512d
   2322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2323 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
   2324 {
   2325   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   2326 						 (__v8df) __V,
   2327 						 (__v8df)
   2328 						 _mm512_undefined_pd (),
   2329 						 (__mmask8) -1, __R);
   2330 }
   2331 
   2332 extern __inline __m512d
   2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2334 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
   2335 			  __m512d __V, const int __R)
   2336 {
   2337   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   2338 						 (__v8df) __V,
   2339 						 (__v8df) __W,
   2340 						 (__mmask8) __U, __R);
   2341 }
   2342 
   2343 extern __inline __m512d
   2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2345 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
   2346 			   const int __R)
   2347 {
   2348   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   2349 						 (__v8df) __V,
   2350 						 (__v8df)
   2351 						 _mm512_setzero_pd (),
   2352 						 (__mmask8) __U, __R);
   2353 }
   2354 
   2355 extern __inline __m512
   2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2357 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
   2358 {
   2359   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2360 						(__v16sf) __B,
   2361 						(__v16sf)
   2362 						_mm512_undefined_ps (),
   2363 						(__mmask16) -1, __R);
   2364 }
   2365 
   2366 extern __inline __m512
   2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2368 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2369 			  __m512 __B, const int __R)
   2370 {
   2371   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2372 						(__v16sf) __B,
   2373 						(__v16sf) __W,
   2374 						(__mmask16) __U, __R);
   2375 }
   2376 
   2377 extern __inline __m512
   2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2379 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2380 {
   2381   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   2382 						(__v16sf) __B,
   2383 						(__v16sf)
   2384 						_mm512_setzero_ps (),
   2385 						(__mmask16) __U, __R);
   2386 }
   2387 
   2388 extern __inline __m128d
   2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2390 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
   2391 {
   2392   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
   2393 					       (__v2df) __B,
   2394 					       __R);
   2395 }
   2396 
   2397 extern __inline __m128
   2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2399 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
   2400 {
   2401   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
   2402 					      (__v4sf) __B,
   2403 					      __R);
   2404 }
   2405 
   2406 extern __inline __m128d
   2407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2408 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
   2409 {
   2410   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
   2411 					       (__v2df) __B,
   2412 					       __R);
   2413 }
   2414 
   2415 extern __inline __m128
   2416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2417 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
   2418 {
   2419   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
   2420 					      (__v4sf) __B,
   2421 					      __R);
   2422 }
   2423 
   2424 #else
   2425 #define _mm512_mul_round_pd(A, B, C)            \
   2426     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
   2427 
   2428 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
   2429     (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
   2430 
   2431 #define _mm512_maskz_mul_round_pd(U, A, B, C)   \
   2432     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
   2433 
   2434 #define _mm512_mul_round_ps(A, B, C)            \
   2435     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
   2436 
   2437 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
   2438     (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
   2439 
   2440 #define _mm512_maskz_mul_round_ps(U, A, B, C)   \
   2441     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
   2442 
   2443 #define _mm512_div_round_pd(A, B, C)            \
   2444     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
   2445 
   2446 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
   2447     (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
   2448 
   2449 #define _mm512_maskz_div_round_pd(U, A, B, C)   \
   2450     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
   2451 
   2452 #define _mm512_div_round_ps(A, B, C)            \
   2453     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
   2454 
   2455 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
   2456     (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
   2457 
   2458 #define _mm512_maskz_div_round_ps(U, A, B, C)   \
   2459     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
   2460 
   2461 #define _mm_mul_round_sd(A, B, C)            \
   2462     (__m128d)__builtin_ia32_mulsd_round(A, B, C)
   2463 
   2464 #define _mm_mul_round_ss(A, B, C)            \
   2465     (__m128)__builtin_ia32_mulss_round(A, B, C)
   2466 
   2467 #define _mm_div_round_sd(A, B, C)            \
   2468     (__m128d)__builtin_ia32_divsd_round(A, B, C)
   2469 
   2470 #define _mm_div_round_ss(A, B, C)            \
   2471     (__m128)__builtin_ia32_divss_round(A, B, C)
   2472 #endif
   2473 
   2474 #ifdef __OPTIMIZE__
   2475 extern __inline __m512d
   2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2477 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
   2478 {
   2479   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   2480 						 (__v8df) __B,
   2481 						 (__v8df)
   2482 						 _mm512_undefined_pd (),
   2483 						 (__mmask8) -1, __R);
   2484 }
   2485 
   2486 extern __inline __m512d
   2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2488 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2489 			  __m512d __B, const int __R)
   2490 {
   2491   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   2492 						 (__v8df) __B,
   2493 						 (__v8df) __W,
   2494 						 (__mmask8) __U, __R);
   2495 }
   2496 
   2497 extern __inline __m512d
   2498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2499 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2500 			   const int __R)
   2501 {
   2502   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   2503 						 (__v8df) __B,
   2504 						 (__v8df)
   2505 						 _mm512_setzero_pd (),
   2506 						 (__mmask8) __U, __R);
   2507 }
   2508 
   2509 extern __inline __m512
   2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2511 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
   2512 {
   2513   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   2514 						(__v16sf) __B,
   2515 						(__v16sf)
   2516 						_mm512_undefined_ps (),
   2517 						(__mmask16) -1, __R);
   2518 }
   2519 
   2520 extern __inline __m512
   2521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2522 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2523 			  __m512 __B, const int __R)
   2524 {
   2525   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   2526 						(__v16sf) __B,
   2527 						(__v16sf) __W,
   2528 						(__mmask16) __U, __R);
   2529 }
   2530 
   2531 extern __inline __m512
   2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2533 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2534 {
   2535   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   2536 						(__v16sf) __B,
   2537 						(__v16sf)
   2538 						_mm512_setzero_ps (),
   2539 						(__mmask16) __U, __R);
   2540 }
   2541 
   2542 extern __inline __m512d
   2543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2544 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
   2545 {
   2546   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   2547 						 (__v8df) __B,
   2548 						 (__v8df)
   2549 						 _mm512_undefined_pd (),
   2550 						 (__mmask8) -1, __R);
   2551 }
   2552 
   2553 extern __inline __m512d
   2554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2555 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2556 			  __m512d __B, const int __R)
   2557 {
   2558   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   2559 						 (__v8df) __B,
   2560 						 (__v8df) __W,
   2561 						 (__mmask8) __U, __R);
   2562 }
   2563 
   2564 extern __inline __m512d
   2565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2566 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2567 			   const int __R)
   2568 {
   2569   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   2570 						 (__v8df) __B,
   2571 						 (__v8df)
   2572 						 _mm512_setzero_pd (),
   2573 						 (__mmask8) __U, __R);
   2574 }
   2575 
   2576 extern __inline __m512
   2577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2578 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
   2579 {
   2580   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   2581 						(__v16sf) __B,
   2582 						(__v16sf)
   2583 						_mm512_undefined_ps (),
   2584 						(__mmask16) -1, __R);
   2585 }
   2586 
   2587 extern __inline __m512
   2588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2589 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2590 			  __m512 __B, const int __R)
   2591 {
   2592   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   2593 						(__v16sf) __B,
   2594 						(__v16sf) __W,
   2595 						(__mmask16) __U, __R);
   2596 }
   2597 
   2598 extern __inline __m512
   2599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2600 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
   2601 {
   2602   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   2603 						(__v16sf) __B,
   2604 						(__v16sf)
   2605 						_mm512_setzero_ps (),
   2606 						(__mmask16) __U, __R);
   2607 }
   2608 #else
   2609 #define _mm512_max_round_pd(A, B,  R) \
   2610     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
   2611 
   2612 #define _mm512_mask_max_round_pd(W, U,  A, B, R) \
   2613     (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
   2614 
   2615 #define _mm512_maskz_max_round_pd(U, A,  B, R) \
   2616     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
   2617 
   2618 #define _mm512_max_round_ps(A, B,  R) \
   2619     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
   2620 
   2621 #define _mm512_mask_max_round_ps(W, U,  A, B, R) \
   2622     (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
   2623 
   2624 #define _mm512_maskz_max_round_ps(U, A,  B, R) \
   2625     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
   2626 
   2627 #define _mm512_min_round_pd(A, B,  R) \
   2628     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
   2629 
   2630 #define _mm512_mask_min_round_pd(W, U,  A, B, R) \
   2631     (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
   2632 
   2633 #define _mm512_maskz_min_round_pd(U, A,  B, R) \
   2634     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
   2635 
   2636 #define _mm512_min_round_ps(A, B, R) \
   2637     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
   2638 
   2639 #define _mm512_mask_min_round_ps(W, U,  A, B, R) \
   2640     (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
   2641 
   2642 #define _mm512_maskz_min_round_ps(U, A,  B, R) \
   2643     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
   2644 #endif
   2645 
   2646 #ifdef __OPTIMIZE__
   2647 extern __inline __m512d
   2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2649 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
   2650 {
   2651   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   2652 						    (__v8df) __B,
   2653 						    (__v8df)
   2654 						    _mm512_undefined_pd (),
   2655 						    (__mmask8) -1, __R);
   2656 }
   2657 
   2658 extern __inline __m512d
   2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2660 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   2661 			     __m512d __B, const int __R)
   2662 {
   2663   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   2664 						    (__v8df) __B,
   2665 						    (__v8df) __W,
   2666 						    (__mmask8) __U, __R);
   2667 }
   2668 
   2669 extern __inline __m512d
   2670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2671 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2672 			      const int __R)
   2673 {
   2674   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   2675 						    (__v8df) __B,
   2676 						    (__v8df)
   2677 						    _mm512_setzero_pd (),
   2678 						    (__mmask8) __U, __R);
   2679 }
   2680 
   2681 extern __inline __m512
   2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2683 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
   2684 {
   2685   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   2686 						   (__v16sf) __B,
   2687 						   (__v16sf)
   2688 						   _mm512_undefined_ps (),
   2689 						   (__mmask16) -1, __R);
   2690 }
   2691 
   2692 extern __inline __m512
   2693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2694 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   2695 			     __m512 __B, const int __R)
   2696 {
   2697   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   2698 						   (__v16sf) __B,
   2699 						   (__v16sf) __W,
   2700 						   (__mmask16) __U, __R);
   2701 }
   2702 
   2703 extern __inline __m512
   2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2705 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   2706 			      const int __R)
   2707 {
   2708   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   2709 						   (__v16sf) __B,
   2710 						   (__v16sf)
   2711 						   _mm512_setzero_ps (),
   2712 						   (__mmask16) __U, __R);
   2713 }
   2714 
   2715 extern __inline __m128d
   2716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2717 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
   2718 {
   2719   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
   2720 						  (__v2df) __B,
   2721 						  __R);
   2722 }
   2723 
   2724 extern __inline __m128
   2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2726 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
   2727 {
   2728   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
   2729 						 (__v4sf) __B,
   2730 						 __R);
   2731 }
   2732 #else
   2733 #define _mm512_scalef_round_pd(A, B, C)            \
   2734     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
   2735 
   2736 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
   2737     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
   2738 
   2739 #define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
   2740     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
   2741 
   2742 #define _mm512_scalef_round_ps(A, B, C)            \
   2743     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
   2744 
   2745 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
   2746     (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
   2747 
   2748 #define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
   2749     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
   2750 
   2751 #define _mm_scalef_round_sd(A, B, C)            \
   2752     (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
   2753 
   2754 #define _mm_scalef_round_ss(A, B, C)            \
   2755     (__m128)__builtin_ia32_scalefss_round(A, B, C)
   2756 #endif
   2757 
   2758 #ifdef __OPTIMIZE__
   2759 extern __inline __m512d
   2760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2761 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   2762 {
   2763   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2764 						    (__v8df) __B,
   2765 						    (__v8df) __C,
   2766 						    (__mmask8) -1, __R);
   2767 }
   2768 
   2769 extern __inline __m512d
   2770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2771 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   2772 			    __m512d __C, const int __R)
   2773 {
   2774   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2775 						    (__v8df) __B,
   2776 						    (__v8df) __C,
   2777 						    (__mmask8) __U, __R);
   2778 }
   2779 
   2780 extern __inline __m512d
   2781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2782 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
   2783 			     __mmask8 __U, const int __R)
   2784 {
   2785   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   2786 						     (__v8df) __B,
   2787 						     (__v8df) __C,
   2788 						     (__mmask8) __U, __R);
   2789 }
   2790 
   2791 extern __inline __m512d
   2792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2793 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2794 			     __m512d __C, const int __R)
   2795 {
   2796   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2797 						     (__v8df) __B,
   2798 						     (__v8df) __C,
   2799 						     (__mmask8) __U, __R);
   2800 }
   2801 
   2802 extern __inline __m512
   2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2804 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   2805 {
   2806   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2807 						   (__v16sf) __B,
   2808 						   (__v16sf) __C,
   2809 						   (__mmask16) -1, __R);
   2810 }
   2811 
   2812 extern __inline __m512
   2813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2814 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   2815 			    __m512 __C, const int __R)
   2816 {
   2817   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2818 						   (__v16sf) __B,
   2819 						   (__v16sf) __C,
   2820 						   (__mmask16) __U, __R);
   2821 }
   2822 
   2823 extern __inline __m512
   2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2825 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
   2826 			     __mmask16 __U, const int __R)
   2827 {
   2828   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   2829 						    (__v16sf) __B,
   2830 						    (__v16sf) __C,
   2831 						    (__mmask16) __U, __R);
   2832 }
   2833 
   2834 extern __inline __m512
   2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2836 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   2837 			     __m512 __C, const int __R)
   2838 {
   2839   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2840 						    (__v16sf) __B,
   2841 						    (__v16sf) __C,
   2842 						    (__mmask16) __U, __R);
   2843 }
   2844 
   2845 extern __inline __m512d
   2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2847 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   2848 {
   2849   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2850 						    (__v8df) __B,
   2851 						    -(__v8df) __C,
   2852 						    (__mmask8) -1, __R);
   2853 }
   2854 
   2855 extern __inline __m512d
   2856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2857 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   2858 			    __m512d __C, const int __R)
   2859 {
   2860   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   2861 						    (__v8df) __B,
   2862 						    -(__v8df) __C,
   2863 						    (__mmask8) __U, __R);
   2864 }
   2865 
   2866 extern __inline __m512d
   2867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2868 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
   2869 			     __mmask8 __U, const int __R)
   2870 {
   2871   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   2872 						     (__v8df) __B,
   2873 						     (__v8df) __C,
   2874 						     (__mmask8) __U, __R);
   2875 }
   2876 
   2877 extern __inline __m512d
   2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2879 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2880 			     __m512d __C, const int __R)
   2881 {
   2882   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   2883 						     (__v8df) __B,
   2884 						     -(__v8df) __C,
   2885 						     (__mmask8) __U, __R);
   2886 }
   2887 
   2888 extern __inline __m512
   2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2890 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   2891 {
   2892   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2893 						   (__v16sf) __B,
   2894 						   -(__v16sf) __C,
   2895 						   (__mmask16) -1, __R);
   2896 }
   2897 
   2898 extern __inline __m512
   2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2900 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   2901 			    __m512 __C, const int __R)
   2902 {
   2903   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   2904 						   (__v16sf) __B,
   2905 						   -(__v16sf) __C,
   2906 						   (__mmask16) __U, __R);
   2907 }
   2908 
   2909 extern __inline __m512
   2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2911 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
   2912 			     __mmask16 __U, const int __R)
   2913 {
   2914   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   2915 						    (__v16sf) __B,
   2916 						    (__v16sf) __C,
   2917 						    (__mmask16) __U, __R);
   2918 }
   2919 
   2920 extern __inline __m512
   2921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2922 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   2923 			     __m512 __C, const int __R)
   2924 {
   2925   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   2926 						    (__v16sf) __B,
   2927 						    -(__v16sf) __C,
   2928 						    (__mmask16) __U, __R);
   2929 }
   2930 
   2931 extern __inline __m512d
   2932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2933 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   2934 {
   2935   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   2936 						       (__v8df) __B,
   2937 						       (__v8df) __C,
   2938 						       (__mmask8) -1, __R);
   2939 }
   2940 
   2941 extern __inline __m512d
   2942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2943 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   2944 			       __m512d __C, const int __R)
   2945 {
   2946   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   2947 						       (__v8df) __B,
   2948 						       (__v8df) __C,
   2949 						       (__mmask8) __U, __R);
   2950 }
   2951 
   2952 extern __inline __m512d
   2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2954 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
   2955 				__mmask8 __U, const int __R)
   2956 {
   2957   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   2958 							(__v8df) __B,
   2959 							(__v8df) __C,
   2960 							(__mmask8) __U, __R);
   2961 }
   2962 
   2963 extern __inline __m512d
   2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2965 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   2966 				__m512d __C, const int __R)
   2967 {
   2968   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   2969 							(__v8df) __B,
   2970 							(__v8df) __C,
   2971 							(__mmask8) __U, __R);
   2972 }
   2973 
   2974 extern __inline __m512
   2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2976 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   2977 {
   2978   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2979 						      (__v16sf) __B,
   2980 						      (__v16sf) __C,
   2981 						      (__mmask16) -1, __R);
   2982 }
   2983 
   2984 extern __inline __m512
   2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2986 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   2987 			       __m512 __C, const int __R)
   2988 {
   2989   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   2990 						      (__v16sf) __B,
   2991 						      (__v16sf) __C,
   2992 						      (__mmask16) __U, __R);
   2993 }
   2994 
   2995 extern __inline __m512
   2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2997 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
   2998 				__mmask16 __U, const int __R)
   2999 {
   3000   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   3001 						       (__v16sf) __B,
   3002 						       (__v16sf) __C,
   3003 						       (__mmask16) __U, __R);
   3004 }
   3005 
   3006 extern __inline __m512
   3007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3008 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   3009 				__m512 __C, const int __R)
   3010 {
   3011   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3012 						       (__v16sf) __B,
   3013 						       (__v16sf) __C,
   3014 						       (__mmask16) __U, __R);
   3015 }
   3016 
   3017 extern __inline __m512d
   3018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3019 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   3020 {
   3021   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3022 						       (__v8df) __B,
   3023 						       -(__v8df) __C,
   3024 						       (__mmask8) -1, __R);
   3025 }
   3026 
   3027 extern __inline __m512d
   3028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3029 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   3030 			       __m512d __C, const int __R)
   3031 {
   3032   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   3033 						       (__v8df) __B,
   3034 						       -(__v8df) __C,
   3035 						       (__mmask8) __U, __R);
   3036 }
   3037 
   3038 extern __inline __m512d
   3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3040 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
   3041 				__mmask8 __U, const int __R)
   3042 {
   3043   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   3044 							(__v8df) __B,
   3045 							(__v8df) __C,
   3046 							(__mmask8) __U, __R);
   3047 }
   3048 
   3049 extern __inline __m512d
   3050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3051 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   3052 				__m512d __C, const int __R)
   3053 {
   3054   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   3055 							(__v8df) __B,
   3056 							-(__v8df) __C,
   3057 							(__mmask8) __U, __R);
   3058 }
   3059 
   3060 extern __inline __m512
   3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3062 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   3063 {
   3064   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3065 						      (__v16sf) __B,
   3066 						      -(__v16sf) __C,
   3067 						      (__mmask16) -1, __R);
   3068 }
   3069 
   3070 extern __inline __m512
   3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3072 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   3073 			       __m512 __C, const int __R)
   3074 {
   3075   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   3076 						      (__v16sf) __B,
   3077 						      -(__v16sf) __C,
   3078 						      (__mmask16) __U, __R);
   3079 }
   3080 
   3081 extern __inline __m512
   3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3083 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
   3084 				__mmask16 __U, const int __R)
   3085 {
   3086   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   3087 						       (__v16sf) __B,
   3088 						       (__v16sf) __C,
   3089 						       (__mmask16) __U, __R);
   3090 }
   3091 
   3092 extern __inline __m512
   3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3094 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   3095 				__m512 __C, const int __R)
   3096 {
   3097   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   3098 						       (__v16sf) __B,
   3099 						       -(__v16sf) __C,
   3100 						       (__mmask16) __U, __R);
   3101 }
   3102 
   3103 extern __inline __m512d
   3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3105 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   3106 {
   3107   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   3108 						    (__v8df) __B,
   3109 						    (__v8df) __C,
   3110 						    (__mmask8) -1, __R);
   3111 }
   3112 
   3113 extern __inline __m512d
   3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3115 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   3116 			     __m512d __C, const int __R)
   3117 {
   3118   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   3119 						     (__v8df) __B,
   3120 						     (__v8df) __C,
   3121 						     (__mmask8) __U, __R);
   3122 }
   3123 
   3124 extern __inline __m512d
   3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3126 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
   3127 			      __mmask8 __U, const int __R)
   3128 {
   3129   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   3130 						     (__v8df) __B,
   3131 						     (__v8df) __C,
   3132 						     (__mmask8) __U, __R);
   3133 }
   3134 
   3135 extern __inline __m512d
   3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3137 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   3138 			      __m512d __C, const int __R)
   3139 {
   3140   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   3141 						     (__v8df) __B,
   3142 						     (__v8df) __C,
   3143 						     (__mmask8) __U, __R);
   3144 }
   3145 
   3146 extern __inline __m512
   3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3148 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   3149 {
   3150   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   3151 						   (__v16sf) __B,
   3152 						   (__v16sf) __C,
   3153 						   (__mmask16) -1, __R);
   3154 }
   3155 
   3156 extern __inline __m512
   3157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3158 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   3159 			     __m512 __C, const int __R)
   3160 {
   3161   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   3162 						    (__v16sf) __B,
   3163 						    (__v16sf) __C,
   3164 						    (__mmask16) __U, __R);
   3165 }
   3166 
   3167 extern __inline __m512
   3168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3169 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
   3170 			      __mmask16 __U, const int __R)
   3171 {
   3172   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   3173 						    (__v16sf) __B,
   3174 						    (__v16sf) __C,
   3175 						    (__mmask16) __U, __R);
   3176 }
   3177 
   3178 extern __inline __m512
   3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3180 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   3181 			      __m512 __C, const int __R)
   3182 {
   3183   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3184 						    (__v16sf) __B,
   3185 						    (__v16sf) __C,
   3186 						    (__mmask16) __U, __R);
   3187 }
   3188 
   3189 extern __inline __m512d
   3190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3191 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
   3192 {
   3193   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   3194 						    (__v8df) __B,
   3195 						    -(__v8df) __C,
   3196 						    (__mmask8) -1, __R);
   3197 }
   3198 
   3199 extern __inline __m512d
   3200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3201 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   3202 			     __m512d __C, const int __R)
   3203 {
   3204   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   3205 						     (__v8df) __B,
   3206 						     (__v8df) __C,
   3207 						     (__mmask8) __U, __R);
   3208 }
   3209 
   3210 extern __inline __m512d
   3211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3212 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
   3213 			      __mmask8 __U, const int __R)
   3214 {
   3215   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   3216 						      (__v8df) __B,
   3217 						      (__v8df) __C,
   3218 						      (__mmask8) __U, __R);
   3219 }
   3220 
   3221 extern __inline __m512d
   3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3223 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   3224 			      __m512d __C, const int __R)
   3225 {
   3226   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   3227 						     (__v8df) __B,
   3228 						     -(__v8df) __C,
   3229 						     (__mmask8) __U, __R);
   3230 }
   3231 
   3232 extern __inline __m512
   3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3234 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
   3235 {
   3236   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   3237 						   (__v16sf) __B,
   3238 						   -(__v16sf) __C,
   3239 						   (__mmask16) -1, __R);
   3240 }
   3241 
   3242 extern __inline __m512
   3243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3244 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   3245 			     __m512 __C, const int __R)
   3246 {
   3247   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   3248 						    (__v16sf) __B,
   3249 						    (__v16sf) __C,
   3250 						    (__mmask16) __U, __R);
   3251 }
   3252 
   3253 extern __inline __m512
   3254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3255 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
   3256 			      __mmask16 __U, const int __R)
   3257 {
   3258   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   3259 						     (__v16sf) __B,
   3260 						     (__v16sf) __C,
   3261 						     (__mmask16) __U, __R);
   3262 }
   3263 
   3264 extern __inline __m512
   3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3266 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   3267 			      __m512 __C, const int __R)
   3268 {
   3269   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   3270 						    (__v16sf) __B,
   3271 						    -(__v16sf) __C,
   3272 						    (__mmask16) __U, __R);
   3273 }
   3274 #else
   3275 #define _mm512_fmadd_round_pd(A, B, C, R)            \
   3276     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
   3277 
   3278 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
   3279     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
   3280 
   3281 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
   3282     (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
   3283 
   3284 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
   3285     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
   3286 
   3287 #define _mm512_fmadd_round_ps(A, B, C, R)            \
   3288     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
   3289 
   3290 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
   3291     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
   3292 
   3293 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
   3294     (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
   3295 
   3296 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
   3297     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
   3298 
   3299 #define _mm512_fmsub_round_pd(A, B, C, R)            \
   3300     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
   3301 
   3302 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
   3303     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
   3304 
   3305 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
   3306     (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
   3307 
   3308 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
   3309     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
   3310 
   3311 #define _mm512_fmsub_round_ps(A, B, C, R)            \
   3312     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
   3313 
   3314 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
   3315     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
   3316 
   3317 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
   3318     (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
   3319 
   3320 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
   3321     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
   3322 
   3323 #define _mm512_fmaddsub_round_pd(A, B, C, R)            \
   3324     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
   3325 
   3326 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
   3327     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
   3328 
   3329 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
   3330     (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
   3331 
   3332 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
   3333     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
   3334 
   3335 #define _mm512_fmaddsub_round_ps(A, B, C, R)            \
   3336     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
   3337 
   3338 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
   3339     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
   3340 
   3341 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
   3342     (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
   3343 
   3344 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
   3345     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
   3346 
   3347 #define _mm512_fmsubadd_round_pd(A, B, C, R)            \
   3348     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
   3349 
   3350 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
   3351     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
   3352 
   3353 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
   3354     (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
   3355 
   3356 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
   3357     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
   3358 
   3359 #define _mm512_fmsubadd_round_ps(A, B, C, R)            \
   3360     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
   3361 
   3362 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
   3363     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
   3364 
   3365 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
   3366     (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
   3367 
   3368 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
   3369     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
   3370 
   3371 #define _mm512_fnmadd_round_pd(A, B, C, R)            \
   3372     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
   3373 
   3374 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
   3375     (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
   3376 
   3377 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
   3378     (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
   3379 
   3380 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
   3381     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
   3382 
   3383 #define _mm512_fnmadd_round_ps(A, B, C, R)            \
   3384     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
   3385 
   3386 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
   3387     (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
   3388 
   3389 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
   3390     (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
   3391 
   3392 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
   3393     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
   3394 
   3395 #define _mm512_fnmsub_round_pd(A, B, C, R)            \
   3396     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
   3397 
   3398 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
   3399     (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
   3400 
   3401 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
   3402     (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
   3403 
   3404 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
   3405     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
   3406 
   3407 #define _mm512_fnmsub_round_ps(A, B, C, R)            \
   3408     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
   3409 
   3410 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
   3411     (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
   3412 
   3413 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
   3414     (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
   3415 
   3416 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
   3417     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
   3418 #endif
   3419 
   3420 extern __inline __m512i
   3421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3422 _mm512_abs_epi64 (__m512i __A)
   3423 {
   3424   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   3425 						 (__v8di)
   3426 						 _mm512_undefined_si512 (),
   3427 						 (__mmask8) -1);
   3428 }
   3429 
   3430 extern __inline __m512i
   3431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3432 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   3433 {
   3434   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   3435 						 (__v8di) __W,
   3436 						 (__mmask8) __U);
   3437 }
   3438 
   3439 extern __inline __m512i
   3440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3441 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
   3442 {
   3443   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
   3444 						 (__v8di)
   3445 						 _mm512_setzero_si512 (),
   3446 						 (__mmask8) __U);
   3447 }
   3448 
   3449 extern __inline __m512i
   3450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3451 _mm512_abs_epi32 (__m512i __A)
   3452 {
   3453   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   3454 						 (__v16si)
   3455 						 _mm512_undefined_si512 (),
   3456 						 (__mmask16) -1);
   3457 }
   3458 
   3459 extern __inline __m512i
   3460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3461 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   3462 {
   3463   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   3464 						 (__v16si) __W,
   3465 						 (__mmask16) __U);
   3466 }
   3467 
   3468 extern __inline __m512i
   3469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3470 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
   3471 {
   3472   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
   3473 						 (__v16si)
   3474 						 _mm512_setzero_si512 (),
   3475 						 (__mmask16) __U);
   3476 }
   3477 
   3478 extern __inline __m512
   3479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3480 _mm512_broadcastss_ps (__m128 __A)
   3481 {
   3482   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
   3483 						 (__v16sf)
   3484 						 _mm512_undefined_ps (),
   3485 						 (__mmask16) -1);
   3486 }
   3487 
   3488 extern __inline __m512
   3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3490 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
   3491 {
   3492   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
   3493 						 (__v16sf) __O, __M);
   3494 }
   3495 
   3496 extern __inline __m512
   3497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3498 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
   3499 {
   3500   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
   3501 						 (__v16sf)
   3502 						 _mm512_setzero_ps (),
   3503 						 __M);
   3504 }
   3505 
   3506 extern __inline __m512d
   3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3508 _mm512_broadcastsd_pd (__m128d __A)
   3509 {
   3510   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
   3511 						  (__v8df)
   3512 						  _mm512_undefined_pd (),
   3513 						  (__mmask8) -1);
   3514 }
   3515 
   3516 extern __inline __m512d
   3517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3518 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
   3519 {
   3520   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
   3521 						  (__v8df) __O, __M);
   3522 }
   3523 
   3524 extern __inline __m512d
   3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3526 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   3527 {
   3528   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
   3529 						  (__v8df)
   3530 						  _mm512_setzero_pd (),
   3531 						  __M);
   3532 }
   3533 
   3534 extern __inline __m512i
   3535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3536 _mm512_broadcastd_epi32 (__m128i __A)
   3537 {
   3538   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
   3539 						  (__v16si)
   3540 						  _mm512_undefined_si512 (),
   3541 						  (__mmask16) -1);
   3542 }
   3543 
   3544 extern __inline __m512i
   3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3546 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
   3547 {
   3548   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
   3549 						  (__v16si) __O, __M);
   3550 }
   3551 
   3552 extern __inline __m512i
   3553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3554 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
   3555 {
   3556   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
   3557 						  (__v16si)
   3558 						  _mm512_setzero_si512 (),
   3559 						  __M);
   3560 }
   3561 
   3562 extern __inline __m512i
   3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3564 _mm512_set1_epi32 (int __A)
   3565 {
   3566   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
   3567 							   (__v16si)
   3568 							   _mm512_undefined_si512 (),
   3569 							   (__mmask16)(-1));
   3570 }
   3571 
   3572 extern __inline __m512i
   3573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3574 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   3575 {
   3576   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
   3577 							   __M);
   3578 }
   3579 
   3580 extern __inline __m512i
   3581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3582 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
   3583 {
   3584   return (__m512i)
   3585 	 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
   3586 						 (__v16si) _mm512_setzero_si512 (),
   3587 						 __M);
   3588 }
   3589 
   3590 extern __inline __m512i
   3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3592 _mm512_broadcastq_epi64 (__m128i __A)
   3593 {
   3594   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
   3595 						  (__v8di)
   3596 						  _mm512_undefined_si512 (),
   3597 						  (__mmask8) -1);
   3598 }
   3599 
   3600 extern __inline __m512i
   3601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3602 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
   3603 {
   3604   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
   3605 						  (__v8di) __O, __M);
   3606 }
   3607 
   3608 extern __inline __m512i
   3609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3610 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
   3611 {
   3612   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
   3613 						  (__v8di)
   3614 						  _mm512_setzero_si512 (),
   3615 						  __M);
   3616 }
   3617 
   3618 extern __inline __m512i
   3619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3620 _mm512_set1_epi64 (long long __A)
   3621 {
   3622 #ifdef TARGET_64BIT
   3623   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
   3624 							   (__v8di)
   3625 							   _mm512_undefined_si512 (),
   3626 							   (__mmask8)(-1));
   3627 #else
   3628   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
   3629 							   (__v8di)
   3630 							   _mm512_undefined_si512 (),
   3631 							   (__mmask8)(-1));
   3632 #endif
   3633 }
   3634 
   3635 extern __inline __m512i
   3636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3637 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   3638 {
   3639 #ifdef TARGET_64BIT
   3640   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
   3641 							   __M);
   3642 #else
   3643   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
   3644 							   __M);
   3645 #endif
   3646 }
   3647 
   3648 extern __inline __m512i
   3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3650 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
   3651 {
   3652 #ifdef TARGET_64BIT
   3653   return (__m512i)
   3654 	 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
   3655 						 (__v8di) _mm512_setzero_si512 (),
   3656 						 __M);
   3657 #else
   3658   return (__m512i)
   3659 	 __builtin_ia32_pbroadcastq512_mem_mask (__A,
   3660 						 (__v8di) _mm512_setzero_si512 (),
   3661 						 __M);
   3662 #endif
   3663 }
   3664 
   3665 extern __inline __m512
   3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3667 _mm512_broadcast_f32x4 (__m128 __A)
   3668 {
   3669   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   3670 						     (__v16sf)
   3671 						     _mm512_undefined_ps (),
   3672 						     (__mmask16) -1);
   3673 }
   3674 
   3675 extern __inline __m512
   3676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3677 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
   3678 {
   3679   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   3680 						     (__v16sf) __O,
   3681 						     __M);
   3682 }
   3683 
   3684 extern __inline __m512
   3685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3686 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
   3687 {
   3688   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
   3689 						     (__v16sf)
   3690 						     _mm512_setzero_ps (),
   3691 						     __M);
   3692 }
   3693 
   3694 extern __inline __m512i
   3695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3696 _mm512_broadcast_i32x4 (__m128i __A)
   3697 {
   3698   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   3699 						      (__v16si)
   3700 						      _mm512_undefined_si512 (),
   3701 						      (__mmask16) -1);
   3702 }
   3703 
   3704 extern __inline __m512i
   3705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3706 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
   3707 {
   3708   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   3709 						      (__v16si) __O,
   3710 						      __M);
   3711 }
   3712 
   3713 extern __inline __m512i
   3714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3715 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
   3716 {
   3717   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
   3718 						      (__v16si)
   3719 						      _mm512_setzero_si512 (),
   3720 						      __M);
   3721 }
   3722 
   3723 extern __inline __m512d
   3724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3725 _mm512_broadcast_f64x4 (__m256d __A)
   3726 {
   3727   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   3728 						      (__v8df)
   3729 						      _mm512_undefined_pd (),
   3730 						      (__mmask8) -1);
   3731 }
   3732 
   3733 extern __inline __m512d
   3734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3735 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
   3736 {
   3737   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   3738 						      (__v8df) __O,
   3739 						      __M);
   3740 }
   3741 
   3742 extern __inline __m512d
   3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3744 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
   3745 {
   3746   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
   3747 						      (__v8df)
   3748 						      _mm512_setzero_pd (),
   3749 						      __M);
   3750 }
   3751 
   3752 extern __inline __m512i
   3753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3754 _mm512_broadcast_i64x4 (__m256i __A)
   3755 {
   3756   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   3757 						      (__v8di)
   3758 						      _mm512_undefined_si512 (),
   3759 						      (__mmask8) -1);
   3760 }
   3761 
   3762 extern __inline __m512i
   3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3764 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
   3765 {
   3766   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   3767 						      (__v8di) __O,
   3768 						      __M);
   3769 }
   3770 
   3771 extern __inline __m512i
   3772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3773 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
   3774 {
   3775   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
   3776 						      (__v8di)
   3777 						      _mm512_setzero_si512 (),
   3778 						      __M);
   3779 }
   3780 
   3781 typedef enum
   3782 {
   3783   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
   3784   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
   3785   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
   3786   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
   3787   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
   3788   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
   3789   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
   3790   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
   3791   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
   3792   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
   3793   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
   3794   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
   3795   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
   3796   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
   3797   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
   3798   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
   3799   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
   3800   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
   3801   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
   3802   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
   3803   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
   3804   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
   3805   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
   3806   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
   3807   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
   3808   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
   3809   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
   3810   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
   3811   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
   3812   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
   3813   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
   3814   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
   3815   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
   3816   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
   3817   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
   3818   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
   3819   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
   3820   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
   3821   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
   3822   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
   3823   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
   3824   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
   3825   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
   3826   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
   3827   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
   3828   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
   3829   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
   3830   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
   3831   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
   3832   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
   3833   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
   3834   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
   3835   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
   3836   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
   3837   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
   3838   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
   3839   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
   3840   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
   3841   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
   3842   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
   3843   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
   3844   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
   3845   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
   3846   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
   3847   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
   3848   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
   3849   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
   3850   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
   3851   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
   3852   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
   3853   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
   3854   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
   3855   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
   3856   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
   3857   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
   3858   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
   3859   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
   3860   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
   3861   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
   3862   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
   3863   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
   3864   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
   3865   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
   3866   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
   3867   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
   3868   _MM_PERM_DDDD = 0xFF
   3869 } _MM_PERM_ENUM;
   3870 
   3871 #ifdef __OPTIMIZE__
   3872 extern __inline __m512i
   3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3874 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
   3875 {
   3876   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
   3877 						  __mask,
   3878 						  (__v16si)
   3879 						  _mm512_undefined_si512 (),
   3880 						  (__mmask16) -1);
   3881 }
   3882 
   3883 extern __inline __m512i
   3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3885 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   3886 			   _MM_PERM_ENUM __mask)
   3887 {
   3888   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
   3889 						  __mask,
   3890 						  (__v16si) __W,
   3891 						  (__mmask16) __U);
   3892 }
   3893 
   3894 extern __inline __m512i
   3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3896 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
   3897 {
   3898   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
   3899 						  __mask,
   3900 						  (__v16si)
   3901 						  _mm512_setzero_si512 (),
   3902 						  (__mmask16) __U);
   3903 }
   3904 
   3905 extern __inline __m512i
   3906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3907 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
   3908 {
   3909   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
   3910 						   (__v8di) __B, __imm,
   3911 						   (__v8di)
   3912 						   _mm512_undefined_si512 (),
   3913 						   (__mmask8) -1);
   3914 }
   3915 
   3916 extern __inline __m512i
   3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3918 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
   3919 			   __m512i __B, const int __imm)
   3920 {
   3921   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
   3922 						   (__v8di) __B, __imm,
   3923 						   (__v8di) __W,
   3924 						   (__mmask8) __U);
   3925 }
   3926 
   3927 extern __inline __m512i
   3928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3929 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
   3930 			    const int __imm)
   3931 {
   3932   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
   3933 						   (__v8di) __B, __imm,
   3934 						   (__v8di)
   3935 						   _mm512_setzero_si512 (),
   3936 						   (__mmask8) __U);
   3937 }
   3938 
   3939 extern __inline __m512i
   3940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3941 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
   3942 {
   3943   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
   3944 						   (__v16si) __B,
   3945 						   __imm,
   3946 						   (__v16si)
   3947 						   _mm512_undefined_si512 (),
   3948 						   (__mmask16) -1);
   3949 }
   3950 
   3951 extern __inline __m512i
   3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3953 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
   3954 			   __m512i __B, const int __imm)
   3955 {
   3956   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
   3957 						   (__v16si) __B,
   3958 						   __imm,
   3959 						   (__v16si) __W,
   3960 						   (__mmask16) __U);
   3961 }
   3962 
   3963 extern __inline __m512i
   3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3965 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
   3966 			    const int __imm)
   3967 {
   3968   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
   3969 						   (__v16si) __B,
   3970 						   __imm,
   3971 						   (__v16si)
   3972 						   _mm512_setzero_si512 (),
   3973 						   (__mmask16) __U);
   3974 }
   3975 
   3976 extern __inline __m512d
   3977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3978 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
   3979 {
   3980   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
   3981 						   (__v8df) __B, __imm,
   3982 						   (__v8df)
   3983 						   _mm512_undefined_pd (),
   3984 						   (__mmask8) -1);
   3985 }
   3986 
   3987 extern __inline __m512d
   3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3989 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
   3990 			   __m512d __B, const int __imm)
   3991 {
   3992   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
   3993 						   (__v8df) __B, __imm,
   3994 						   (__v8df) __W,
   3995 						   (__mmask8) __U);
   3996 }
   3997 
   3998 extern __inline __m512d
   3999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4000 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
   4001 			    const int __imm)
   4002 {
   4003   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
   4004 						   (__v8df) __B, __imm,
   4005 						   (__v8df)
   4006 						   _mm512_setzero_pd (),
   4007 						   (__mmask8) __U);
   4008 }
   4009 
   4010 extern __inline __m512
   4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4012 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
   4013 {
   4014   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
   4015 						  (__v16sf) __B, __imm,
   4016 						  (__v16sf)
   4017 						  _mm512_undefined_ps (),
   4018 						  (__mmask16) -1);
   4019 }
   4020 
   4021 extern __inline __m512
   4022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4023 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
   4024 			   __m512 __B, const int __imm)
   4025 {
   4026   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
   4027 						  (__v16sf) __B, __imm,
   4028 						  (__v16sf) __W,
   4029 						  (__mmask16) __U);
   4030 }
   4031 
   4032 extern __inline __m512
   4033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4034 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
   4035 			    const int __imm)
   4036 {
   4037   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
   4038 						  (__v16sf) __B, __imm,
   4039 						  (__v16sf)
   4040 						  _mm512_setzero_ps (),
   4041 						  (__mmask16) __U);
   4042 }
   4043 
   4044 #else
   4045 #define _mm512_shuffle_epi32(X, C)                                      \
   4046   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
   4047     (__v16si)(__m512i)_mm512_undefined_si512 (),\
   4048     (__mmask16)-1))
   4049 
   4050 #define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
   4051   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
   4052     (__v16si)(__m512i)(W),\
   4053     (__mmask16)(U)))
   4054 
   4055 #define _mm512_maskz_shuffle_epi32(U, X, C)                             \
   4056   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
   4057     (__v16si)(__m512i)_mm512_setzero_si512 (),\
   4058     (__mmask16)(U)))
   4059 
   4060 #define _mm512_shuffle_i64x2(X, Y, C)                                   \
   4061   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
   4062       (__v8di)(__m512i)(Y), (int)(C),\
   4063     (__v8di)(__m512i)_mm512_undefined_si512 (),\
   4064     (__mmask8)-1))
   4065 
   4066 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
   4067   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
   4068       (__v8di)(__m512i)(Y), (int)(C),\
   4069     (__v8di)(__m512i)(W),\
   4070     (__mmask8)(U)))
   4071 
   4072 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
   4073   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
   4074       (__v8di)(__m512i)(Y), (int)(C),\
   4075     (__v8di)(__m512i)_mm512_setzero_si512 (),\
   4076     (__mmask8)(U)))
   4077 
   4078 #define _mm512_shuffle_i32x4(X, Y, C)                                   \
   4079   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
   4080       (__v16si)(__m512i)(Y), (int)(C),\
   4081     (__v16si)(__m512i)_mm512_undefined_si512 (),\
   4082     (__mmask16)-1))
   4083 
   4084 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
   4085   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
   4086       (__v16si)(__m512i)(Y), (int)(C),\
   4087     (__v16si)(__m512i)(W),\
   4088     (__mmask16)(U)))
   4089 
   4090 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
   4091   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
   4092       (__v16si)(__m512i)(Y), (int)(C),\
   4093     (__v16si)(__m512i)_mm512_setzero_si512 (),\
   4094     (__mmask16)(U)))
   4095 
   4096 #define _mm512_shuffle_f64x2(X, Y, C)                                   \
   4097   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
   4098       (__v8df)(__m512d)(Y), (int)(C),\
   4099     (__v8df)(__m512d)_mm512_undefined_pd(),\
   4100     (__mmask8)-1))
   4101 
   4102 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
   4103   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
   4104       (__v8df)(__m512d)(Y), (int)(C),\
   4105     (__v8df)(__m512d)(W),\
   4106     (__mmask8)(U)))
   4107 
   4108 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
   4109   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
   4110       (__v8df)(__m512d)(Y), (int)(C),\
   4111     (__v8df)(__m512d)_mm512_setzero_pd(),\
   4112     (__mmask8)(U)))
   4113 
   4114 #define _mm512_shuffle_f32x4(X, Y, C)                                  \
   4115   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
   4116       (__v16sf)(__m512)(Y), (int)(C),\
   4117     (__v16sf)(__m512)_mm512_undefined_ps(),\
   4118     (__mmask16)-1))
   4119 
   4120 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
   4121   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
   4122       (__v16sf)(__m512)(Y), (int)(C),\
   4123     (__v16sf)(__m512)(W),\
   4124     (__mmask16)(U)))
   4125 
   4126 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
   4127   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
   4128       (__v16sf)(__m512)(Y), (int)(C),\
   4129     (__v16sf)(__m512)_mm512_setzero_ps(),\
   4130     (__mmask16)(U)))
   4131 #endif
   4132 
   4133 extern __inline __m512i
   4134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4135 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
   4136 {
   4137   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   4138 						  (__v16si) __B,
   4139 						  (__v16si)
   4140 						  _mm512_undefined_si512 (),
   4141 						  (__mmask16) -1);
   4142 }
   4143 
   4144 extern __inline __m512i
   4145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4146 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4147 {
   4148   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   4149 						  (__v16si) __B,
   4150 						  (__v16si) __W,
   4151 						  (__mmask16) __U);
   4152 }
   4153 
   4154 extern __inline __m512i
   4155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4156 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   4157 {
   4158   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
   4159 						  (__v16si) __B,
   4160 						  (__v16si)
   4161 						  _mm512_setzero_si512 (),
   4162 						  (__mmask16) __U);
   4163 }
   4164 
   4165 extern __inline __m512i
   4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4167 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
   4168 {
   4169   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   4170 						  (__v16si) __B,
   4171 						  (__v16si)
   4172 						  _mm512_undefined_si512 (),
   4173 						  (__mmask16) -1);
   4174 }
   4175 
   4176 extern __inline __m512i
   4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4178 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   4179 {
   4180   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   4181 						  (__v16si) __B,
   4182 						  (__v16si) __W,
   4183 						  (__mmask16) __U);
   4184 }
   4185 
   4186 extern __inline __m512i
   4187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4188 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   4189 {
   4190   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
   4191 						  (__v16si) __B,
   4192 						  (__v16si)
   4193 						  _mm512_setzero_si512 (),
   4194 						  (__mmask16) __U);
   4195 }
   4196 
   4197 extern __inline __m512i
   4198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4199 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
   4200 {
   4201   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   4202 						  (__v8di) __B,
   4203 						  (__v8di)
   4204 						  _mm512_undefined_si512 (),
   4205 						  (__mmask8) -1);
   4206 }
   4207 
   4208 extern __inline __m512i
   4209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4210 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4211 {
   4212   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   4213 						  (__v8di) __B,
   4214 						  (__v8di) __W,
   4215 						  (__mmask8) __U);
   4216 }
   4217 
   4218 extern __inline __m512i
   4219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4220 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4221 {
   4222   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
   4223 						  (__v8di) __B,
   4224 						  (__v8di)
   4225 						  _mm512_setzero_si512 (),
   4226 						  (__mmask8) __U);
   4227 }
   4228 
   4229 extern __inline __m512i
   4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4231 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
   4232 {
   4233   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   4234 						  (__v8di) __B,
   4235 						  (__v8di)
   4236 						  _mm512_undefined_si512 (),
   4237 						  (__mmask8) -1);
   4238 }
   4239 
   4240 extern __inline __m512i
   4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4242 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   4243 {
   4244   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   4245 						  (__v8di) __B,
   4246 						  (__v8di) __W,
   4247 						  (__mmask8) __U);
   4248 }
   4249 
   4250 extern __inline __m512i
   4251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4252 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   4253 {
   4254   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
   4255 						  (__v8di) __B,
   4256 						  (__v8di)
   4257 						  _mm512_setzero_si512 (),
   4258 						  (__mmask8) __U);
   4259 }
   4260 
   4261 #ifdef __OPTIMIZE__
   4262 extern __inline __m256i
   4263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4264 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
   4265 {
   4266   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4267 						     (__v8si)
   4268 						     _mm256_undefined_si256 (),
   4269 						     (__mmask8) -1, __R);
   4270 }
   4271 
   4272 extern __inline __m256i
   4273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4274 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
   4275 				const int __R)
   4276 {
   4277   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4278 						     (__v8si) __W,
   4279 						     (__mmask8) __U, __R);
   4280 }
   4281 
   4282 extern __inline __m256i
   4283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4284 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
   4285 {
   4286   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   4287 						     (__v8si)
   4288 						     _mm256_setzero_si256 (),
   4289 						     (__mmask8) __U, __R);
   4290 }
   4291 
   4292 extern __inline __m256i
   4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4294 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
   4295 {
   4296   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   4297 						      (__v8si)
   4298 						      _mm256_undefined_si256 (),
   4299 						      (__mmask8) -1, __R);
   4300 }
   4301 
   4302 extern __inline __m256i
   4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4304 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
   4305 				const int __R)
   4306 {
   4307   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   4308 						      (__v8si) __W,
   4309 						      (__mmask8) __U, __R);
   4310 }
   4311 
   4312 extern __inline __m256i
   4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4314 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
   4315 {
   4316   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   4317 						      (__v8si)
   4318 						      _mm256_setzero_si256 (),
   4319 						      (__mmask8) __U, __R);
   4320 }
   4321 #else
   4322 #define _mm512_cvtt_roundpd_epi32(A, B)		     \
   4323     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
   4324 
   4325 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
   4326     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
   4327 
   4328 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
   4329     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
   4330 
   4331 #define _mm512_cvtt_roundpd_epu32(A, B)		     \
   4332     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
   4333 
   4334 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
   4335     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
   4336 
   4337 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
   4338     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
   4339 #endif
   4340 
   4341 #ifdef __OPTIMIZE__
   4342 extern __inline __m256i
   4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4344 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
   4345 {
   4346   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4347 						    (__v8si)
   4348 						    _mm256_undefined_si256 (),
   4349 						    (__mmask8) -1, __R);
   4350 }
   4351 
   4352 extern __inline __m256i
   4353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4354 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
   4355 			       const int __R)
   4356 {
   4357   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4358 						    (__v8si) __W,
   4359 						    (__mmask8) __U, __R);
   4360 }
   4361 
   4362 extern __inline __m256i
   4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4364 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
   4365 {
   4366   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   4367 						    (__v8si)
   4368 						    _mm256_setzero_si256 (),
   4369 						    (__mmask8) __U, __R);
   4370 }
   4371 
   4372 extern __inline __m256i
   4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4374 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
   4375 {
   4376   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4377 						     (__v8si)
   4378 						     _mm256_undefined_si256 (),
   4379 						     (__mmask8) -1, __R);
   4380 }
   4381 
   4382 extern __inline __m256i
   4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4384 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
   4385 			       const int __R)
   4386 {
   4387   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4388 						     (__v8si) __W,
   4389 						     (__mmask8) __U, __R);
   4390 }
   4391 
   4392 extern __inline __m256i
   4393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4394 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
   4395 {
   4396   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   4397 						     (__v8si)
   4398 						     _mm256_setzero_si256 (),
   4399 						     (__mmask8) __U, __R);
   4400 }
   4401 #else
   4402 #define _mm512_cvt_roundpd_epi32(A, B)		    \
   4403     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
   4404 
   4405 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
   4406     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
   4407 
   4408 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
   4409     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
   4410 
   4411 #define _mm512_cvt_roundpd_epu32(A, B)		    \
   4412     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
   4413 
   4414 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
   4415     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
   4416 
   4417 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
   4418     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
   4419 #endif
   4420 
   4421 #ifdef __OPTIMIZE__
   4422 extern __inline __m512i
   4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4424 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
   4425 {
   4426   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4427 						     (__v16si)
   4428 						     _mm512_undefined_si512 (),
   4429 						     (__mmask16) -1, __R);
   4430 }
   4431 
   4432 extern __inline __m512i
   4433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4434 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
   4435 				const int __R)
   4436 {
   4437   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4438 						     (__v16si) __W,
   4439 						     (__mmask16) __U, __R);
   4440 }
   4441 
   4442 extern __inline __m512i
   4443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4444 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
   4445 {
   4446   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   4447 						     (__v16si)
   4448 						     _mm512_setzero_si512 (),
   4449 						     (__mmask16) __U, __R);
   4450 }
   4451 
   4452 extern __inline __m512i
   4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4454 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
   4455 {
   4456   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   4457 						      (__v16si)
   4458 						      _mm512_undefined_si512 (),
   4459 						      (__mmask16) -1, __R);
   4460 }
   4461 
   4462 extern __inline __m512i
   4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4464 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
   4465 				const int __R)
   4466 {
   4467   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   4468 						      (__v16si) __W,
   4469 						      (__mmask16) __U, __R);
   4470 }
   4471 
   4472 extern __inline __m512i
   4473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4474 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
   4475 {
   4476   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   4477 						      (__v16si)
   4478 						      _mm512_setzero_si512 (),
   4479 						      (__mmask16) __U, __R);
   4480 }
   4481 #else
   4482 #define _mm512_cvtt_roundps_epi32(A, B)		     \
   4483     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
   4484 
   4485 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
   4486     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
   4487 
   4488 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
   4489     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
   4490 
   4491 #define _mm512_cvtt_roundps_epu32(A, B)		     \
   4492     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
   4493 
   4494 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
   4495     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
   4496 
   4497 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
   4498     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
   4499 #endif
   4500 
   4501 #ifdef __OPTIMIZE__
   4502 extern __inline __m512i
   4503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4504 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
   4505 {
   4506   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4507 						    (__v16si)
   4508 						    _mm512_undefined_si512 (),
   4509 						    (__mmask16) -1, __R);
   4510 }
   4511 
   4512 extern __inline __m512i
   4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4514 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
   4515 			       const int __R)
   4516 {
   4517   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4518 						    (__v16si) __W,
   4519 						    (__mmask16) __U, __R);
   4520 }
   4521 
   4522 extern __inline __m512i
   4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4524 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
   4525 {
   4526   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   4527 						    (__v16si)
   4528 						    _mm512_setzero_si512 (),
   4529 						    (__mmask16) __U, __R);
   4530 }
   4531 
   4532 extern __inline __m512i
   4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4534 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
   4535 {
   4536   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4537 						     (__v16si)
   4538 						     _mm512_undefined_si512 (),
   4539 						     (__mmask16) -1, __R);
   4540 }
   4541 
   4542 extern __inline __m512i
   4543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4544 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
   4545 			       const int __R)
   4546 {
   4547   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4548 						     (__v16si) __W,
   4549 						     (__mmask16) __U, __R);
   4550 }
   4551 
   4552 extern __inline __m512i
   4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4554 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
   4555 {
   4556   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   4557 						     (__v16si)
   4558 						     _mm512_setzero_si512 (),
   4559 						     (__mmask16) __U, __R);
   4560 }
   4561 #else
   4562 #define _mm512_cvt_roundps_epi32(A, B)		    \
   4563     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
   4564 
   4565 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
   4566     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
   4567 
   4568 #define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
   4569     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
   4570 
   4571 #define _mm512_cvt_roundps_epu32(A, B)		    \
   4572     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
   4573 
   4574 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
   4575     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
   4576 
   4577 #define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
   4578     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
   4579 #endif
   4580 
   4581 extern __inline __m128d
   4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4583 _mm_cvtu32_sd (__m128d __A, unsigned __B)
   4584 {
   4585   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
   4586 }
   4587 
   4588 #ifdef __x86_64__
   4589 #ifdef __OPTIMIZE__
   4590 extern __inline __m128d
   4591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4592 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
   4593 {
   4594   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
   4595 }
   4596 
   4597 extern __inline __m128d
   4598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4599 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
   4600 {
   4601   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
   4602 }
   4603 
   4604 extern __inline __m128d
   4605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4606 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
   4607 {
   4608   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
   4609 }
   4610 #else
   4611 #define _mm_cvt_roundu64_sd(A, B, C)   \
   4612     (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
   4613 
   4614 #define _mm_cvt_roundi64_sd(A, B, C)   \
   4615     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
   4616 
   4617 #define _mm_cvt_roundsi64_sd(A, B, C)   \
   4618     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
   4619 #endif
   4620 
   4621 #endif
   4622 
   4623 #ifdef __OPTIMIZE__
   4624 extern __inline __m128
   4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4626 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
   4627 {
   4628   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
   4629 }
   4630 
   4631 extern __inline __m128
   4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4633 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
   4634 {
   4635   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
   4636 }
   4637 
   4638 extern __inline __m128
   4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4640 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
   4641 {
   4642   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
   4643 }
   4644 #else
   4645 #define _mm_cvt_roundu32_ss(A, B, C)   \
   4646     (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
   4647 
   4648 #define _mm_cvt_roundi32_ss(A, B, C)   \
   4649     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
   4650 
   4651 #define _mm_cvt_roundsi32_ss(A, B, C)   \
   4652     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
   4653 #endif
   4654 
   4655 #ifdef __x86_64__
   4656 #ifdef __OPTIMIZE__
   4657 extern __inline __m128
   4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4659 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
   4660 {
   4661   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
   4662 }
   4663 
   4664 extern __inline __m128
   4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4666 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
   4667 {
   4668   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
   4669 }
   4670 
   4671 extern __inline __m128
   4672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4673 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
   4674 {
   4675   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
   4676 }
   4677 #else
   4678 #define _mm_cvt_roundu64_ss(A, B, C)   \
   4679     (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
   4680 
   4681 #define _mm_cvt_roundi64_ss(A, B, C)   \
   4682     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
   4683 
   4684 #define _mm_cvt_roundsi64_ss(A, B, C)   \
   4685     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
   4686 #endif
   4687 
   4688 #endif
   4689 
   4690 extern __inline __m128i
   4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4692 _mm512_cvtepi32_epi8 (__m512i __A)
   4693 {
   4694   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   4695 						  (__v16qi)
   4696 						  _mm_undefined_si128 (),
   4697 						  (__mmask16) -1);
   4698 }
   4699 
   4700 extern __inline void
   4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4702 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   4703 {
   4704   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   4705 }
   4706 
   4707 extern __inline __m128i
   4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4709 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   4710 {
   4711   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   4712 						  (__v16qi) __O, __M);
   4713 }
   4714 
   4715 extern __inline __m128i
   4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4717 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
   4718 {
   4719   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
   4720 						  (__v16qi)
   4721 						  _mm_setzero_si128 (),
   4722 						  __M);
   4723 }
   4724 
   4725 extern __inline __m128i
   4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4727 _mm512_cvtsepi32_epi8 (__m512i __A)
   4728 {
   4729   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   4730 						   (__v16qi)
   4731 						   _mm_undefined_si128 (),
   4732 						   (__mmask16) -1);
   4733 }
   4734 
   4735 extern __inline void
   4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4737 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   4738 {
   4739   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   4740 }
   4741 
   4742 extern __inline __m128i
   4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4744 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   4745 {
   4746   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   4747 						   (__v16qi) __O, __M);
   4748 }
   4749 
   4750 extern __inline __m128i
   4751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4752 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
   4753 {
   4754   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
   4755 						   (__v16qi)
   4756 						   _mm_setzero_si128 (),
   4757 						   __M);
   4758 }
   4759 
   4760 extern __inline __m128i
   4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4762 _mm512_cvtusepi32_epi8 (__m512i __A)
   4763 {
   4764   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   4765 						    (__v16qi)
   4766 						    _mm_undefined_si128 (),
   4767 						    (__mmask16) -1);
   4768 }
   4769 
   4770 extern __inline void
   4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4772 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
   4773 {
   4774   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
   4775 }
   4776 
   4777 extern __inline __m128i
   4778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4779 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
   4780 {
   4781   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   4782 						    (__v16qi) __O,
   4783 						    __M);
   4784 }
   4785 
   4786 extern __inline __m128i
   4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4788 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
   4789 {
   4790   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
   4791 						    (__v16qi)
   4792 						    _mm_setzero_si128 (),
   4793 						    __M);
   4794 }
   4795 
   4796 extern __inline __m256i
   4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4798 _mm512_cvtepi32_epi16 (__m512i __A)
   4799 {
   4800   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   4801 						  (__v16hi)
   4802 						  _mm256_undefined_si256 (),
   4803 						  (__mmask16) -1);
   4804 }
   4805 
   4806 extern __inline void
   4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4808 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
   4809 {
   4810   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
   4811 }
   4812 
   4813 extern __inline __m256i
   4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4815 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   4816 {
   4817   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   4818 						  (__v16hi) __O, __M);
   4819 }
   4820 
   4821 extern __inline __m256i
   4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4823 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
   4824 {
   4825   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
   4826 						  (__v16hi)
   4827 						  _mm256_setzero_si256 (),
   4828 						  __M);
   4829 }
   4830 
   4831 extern __inline __m256i
   4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4833 _mm512_cvtsepi32_epi16 (__m512i __A)
   4834 {
   4835   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   4836 						   (__v16hi)
   4837 						   _mm256_undefined_si256 (),
   4838 						   (__mmask16) -1);
   4839 }
   4840 
   4841 extern __inline void
   4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4843 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   4844 {
   4845   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   4846 }
   4847 
   4848 extern __inline __m256i
   4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4850 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   4851 {
   4852   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   4853 						   (__v16hi) __O, __M);
   4854 }
   4855 
   4856 extern __inline __m256i
   4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4858 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
   4859 {
   4860   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
   4861 						   (__v16hi)
   4862 						   _mm256_setzero_si256 (),
   4863 						   __M);
   4864 }
   4865 
   4866 extern __inline __m256i
   4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4868 _mm512_cvtusepi32_epi16 (__m512i __A)
   4869 {
   4870   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   4871 						    (__v16hi)
   4872 						    _mm256_undefined_si256 (),
   4873 						    (__mmask16) -1);
   4874 }
   4875 
   4876 extern __inline void
   4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4878 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
   4879 {
   4880   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
   4881 }
   4882 
   4883 extern __inline __m256i
   4884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4885 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
   4886 {
   4887   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   4888 						    (__v16hi) __O,
   4889 						    __M);
   4890 }
   4891 
   4892 extern __inline __m256i
   4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4894 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
   4895 {
   4896   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
   4897 						    (__v16hi)
   4898 						    _mm256_setzero_si256 (),
   4899 						    __M);
   4900 }
   4901 
   4902 extern __inline __m256i
   4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4904 _mm512_cvtepi64_epi32 (__m512i __A)
   4905 {
   4906   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   4907 						  (__v8si)
   4908 						  _mm256_undefined_si256 (),
   4909 						  (__mmask8) -1);
   4910 }
   4911 
   4912 extern __inline void
   4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4914 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   4915 {
   4916   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   4917 }
   4918 
   4919 extern __inline __m256i
   4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4921 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   4922 {
   4923   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   4924 						  (__v8si) __O, __M);
   4925 }
   4926 
   4927 extern __inline __m256i
   4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4929 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
   4930 {
   4931   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
   4932 						  (__v8si)
   4933 						  _mm256_setzero_si256 (),
   4934 						  __M);
   4935 }
   4936 
   4937 extern __inline __m256i
   4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4939 _mm512_cvtsepi64_epi32 (__m512i __A)
   4940 {
   4941   __v8si __O;
   4942   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   4943 						   (__v8si)
   4944 						   _mm256_undefined_si256 (),
   4945 						   (__mmask8) -1);
   4946 }
   4947 
   4948 extern __inline void
   4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4950 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
   4951 {
   4952   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
   4953 }
   4954 
   4955 extern __inline __m256i
   4956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4957 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   4958 {
   4959   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   4960 						   (__v8si) __O, __M);
   4961 }
   4962 
   4963 extern __inline __m256i
   4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4965 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
   4966 {
   4967   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
   4968 						   (__v8si)
   4969 						   _mm256_setzero_si256 (),
   4970 						   __M);
   4971 }
   4972 
   4973 extern __inline __m256i
   4974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4975 _mm512_cvtusepi64_epi32 (__m512i __A)
   4976 {
   4977   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   4978 						    (__v8si)
   4979 						    _mm256_undefined_si256 (),
   4980 						    (__mmask8) -1);
   4981 }
   4982 
   4983 extern __inline void
   4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4985 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
   4986 {
   4987   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
   4988 }
   4989 
   4990 extern __inline __m256i
   4991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4992 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
   4993 {
   4994   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   4995 						    (__v8si) __O, __M);
   4996 }
   4997 
   4998 extern __inline __m256i
   4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5000 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
   5001 {
   5002   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
   5003 						    (__v8si)
   5004 						    _mm256_setzero_si256 (),
   5005 						    __M);
   5006 }
   5007 
   5008 extern __inline __m128i
   5009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5010 _mm512_cvtepi64_epi16 (__m512i __A)
   5011 {
   5012   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   5013 						  (__v8hi)
   5014 						  _mm_undefined_si128 (),
   5015 						  (__mmask8) -1);
   5016 }
   5017 
   5018 extern __inline void
   5019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5020 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   5021 {
   5022   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   5023 }
   5024 
   5025 extern __inline __m128i
   5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5027 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   5028 {
   5029   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   5030 						  (__v8hi) __O, __M);
   5031 }
   5032 
   5033 extern __inline __m128i
   5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5035 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
   5036 {
   5037   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
   5038 						  (__v8hi)
   5039 						  _mm_setzero_si128 (),
   5040 						  __M);
   5041 }
   5042 
   5043 extern __inline __m128i
   5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5045 _mm512_cvtsepi64_epi16 (__m512i __A)
   5046 {
   5047   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   5048 						   (__v8hi)
   5049 						   _mm_undefined_si128 (),
   5050 						   (__mmask8) -1);
   5051 }
   5052 
   5053 extern __inline void
   5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5055 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
   5056 {
   5057   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
   5058 }
   5059 
   5060 extern __inline __m128i
   5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5062 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   5063 {
   5064   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   5065 						   (__v8hi) __O, __M);
   5066 }
   5067 
   5068 extern __inline __m128i
   5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5070 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
   5071 {
   5072   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
   5073 						   (__v8hi)
   5074 						   _mm_setzero_si128 (),
   5075 						   __M);
   5076 }
   5077 
   5078 extern __inline __m128i
   5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5080 _mm512_cvtusepi64_epi16 (__m512i __A)
   5081 {
   5082   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   5083 						    (__v8hi)
   5084 						    _mm_undefined_si128 (),
   5085 						    (__mmask8) -1);
   5086 }
   5087 
   5088 extern __inline void
   5089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5090 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   5091 {
   5092   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
   5093 }
   5094 
   5095 extern __inline __m128i
   5096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5097 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
   5098 {
   5099   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   5100 						    (__v8hi) __O, __M);
   5101 }
   5102 
   5103 extern __inline __m128i
   5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5105 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
   5106 {
   5107   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
   5108 						    (__v8hi)
   5109 						    _mm_setzero_si128 (),
   5110 						    __M);
   5111 }
   5112 
   5113 extern __inline __m128i
   5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5115 _mm512_cvtepi64_epi8 (__m512i __A)
   5116 {
   5117   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   5118 						  (__v16qi)
   5119 						  _mm_undefined_si128 (),
   5120 						  (__mmask8) -1);
   5121 }
   5122 
   5123 extern __inline void
   5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5125 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   5126 {
   5127   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   5128 }
   5129 
   5130 extern __inline __m128i
   5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5132 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   5133 {
   5134   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   5135 						  (__v16qi) __O, __M);
   5136 }
   5137 
   5138 extern __inline __m128i
   5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5140 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
   5141 {
   5142   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
   5143 						  (__v16qi)
   5144 						  _mm_setzero_si128 (),
   5145 						  __M);
   5146 }
   5147 
   5148 extern __inline __m128i
   5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5150 _mm512_cvtsepi64_epi8 (__m512i __A)
   5151 {
   5152   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   5153 						   (__v16qi)
   5154 						   _mm_undefined_si128 (),
   5155 						   (__mmask8) -1);
   5156 }
   5157 
   5158 extern __inline void
   5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5160 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   5161 {
   5162   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   5163 }
   5164 
   5165 extern __inline __m128i
   5166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5167 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   5168 {
   5169   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   5170 						   (__v16qi) __O, __M);
   5171 }
   5172 
   5173 extern __inline __m128i
   5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5175 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
   5176 {
   5177   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
   5178 						   (__v16qi)
   5179 						   _mm_setzero_si128 (),
   5180 						   __M);
   5181 }
   5182 
   5183 extern __inline __m128i
   5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5185 _mm512_cvtusepi64_epi8 (__m512i __A)
   5186 {
   5187   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   5188 						    (__v16qi)
   5189 						    _mm_undefined_si128 (),
   5190 						    (__mmask8) -1);
   5191 }
   5192 
   5193 extern __inline void
   5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5195 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
   5196 {
   5197   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
   5198 }
   5199 
   5200 extern __inline __m128i
   5201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5202 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
   5203 {
   5204   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   5205 						    (__v16qi) __O,
   5206 						    __M);
   5207 }
   5208 
   5209 extern __inline __m128i
   5210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5211 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
   5212 {
   5213   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
   5214 						    (__v16qi)
   5215 						    _mm_setzero_si128 (),
   5216 						    __M);
   5217 }
   5218 
   5219 extern __inline __m512d
   5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5221 _mm512_cvtepi32_pd (__m256i __A)
   5222 {
   5223   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   5224 						    (__v8df)
   5225 						    _mm512_undefined_pd (),
   5226 						    (__mmask8) -1);
   5227 }
   5228 
   5229 extern __inline __m512d
   5230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5231 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   5232 {
   5233   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   5234 						    (__v8df) __W,
   5235 						    (__mmask8) __U);
   5236 }
   5237 
   5238 extern __inline __m512d
   5239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5240 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
   5241 {
   5242   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
   5243 						    (__v8df)
   5244 						    _mm512_setzero_pd (),
   5245 						    (__mmask8) __U);
   5246 }
   5247 
   5248 extern __inline __m512d
   5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5250 _mm512_cvtepu32_pd (__m256i __A)
   5251 {
   5252   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   5253 						     (__v8df)
   5254 						     _mm512_undefined_pd (),
   5255 						     (__mmask8) -1);
   5256 }
   5257 
   5258 extern __inline __m512d
   5259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5260 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
   5261 {
   5262   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   5263 						     (__v8df) __W,
   5264 						     (__mmask8) __U);
   5265 }
   5266 
   5267 extern __inline __m512d
   5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5269 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
   5270 {
   5271   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
   5272 						     (__v8df)
   5273 						     _mm512_setzero_pd (),
   5274 						     (__mmask8) __U);
   5275 }
   5276 
   5277 #ifdef __OPTIMIZE__
   5278 extern __inline __m512
   5279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5280 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
   5281 {
   5282   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   5283 						   (__v16sf)
   5284 						   _mm512_undefined_ps (),
   5285 						   (__mmask16) -1, __R);
   5286 }
   5287 
   5288 extern __inline __m512
   5289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5290 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
   5291 			       const int __R)
   5292 {
   5293   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   5294 						   (__v16sf) __W,
   5295 						   (__mmask16) __U, __R);
   5296 }
   5297 
   5298 extern __inline __m512
   5299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5300 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
   5301 {
   5302   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   5303 						   (__v16sf)
   5304 						   _mm512_setzero_ps (),
   5305 						   (__mmask16) __U, __R);
   5306 }
   5307 
   5308 extern __inline __m512
   5309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5310 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
   5311 {
   5312   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   5313 						    (__v16sf)
   5314 						    _mm512_undefined_ps (),
   5315 						    (__mmask16) -1, __R);
   5316 }
   5317 
   5318 extern __inline __m512
   5319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5320 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
   5321 			       const int __R)
   5322 {
   5323   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   5324 						    (__v16sf) __W,
   5325 						    (__mmask16) __U, __R);
   5326 }
   5327 
   5328 extern __inline __m512
   5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5330 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
   5331 {
   5332   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   5333 						    (__v16sf)
   5334 						    _mm512_setzero_ps (),
   5335 						    (__mmask16) __U, __R);
   5336 }
   5337 
   5338 #else
   5339 #define _mm512_cvt_roundepi32_ps(A, B)        \
   5340     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
   5341 
   5342 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
   5343     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
   5344 
   5345 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
   5346     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
   5347 
   5348 #define _mm512_cvt_roundepu32_ps(A, B)        \
   5349     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
   5350 
   5351 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
   5352     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
   5353 
   5354 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
   5355     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
   5356 #endif
   5357 
   5358 #ifdef __OPTIMIZE__
   5359 extern __inline __m256d
   5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5361 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
   5362 {
   5363   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
   5364 						     __imm,
   5365 						     (__v4df)
   5366 						     _mm256_undefined_pd (),
   5367 						     (__mmask8) -1);
   5368 }
   5369 
   5370 extern __inline __m256d
   5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5372 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
   5373 			     const int __imm)
   5374 {
   5375   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
   5376 						     __imm,
   5377 						     (__v4df) __W,
   5378 						     (__mmask8) __U);
   5379 }
   5380 
   5381 extern __inline __m256d
   5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5383 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
   5384 {
   5385   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
   5386 						     __imm,
   5387 						     (__v4df)
   5388 						     _mm256_setzero_pd (),
   5389 						     (__mmask8) __U);
   5390 }
   5391 
   5392 extern __inline __m128
   5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5394 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
   5395 {
   5396   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
   5397 						    __imm,
   5398 						    (__v4sf)
   5399 						    _mm_undefined_ps (),
   5400 						    (__mmask8) -1);
   5401 }
   5402 
   5403 extern __inline __m128
   5404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5405 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
   5406 			     const int __imm)
   5407 {
   5408   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
   5409 						    __imm,
   5410 						    (__v4sf) __W,
   5411 						    (__mmask8) __U);
   5412 }
   5413 
   5414 extern __inline __m128
   5415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5416 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
   5417 {
   5418   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
   5419 						    __imm,
   5420 						    (__v4sf)
   5421 						    _mm_setzero_ps (),
   5422 						    (__mmask8) __U);
   5423 }
   5424 
   5425 extern __inline __m256i
   5426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5427 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
   5428 {
   5429   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
   5430 						     __imm,
   5431 						     (__v4di)
   5432 						     _mm256_undefined_si256 (),
   5433 						     (__mmask8) -1);
   5434 }
   5435 
   5436 extern __inline __m256i
   5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5438 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
   5439 				const int __imm)
   5440 {
   5441   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
   5442 						     __imm,
   5443 						     (__v4di) __W,
   5444 						     (__mmask8) __U);
   5445 }
   5446 
   5447 extern __inline __m256i
   5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5449 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
   5450 {
   5451   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
   5452 						     __imm,
   5453 						     (__v4di)
   5454 						     _mm256_setzero_si256 (),
   5455 						     (__mmask8) __U);
   5456 }
   5457 
   5458 extern __inline __m128i
   5459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5460 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
   5461 {
   5462   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
   5463 						     __imm,
   5464 						     (__v4si)
   5465 						     _mm_undefined_si128 (),
   5466 						     (__mmask8) -1);
   5467 }
   5468 
   5469 extern __inline __m128i
   5470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5471 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
   5472 				const int __imm)
   5473 {
   5474   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
   5475 						     __imm,
   5476 						     (__v4si) __W,
   5477 						     (__mmask8) __U);
   5478 }
   5479 
   5480 extern __inline __m128i
   5481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5482 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
   5483 {
   5484   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
   5485 						     __imm,
   5486 						     (__v4si)
   5487 						     _mm_setzero_si128 (),
   5488 						     (__mmask8) __U);
   5489 }
   5490 #else
   5491 
   5492 #define _mm512_extractf64x4_pd(X, C)                                    \
   5493   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
   5494     (int) (C),\
   5495     (__v4df)(__m256d)_mm256_undefined_pd(),\
   5496     (__mmask8)-1))
   5497 
   5498 #define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
   5499   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
   5500     (int) (C),\
   5501     (__v4df)(__m256d)(W),\
   5502     (__mmask8)(U)))
   5503 
   5504 #define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
   5505   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
   5506     (int) (C),\
   5507     (__v4df)(__m256d)_mm256_setzero_pd(),\
   5508     (__mmask8)(U)))
   5509 
   5510 #define _mm512_extractf32x4_ps(X, C)                                    \
   5511   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
   5512     (int) (C),\
   5513     (__v4sf)(__m128)_mm_undefined_ps(),\
   5514     (__mmask8)-1))
   5515 
   5516 #define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
   5517   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
   5518     (int) (C),\
   5519     (__v4sf)(__m128)(W),\
   5520     (__mmask8)(U)))
   5521 
   5522 #define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
   5523   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
   5524     (int) (C),\
   5525     (__v4sf)(__m128)_mm_setzero_ps(),\
   5526     (__mmask8)(U)))
   5527 
   5528 #define _mm512_extracti64x4_epi64(X, C)                                 \
   5529   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
   5530     (int) (C),\
   5531     (__v4di)(__m256i)_mm256_undefined_si256 (),\
   5532     (__mmask8)-1))
   5533 
   5534 #define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
   5535   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
   5536     (int) (C),\
   5537     (__v4di)(__m256i)(W),\
   5538     (__mmask8)(U)))
   5539 
   5540 #define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
   5541   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
   5542     (int) (C),\
   5543     (__v4di)(__m256i)_mm256_setzero_si256 (),\
   5544     (__mmask8)(U)))
   5545 
   5546 #define _mm512_extracti32x4_epi32(X, C)                                 \
   5547   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
   5548     (int) (C),\
   5549     (__v4si)(__m128i)_mm_undefined_si128 (),\
   5550     (__mmask8)-1))
   5551 
   5552 #define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
   5553   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
   5554     (int) (C),\
   5555     (__v4si)(__m128i)(W),\
   5556     (__mmask8)(U)))
   5557 
   5558 #define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
   5559   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
   5560     (int) (C),\
   5561     (__v4si)(__m128i)_mm_setzero_si128 (),\
   5562     (__mmask8)(U)))
   5563 #endif
   5564 
   5565 #ifdef __OPTIMIZE__
   5566 extern __inline __m512i
   5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5568 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
   5569 {
   5570   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
   5571 						    (__v4si) __B,
   5572 						    __imm,
   5573 						    (__v16si) __A, -1);
   5574 }
   5575 
   5576 extern __inline __m512
   5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5578 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
   5579 {
   5580   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
   5581 						   (__v4sf) __B,
   5582 						   __imm,
   5583 						   (__v16sf) __A, -1);
   5584 }
   5585 
   5586 extern __inline __m512i
   5587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5588 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
   5589 {
   5590   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
   5591 						    (__v4di) __B,
   5592 						    __imm,
   5593 						    (__v8di)
   5594 						    _mm512_undefined_si512 (),
   5595 						    (__mmask8) -1);
   5596 }
   5597 
   5598 extern __inline __m512i
   5599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5600 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
   5601 			 __m256i __B, const int __imm)
   5602 {
   5603   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
   5604 						    (__v4di) __B,
   5605 						    __imm,
   5606 						    (__v8di) __W,
   5607 						    (__mmask8) __U);
   5608 }
   5609 
   5610 extern __inline __m512i
   5611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5612 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
   5613 			  const int __imm)
   5614 {
   5615   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
   5616 						    (__v4di) __B,
   5617 						    __imm,
   5618 						    (__v8di)
   5619 						    _mm512_setzero_si512 (),
   5620 						    (__mmask8) __U);
   5621 }
   5622 
   5623 extern __inline __m512d
   5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5625 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
   5626 {
   5627   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
   5628 						    (__v4df) __B,
   5629 						    __imm,
   5630 						    (__v8df)
   5631 						    _mm512_undefined_pd (),
   5632 						    (__mmask8) -1);
   5633 }
   5634 
   5635 extern __inline __m512d
   5636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5637 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
   5638 			 __m256d __B, const int __imm)
   5639 {
   5640   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
   5641 						    (__v4df) __B,
   5642 						    __imm,
   5643 						    (__v8df) __W,
   5644 						    (__mmask8) __U);
   5645 }
   5646 
   5647 extern __inline __m512d
   5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5649 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
   5650 			  const int __imm)
   5651 {
   5652   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
   5653 						    (__v4df) __B,
   5654 						    __imm,
   5655 						    (__v8df)
   5656 						    _mm512_setzero_pd (),
   5657 						    (__mmask8) __U);
   5658 }
   5659 #else
   5660 #define _mm512_insertf32x4(X, Y, C)                                     \
   5661   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
   5662     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
   5663 
   5664 #define _mm512_inserti32x4(X, Y, C)                                     \
   5665   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
   5666     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
   5667 
   5668 #define _mm512_insertf64x4(X, Y, C)                                     \
   5669   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
   5670     (__v4df)(__m256d) (Y), (int) (C),					\
   5671     (__v8df)(__m512d)_mm512_undefined_pd(),				\
   5672     (__mmask8)-1))
   5673 
   5674 #define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
   5675   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
   5676     (__v4df)(__m256d) (Y), (int) (C),					\
   5677     (__v8df)(__m512d)(W),						\
   5678     (__mmask8)(U)))
   5679 
   5680 #define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
   5681   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
   5682     (__v4df)(__m256d) (Y), (int) (C),					\
   5683     (__v8df)(__m512d)_mm512_setzero_pd(),				\
   5684     (__mmask8)(U)))
   5685 
   5686 #define _mm512_inserti64x4(X, Y, C)                                     \
   5687   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
   5688     (__v4di)(__m256i) (Y), (int) (C),					\
   5689     (__v8di)(__m512i)_mm512_undefined_si512 (),				\
   5690     (__mmask8)-1))
   5691 
   5692 #define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
   5693   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
   5694     (__v4di)(__m256i) (Y), (int) (C),\
   5695     (__v8di)(__m512i)(W),\
   5696     (__mmask8)(U)))
   5697 
   5698 #define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
   5699   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
   5700     (__v4di)(__m256i) (Y), (int) (C),					\
   5701     (__v8di)(__m512i)_mm512_setzero_si512 (),				\
   5702     (__mmask8)(U)))
   5703 #endif
   5704 
   5705 extern __inline __m512d
   5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5707 _mm512_loadu_pd (void const *__P)
   5708 {
   5709   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
   5710 						   (__v8df)
   5711 						   _mm512_undefined_pd (),
   5712 						   (__mmask8) -1);
   5713 }
   5714 
   5715 extern __inline __m512d
   5716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5717 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   5718 {
   5719   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
   5720 						   (__v8df) __W,
   5721 						   (__mmask8) __U);
   5722 }
   5723 
   5724 extern __inline __m512d
   5725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5726 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
   5727 {
   5728   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
   5729 						   (__v8df)
   5730 						   _mm512_setzero_pd (),
   5731 						   (__mmask8) __U);
   5732 }
   5733 
   5734 extern __inline void
   5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5736 _mm512_storeu_pd (void *__P, __m512d __A)
   5737 {
   5738   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
   5739 				   (__mmask8) -1);
   5740 }
   5741 
   5742 extern __inline void
   5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5744 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
   5745 {
   5746   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
   5747 				   (__mmask8) __U);
   5748 }
   5749 
   5750 extern __inline __m512
   5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5752 _mm512_loadu_ps (void const *__P)
   5753 {
   5754   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
   5755 						  (__v16sf)
   5756 						  _mm512_undefined_ps (),
   5757 						  (__mmask16) -1);
   5758 }
   5759 
   5760 extern __inline __m512
   5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5762 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   5763 {
   5764   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
   5765 						  (__v16sf) __W,
   5766 						  (__mmask16) __U);
   5767 }
   5768 
   5769 extern __inline __m512
   5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5771 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
   5772 {
   5773   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
   5774 						  (__v16sf)
   5775 						  _mm512_setzero_ps (),
   5776 						  (__mmask16) __U);
   5777 }
   5778 
   5779 extern __inline void
   5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5781 _mm512_storeu_ps (void *__P, __m512 __A)
   5782 {
   5783   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
   5784 				   (__mmask16) -1);
   5785 }
   5786 
   5787 extern __inline void
   5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5789 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
   5790 {
   5791   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
   5792 				   (__mmask16) __U);
   5793 }
   5794 
   5795 extern __inline __m512i
   5796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5797 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   5798 {
   5799   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
   5800 						     (__v8di) __W,
   5801 						     (__mmask8) __U);
   5802 }
   5803 
   5804 extern __inline __m512i
   5805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5806 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
   5807 {
   5808   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
   5809 						     (__v8di)
   5810 						     _mm512_setzero_si512 (),
   5811 						     (__mmask8) __U);
   5812 }
   5813 
   5814 extern __inline void
   5815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5816 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   5817 {
   5818   __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
   5819 				     (__mmask8) __U);
   5820 }
   5821 
   5822 extern __inline __m512i
   5823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5824 _mm512_loadu_si512 (void const *__P)
   5825 {
   5826   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
   5827 						     (__v16si)
   5828 						     _mm512_setzero_si512 (),
   5829 						     (__mmask16) -1);
   5830 }
   5831 
   5832 extern __inline __m512i
   5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5834 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   5835 {
   5836   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
   5837 						     (__v16si) __W,
   5838 						     (__mmask16) __U);
   5839 }
   5840 
   5841 extern __inline __m512i
   5842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5843 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
   5844 {
   5845   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
   5846 						     (__v16si)
   5847 						     _mm512_setzero_si512 (),
   5848 						     (__mmask16) __U);
   5849 }
   5850 
   5851 extern __inline void
   5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5853 _mm512_storeu_si512 (void *__P, __m512i __A)
   5854 {
   5855   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
   5856 				     (__mmask16) -1);
   5857 }
   5858 
   5859 extern __inline void
   5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5861 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   5862 {
   5863   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
   5864 				     (__mmask16) __U);
   5865 }
   5866 
   5867 extern __inline __m512d
   5868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5869 _mm512_permutevar_pd (__m512d __A, __m512i __C)
   5870 {
   5871   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   5872 							(__v8di) __C,
   5873 							(__v8df)
   5874 							_mm512_undefined_pd (),
   5875 							(__mmask8) -1);
   5876 }
   5877 
   5878 extern __inline __m512d
   5879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5880 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
   5881 {
   5882   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   5883 							(__v8di) __C,
   5884 							(__v8df) __W,
   5885 							(__mmask8) __U);
   5886 }
   5887 
   5888 extern __inline __m512d
   5889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5890 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
   5891 {
   5892   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
   5893 							(__v8di) __C,
   5894 							(__v8df)
   5895 							_mm512_setzero_pd (),
   5896 							(__mmask8) __U);
   5897 }
   5898 
   5899 extern __inline __m512
   5900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5901 _mm512_permutevar_ps (__m512 __A, __m512i __C)
   5902 {
   5903   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   5904 						       (__v16si) __C,
   5905 						       (__v16sf)
   5906 						       _mm512_undefined_ps (),
   5907 						       (__mmask16) -1);
   5908 }
   5909 
   5910 extern __inline __m512
   5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5912 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
   5913 {
   5914   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   5915 						       (__v16si) __C,
   5916 						       (__v16sf) __W,
   5917 						       (__mmask16) __U);
   5918 }
   5919 
   5920 extern __inline __m512
   5921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5922 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
   5923 {
   5924   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
   5925 						       (__v16si) __C,
   5926 						       (__v16sf)
   5927 						       _mm512_setzero_ps (),
   5928 						       (__mmask16) __U);
   5929 }
   5930 
   5931 extern __inline __m512i
   5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5933 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
   5934 {
   5935   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   5936 						       /* idx */ ,
   5937 						       (__v8di) __A,
   5938 						       (__v8di) __B,
   5939 						       (__mmask8) -1);
   5940 }
   5941 
   5942 extern __inline __m512i
   5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5944 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
   5945 				__m512i __B)
   5946 {
   5947   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
   5948 						       /* idx */ ,
   5949 						       (__v8di) __A,
   5950 						       (__v8di) __B,
   5951 						       (__mmask8) __U);
   5952 }
   5953 
   5954 extern __inline __m512i
   5955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5956 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
   5957 				 __mmask8 __U, __m512i __B)
   5958 {
   5959   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
   5960 						       (__v8di) __I
   5961 						       /* idx */ ,
   5962 						       (__v8di) __B,
   5963 						       (__mmask8) __U);
   5964 }
   5965 
   5966 extern __inline __m512i
   5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5968 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
   5969 				 __m512i __I, __m512i __B)
   5970 {
   5971   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
   5972 							/* idx */ ,
   5973 							(__v8di) __A,
   5974 							(__v8di) __B,
   5975 							(__mmask8) __U);
   5976 }
   5977 
   5978 extern __inline __m512i
   5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5980 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
   5981 {
   5982   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   5983 						       /* idx */ ,
   5984 						       (__v16si) __A,
   5985 						       (__v16si) __B,
   5986 						       (__mmask16) -1);
   5987 }
   5988 
   5989 extern __inline __m512i
   5990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5991 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
   5992 				__m512i __I, __m512i __B)
   5993 {
   5994   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
   5995 						       /* idx */ ,
   5996 						       (__v16si) __A,
   5997 						       (__v16si) __B,
   5998 						       (__mmask16) __U);
   5999 }
   6000 
   6001 extern __inline __m512i
   6002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6003 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
   6004 				 __mmask16 __U, __m512i __B)
   6005 {
   6006   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
   6007 						       (__v16si) __I
   6008 						       /* idx */ ,
   6009 						       (__v16si) __B,
   6010 						       (__mmask16) __U);
   6011 }
   6012 
   6013 extern __inline __m512i
   6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6015 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
   6016 				 __m512i __I, __m512i __B)
   6017 {
   6018   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
   6019 							/* idx */ ,
   6020 							(__v16si) __A,
   6021 							(__v16si) __B,
   6022 							(__mmask16) __U);
   6023 }
   6024 
   6025 extern __inline __m512d
   6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6027 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
   6028 {
   6029   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6030 							/* idx */ ,
   6031 							(__v8df) __A,
   6032 							(__v8df) __B,
   6033 							(__mmask8) -1);
   6034 }
   6035 
   6036 extern __inline __m512d
   6037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6038 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
   6039 			     __m512d __B)
   6040 {
   6041   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
   6042 							/* idx */ ,
   6043 							(__v8df) __A,
   6044 							(__v8df) __B,
   6045 							(__mmask8) __U);
   6046 }
   6047 
   6048 extern __inline __m512d
   6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6050 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
   6051 			      __m512d __B)
   6052 {
   6053   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
   6054 							(__v8di) __I
   6055 							/* idx */ ,
   6056 							(__v8df) __B,
   6057 							(__mmask8) __U);
   6058 }
   6059 
   6060 extern __inline __m512d
   6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6062 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
   6063 			      __m512d __B)
   6064 {
   6065   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
   6066 							 /* idx */ ,
   6067 							 (__v8df) __A,
   6068 							 (__v8df) __B,
   6069 							 (__mmask8) __U);
   6070 }
   6071 
   6072 extern __inline __m512
   6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6074 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
   6075 {
   6076   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6077 						       /* idx */ ,
   6078 						       (__v16sf) __A,
   6079 						       (__v16sf) __B,
   6080 						       (__mmask16) -1);
   6081 }
   6082 
   6083 extern __inline __m512
   6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6085 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
   6086 {
   6087   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
   6088 						       /* idx */ ,
   6089 						       (__v16sf) __A,
   6090 						       (__v16sf) __B,
   6091 						       (__mmask16) __U);
   6092 }
   6093 
   6094 extern __inline __m512
   6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6096 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
   6097 			      __m512 __B)
   6098 {
   6099   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
   6100 						       (__v16si) __I
   6101 						       /* idx */ ,
   6102 						       (__v16sf) __B,
   6103 						       (__mmask16) __U);
   6104 }
   6105 
   6106 extern __inline __m512
   6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6108 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
   6109 			      __m512 __B)
   6110 {
   6111   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
   6112 							/* idx */ ,
   6113 							(__v16sf) __A,
   6114 							(__v16sf) __B,
   6115 							(__mmask16) __U);
   6116 }
   6117 
   6118 #ifdef __OPTIMIZE__
   6119 extern __inline __m512d
   6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6121 _mm512_permute_pd (__m512d __X, const int __C)
   6122 {
   6123   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
   6124 						     (__v8df)
   6125 						     _mm512_undefined_pd (),
   6126 						     (__mmask8) -1);
   6127 }
   6128 
   6129 extern __inline __m512d
   6130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6131 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
   6132 {
   6133   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
   6134 						     (__v8df) __W,
   6135 						     (__mmask8) __U);
   6136 }
   6137 
   6138 extern __inline __m512d
   6139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6140 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
   6141 {
   6142   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
   6143 						     (__v8df)
   6144 						     _mm512_setzero_pd (),
   6145 						     (__mmask8) __U);
   6146 }
   6147 
   6148 extern __inline __m512
   6149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6150 _mm512_permute_ps (__m512 __X, const int __C)
   6151 {
   6152   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
   6153 						    (__v16sf)
   6154 						    _mm512_undefined_ps (),
   6155 						    (__mmask16) -1);
   6156 }
   6157 
   6158 extern __inline __m512
   6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6160 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
   6161 {
   6162   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
   6163 						    (__v16sf) __W,
   6164 						    (__mmask16) __U);
   6165 }
   6166 
   6167 extern __inline __m512
   6168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6169 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
   6170 {
   6171   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
   6172 						    (__v16sf)
   6173 						    _mm512_setzero_ps (),
   6174 						    (__mmask16) __U);
   6175 }
   6176 #else
   6177 #define _mm512_permute_pd(X, C)							    \
   6178   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
   6179 					      (__v8df)(__m512d)_mm512_undefined_pd(),\
   6180 					      (__mmask8)(-1)))
   6181 
   6182 #define _mm512_mask_permute_pd(W, U, X, C)					    \
   6183   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
   6184 					      (__v8df)(__m512d)(W),		    \
   6185 					      (__mmask8)(U)))
   6186 
   6187 #define _mm512_maskz_permute_pd(U, X, C)					    \
   6188   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
   6189 					      (__v8df)(__m512d)_mm512_setzero_pd(), \
   6190 					      (__mmask8)(U)))
   6191 
   6192 #define _mm512_permute_ps(X, C)							    \
   6193   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
   6194 					      (__v16sf)(__m512)_mm512_undefined_ps(),\
   6195 					      (__mmask16)(-1)))
   6196 
   6197 #define _mm512_mask_permute_ps(W, U, X, C)					    \
   6198   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
   6199 					      (__v16sf)(__m512)(W),		    \
   6200 					      (__mmask16)(U)))
   6201 
   6202 #define _mm512_maskz_permute_ps(U, X, C)					    \
   6203   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
   6204 					      (__v16sf)(__m512)_mm512_setzero_ps(), \
   6205 					      (__mmask16)(U)))
   6206 #endif
   6207 
   6208 #ifdef __OPTIMIZE__
   6209 extern __inline __m512i
   6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6211 _mm512_permutex_epi64 (__m512i __X, const int __I)
   6212 {
   6213   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
   6214 						  (__v8di)
   6215 						  _mm512_undefined_si512 (),
   6216 						  (__mmask8) (-1));
   6217 }
   6218 
   6219 extern __inline __m512i
   6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6221 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
   6222 			    __m512i __X, const int __I)
   6223 {
   6224   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
   6225 						  (__v8di) __W,
   6226 						  (__mmask8) __M);
   6227 }
   6228 
   6229 extern __inline __m512i
   6230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6231 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
   6232 {
   6233   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
   6234 						  (__v8di)
   6235 						  _mm512_setzero_si512 (),
   6236 						  (__mmask8) __M);
   6237 }
   6238 
   6239 extern __inline __m512d
   6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6241 _mm512_permutex_pd (__m512d __X, const int __M)
   6242 {
   6243   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
   6244 						  (__v8df)
   6245 						  _mm512_undefined_pd (),
   6246 						  (__mmask8) -1);
   6247 }
   6248 
   6249 extern __inline __m512d
   6250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6251 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
   6252 {
   6253   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
   6254 						  (__v8df) __W,
   6255 						  (__mmask8) __U);
   6256 }
   6257 
   6258 extern __inline __m512d
   6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6260 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
   6261 {
   6262   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
   6263 						  (__v8df)
   6264 						  _mm512_setzero_pd (),
   6265 						  (__mmask8) __U);
   6266 }
   6267 #else
   6268 #define _mm512_permutex_pd(X, M)						\
   6269   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
   6270 					    (__v8df)(__m512d)_mm512_undefined_pd(),\
   6271 					    (__mmask8)-1))
   6272 
   6273 #define _mm512_mask_permutex_pd(W, U, X, M)					\
   6274   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
   6275 					    (__v8df)(__m512d)(W), (__mmask8)(U)))
   6276 
   6277 #define _mm512_maskz_permutex_pd(U, X, M)					\
   6278   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
   6279 					    (__v8df)(__m512d)_mm512_setzero_pd(),\
   6280 					    (__mmask8)(U)))
   6281 
   6282 #define _mm512_permutex_epi64(X, I)			          \
   6283   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
   6284 					    (int)(I),             \
   6285 					    (__v8di)(__m512i)	  \
   6286 					    (_mm512_undefined_si512 ()),\
   6287 					    (__mmask8)(-1)))
   6288 
   6289 #define _mm512_maskz_permutex_epi64(M, X, I)                 \
   6290   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
   6291 					    (int)(I),             \
   6292 					    (__v8di)(__m512i)     \
   6293 					    (_mm512_setzero_si512 ()),\
   6294 					    (__mmask8)(M)))
   6295 
   6296 #define _mm512_mask_permutex_epi64(W, M, X, I)               \
   6297   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
   6298 					    (int)(I),             \
   6299 					    (__v8di)(__m512i)(W), \
   6300 					    (__mmask8)(M)))
   6301 #endif
   6302 
   6303 extern __inline __m512i
   6304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6305 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
   6306 {
   6307   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   6308 						     (__v8di) __X,
   6309 						     (__v8di)
   6310 						     _mm512_setzero_si512 (),
   6311 						     __M);
   6312 }
   6313 
   6314 extern __inline __m512i
   6315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6316 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
   6317 {
   6318   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   6319 						     (__v8di) __X,
   6320 						     (__v8di)
   6321 						     _mm512_undefined_si512 (),
   6322 						     (__mmask8) -1);
   6323 }
   6324 
   6325 extern __inline __m512i
   6326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6327 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
   6328 			       __m512i __Y)
   6329 {
   6330   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
   6331 						     (__v8di) __X,
   6332 						     (__v8di) __W,
   6333 						     __M);
   6334 }
   6335 
   6336 extern __inline __m512i
   6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6338 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
   6339 {
   6340   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   6341 						     (__v16si) __X,
   6342 						     (__v16si)
   6343 						     _mm512_setzero_si512 (),
   6344 						     __M);
   6345 }
   6346 
   6347 extern __inline __m512i
   6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6349 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
   6350 {
   6351   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   6352 						     (__v16si) __X,
   6353 						     (__v16si)
   6354 						     _mm512_undefined_si512 (),
   6355 						     (__mmask16) -1);
   6356 }
   6357 
   6358 extern __inline __m512i
   6359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6360 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
   6361 			       __m512i __Y)
   6362 {
   6363   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
   6364 						     (__v16si) __X,
   6365 						     (__v16si) __W,
   6366 						     __M);
   6367 }
   6368 
   6369 extern __inline __m512d
   6370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6371 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
   6372 {
   6373   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   6374 						     (__v8di) __X,
   6375 						     (__v8df)
   6376 						     _mm512_undefined_pd (),
   6377 						     (__mmask8) -1);
   6378 }
   6379 
   6380 extern __inline __m512d
   6381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6382 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
   6383 {
   6384   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   6385 						     (__v8di) __X,
   6386 						     (__v8df) __W,
   6387 						     (__mmask8) __U);
   6388 }
   6389 
   6390 extern __inline __m512d
   6391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6392 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
   6393 {
   6394   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
   6395 						     (__v8di) __X,
   6396 						     (__v8df)
   6397 						     _mm512_setzero_pd (),
   6398 						     (__mmask8) __U);
   6399 }
   6400 
   6401 extern __inline __m512
   6402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6403 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
   6404 {
   6405   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   6406 						    (__v16si) __X,
   6407 						    (__v16sf)
   6408 						    _mm512_undefined_ps (),
   6409 						    (__mmask16) -1);
   6410 }
   6411 
   6412 extern __inline __m512
   6413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6414 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
   6415 {
   6416   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   6417 						    (__v16si) __X,
   6418 						    (__v16sf) __W,
   6419 						    (__mmask16) __U);
   6420 }
   6421 
   6422 extern __inline __m512
   6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6424 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
   6425 {
   6426   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
   6427 						    (__v16si) __X,
   6428 						    (__v16sf)
   6429 						    _mm512_setzero_ps (),
   6430 						    (__mmask16) __U);
   6431 }
   6432 
   6433 #ifdef __OPTIMIZE__
   6434 extern __inline __m512
   6435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6436 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
   6437 {
   6438   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
   6439 						 (__v16sf) __V, __imm,
   6440 						 (__v16sf)
   6441 						 _mm512_undefined_ps (),
   6442 						 (__mmask16) -1);
   6443 }
   6444 
   6445 extern __inline __m512
   6446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6447 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
   6448 			__m512 __V, const int __imm)
   6449 {
   6450   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
   6451 						 (__v16sf) __V, __imm,
   6452 						 (__v16sf) __W,
   6453 						 (__mmask16) __U);
   6454 }
   6455 
   6456 extern __inline __m512
   6457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6458 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
   6459 {
   6460   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
   6461 						 (__v16sf) __V, __imm,
   6462 						 (__v16sf)
   6463 						 _mm512_setzero_ps (),
   6464 						 (__mmask16) __U);
   6465 }
   6466 
   6467 extern __inline __m512d
   6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6469 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
   6470 {
   6471   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
   6472 						  (__v8df) __V, __imm,
   6473 						  (__v8df)
   6474 						  _mm512_undefined_pd (),
   6475 						  (__mmask8) -1);
   6476 }
   6477 
   6478 extern __inline __m512d
   6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6480 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
   6481 			__m512d __V, const int __imm)
   6482 {
   6483   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
   6484 						  (__v8df) __V, __imm,
   6485 						  (__v8df) __W,
   6486 						  (__mmask8) __U);
   6487 }
   6488 
   6489 extern __inline __m512d
   6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6491 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
   6492 			 const int __imm)
   6493 {
   6494   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
   6495 						  (__v8df) __V, __imm,
   6496 						  (__v8df)
   6497 						  _mm512_setzero_pd (),
   6498 						  (__mmask8) __U);
   6499 }
   6500 
   6501 extern __inline __m512d
   6502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6503 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
   6504 			  const int __imm, const int __R)
   6505 {
   6506   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
   6507 						      (__v8df) __B,
   6508 						      (__v8di) __C,
   6509 						      __imm,
   6510 						      (__mmask8) -1, __R);
   6511 }
   6512 
   6513 extern __inline __m512d
   6514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6515 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
   6516 			       __m512i __C, const int __imm, const int __R)
   6517 {
   6518   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
   6519 						      (__v8df) __B,
   6520 						      (__v8di) __C,
   6521 						      __imm,
   6522 						      (__mmask8) __U, __R);
   6523 }
   6524 
   6525 extern __inline __m512d
   6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6527 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
   6528 				__m512i __C, const int __imm, const int __R)
   6529 {
   6530   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
   6531 						       (__v8df) __B,
   6532 						       (__v8di) __C,
   6533 						       __imm,
   6534 						       (__mmask8) __U, __R);
   6535 }
   6536 
   6537 extern __inline __m512
   6538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6539 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
   6540 			  const int __imm, const int __R)
   6541 {
   6542   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
   6543 						     (__v16sf) __B,
   6544 						     (__v16si) __C,
   6545 						     __imm,
   6546 						     (__mmask16) -1, __R);
   6547 }
   6548 
   6549 extern __inline __m512
   6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6551 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
   6552 			       __m512i __C, const int __imm, const int __R)
   6553 {
   6554   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
   6555 						     (__v16sf) __B,
   6556 						     (__v16si) __C,
   6557 						     __imm,
   6558 						     (__mmask16) __U, __R);
   6559 }
   6560 
   6561 extern __inline __m512
   6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6563 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
   6564 				__m512i __C, const int __imm, const int __R)
   6565 {
   6566   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
   6567 						      (__v16sf) __B,
   6568 						      (__v16si) __C,
   6569 						      __imm,
   6570 						      (__mmask16) __U, __R);
   6571 }
   6572 
   6573 extern __inline __m128d
   6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6575 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
   6576 		       const int __imm, const int __R)
   6577 {
   6578   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
   6579 						   (__v2df) __B,
   6580 						   (__v2di) __C, __imm,
   6581 						   (__mmask8) -1, __R);
   6582 }
   6583 
   6584 extern __inline __m128d
   6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6586 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
   6587 			    __m128i __C, const int __imm, const int __R)
   6588 {
   6589   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
   6590 						   (__v2df) __B,
   6591 						   (__v2di) __C, __imm,
   6592 						   (__mmask8) __U, __R);
   6593 }
   6594 
   6595 extern __inline __m128d
   6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6597 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   6598 			     __m128i __C, const int __imm, const int __R)
   6599 {
   6600   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
   6601 						    (__v2df) __B,
   6602 						    (__v2di) __C,
   6603 						    __imm,
   6604 						    (__mmask8) __U, __R);
   6605 }
   6606 
   6607 extern __inline __m128
   6608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6609 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
   6610 		       const int __imm, const int __R)
   6611 {
   6612   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
   6613 						  (__v4sf) __B,
   6614 						  (__v4si) __C, __imm,
   6615 						  (__mmask8) -1, __R);
   6616 }
   6617 
   6618 extern __inline __m128
   6619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6620 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
   6621 			    __m128i __C, const int __imm, const int __R)
   6622 {
   6623   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
   6624 						  (__v4sf) __B,
   6625 						  (__v4si) __C, __imm,
   6626 						  (__mmask8) __U, __R);
   6627 }
   6628 
   6629 extern __inline __m128
   6630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6631 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   6632 			     __m128i __C, const int __imm, const int __R)
   6633 {
   6634   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
   6635 						   (__v4sf) __B,
   6636 						   (__v4si) __C, __imm,
   6637 						   (__mmask8) __U, __R);
   6638 }
   6639 
   6640 #else
   6641 #define _mm512_shuffle_pd(X, Y, C)                                      \
   6642     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
   6643         (__v8df)(__m512d)(Y), (int)(C),\
   6644     (__v8df)(__m512d)_mm512_undefined_pd(),\
   6645     (__mmask8)-1))
   6646 
   6647 #define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
   6648     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
   6649         (__v8df)(__m512d)(Y), (int)(C),\
   6650     (__v8df)(__m512d)(W),\
   6651     (__mmask8)(U)))
   6652 
   6653 #define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
   6654     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
   6655         (__v8df)(__m512d)(Y), (int)(C),\
   6656     (__v8df)(__m512d)_mm512_setzero_pd(),\
   6657     (__mmask8)(U)))
   6658 
   6659 #define _mm512_shuffle_ps(X, Y, C)                                      \
   6660     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
   6661         (__v16sf)(__m512)(Y), (int)(C),\
   6662     (__v16sf)(__m512)_mm512_undefined_ps(),\
   6663     (__mmask16)-1))
   6664 
   6665 #define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
   6666     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
   6667         (__v16sf)(__m512)(Y), (int)(C),\
   6668     (__v16sf)(__m512)(W),\
   6669     (__mmask16)(U)))
   6670 
   6671 #define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
   6672     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
   6673         (__v16sf)(__m512)(Y), (int)(C),\
   6674     (__v16sf)(__m512)_mm512_setzero_ps(),\
   6675     (__mmask16)(U)))
   6676 
   6677 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R)					\
   6678   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
   6679       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
   6680       (__mmask8)(-1), (R)))
   6681 
   6682 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
   6683   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
   6684       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
   6685       (__mmask8)(U), (R)))
   6686 
   6687 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
   6688   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
   6689       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
   6690       (__mmask8)(U), (R)))
   6691 
   6692 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R)					\
   6693   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
   6694     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
   6695     (__mmask16)(-1), (R)))
   6696 
   6697 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
   6698   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
   6699     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
   6700     (__mmask16)(U), (R)))
   6701 
   6702 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
   6703   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
   6704     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
   6705     (__mmask16)(U), (R)))
   6706 
   6707 #define _mm_fixupimm_round_sd(X, Y, Z, C, R)					\
   6708     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
   6709       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   6710       (__mmask8)(-1), (R)))
   6711 
   6712 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)				\
   6713     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
   6714       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   6715       (__mmask8)(U), (R)))
   6716 
   6717 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)				\
   6718     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
   6719       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   6720       (__mmask8)(U), (R)))
   6721 
   6722 #define _mm_fixupimm_round_ss(X, Y, Z, C, R)					\
   6723     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
   6724       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   6725       (__mmask8)(-1), (R)))
   6726 
   6727 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)				\
   6728     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
   6729       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   6730       (__mmask8)(U), (R)))
   6731 
   6732 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)				\
   6733     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
   6734       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   6735       (__mmask8)(U), (R)))
   6736 #endif
   6737 
   6738 extern __inline __m512
   6739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6740 _mm512_movehdup_ps (__m512 __A)
   6741 {
   6742   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
   6743 						   (__v16sf)
   6744 						   _mm512_undefined_ps (),
   6745 						   (__mmask16) -1);
   6746 }
   6747 
   6748 extern __inline __m512
   6749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6750 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   6751 {
   6752   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
   6753 						   (__v16sf) __W,
   6754 						   (__mmask16) __U);
   6755 }
   6756 
   6757 extern __inline __m512
   6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6759 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
   6760 {
   6761   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
   6762 						   (__v16sf)
   6763 						   _mm512_setzero_ps (),
   6764 						   (__mmask16) __U);
   6765 }
   6766 
   6767 extern __inline __m512
   6768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6769 _mm512_moveldup_ps (__m512 __A)
   6770 {
   6771   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
   6772 						   (__v16sf)
   6773 						   _mm512_undefined_ps (),
   6774 						   (__mmask16) -1);
   6775 }
   6776 
   6777 extern __inline __m512
   6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6779 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
   6780 {
   6781   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
   6782 						   (__v16sf) __W,
   6783 						   (__mmask16) __U);
   6784 }
   6785 
   6786 extern __inline __m512
   6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6788 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
   6789 {
   6790   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
   6791 						   (__v16sf)
   6792 						   _mm512_setzero_ps (),
   6793 						   (__mmask16) __U);
   6794 }
   6795 
   6796 extern __inline __m512i
   6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6798 _mm512_or_si512 (__m512i __A, __m512i __B)
   6799 {
   6800   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
   6801 						(__v16si) __B,
   6802 						(__v16si)
   6803 						_mm512_undefined_si512 (),
   6804 						(__mmask16) -1);
   6805 }
   6806 
   6807 extern __inline __m512i
   6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6809 _mm512_or_epi32 (__m512i __A, __m512i __B)
   6810 {
   6811   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
   6812 						(__v16si) __B,
   6813 						(__v16si)
   6814 						_mm512_undefined_si512 (),
   6815 						(__mmask16) -1);
   6816 }
   6817 
   6818 extern __inline __m512i
   6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6820 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   6821 {
   6822   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
   6823 						(__v16si) __B,
   6824 						(__v16si) __W,
   6825 						(__mmask16) __U);
   6826 }
   6827 
   6828 extern __inline __m512i
   6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6830 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   6831 {
   6832   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
   6833 						(__v16si) __B,
   6834 						(__v16si)
   6835 						_mm512_setzero_si512 (),
   6836 						(__mmask16) __U);
   6837 }
   6838 
   6839 extern __inline __m512i
   6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6841 _mm512_or_epi64 (__m512i __A, __m512i __B)
   6842 {
   6843   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
   6844 						(__v8di) __B,
   6845 						(__v8di)
   6846 						_mm512_undefined_si512 (),
   6847 						(__mmask8) -1);
   6848 }
   6849 
   6850 extern __inline __m512i
   6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6852 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   6853 {
   6854   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
   6855 						(__v8di) __B,
   6856 						(__v8di) __W,
   6857 						(__mmask8) __U);
   6858 }
   6859 
   6860 extern __inline __m512i
   6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6862 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   6863 {
   6864   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
   6865 						(__v8di) __B,
   6866 						(__v8di)
   6867 						_mm512_setzero_si512 (),
   6868 						(__mmask8) __U);
   6869 }
   6870 
   6871 extern __inline __m512i
   6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6873 _mm512_xor_si512 (__m512i __A, __m512i __B)
   6874 {
   6875   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
   6876 						 (__v16si) __B,
   6877 						 (__v16si)
   6878 						 _mm512_undefined_si512 (),
   6879 						 (__mmask16) -1);
   6880 }
   6881 
   6882 extern __inline __m512i
   6883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6884 _mm512_xor_epi32 (__m512i __A, __m512i __B)
   6885 {
   6886   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
   6887 						 (__v16si) __B,
   6888 						 (__v16si)
   6889 						 _mm512_undefined_si512 (),
   6890 						 (__mmask16) -1);
   6891 }
   6892 
   6893 extern __inline __m512i
   6894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6895 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   6896 {
   6897   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
   6898 						 (__v16si) __B,
   6899 						 (__v16si) __W,
   6900 						 (__mmask16) __U);
   6901 }
   6902 
   6903 extern __inline __m512i
   6904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6905 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   6906 {
   6907   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
   6908 						 (__v16si) __B,
   6909 						 (__v16si)
   6910 						 _mm512_setzero_si512 (),
   6911 						 (__mmask16) __U);
   6912 }
   6913 
   6914 extern __inline __m512i
   6915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6916 _mm512_xor_epi64 (__m512i __A, __m512i __B)
   6917 {
   6918   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
   6919 						 (__v8di) __B,
   6920 						 (__v8di)
   6921 						 _mm512_undefined_si512 (),
   6922 						 (__mmask8) -1);
   6923 }
   6924 
   6925 extern __inline __m512i
   6926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6927 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   6928 {
   6929   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
   6930 						 (__v8di) __B,
   6931 						 (__v8di) __W,
   6932 						 (__mmask8) __U);
   6933 }
   6934 
   6935 extern __inline __m512i
   6936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6937 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
   6938 {
   6939   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
   6940 						 (__v8di) __B,
   6941 						 (__v8di)
   6942 						 _mm512_setzero_si512 (),
   6943 						 (__mmask8) __U);
   6944 }
   6945 
   6946 #ifdef __OPTIMIZE__
   6947 extern __inline __m512i
   6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6949 _mm512_rol_epi32 (__m512i __A, const int __B)
   6950 {
   6951   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
   6952 						 (__v16si)
   6953 						 _mm512_undefined_si512 (),
   6954 						 (__mmask16) -1);
   6955 }
   6956 
   6957 extern __inline __m512i
   6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6959 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
   6960 {
   6961   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
   6962 						 (__v16si) __W,
   6963 						 (__mmask16) __U);
   6964 }
   6965 
   6966 extern __inline __m512i
   6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6968 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
   6969 {
   6970   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
   6971 						 (__v16si)
   6972 						 _mm512_setzero_si512 (),
   6973 						 (__mmask16) __U);
   6974 }
   6975 
   6976 extern __inline __m512i
   6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6978 _mm512_ror_epi32 (__m512i __A, int __B)
   6979 {
   6980   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
   6981 						 (__v16si)
   6982 						 _mm512_undefined_si512 (),
   6983 						 (__mmask16) -1);
   6984 }
   6985 
   6986 extern __inline __m512i
   6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6988 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
   6989 {
   6990   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
   6991 						 (__v16si) __W,
   6992 						 (__mmask16) __U);
   6993 }
   6994 
   6995 extern __inline __m512i
   6996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6997 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
   6998 {
   6999   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
   7000 						 (__v16si)
   7001 						 _mm512_setzero_si512 (),
   7002 						 (__mmask16) __U);
   7003 }
   7004 
   7005 extern __inline __m512i
   7006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7007 _mm512_rol_epi64 (__m512i __A, const int __B)
   7008 {
   7009   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
   7010 						 (__v8di)
   7011 						 _mm512_undefined_si512 (),
   7012 						 (__mmask8) -1);
   7013 }
   7014 
   7015 extern __inline __m512i
   7016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7017 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
   7018 {
   7019   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
   7020 						 (__v8di) __W,
   7021 						 (__mmask8) __U);
   7022 }
   7023 
   7024 extern __inline __m512i
   7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7026 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
   7027 {
   7028   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
   7029 						 (__v8di)
   7030 						 _mm512_setzero_si512 (),
   7031 						 (__mmask8) __U);
   7032 }
   7033 
   7034 extern __inline __m512i
   7035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7036 _mm512_ror_epi64 (__m512i __A, int __B)
   7037 {
   7038   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
   7039 						 (__v8di)
   7040 						 _mm512_undefined_si512 (),
   7041 						 (__mmask8) -1);
   7042 }
   7043 
   7044 extern __inline __m512i
   7045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7046 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
   7047 {
   7048   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
   7049 						 (__v8di) __W,
   7050 						 (__mmask8) __U);
   7051 }
   7052 
   7053 extern __inline __m512i
   7054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7055 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
   7056 {
   7057   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
   7058 						 (__v8di)
   7059 						 _mm512_setzero_si512 (),
   7060 						 (__mmask8) __U);
   7061 }
   7062 
   7063 #else
   7064 #define _mm512_rol_epi32(A, B)						  \
   7065     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
   7066 					    (int)(B),			  \
   7067 					    (__v16si)_mm512_undefined_si512 (), \
   7068 					    (__mmask16)(-1)))
   7069 #define _mm512_mask_rol_epi32(W, U, A, B)				  \
   7070     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
   7071 					    (int)(B),			  \
   7072 					    (__v16si)(__m512i)(W),	  \
   7073 					    (__mmask16)(U)))
   7074 #define _mm512_maskz_rol_epi32(U, A, B)					  \
   7075     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
   7076 					    (int)(B),			  \
   7077 					    (__v16si)_mm512_setzero_si512 (), \
   7078 					    (__mmask16)(U)))
   7079 #define _mm512_ror_epi32(A, B)						  \
   7080     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
   7081 					    (int)(B),			  \
   7082 					    (__v16si)_mm512_undefined_si512 (), \
   7083 					    (__mmask16)(-1)))
   7084 #define _mm512_mask_ror_epi32(W, U, A, B)				  \
   7085     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
   7086 					    (int)(B),			  \
   7087 					    (__v16si)(__m512i)(W),	  \
   7088 					    (__mmask16)(U)))
   7089 #define _mm512_maskz_ror_epi32(U, A, B)					  \
   7090     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
   7091 					    (int)(B),			  \
   7092 					    (__v16si)_mm512_setzero_si512 (), \
   7093 					    (__mmask16)(U)))
   7094 #define _mm512_rol_epi64(A, B)						  \
   7095     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
   7096 					    (int)(B),			  \
   7097 					    (__v8di)_mm512_undefined_si512 (),  \
   7098 					    (__mmask8)(-1)))
   7099 #define _mm512_mask_rol_epi64(W, U, A, B)				  \
   7100     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
   7101 					    (int)(B),			  \
   7102 					    (__v8di)(__m512i)(W),	  \
   7103 					    (__mmask8)(U)))
   7104 #define _mm512_maskz_rol_epi64(U, A, B)					  \
   7105     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
   7106 					    (int)(B),			  \
   7107 					    (__v8di)_mm512_setzero_si512 (),  \
   7108 					    (__mmask8)(U)))
   7109 
   7110 #define _mm512_ror_epi64(A, B)						  \
   7111     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
   7112 					    (int)(B),			  \
   7113 					    (__v8di)_mm512_undefined_si512 (),  \
   7114 					    (__mmask8)(-1)))
   7115 #define _mm512_mask_ror_epi64(W, U, A, B)				  \
   7116     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
   7117 					    (int)(B),			  \
   7118 					    (__v8di)(__m512i)(W),	  \
   7119 					    (__mmask8)(U)))
   7120 #define _mm512_maskz_ror_epi64(U, A, B)					  \
   7121     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
   7122 					    (int)(B),			  \
   7123 					    (__v8di)_mm512_setzero_si512 (),  \
   7124 					    (__mmask8)(U)))
   7125 #endif
   7126 
   7127 extern __inline __m512i
   7128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7129 _mm512_and_si512 (__m512i __A, __m512i __B)
   7130 {
   7131   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
   7132 						 (__v16si) __B,
   7133 						 (__v16si)
   7134 						 _mm512_undefined_si512 (),
   7135 						 (__mmask16) -1);
   7136 }
   7137 
   7138 extern __inline __m512i
   7139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7140 _mm512_and_epi32 (__m512i __A, __m512i __B)
   7141 {
   7142   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
   7143 						 (__v16si) __B,
   7144 						 (__v16si)
   7145 						 _mm512_undefined_si512 (),
   7146 						 (__mmask16) -1);
   7147 }
   7148 
   7149 extern __inline __m512i
   7150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7151 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   7152 {
   7153   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
   7154 						 (__v16si) __B,
   7155 						 (__v16si) __W,
   7156 						 (__mmask16) __U);
   7157 }
   7158 
   7159 extern __inline __m512i
   7160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7161 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   7162 {
   7163   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
   7164 						 (__v16si) __B,
   7165 						 (__v16si)
   7166 						 _mm512_setzero_si512 (),
   7167 						 (__mmask16) __U);
   7168 }
   7169 
   7170 extern __inline __m512i
   7171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7172 _mm512_and_epi64 (__m512i __A, __m512i __B)
   7173 {
   7174   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
   7175 						 (__v8di) __B,
   7176 						 (__v8di)
   7177 						 _mm512_undefined_si512 (),
   7178 						 (__mmask8) -1);
   7179 }
   7180 
   7181 extern __inline __m512i
   7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7183 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   7184 {
   7185   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
   7186 						 (__v8di) __B,
   7187 						 (__v8di) __W, __U);
   7188 }
   7189 
   7190 extern __inline __m512i
   7191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7192 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   7193 {
   7194   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
   7195 						 (__v8di) __B,
   7196 						 (__v8di)
   7197 						 _mm512_setzero_pd (),
   7198 						 __U);
   7199 }
   7200 
   7201 extern __inline __m512i
   7202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7203 _mm512_andnot_si512 (__m512i __A, __m512i __B)
   7204 {
   7205   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
   7206 						  (__v16si) __B,
   7207 						  (__v16si)
   7208 						  _mm512_undefined_si512 (),
   7209 						  (__mmask16) -1);
   7210 }
   7211 
   7212 extern __inline __m512i
   7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7214 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
   7215 {
   7216   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
   7217 						  (__v16si) __B,
   7218 						  (__v16si)
   7219 						  _mm512_undefined_si512 (),
   7220 						  (__mmask16) -1);
   7221 }
   7222 
   7223 extern __inline __m512i
   7224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7225 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
   7226 {
   7227   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
   7228 						  (__v16si) __B,
   7229 						  (__v16si) __W,
   7230 						  (__mmask16) __U);
   7231 }
   7232 
   7233 extern __inline __m512i
   7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7235 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   7236 {
   7237   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
   7238 						  (__v16si) __B,
   7239 						  (__v16si)
   7240 						  _mm512_setzero_si512 (),
   7241 						  (__mmask16) __U);
   7242 }
   7243 
   7244 extern __inline __m512i
   7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7246 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
   7247 {
   7248   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
   7249 						  (__v8di) __B,
   7250 						  (__v8di)
   7251 						  _mm512_undefined_si512 (),
   7252 						  (__mmask8) -1);
   7253 }
   7254 
   7255 extern __inline __m512i
   7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7257 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   7258 {
   7259   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
   7260 						  (__v8di) __B,
   7261 						  (__v8di) __W, __U);
   7262 }
   7263 
   7264 extern __inline __m512i
   7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7266 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   7267 {
   7268   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
   7269 						  (__v8di) __B,
   7270 						  (__v8di)
   7271 						  _mm512_setzero_pd (),
   7272 						  __U);
   7273 }
   7274 
   7275 extern __inline __mmask16
   7276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7277 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
   7278 {
   7279   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   7280 						(__v16si) __B,
   7281 						(__mmask16) -1);
   7282 }
   7283 
   7284 extern __inline __mmask16
   7285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7286 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   7287 {
   7288   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
   7289 						(__v16si) __B, __U);
   7290 }
   7291 
   7292 extern __inline __mmask8
   7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7294 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
   7295 {
   7296   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
   7297 					       (__v8di) __B,
   7298 					       (__mmask8) -1);
   7299 }
   7300 
   7301 extern __inline __mmask8
   7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7303 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   7304 {
   7305   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
   7306 }
   7307 
   7308 extern __inline __mmask16
   7309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7310 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
   7311 {
   7312   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   7313 						 (__v16si) __B,
   7314 						 (__mmask16) -1);
   7315 }
   7316 
   7317 extern __inline __mmask16
   7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7319 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   7320 {
   7321   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
   7322 						 (__v16si) __B, __U);
   7323 }
   7324 
   7325 extern __inline __mmask8
   7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7327 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
   7328 {
   7329   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   7330 						(__v8di) __B,
   7331 						(__mmask8) -1);
   7332 }
   7333 
   7334 extern __inline __mmask8
   7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7336 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   7337 {
   7338   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
   7339 						(__v8di) __B, __U);
   7340 }
   7341 
   7342 extern __inline __m512i
   7343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7344 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
   7345 {
   7346   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
   7347 						     (__v16si) __B,
   7348 						     (__v16si)
   7349 						     _mm512_undefined_si512 (),
   7350 						     (__mmask16) -1);
   7351 }
   7352 
   7353 extern __inline __m512i
   7354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7355 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   7356 			    __m512i __B)
   7357 {
   7358   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
   7359 						     (__v16si) __B,
   7360 						     (__v16si) __W,
   7361 						     (__mmask16) __U);
   7362 }
   7363 
   7364 extern __inline __m512i
   7365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7366 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   7367 {
   7368   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
   7369 						     (__v16si) __B,
   7370 						     (__v16si)
   7371 						     _mm512_setzero_si512 (),
   7372 						     (__mmask16) __U);
   7373 }
   7374 
   7375 extern __inline __m512i
   7376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7377 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
   7378 {
   7379   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
   7380 						      (__v8di) __B,
   7381 						      (__v8di)
   7382 						      _mm512_undefined_si512 (),
   7383 						      (__mmask8) -1);
   7384 }
   7385 
   7386 extern __inline __m512i
   7387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7388 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   7389 {
   7390   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
   7391 						      (__v8di) __B,
   7392 						      (__v8di) __W,
   7393 						      (__mmask8) __U);
   7394 }
   7395 
   7396 extern __inline __m512i
   7397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7398 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   7399 {
   7400   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
   7401 						      (__v8di) __B,
   7402 						      (__v8di)
   7403 						      _mm512_setzero_si512 (),
   7404 						      (__mmask8) __U);
   7405 }
   7406 
   7407 extern __inline __m512i
   7408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7409 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
   7410 {
   7411   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
   7412 						     (__v16si) __B,
   7413 						     (__v16si)
   7414 						     _mm512_undefined_si512 (),
   7415 						     (__mmask16) -1);
   7416 }
   7417 
   7418 extern __inline __m512i
   7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7420 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   7421 			    __m512i __B)
   7422 {
   7423   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
   7424 						     (__v16si) __B,
   7425 						     (__v16si) __W,
   7426 						     (__mmask16) __U);
   7427 }
   7428 
   7429 extern __inline __m512i
   7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7431 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
   7432 {
   7433   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
   7434 						     (__v16si) __B,
   7435 						     (__v16si)
   7436 						     _mm512_setzero_si512 (),
   7437 						     (__mmask16) __U);
   7438 }
   7439 
   7440 extern __inline __m512i
   7441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7442 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
   7443 {
   7444   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
   7445 						      (__v8di) __B,
   7446 						      (__v8di)
   7447 						      _mm512_undefined_si512 (),
   7448 						      (__mmask8) -1);
   7449 }
   7450 
   7451 extern __inline __m512i
   7452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7453 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
   7454 {
   7455   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
   7456 						      (__v8di) __B,
   7457 						      (__v8di) __W,
   7458 						      (__mmask8) __U);
   7459 }
   7460 
   7461 extern __inline __m512i
   7462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7463 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
   7464 {
   7465   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
   7466 						      (__v8di) __B,
   7467 						      (__v8di)
   7468 						      _mm512_setzero_si512 (),
   7469 						      (__mmask8) __U);
   7470 }
   7471 
   7472 #ifdef __x86_64__
   7473 #ifdef __OPTIMIZE__
   7474 extern __inline unsigned long long
   7475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7476 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
   7477 {
   7478   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
   7479 }
   7480 
   7481 extern __inline long long
   7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7483 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
   7484 {
   7485   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
   7486 }
   7487 
   7488 extern __inline long long
   7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7490 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
   7491 {
   7492   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
   7493 }
   7494 
   7495 extern __inline unsigned long long
   7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7497 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
   7498 {
   7499   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
   7500 }
   7501 
   7502 extern __inline long long
   7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7504 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
   7505 {
   7506   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
   7507 }
   7508 
   7509 extern __inline long long
   7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7511 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
   7512 {
   7513   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
   7514 }
   7515 #else
   7516 #define _mm_cvt_roundss_u64(A, B)   \
   7517     ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
   7518 
   7519 #define _mm_cvt_roundss_si64(A, B)   \
   7520     ((long long)__builtin_ia32_vcvtss2si64(A, B))
   7521 
   7522 #define _mm_cvt_roundss_i64(A, B)   \
   7523     ((long long)__builtin_ia32_vcvtss2si64(A, B))
   7524 
   7525 #define _mm_cvtt_roundss_u64(A, B)  \
   7526     ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
   7527 
   7528 #define _mm_cvtt_roundss_i64(A, B)  \
   7529     ((long long)__builtin_ia32_vcvttss2si64(A, B))
   7530 
   7531 #define _mm_cvtt_roundss_si64(A, B)  \
   7532     ((long long)__builtin_ia32_vcvttss2si64(A, B))
   7533 #endif
   7534 #endif
   7535 
   7536 #ifdef __OPTIMIZE__
   7537 extern __inline unsigned
   7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7539 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
   7540 {
   7541   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
   7542 }
   7543 
   7544 extern __inline int
   7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7546 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
   7547 {
   7548   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
   7549 }
   7550 
   7551 extern __inline int
   7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7553 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
   7554 {
   7555   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
   7556 }
   7557 
   7558 extern __inline unsigned
   7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7560 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
   7561 {
   7562   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
   7563 }
   7564 
   7565 extern __inline int
   7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7567 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
   7568 {
   7569   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
   7570 }
   7571 
   7572 extern __inline int
   7573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7574 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
   7575 {
   7576   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
   7577 }
   7578 #else
   7579 #define _mm_cvt_roundss_u32(A, B)   \
   7580     ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
   7581 
   7582 #define _mm_cvt_roundss_si32(A, B)   \
   7583     ((int)__builtin_ia32_vcvtss2si32(A, B))
   7584 
   7585 #define _mm_cvt_roundss_i32(A, B)   \
   7586     ((int)__builtin_ia32_vcvtss2si32(A, B))
   7587 
   7588 #define _mm_cvtt_roundss_u32(A, B)  \
   7589     ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
   7590 
   7591 #define _mm_cvtt_roundss_si32(A, B)  \
   7592     ((int)__builtin_ia32_vcvttss2si32(A, B))
   7593 
   7594 #define _mm_cvtt_roundss_i32(A, B)  \
   7595     ((int)__builtin_ia32_vcvttss2si32(A, B))
   7596 #endif
   7597 
   7598 #ifdef __x86_64__
   7599 #ifdef __OPTIMIZE__
   7600 extern __inline unsigned long long
   7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7602 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
   7603 {
   7604   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
   7605 }
   7606 
   7607 extern __inline long long
   7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7609 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
   7610 {
   7611   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
   7612 }
   7613 
   7614 extern __inline long long
   7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7616 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
   7617 {
   7618   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
   7619 }
   7620 
   7621 extern __inline unsigned long long
   7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7623 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
   7624 {
   7625   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
   7626 }
   7627 
   7628 extern __inline long long
   7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7630 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
   7631 {
   7632   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
   7633 }
   7634 
   7635 extern __inline long long
   7636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7637 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
   7638 {
   7639   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
   7640 }
   7641 #else
   7642 #define _mm_cvt_roundsd_u64(A, B)   \
   7643     ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
   7644 
   7645 #define _mm_cvt_roundsd_si64(A, B)   \
   7646     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
   7647 
   7648 #define _mm_cvt_roundsd_i64(A, B)   \
   7649     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
   7650 
   7651 #define _mm_cvtt_roundsd_u64(A, B)   \
   7652     ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
   7653 
   7654 #define _mm_cvtt_roundsd_si64(A, B)   \
   7655     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
   7656 
   7657 #define _mm_cvtt_roundsd_i64(A, B)   \
   7658     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
   7659 #endif
   7660 #endif
   7661 
   7662 #ifdef __OPTIMIZE__
   7663 extern __inline unsigned
   7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7665 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
   7666 {
   7667   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
   7668 }
   7669 
   7670 extern __inline int
   7671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7672 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
   7673 {
   7674   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
   7675 }
   7676 
   7677 extern __inline int
   7678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7679 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
   7680 {
   7681   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
   7682 }
   7683 
   7684 extern __inline unsigned
   7685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7686 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
   7687 {
   7688   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
   7689 }
   7690 
   7691 extern __inline int
   7692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7693 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
   7694 {
   7695   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
   7696 }
   7697 
   7698 extern __inline int
   7699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7700 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
   7701 {
   7702   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
   7703 }
   7704 #else
   7705 #define _mm_cvt_roundsd_u32(A, B)   \
   7706     ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
   7707 
   7708 #define _mm_cvt_roundsd_si32(A, B)   \
   7709     ((int)__builtin_ia32_vcvtsd2si32(A, B))
   7710 
   7711 #define _mm_cvt_roundsd_i32(A, B)   \
   7712     ((int)__builtin_ia32_vcvtsd2si32(A, B))
   7713 
   7714 #define _mm_cvtt_roundsd_u32(A, B)   \
   7715     ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
   7716 
   7717 #define _mm_cvtt_roundsd_si32(A, B)   \
   7718     ((int)__builtin_ia32_vcvttsd2si32(A, B))
   7719 
   7720 #define _mm_cvtt_roundsd_i32(A, B)   \
   7721     ((int)__builtin_ia32_vcvttsd2si32(A, B))
   7722 #endif
   7723 
   7724 extern __inline __m512d
   7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7726 _mm512_movedup_pd (__m512d __A)
   7727 {
   7728   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
   7729 						   (__v8df)
   7730 						   _mm512_undefined_pd (),
   7731 						   (__mmask8) -1);
   7732 }
   7733 
   7734 extern __inline __m512d
   7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7736 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
   7737 {
   7738   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
   7739 						   (__v8df) __W,
   7740 						   (__mmask8) __U);
   7741 }
   7742 
   7743 extern __inline __m512d
   7744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7745 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
   7746 {
   7747   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
   7748 						   (__v8df)
   7749 						   _mm512_setzero_pd (),
   7750 						   (__mmask8) __U);
   7751 }
   7752 
   7753 extern __inline __m512d
   7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7755 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
   7756 {
   7757   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
   7758 						    (__v8df) __B,
   7759 						    (__v8df)
   7760 						    _mm512_undefined_pd (),
   7761 						    (__mmask8) -1);
   7762 }
   7763 
   7764 extern __inline __m512d
   7765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7766 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   7767 {
   7768   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
   7769 						    (__v8df) __B,
   7770 						    (__v8df) __W,
   7771 						    (__mmask8) __U);
   7772 }
   7773 
   7774 extern __inline __m512d
   7775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7776 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
   7777 {
   7778   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
   7779 						    (__v8df) __B,
   7780 						    (__v8df)
   7781 						    _mm512_setzero_pd (),
   7782 						    (__mmask8) __U);
   7783 }
   7784 
   7785 extern __inline __m512d
   7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7787 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
   7788 {
   7789   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
   7790 						    (__v8df) __B,
   7791 						    (__v8df)
   7792 						    _mm512_undefined_pd (),
   7793 						    (__mmask8) -1);
   7794 }
   7795 
   7796 extern __inline __m512d
   7797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7798 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   7799 {
   7800   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
   7801 						    (__v8df) __B,
   7802 						    (__v8df) __W,
   7803 						    (__mmask8) __U);
   7804 }
   7805 
   7806 extern __inline __m512d
   7807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7808 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
   7809 {
   7810   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
   7811 						    (__v8df) __B,
   7812 						    (__v8df)
   7813 						    _mm512_setzero_pd (),
   7814 						    (__mmask8) __U);
   7815 }
   7816 
   7817 extern __inline __m512
   7818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7819 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
   7820 {
   7821   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
   7822 						   (__v16sf) __B,
   7823 						   (__v16sf)
   7824 						   _mm512_undefined_ps (),
   7825 						   (__mmask16) -1);
   7826 }
   7827 
   7828 extern __inline __m512
   7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7830 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   7831 {
   7832   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
   7833 						   (__v16sf) __B,
   7834 						   (__v16sf) __W,
   7835 						   (__mmask16) __U);
   7836 }
   7837 
   7838 extern __inline __m512
   7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7840 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
   7841 {
   7842   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
   7843 						   (__v16sf) __B,
   7844 						   (__v16sf)
   7845 						   _mm512_setzero_ps (),
   7846 						   (__mmask16) __U);
   7847 }
   7848 
   7849 #ifdef __OPTIMIZE__
   7850 extern __inline __m512d
   7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7852 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
   7853 {
   7854   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   7855 						    (__v8df)
   7856 						    _mm512_undefined_pd (),
   7857 						    (__mmask8) -1, __R);
   7858 }
   7859 
   7860 extern __inline __m512d
   7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7862 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
   7863 			    const int __R)
   7864 {
   7865   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   7866 						    (__v8df) __W,
   7867 						    (__mmask8) __U, __R);
   7868 }
   7869 
   7870 extern __inline __m512d
   7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7872 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
   7873 {
   7874   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   7875 						    (__v8df)
   7876 						    _mm512_setzero_pd (),
   7877 						    (__mmask8) __U, __R);
   7878 }
   7879 
   7880 extern __inline __m512
   7881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7882 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
   7883 {
   7884   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   7885 						    (__v16sf)
   7886 						    _mm512_undefined_ps (),
   7887 						    (__mmask16) -1, __R);
   7888 }
   7889 
   7890 extern __inline __m512
   7891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7892 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
   7893 			    const int __R)
   7894 {
   7895   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   7896 						    (__v16sf) __W,
   7897 						    (__mmask16) __U, __R);
   7898 }
   7899 
   7900 extern __inline __m512
   7901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7902 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
   7903 {
   7904   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   7905 						    (__v16sf)
   7906 						    _mm512_setzero_ps (),
   7907 						    (__mmask16) __U, __R);
   7908 }
   7909 
   7910 extern __inline __m256i
   7911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7912 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
   7913 {
   7914   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7915 						     __I,
   7916 						     (__v16hi)
   7917 						     _mm256_undefined_si256 (),
   7918 						     -1);
   7919 }
   7920 
   7921 extern __inline __m256i
   7922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7923 _mm512_cvtps_ph (__m512 __A, const int __I)
   7924 {
   7925   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7926 						     __I,
   7927 						     (__v16hi)
   7928 						     _mm256_undefined_si256 (),
   7929 						     -1);
   7930 }
   7931 
   7932 extern __inline __m256i
   7933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7934 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
   7935 			    const int __I)
   7936 {
   7937   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7938 						     __I,
   7939 						     (__v16hi) __U,
   7940 						     (__mmask16) __W);
   7941 }
   7942 
   7943 extern __inline __m256i
   7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7945 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
   7946 {
   7947   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7948 						     __I,
   7949 						     (__v16hi) __U,
   7950 						     (__mmask16) __W);
   7951 }
   7952 
   7953 extern __inline __m256i
   7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7955 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
   7956 {
   7957   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7958 						     __I,
   7959 						     (__v16hi)
   7960 						     _mm256_setzero_si256 (),
   7961 						     (__mmask16) __W);
   7962 }
   7963 
   7964 extern __inline __m256i
   7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7966 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
   7967 {
   7968   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
   7969 						     __I,
   7970 						     (__v16hi)
   7971 						     _mm256_setzero_si256 (),
   7972 						     (__mmask16) __W);
   7973 }
   7974 #else
   7975 #define _mm512_cvt_roundps_pd(A, B)		 \
   7976     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
   7977 
   7978 #define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
   7979     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
   7980 
   7981 #define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
   7982     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
   7983 
   7984 #define _mm512_cvt_roundph_ps(A, B)		 \
   7985     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
   7986 
   7987 #define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
   7988     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
   7989 
   7990 #define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
   7991     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
   7992 
   7993 #define _mm512_cvt_roundps_ph(A, I)						 \
   7994   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   7995     (__v16hi)_mm256_undefined_si256 (), -1))
   7996 #define _mm512_cvtps_ph(A, I)						 \
   7997   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   7998     (__v16hi)_mm256_undefined_si256 (), -1))
   7999 #define _mm512_mask_cvt_roundps_ph(U, W, A, I)				 \
   8000   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   8001     (__v16hi)(__m256i)(U), (__mmask16) (W)))
   8002 #define _mm512_mask_cvtps_ph(U, W, A, I)				 \
   8003   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   8004     (__v16hi)(__m256i)(U), (__mmask16) (W)))
   8005 #define _mm512_maskz_cvt_roundps_ph(W, A, I)					 \
   8006   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   8007     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
   8008 #define _mm512_maskz_cvtps_ph(W, A, I)					 \
   8009   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
   8010     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
   8011 #endif
   8012 
   8013 #ifdef __OPTIMIZE__
   8014 extern __inline __m256
   8015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8016 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
   8017 {
   8018   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   8019 						   (__v8sf)
   8020 						   _mm256_undefined_ps (),
   8021 						   (__mmask8) -1, __R);
   8022 }
   8023 
   8024 extern __inline __m256
   8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8026 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
   8027 			    const int __R)
   8028 {
   8029   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   8030 						   (__v8sf) __W,
   8031 						   (__mmask8) __U, __R);
   8032 }
   8033 
   8034 extern __inline __m256
   8035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8036 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
   8037 {
   8038   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   8039 						   (__v8sf)
   8040 						   _mm256_setzero_ps (),
   8041 						   (__mmask8) __U, __R);
   8042 }
   8043 
   8044 extern __inline __m128
   8045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8046 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
   8047 {
   8048   return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
   8049 						 (__v2df) __B,
   8050 						 __R);
   8051 }
   8052 
   8053 extern __inline __m128d
   8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8055 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
   8056 {
   8057   return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
   8058 						  (__v4sf) __B,
   8059 						  __R);
   8060 }
   8061 #else
   8062 #define _mm512_cvt_roundpd_ps(A, B)		 \
   8063     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
   8064 
   8065 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
   8066     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
   8067 
   8068 #define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
   8069     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
   8070 
   8071 #define _mm_cvt_roundsd_ss(A, B, C)		 \
   8072     (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
   8073 
   8074 #define _mm_cvt_roundss_sd(A, B, C)		 \
   8075     (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
   8076 #endif
   8077 
   8078 extern __inline void
   8079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8080 _mm512_stream_si512 (__m512i * __P, __m512i __A)
   8081 {
   8082   __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
   8083 }
   8084 
   8085 extern __inline void
   8086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8087 _mm512_stream_ps (float *__P, __m512 __A)
   8088 {
   8089   __builtin_ia32_movntps512 (__P, (__v16sf) __A);
   8090 }
   8091 
   8092 extern __inline void
   8093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8094 _mm512_stream_pd (double *__P, __m512d __A)
   8095 {
   8096   __builtin_ia32_movntpd512 (__P, (__v8df) __A);
   8097 }
   8098 
   8099 extern __inline __m512i
   8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8101 _mm512_stream_load_si512 (void *__P)
   8102 {
   8103   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
   8104 }
   8105 
   8106 /* Constants for mantissa extraction */
   8107 typedef enum
   8108 {
   8109   _MM_MANT_NORM_1_2,		/* interval [1, 2)      */
   8110   _MM_MANT_NORM_p5_2,		/* interval [0.5, 2)    */
   8111   _MM_MANT_NORM_p5_1,		/* interval [0.5, 1)    */
   8112   _MM_MANT_NORM_p75_1p5		/* interval [0.75, 1.5) */
   8113 } _MM_MANTISSA_NORM_ENUM;
   8114 
   8115 typedef enum
   8116 {
   8117   _MM_MANT_SIGN_src,		/* sign = sign(SRC)     */
   8118   _MM_MANT_SIGN_zero,		/* sign = 0             */
   8119   _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
   8120 } _MM_MANTISSA_SIGN_ENUM;
   8121 
   8122 #ifdef __OPTIMIZE__
   8123 extern __inline __m128
   8124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8125 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
   8126 {
   8127   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
   8128 						    (__v4sf) __B,
   8129 						    __R);
   8130 }
   8131 
   8132 extern __inline __m128d
   8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8134 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
   8135 {
   8136   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
   8137 						     (__v2df) __B,
   8138 						     __R);
   8139 }
   8140 
   8141 extern __inline __m512
   8142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8143 _mm512_getexp_round_ps (__m512 __A, const int __R)
   8144 {
   8145   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8146 						   (__v16sf)
   8147 						   _mm512_undefined_ps (),
   8148 						   (__mmask16) -1, __R);
   8149 }
   8150 
   8151 extern __inline __m512
   8152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8153 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   8154 			     const int __R)
   8155 {
   8156   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8157 						   (__v16sf) __W,
   8158 						   (__mmask16) __U, __R);
   8159 }
   8160 
   8161 extern __inline __m512
   8162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8163 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
   8164 {
   8165   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   8166 						   (__v16sf)
   8167 						   _mm512_setzero_ps (),
   8168 						   (__mmask16) __U, __R);
   8169 }
   8170 
   8171 extern __inline __m512d
   8172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8173 _mm512_getexp_round_pd (__m512d __A, const int __R)
   8174 {
   8175   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8176 						    (__v8df)
   8177 						    _mm512_undefined_pd (),
   8178 						    (__mmask8) -1, __R);
   8179 }
   8180 
   8181 extern __inline __m512d
   8182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8183 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   8184 			     const int __R)
   8185 {
   8186   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8187 						    (__v8df) __W,
   8188 						    (__mmask8) __U, __R);
   8189 }
   8190 
   8191 extern __inline __m512d
   8192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8193 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
   8194 {
   8195   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   8196 						    (__v8df)
   8197 						    _mm512_setzero_pd (),
   8198 						    (__mmask8) __U, __R);
   8199 }
   8200 
   8201 extern __inline __m512d
   8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8203 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
   8204 			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8205 {
   8206   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   8207 						     (__C << 2) | __B,
   8208 						     _mm512_undefined_pd (),
   8209 						     (__mmask8) -1, __R);
   8210 }
   8211 
   8212 extern __inline __m512d
   8213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8214 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
   8215 			      _MM_MANTISSA_NORM_ENUM __B,
   8216 			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8217 {
   8218   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   8219 						     (__C << 2) | __B,
   8220 						     (__v8df) __W, __U,
   8221 						     __R);
   8222 }
   8223 
   8224 extern __inline __m512d
   8225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8226 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
   8227 			       _MM_MANTISSA_NORM_ENUM __B,
   8228 			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8229 {
   8230   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   8231 						     (__C << 2) | __B,
   8232 						     (__v8df)
   8233 						     _mm512_setzero_pd (),
   8234 						     __U, __R);
   8235 }
   8236 
   8237 extern __inline __m512
   8238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8239 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
   8240 			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8241 {
   8242   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   8243 						    (__C << 2) | __B,
   8244 						    _mm512_undefined_ps (),
   8245 						    (__mmask16) -1, __R);
   8246 }
   8247 
   8248 extern __inline __m512
   8249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8250 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
   8251 			      _MM_MANTISSA_NORM_ENUM __B,
   8252 			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8253 {
   8254   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   8255 						    (__C << 2) | __B,
   8256 						    (__v16sf) __W, __U,
   8257 						    __R);
   8258 }
   8259 
   8260 extern __inline __m512
   8261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8262 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
   8263 			       _MM_MANTISSA_NORM_ENUM __B,
   8264 			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   8265 {
   8266   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   8267 						    (__C << 2) | __B,
   8268 						    (__v16sf)
   8269 						    _mm512_setzero_ps (),
   8270 						    __U, __R);
   8271 }
   8272 
   8273 extern __inline __m128d
   8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8275 _mm_getmant_round_sd (__m128d __A, __m128d __B,
   8276 		      _MM_MANTISSA_NORM_ENUM __C,
   8277 		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
   8278 {
   8279   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
   8280 						  (__v2df) __B,
   8281 						  (__D << 2) | __C,
   8282 						   __R);
   8283 }
   8284 
   8285 extern __inline __m128
   8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8287 _mm_getmant_round_ss (__m128 __A, __m128 __B,
   8288 		      _MM_MANTISSA_NORM_ENUM __C,
   8289 		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
   8290 {
   8291   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
   8292 						  (__v4sf) __B,
   8293 						  (__D << 2) | __C,
   8294 						  __R);
   8295 }
   8296 
   8297 #else
   8298 #define _mm512_getmant_round_pd(X, B, C, R)                                                  \
   8299   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   8300                                               (int)(((C)<<2) | (B)),                \
   8301                                               (__v8df)(__m512d)_mm512_undefined_pd(), \
   8302                                               (__mmask8)-1,\
   8303 					      (R)))
   8304 
   8305 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
   8306   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   8307                                               (int)(((C)<<2) | (B)),                \
   8308                                               (__v8df)(__m512d)(W),                 \
   8309                                               (__mmask8)(U),\
   8310 					      (R)))
   8311 
   8312 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
   8313   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   8314                                               (int)(((C)<<2) | (B)),                \
   8315                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
   8316                                               (__mmask8)(U),\
   8317 					      (R)))
   8318 #define _mm512_getmant_round_ps(X, B, C, R)                                                  \
   8319   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   8320                                              (int)(((C)<<2) | (B)),                 \
   8321                                              (__v16sf)(__m512)_mm512_undefined_ps(), \
   8322                                              (__mmask16)-1,\
   8323 					     (R)))
   8324 
   8325 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
   8326   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   8327                                              (int)(((C)<<2) | (B)),                 \
   8328                                              (__v16sf)(__m512)(W),                  \
   8329                                              (__mmask16)(U),\
   8330 					     (R)))
   8331 
   8332 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
   8333   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   8334                                              (int)(((C)<<2) | (B)),                 \
   8335                                              (__v16sf)(__m512)_mm512_setzero_ps(),  \
   8336                                              (__mmask16)(U),\
   8337 					     (R)))
   8338 #define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
   8339   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
   8340 					    (__v2df)(__m128d)(Y),	\
   8341 					    (int)(((D)<<2) | (C)),	\
   8342 					    (R)))
   8343 
   8344 #define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
   8345   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
   8346 					   (__v4sf)(__m128)(Y),		\
   8347 					   (int)(((D)<<2) | (C)),	\
   8348 					   (R)))
   8349 
   8350 #define _mm_getexp_round_ss(A, B, R)						      \
   8351   ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
   8352 
   8353 #define _mm_getexp_round_sd(A, B, R)						       \
   8354   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
   8355 
   8356 #define _mm512_getexp_round_ps(A, R)						\
   8357   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   8358   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
   8359 
   8360 #define _mm512_mask_getexp_round_ps(W, U, A, R)					\
   8361   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   8362   (__v16sf)(__m512)(W), (__mmask16)(U), R))
   8363 
   8364 #define _mm512_maskz_getexp_round_ps(U, A, R)					\
   8365   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   8366   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
   8367 
   8368 #define _mm512_getexp_round_pd(A, R)						\
   8369   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   8370   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
   8371 
   8372 #define _mm512_mask_getexp_round_pd(W, U, A, R)					\
   8373   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   8374   (__v8df)(__m512d)(W), (__mmask8)(U), R))
   8375 
   8376 #define _mm512_maskz_getexp_round_pd(U, A, R)					\
   8377   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   8378   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
   8379 #endif
   8380 
   8381 #ifdef __OPTIMIZE__
   8382 extern __inline __m512
   8383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8384 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
   8385 {
   8386   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
   8387 						  (__v16sf)
   8388 						  _mm512_undefined_ps (),
   8389 						  -1, __R);
   8390 }
   8391 
   8392 extern __inline __m512
   8393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8394 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
   8395 				 const int __imm, const int __R)
   8396 {
   8397   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
   8398 						  (__v16sf) __A,
   8399 						  (__mmask16) __B, __R);
   8400 }
   8401 
   8402 extern __inline __m512
   8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8404 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
   8405 				  const int __imm, const int __R)
   8406 {
   8407   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
   8408 						  __imm,
   8409 						  (__v16sf)
   8410 						  _mm512_setzero_ps (),
   8411 						  (__mmask16) __A, __R);
   8412 }
   8413 
   8414 extern __inline __m512d
   8415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8416 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
   8417 {
   8418   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
   8419 						   (__v8df)
   8420 						   _mm512_undefined_pd (),
   8421 						   -1, __R);
   8422 }
   8423 
   8424 extern __inline __m512d
   8425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8426 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
   8427 				 __m512d __C, const int __imm, const int __R)
   8428 {
   8429   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
   8430 						   (__v8df) __A,
   8431 						   (__mmask8) __B, __R);
   8432 }
   8433 
   8434 extern __inline __m512d
   8435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8436 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
   8437 				  const int __imm, const int __R)
   8438 {
   8439   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
   8440 						   __imm,
   8441 						   (__v8df)
   8442 						   _mm512_setzero_pd (),
   8443 						   (__mmask8) __A, __R);
   8444 }
   8445 
   8446 extern __inline __m128
   8447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8448 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
   8449 {
   8450   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
   8451 						   (__v4sf) __B, __imm, __R);
   8452 }
   8453 
   8454 extern __inline __m128d
   8455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8456 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
   8457 			 const int __R)
   8458 {
   8459   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
   8460 						    (__v2df) __B, __imm, __R);
   8461 }
   8462 
   8463 #else
   8464 #define _mm512_roundscale_round_ps(A, B, R) \
   8465   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
   8466     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
   8467 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R)				\
   8468   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
   8469 					    (int)(D),			\
   8470 					    (__v16sf)(__m512)(A),	\
   8471 					    (__mmask16)(B), R))
   8472 #define _mm512_maskz_roundscale_round_ps(A, B, C, R)				\
   8473   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
   8474 					    (int)(C),			\
   8475 					    (__v16sf)_mm512_setzero_ps(),\
   8476 					    (__mmask16)(A), R))
   8477 #define _mm512_roundscale_round_pd(A, B, R) \
   8478   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
   8479     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
   8480 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R)				\
   8481   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
   8482 					     (int)(D),			\
   8483 					     (__v8df)(__m512d)(A),	\
   8484 					     (__mmask8)(B), R))
   8485 #define _mm512_maskz_roundscale_round_pd(A, B, C, R)				\
   8486   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
   8487 					     (int)(C),			\
   8488 					     (__v8df)_mm512_setzero_pd(),\
   8489 					     (__mmask8)(A), R))
   8490 #define _mm_roundscale_round_ss(A, B, C, R)					\
   8491   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
   8492     (__v4sf)(__m128)(B), (int)(C), R))
   8493 #define _mm_roundscale_round_sd(A, B, C, R)					\
   8494   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
   8495     (__v2df)(__m128d)(B), (int)(C), R))
   8496 #endif
   8497 
   8498 extern __inline __m512
   8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8500 _mm512_floor_ps (__m512 __A)
   8501 {
   8502   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   8503 						  _MM_FROUND_FLOOR,
   8504 						  (__v16sf) __A, -1,
   8505 						  _MM_FROUND_CUR_DIRECTION);
   8506 }
   8507 
   8508 extern __inline __m512d
   8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8510 _mm512_floor_pd (__m512d __A)
   8511 {
   8512   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   8513 						   _MM_FROUND_FLOOR,
   8514 						   (__v8df) __A, -1,
   8515 						   _MM_FROUND_CUR_DIRECTION);
   8516 }
   8517 
   8518 extern __inline __m512
   8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8520 _mm512_ceil_ps (__m512 __A)
   8521 {
   8522   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   8523 						  _MM_FROUND_CEIL,
   8524 						  (__v16sf) __A, -1,
   8525 						  _MM_FROUND_CUR_DIRECTION);
   8526 }
   8527 
   8528 extern __inline __m512d
   8529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8530 _mm512_ceil_pd (__m512d __A)
   8531 {
   8532   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   8533 						   _MM_FROUND_CEIL,
   8534 						   (__v8df) __A, -1,
   8535 						   _MM_FROUND_CUR_DIRECTION);
   8536 }
   8537 
   8538 extern __inline __m512
   8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8540 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8541 {
   8542   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   8543 						  _MM_FROUND_FLOOR,
   8544 						  (__v16sf) __W, __U,
   8545 						  _MM_FROUND_CUR_DIRECTION);
   8546 }
   8547 
   8548 extern __inline __m512d
   8549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8550 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8551 {
   8552   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   8553 						   _MM_FROUND_FLOOR,
   8554 						   (__v8df) __W, __U,
   8555 						   _MM_FROUND_CUR_DIRECTION);
   8556 }
   8557 
   8558 extern __inline __m512
   8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8560 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
   8561 {
   8562   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
   8563 						  _MM_FROUND_CEIL,
   8564 						  (__v16sf) __W, __U,
   8565 						  _MM_FROUND_CUR_DIRECTION);
   8566 }
   8567 
   8568 extern __inline __m512d
   8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8570 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
   8571 {
   8572   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
   8573 						   _MM_FROUND_CEIL,
   8574 						   (__v8df) __W, __U,
   8575 						   _MM_FROUND_CUR_DIRECTION);
   8576 }
   8577 
   8578 #ifdef __OPTIMIZE__
   8579 extern __inline __m512i
   8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8581 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
   8582 {
   8583   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
   8584 						  (__v16si) __B, __imm,
   8585 						  (__v16si)
   8586 						  _mm512_undefined_si512 (),
   8587 						  (__mmask16) -1);
   8588 }
   8589 
   8590 extern __inline __m512i
   8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8592 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
   8593 			  __m512i __B, const int __imm)
   8594 {
   8595   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
   8596 						  (__v16si) __B, __imm,
   8597 						  (__v16si) __W,
   8598 						  (__mmask16) __U);
   8599 }
   8600 
   8601 extern __inline __m512i
   8602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8603 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
   8604 			   const int __imm)
   8605 {
   8606   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
   8607 						  (__v16si) __B, __imm,
   8608 						  (__v16si)
   8609 						  _mm512_setzero_si512 (),
   8610 						  (__mmask16) __U);
   8611 }
   8612 
   8613 extern __inline __m512i
   8614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8615 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
   8616 {
   8617   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
   8618 						  (__v8di) __B, __imm,
   8619 						  (__v8di)
   8620 						  _mm512_undefined_si512 (),
   8621 						  (__mmask8) -1);
   8622 }
   8623 
   8624 extern __inline __m512i
   8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8626 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
   8627 			  __m512i __B, const int __imm)
   8628 {
   8629   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
   8630 						  (__v8di) __B, __imm,
   8631 						  (__v8di) __W,
   8632 						  (__mmask8) __U);
   8633 }
   8634 
   8635 extern __inline __m512i
   8636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8637 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
   8638 			   const int __imm)
   8639 {
   8640   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
   8641 						  (__v8di) __B, __imm,
   8642 						  (__v8di)
   8643 						  _mm512_setzero_si512 (),
   8644 						  (__mmask8) __U);
   8645 }
   8646 #else
   8647 #define _mm512_alignr_epi32(X, Y, C)                                        \
   8648     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
   8649         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
   8650         (__mmask16)-1))
   8651 
   8652 #define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
   8653     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
   8654         (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
   8655         (__mmask16)(U)))
   8656 
   8657 #define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
   8658     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
   8659         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
   8660         (__mmask16)(U)))
   8661 
   8662 #define _mm512_alignr_epi64(X, Y, C)                                        \
   8663     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
   8664         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (),  \
   8665 	(__mmask8)-1))
   8666 
   8667 #define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
   8668     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
   8669         (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
   8670 
   8671 #define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
   8672     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
   8673         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
   8674         (__mmask8)(U)))
   8675 #endif
   8676 
   8677 extern __inline __mmask16
   8678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8679 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
   8680 {
   8681   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
   8682 						     (__v16si) __B,
   8683 						     (__mmask16) -1);
   8684 }
   8685 
   8686 extern __inline __mmask16
   8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8688 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   8689 {
   8690   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
   8691 						     (__v16si) __B, __U);
   8692 }
   8693 
   8694 extern __inline __mmask8
   8695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8696 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   8697 {
   8698   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
   8699 						    (__v8di) __B, __U);
   8700 }
   8701 
   8702 extern __inline __mmask8
   8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8704 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
   8705 {
   8706   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
   8707 						    (__v8di) __B,
   8708 						    (__mmask8) -1);
   8709 }
   8710 
   8711 extern __inline __mmask16
   8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8713 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
   8714 {
   8715   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
   8716 						     (__v16si) __B,
   8717 						     (__mmask16) -1);
   8718 }
   8719 
   8720 extern __inline __mmask16
   8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8722 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
   8723 {
   8724   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
   8725 						     (__v16si) __B, __U);
   8726 }
   8727 
   8728 extern __inline __mmask8
   8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8730 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
   8731 {
   8732   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
   8733 						    (__v8di) __B, __U);
   8734 }
   8735 
   8736 extern __inline __mmask8
   8737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8738 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
   8739 {
   8740   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
   8741 						    (__v8di) __B,
   8742 						    (__mmask8) -1);
   8743 }
   8744 
   8745 extern __inline __mmask16
   8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8747 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
   8748 {
   8749   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8750 						    (__v16si) __Y, 5,
   8751 						    (__mmask16) -1);
   8752 }
   8753 
   8754 extern __inline __mmask16
   8755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8756 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
   8757 {
   8758   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8759 						    (__v16si) __Y, 5,
   8760 						    (__mmask16) -1);
   8761 }
   8762 
   8763 extern __inline __mmask8
   8764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8765 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
   8766 {
   8767   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8768 						    (__v8di) __Y, 5,
   8769 						    (__mmask8) -1);
   8770 }
   8771 
   8772 extern __inline __mmask8
   8773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8774 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
   8775 {
   8776   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8777 						    (__v8di) __Y, 5,
   8778 						    (__mmask8) -1);
   8779 }
   8780 
   8781 extern __inline __mmask16
   8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8783 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
   8784 {
   8785   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8786 						    (__v16si) __Y, 2,
   8787 						    (__mmask16) -1);
   8788 }
   8789 
   8790 extern __inline __mmask16
   8791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8792 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
   8793 {
   8794   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8795 						    (__v16si) __Y, 2,
   8796 						    (__mmask16) -1);
   8797 }
   8798 
   8799 extern __inline __mmask8
   8800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8801 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
   8802 {
   8803   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8804 						    (__v8di) __Y, 2,
   8805 						    (__mmask8) -1);
   8806 }
   8807 
   8808 extern __inline __mmask8
   8809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8810 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
   8811 {
   8812   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8813 						    (__v8di) __Y, 2,
   8814 						    (__mmask8) -1);
   8815 }
   8816 
   8817 extern __inline __mmask16
   8818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8819 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
   8820 {
   8821   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8822 						    (__v16si) __Y, 1,
   8823 						    (__mmask16) -1);
   8824 }
   8825 
   8826 extern __inline __mmask16
   8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8828 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
   8829 {
   8830   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8831 						    (__v16si) __Y, 1,
   8832 						    (__mmask16) -1);
   8833 }
   8834 
   8835 extern __inline __mmask8
   8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8837 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
   8838 {
   8839   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8840 						    (__v8di) __Y, 1,
   8841 						    (__mmask8) -1);
   8842 }
   8843 
   8844 extern __inline __mmask8
   8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8846 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
   8847 {
   8848   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8849 						    (__v8di) __Y, 1,
   8850 						    (__mmask8) -1);
   8851 }
   8852 
   8853 extern __inline __mmask16
   8854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8855 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
   8856 {
   8857   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8858 						    (__v16si) __Y, 4,
   8859 						    (__mmask16) -1);
   8860 }
   8861 
   8862 extern __inline __mmask16
   8863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8864 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
   8865 {
   8866   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8867 						    (__v16si) __Y, 4,
   8868 						    (__mmask16) -1);
   8869 }
   8870 
   8871 extern __inline __mmask8
   8872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8873 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
   8874 {
   8875   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8876 						    (__v8di) __Y, 4,
   8877 						    (__mmask8) -1);
   8878 }
   8879 
   8880 extern __inline __mmask8
   8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8882 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
   8883 {
   8884   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8885 						    (__v8di) __Y, 4,
   8886 						    (__mmask8) -1);
   8887 }
   8888 
   8889 #define _MM_CMPINT_EQ	    0x0
   8890 #define _MM_CMPINT_LT	    0x1
   8891 #define _MM_CMPINT_LE	    0x2
   8892 #define _MM_CMPINT_UNUSED   0x3
   8893 #define _MM_CMPINT_NE	    0x4
   8894 #define _MM_CMPINT_NLT	    0x5
   8895 #define _MM_CMPINT_GE	    0x5
   8896 #define _MM_CMPINT_NLE	    0x6
   8897 #define _MM_CMPINT_GT	    0x6
   8898 
   8899 #ifdef __OPTIMIZE__
   8900 extern __inline __mmask8
   8901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8902 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
   8903 {
   8904   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8905 						 (__v8di) __Y, __P,
   8906 						 (__mmask8) -1);
   8907 }
   8908 
   8909 extern __inline __mmask16
   8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8911 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
   8912 {
   8913   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8914 						  (__v16si) __Y, __P,
   8915 						  (__mmask16) -1);
   8916 }
   8917 
   8918 extern __inline __mmask8
   8919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8920 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
   8921 {
   8922   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8923 						  (__v8di) __Y, __P,
   8924 						  (__mmask8) -1);
   8925 }
   8926 
   8927 extern __inline __mmask16
   8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8929 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
   8930 {
   8931   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8932 						   (__v16si) __Y, __P,
   8933 						   (__mmask16) -1);
   8934 }
   8935 
   8936 extern __inline __mmask8
   8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8938 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
   8939 			  const int __R)
   8940 {
   8941   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
   8942 						  (__v8df) __Y, __P,
   8943 						  (__mmask8) -1, __R);
   8944 }
   8945 
   8946 extern __inline __mmask16
   8947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8948 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
   8949 {
   8950   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
   8951 						   (__v16sf) __Y, __P,
   8952 						   (__mmask16) -1, __R);
   8953 }
   8954 
   8955 extern __inline __mmask8
   8956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8957 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
   8958 			    const int __P)
   8959 {
   8960   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
   8961 						 (__v8di) __Y, __P,
   8962 						 (__mmask8) __U);
   8963 }
   8964 
   8965 extern __inline __mmask16
   8966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8967 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
   8968 			    const int __P)
   8969 {
   8970   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
   8971 						  (__v16si) __Y, __P,
   8972 						  (__mmask16) __U);
   8973 }
   8974 
   8975 extern __inline __mmask8
   8976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8977 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
   8978 			    const int __P)
   8979 {
   8980   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
   8981 						  (__v8di) __Y, __P,
   8982 						  (__mmask8) __U);
   8983 }
   8984 
   8985 extern __inline __mmask16
   8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8987 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
   8988 			    const int __P)
   8989 {
   8990   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
   8991 						   (__v16si) __Y, __P,
   8992 						   (__mmask16) __U);
   8993 }
   8994 
   8995 extern __inline __mmask8
   8996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   8997 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
   8998 			       const int __P, const int __R)
   8999 {
   9000   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
   9001 						  (__v8df) __Y, __P,
   9002 						  (__mmask8) __U, __R);
   9003 }
   9004 
   9005 extern __inline __mmask16
   9006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9007 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
   9008 			       const int __P, const int __R)
   9009 {
   9010   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
   9011 						   (__v16sf) __Y, __P,
   9012 						   (__mmask16) __U, __R);
   9013 }
   9014 
   9015 extern __inline __mmask8
   9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9017 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
   9018 {
   9019   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
   9020 					       (__v2df) __Y, __P,
   9021 					       (__mmask8) -1, __R);
   9022 }
   9023 
   9024 extern __inline __mmask8
   9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9026 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
   9027 			    const int __P, const int __R)
   9028 {
   9029   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
   9030 					       (__v2df) __Y, __P,
   9031 					       (__mmask8) __M, __R);
   9032 }
   9033 
   9034 extern __inline __mmask8
   9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9036 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
   9037 {
   9038   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
   9039 					       (__v4sf) __Y, __P,
   9040 					       (__mmask8) -1, __R);
   9041 }
   9042 
   9043 extern __inline __mmask8
   9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9045 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
   9046 			    const int __P, const int __R)
   9047 {
   9048   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
   9049 					       (__v4sf) __Y, __P,
   9050 					       (__mmask8) __M, __R);
   9051 }
   9052 
   9053 #else
   9054 #define _mm512_cmp_epi64_mask(X, Y, P)					\
   9055   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
   9056 					   (__v8di)(__m512i)(Y), (int)(P),\
   9057 					   (__mmask8)-1))
   9058 
   9059 #define _mm512_cmp_epi32_mask(X, Y, P)					\
   9060   ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
   9061 					   (__v16si)(__m512i)(Y), (int)(P),\
   9062 					   (__mmask16)-1))
   9063 
   9064 #define _mm512_cmp_epu64_mask(X, Y, P)					\
   9065   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
   9066 					    (__v8di)(__m512i)(Y), (int)(P),\
   9067 					    (__mmask8)-1))
   9068 
   9069 #define _mm512_cmp_epu32_mask(X, Y, P)					\
   9070   ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
   9071 					    (__v16si)(__m512i)(Y), (int)(P),\
   9072 					    (__mmask16)-1))
   9073 
   9074 #define _mm512_cmp_round_pd_mask(X, Y, P, R)					\
   9075   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
   9076 					    (__v8df)(__m512d)(Y), (int)(P),\
   9077 					    (__mmask8)-1, R))
   9078 
   9079 #define _mm512_cmp_round_ps_mask(X, Y, P, R)					\
   9080   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
   9081 					     (__v16sf)(__m512)(Y), (int)(P),\
   9082 					     (__mmask16)-1, R))
   9083 
   9084 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P)					\
   9085   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
   9086 					   (__v8di)(__m512i)(Y), (int)(P),\
   9087 					   (__mmask8)M))
   9088 
   9089 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P)					\
   9090   ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
   9091 					   (__v16si)(__m512i)(Y), (int)(P),\
   9092 					   (__mmask16)M))
   9093 
   9094 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P)					\
   9095   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
   9096 					    (__v8di)(__m512i)(Y), (int)(P),\
   9097 					    (__mmask8)M))
   9098 
   9099 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P)					\
   9100   ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
   9101 					    (__v16si)(__m512i)(Y), (int)(P),\
   9102 					    (__mmask16)M))
   9103 
   9104 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)					\
   9105   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
   9106 					    (__v8df)(__m512d)(Y), (int)(P),\
   9107 					    (__mmask8)M, R))
   9108 
   9109 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)					\
   9110   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
   9111 					     (__v16sf)(__m512)(Y), (int)(P),\
   9112 					     (__mmask16)M, R))
   9113 
   9114 #define _mm_cmp_round_sd_mask(X, Y, P, R)					\
   9115   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
   9116 					 (__v2df)(__m128d)(Y), (int)(P),\
   9117 					 (__mmask8)-1, R))
   9118 
   9119 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)					\
   9120   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
   9121 					 (__v2df)(__m128d)(Y), (int)(P),\
   9122 					 (M), R))
   9123 
   9124 #define _mm_cmp_round_ss_mask(X, Y, P, R)					\
   9125   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
   9126 					 (__v4sf)(__m128)(Y), (int)(P), \
   9127 					 (__mmask8)-1, R))
   9128 
   9129 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)					\
   9130   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
   9131 					 (__v4sf)(__m128)(Y), (int)(P), \
   9132 					 (M), R))
   9133 #endif
   9134 
   9135 #ifdef __OPTIMIZE__
   9136 extern __inline __m512
   9137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9138 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
   9139 {
   9140   __m512 v1_old = _mm512_undefined_ps ();
   9141   __mmask16 mask = 0xFFFF;
   9142 
   9143   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
   9144 						__addr,
   9145 						(__v16si) __index,
   9146 						mask, __scale);
   9147 }
   9148 
   9149 extern __inline __m512
   9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9151 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
   9152 			  __m512i __index, float const *__addr, int __scale)
   9153 {
   9154   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
   9155 						__addr,
   9156 						(__v16si) __index,
   9157 						__mask, __scale);
   9158 }
   9159 
   9160 extern __inline __m512d
   9161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9162 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
   9163 {
   9164   __m512d v1_old = _mm512_undefined_pd ();
   9165   __mmask8 mask = 0xFF;
   9166 
   9167   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
   9168 						__addr,
   9169 						(__v8si) __index, mask,
   9170 						__scale);
   9171 }
   9172 
   9173 extern __inline __m512d
   9174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9175 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
   9176 			  __m256i __index, double const *__addr, int __scale)
   9177 {
   9178   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
   9179 						__addr,
   9180 						(__v8si) __index,
   9181 						__mask, __scale);
   9182 }
   9183 
   9184 extern __inline __m256
   9185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9186 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
   9187 {
   9188   __m256 v1_old = _mm256_undefined_ps ();
   9189   __mmask8 mask = 0xFF;
   9190 
   9191   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
   9192 						__addr,
   9193 						(__v8di) __index, mask,
   9194 						__scale);
   9195 }
   9196 
   9197 extern __inline __m256
   9198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9199 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
   9200 			  __m512i __index, float const *__addr, int __scale)
   9201 {
   9202   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
   9203 						__addr,
   9204 						(__v8di) __index,
   9205 						__mask, __scale);
   9206 }
   9207 
   9208 extern __inline __m512d
   9209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9210 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
   9211 {
   9212   __m512d v1_old = _mm512_undefined_pd ();
   9213   __mmask8 mask = 0xFF;
   9214 
   9215   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
   9216 						__addr,
   9217 						(__v8di) __index, mask,
   9218 						__scale);
   9219 }
   9220 
   9221 extern __inline __m512d
   9222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9223 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
   9224 			  __m512i __index, double const *__addr, int __scale)
   9225 {
   9226   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
   9227 						__addr,
   9228 						(__v8di) __index,
   9229 						__mask, __scale);
   9230 }
   9231 
   9232 extern __inline __m512i
   9233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9234 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
   9235 {
   9236   __m512i v1_old = _mm512_undefined_si512 ();
   9237   __mmask16 mask = 0xFFFF;
   9238 
   9239   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
   9240 						 __addr,
   9241 						 (__v16si) __index,
   9242 						 mask, __scale);
   9243 }
   9244 
   9245 extern __inline __m512i
   9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9247 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
   9248 			     __m512i __index, int const *__addr, int __scale)
   9249 {
   9250   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
   9251 						 __addr,
   9252 						 (__v16si) __index,
   9253 						 __mask, __scale);
   9254 }
   9255 
   9256 extern __inline __m512i
   9257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9258 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
   9259 {
   9260   __m512i v1_old = _mm512_undefined_si512 ();
   9261   __mmask8 mask = 0xFF;
   9262 
   9263   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
   9264 						__addr,
   9265 						(__v8si) __index, mask,
   9266 						__scale);
   9267 }
   9268 
   9269 extern __inline __m512i
   9270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9271 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
   9272 			     __m256i __index, long long const *__addr,
   9273 			     int __scale)
   9274 {
   9275   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
   9276 						__addr,
   9277 						(__v8si) __index,
   9278 						__mask, __scale);
   9279 }
   9280 
   9281 extern __inline __m256i
   9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9283 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
   9284 {
   9285   __m256i v1_old = _mm256_undefined_si256 ();
   9286   __mmask8 mask = 0xFF;
   9287 
   9288   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
   9289 						 __addr,
   9290 						 (__v8di) __index,
   9291 						 mask, __scale);
   9292 }
   9293 
   9294 extern __inline __m256i
   9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9296 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
   9297 			     __m512i __index, int const *__addr, int __scale)
   9298 {
   9299   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
   9300 						 __addr,
   9301 						 (__v8di) __index,
   9302 						 __mask, __scale);
   9303 }
   9304 
   9305 extern __inline __m512i
   9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9307 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
   9308 {
   9309   __m512i v1_old = _mm512_undefined_si512 ();
   9310   __mmask8 mask = 0xFF;
   9311 
   9312   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
   9313 						__addr,
   9314 						(__v8di) __index, mask,
   9315 						__scale);
   9316 }
   9317 
   9318 extern __inline __m512i
   9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9320 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
   9321 			     __m512i __index, long long const *__addr,
   9322 			     int __scale)
   9323 {
   9324   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
   9325 						__addr,
   9326 						(__v8di) __index,
   9327 						__mask, __scale);
   9328 }
   9329 
   9330 extern __inline void
   9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9332 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
   9333 {
   9334   __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
   9335 				 (__v16si) __index, (__v16sf) __v1, __scale);
   9336 }
   9337 
   9338 extern __inline void
   9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9340 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
   9341 			   __m512i __index, __m512 __v1, int __scale)
   9342 {
   9343   __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
   9344 				 (__v16sf) __v1, __scale);
   9345 }
   9346 
   9347 extern __inline void
   9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9349 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
   9350 		      int __scale)
   9351 {
   9352   __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
   9353 				(__v8si) __index, (__v8df) __v1, __scale);
   9354 }
   9355 
   9356 extern __inline void
   9357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9358 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
   9359 			   __m256i __index, __m512d __v1, int __scale)
   9360 {
   9361   __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
   9362 				(__v8df) __v1, __scale);
   9363 }
   9364 
   9365 extern __inline void
   9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9367 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
   9368 {
   9369   __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
   9370 				 (__v8di) __index, (__v8sf) __v1, __scale);
   9371 }
   9372 
   9373 extern __inline void
   9374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9375 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
   9376 			   __m512i __index, __m256 __v1, int __scale)
   9377 {
   9378   __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
   9379 				 (__v8sf) __v1, __scale);
   9380 }
   9381 
   9382 extern __inline void
   9383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9384 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
   9385 		      int __scale)
   9386 {
   9387   __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
   9388 				(__v8di) __index, (__v8df) __v1, __scale);
   9389 }
   9390 
   9391 extern __inline void
   9392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9393 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
   9394 			   __m512i __index, __m512d __v1, int __scale)
   9395 {
   9396   __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
   9397 				(__v8df) __v1, __scale);
   9398 }
   9399 
   9400 extern __inline void
   9401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9402 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
   9403 			 __m512i __v1, int __scale)
   9404 {
   9405   __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
   9406 				 (__v16si) __index, (__v16si) __v1, __scale);
   9407 }
   9408 
   9409 extern __inline void
   9410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9411 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
   9412 			      __m512i __index, __m512i __v1, int __scale)
   9413 {
   9414   __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
   9415 				 (__v16si) __v1, __scale);
   9416 }
   9417 
   9418 extern __inline void
   9419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9420 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
   9421 			 __m512i __v1, int __scale)
   9422 {
   9423   __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
   9424 				(__v8si) __index, (__v8di) __v1, __scale);
   9425 }
   9426 
   9427 extern __inline void
   9428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9429 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
   9430 			      __m256i __index, __m512i __v1, int __scale)
   9431 {
   9432   __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
   9433 				(__v8di) __v1, __scale);
   9434 }
   9435 
   9436 extern __inline void
   9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9438 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
   9439 			 __m256i __v1, int __scale)
   9440 {
   9441   __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
   9442 				 (__v8di) __index, (__v8si) __v1, __scale);
   9443 }
   9444 
   9445 extern __inline void
   9446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9447 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
   9448 			      __m512i __index, __m256i __v1, int __scale)
   9449 {
   9450   __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
   9451 				 (__v8si) __v1, __scale);
   9452 }
   9453 
   9454 extern __inline void
   9455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9456 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
   9457 			 __m512i __v1, int __scale)
   9458 {
   9459   __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
   9460 				(__v8di) __index, (__v8di) __v1, __scale);
   9461 }
   9462 
   9463 extern __inline void
   9464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9465 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
   9466 			      __m512i __index, __m512i __v1, int __scale)
   9467 {
   9468   __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
   9469 				(__v8di) __v1, __scale);
   9470 }
   9471 #else
   9472 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)				\
   9473   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
   9474 					 (float const *)ADDR,		\
   9475 					 (__v16si)(__m512i)INDEX,	\
   9476 					 (__mmask16)0xFFFF, (int)SCALE)
   9477 
   9478 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9479   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,	\
   9480 					 (float const *)ADDR,		\
   9481 					 (__v16si)(__m512i)INDEX,	\
   9482 					 (__mmask16)MASK, (int)SCALE)
   9483 
   9484 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)				\
   9485   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),	\
   9486 					 (double const *)ADDR,		\
   9487 					 (__v8si)(__m256i)INDEX,	\
   9488 					 (__mmask8)0xFF, (int)SCALE)
   9489 
   9490 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9491   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,	\
   9492 					 (double const *)ADDR,		\
   9493 					 (__v8si)(__m256i)INDEX,	\
   9494 					 (__mmask8)MASK, (int)SCALE)
   9495 
   9496 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)				\
   9497   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),	\
   9498 					 (float const *)ADDR,		\
   9499 					 (__v8di)(__m512i)INDEX,	\
   9500 					 (__mmask8)0xFF, (int)SCALE)
   9501 
   9502 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9503   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,		\
   9504 					 (float const *)ADDR,		\
   9505 					 (__v8di)(__m512i)INDEX,	\
   9506 					 (__mmask8)MASK, (int)SCALE)
   9507 
   9508 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)				\
   9509   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),	\
   9510 					 (double const *)ADDR,		\
   9511 					 (__v8di)(__m512i)INDEX,	\
   9512 					 (__mmask8)0xFF, (int)SCALE)
   9513 
   9514 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9515   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,	\
   9516 					 (double const *)ADDR,		\
   9517 					 (__v8di)(__m512i)INDEX,	\
   9518 					 (__mmask8)MASK, (int)SCALE)
   9519 
   9520 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)			\
   9521   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (),	\
   9522 					  (int const *)ADDR,		\
   9523 					  (__v16si)(__m512i)INDEX,	\
   9524 					  (__mmask16)0xFFFF, (int)SCALE)
   9525 
   9526 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9527   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,	\
   9528 					  (int const *)ADDR,		\
   9529 					  (__v16si)(__m512i)INDEX,	\
   9530 					  (__mmask16)MASK, (int)SCALE)
   9531 
   9532 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)			\
   9533   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (),	\
   9534 					 (long long const *)ADDR,	\
   9535 					 (__v8si)(__m256i)INDEX,	\
   9536 					 (__mmask8)0xFF, (int)SCALE)
   9537 
   9538 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9539   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,	\
   9540 					 (long long const *)ADDR,	\
   9541 					 (__v8si)(__m256i)INDEX,	\
   9542 					 (__mmask8)MASK, (int)SCALE)
   9543 
   9544 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			  \
   9545   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
   9546 					  (int const *)ADDR,		  \
   9547 					  (__v8di)(__m512i)INDEX,	  \
   9548 					  (__mmask8)0xFF, (int)SCALE)
   9549 
   9550 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9551   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,	\
   9552 					  (int const *)ADDR,		\
   9553 					  (__v8di)(__m512i)INDEX,	\
   9554 					  (__mmask8)MASK, (int)SCALE)
   9555 
   9556 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)			\
   9557   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (),	\
   9558 					 (long long const *)ADDR,	\
   9559 					 (__v8di)(__m512i)INDEX,	\
   9560 					 (__mmask8)0xFF, (int)SCALE)
   9561 
   9562 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
   9563   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,	\
   9564 					 (long long const *)ADDR,	\
   9565 					 (__v8di)(__m512i)INDEX,	\
   9566 					 (__mmask8)MASK, (int)SCALE)
   9567 
   9568 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
   9569   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,	\
   9570 				 (__v16si)(__m512i)INDEX,		\
   9571 				 (__v16sf)(__m512)V1, (int)SCALE)
   9572 
   9573 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
   9574   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,		\
   9575 				 (__v16si)(__m512i)INDEX,		\
   9576 				 (__v16sf)(__m512)V1, (int)SCALE)
   9577 
   9578 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
   9579   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,		\
   9580 				(__v8si)(__m256i)INDEX,			\
   9581 				(__v8df)(__m512d)V1, (int)SCALE)
   9582 
   9583 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
   9584   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,		\
   9585 				(__v8si)(__m256i)INDEX,			\
   9586 				(__v8df)(__m512d)V1, (int)SCALE)
   9587 
   9588 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
   9589   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,		\
   9590 				 (__v8di)(__m512i)INDEX,		\
   9591 				 (__v8sf)(__m256)V1, (int)SCALE)
   9592 
   9593 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
   9594   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,		\
   9595 				 (__v8di)(__m512i)INDEX,		\
   9596 				 (__v8sf)(__m256)V1, (int)SCALE)
   9597 
   9598 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
   9599   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,		\
   9600 				(__v8di)(__m512i)INDEX,			\
   9601 				(__v8df)(__m512d)V1, (int)SCALE)
   9602 
   9603 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
   9604   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,		\
   9605 				(__v8di)(__m512i)INDEX,			\
   9606 				(__v8df)(__m512d)V1, (int)SCALE)
   9607 
   9608 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
   9609   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,	\
   9610 				 (__v16si)(__m512i)INDEX,		\
   9611 				 (__v16si)(__m512i)V1, (int)SCALE)
   9612 
   9613 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
   9614   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,		\
   9615 				 (__v16si)(__m512i)INDEX,		\
   9616 				 (__v16si)(__m512i)V1, (int)SCALE)
   9617 
   9618 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
   9619   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
   9620 				(__v8si)(__m256i)INDEX,			\
   9621 				(__v8di)(__m512i)V1, (int)SCALE)
   9622 
   9623 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
   9624   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,	\
   9625 				(__v8si)(__m256i)INDEX,			\
   9626 				(__v8di)(__m512i)V1, (int)SCALE)
   9627 
   9628 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
   9629   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,		\
   9630 				 (__v8di)(__m512i)INDEX,		\
   9631 				 (__v8si)(__m256i)V1, (int)SCALE)
   9632 
   9633 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
   9634   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,		\
   9635 				 (__v8di)(__m512i)INDEX,		\
   9636 				 (__v8si)(__m256i)V1, (int)SCALE)
   9637 
   9638 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
   9639   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
   9640 				(__v8di)(__m512i)INDEX,			\
   9641 				(__v8di)(__m512i)V1, (int)SCALE)
   9642 
   9643 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
   9644   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,	\
   9645 				(__v8di)(__m512i)INDEX,			\
   9646 				(__v8di)(__m512i)V1, (int)SCALE)
   9647 #endif
   9648 
   9649 extern __inline __m512d
   9650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9651 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9652 {
   9653   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9654 						      (__v8df) __W,
   9655 						      (__mmask8) __U);
   9656 }
   9657 
   9658 extern __inline __m512d
   9659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9660 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
   9661 {
   9662   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
   9663 						      (__v8df)
   9664 						      _mm512_setzero_pd (),
   9665 						      (__mmask8) __U);
   9666 }
   9667 
   9668 extern __inline void
   9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9670 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
   9671 {
   9672   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
   9673 					  (__mmask8) __U);
   9674 }
   9675 
   9676 extern __inline __m512
   9677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9678 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9679 {
   9680   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9681 						     (__v16sf) __W,
   9682 						     (__mmask16) __U);
   9683 }
   9684 
   9685 extern __inline __m512
   9686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9687 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
   9688 {
   9689   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
   9690 						     (__v16sf)
   9691 						     _mm512_setzero_ps (),
   9692 						     (__mmask16) __U);
   9693 }
   9694 
   9695 extern __inline void
   9696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9697 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
   9698 {
   9699   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
   9700 					  (__mmask16) __U);
   9701 }
   9702 
   9703 extern __inline __m512i
   9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9705 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9706 {
   9707   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9708 						      (__v8di) __W,
   9709 						      (__mmask8) __U);
   9710 }
   9711 
   9712 extern __inline __m512i
   9713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9714 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
   9715 {
   9716   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
   9717 						      (__v8di)
   9718 						      _mm512_setzero_si512 (),
   9719 						      (__mmask8) __U);
   9720 }
   9721 
   9722 extern __inline void
   9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9724 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
   9725 {
   9726   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
   9727 					  (__mmask8) __U);
   9728 }
   9729 
   9730 extern __inline __m512i
   9731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9732 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9733 {
   9734   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9735 						      (__v16si) __W,
   9736 						      (__mmask16) __U);
   9737 }
   9738 
   9739 extern __inline __m512i
   9740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9741 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
   9742 {
   9743   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
   9744 						      (__v16si)
   9745 						      _mm512_setzero_si512 (),
   9746 						      (__mmask16) __U);
   9747 }
   9748 
   9749 extern __inline void
   9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9751 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
   9752 {
   9753   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
   9754 					  (__mmask16) __U);
   9755 }
   9756 
   9757 extern __inline __m512d
   9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9759 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
   9760 {
   9761   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
   9762 						    (__v8df) __W,
   9763 						    (__mmask8) __U);
   9764 }
   9765 
   9766 extern __inline __m512d
   9767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9768 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
   9769 {
   9770   return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
   9771 						     (__v8df)
   9772 						     _mm512_setzero_pd (),
   9773 						     (__mmask8) __U);
   9774 }
   9775 
   9776 extern __inline __m512d
   9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9778 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
   9779 {
   9780   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
   9781 							(__v8df) __W,
   9782 							(__mmask8) __U);
   9783 }
   9784 
   9785 extern __inline __m512d
   9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9787 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
   9788 {
   9789   return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
   9790 							 (__v8df)
   9791 							 _mm512_setzero_pd (),
   9792 							 (__mmask8) __U);
   9793 }
   9794 
   9795 extern __inline __m512
   9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9797 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
   9798 {
   9799   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
   9800 						   (__v16sf) __W,
   9801 						   (__mmask16) __U);
   9802 }
   9803 
   9804 extern __inline __m512
   9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9806 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
   9807 {
   9808   return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
   9809 						    (__v16sf)
   9810 						    _mm512_setzero_ps (),
   9811 						    (__mmask16) __U);
   9812 }
   9813 
   9814 extern __inline __m512
   9815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9816 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
   9817 {
   9818   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
   9819 						       (__v16sf) __W,
   9820 						       (__mmask16) __U);
   9821 }
   9822 
   9823 extern __inline __m512
   9824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9825 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
   9826 {
   9827   return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
   9828 							(__v16sf)
   9829 							_mm512_setzero_ps (),
   9830 							(__mmask16) __U);
   9831 }
   9832 
   9833 extern __inline __m512i
   9834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9835 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
   9836 {
   9837   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
   9838 						    (__v8di) __W,
   9839 						    (__mmask8) __U);
   9840 }
   9841 
   9842 extern __inline __m512i
   9843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9844 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
   9845 {
   9846   return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
   9847 						     (__v8di)
   9848 						     _mm512_setzero_si512 (),
   9849 						     (__mmask8) __U);
   9850 }
   9851 
   9852 extern __inline __m512i
   9853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9854 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
   9855 {
   9856   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
   9857 							(__v8di) __W,
   9858 							(__mmask8) __U);
   9859 }
   9860 
   9861 extern __inline __m512i
   9862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9863 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
   9864 {
   9865   return (__m512i)
   9866 	 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
   9867 					       (__v8di)
   9868 					       _mm512_setzero_si512 (),
   9869 					       (__mmask8) __U);
   9870 }
   9871 
   9872 extern __inline __m512i
   9873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9874 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
   9875 {
   9876   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
   9877 						    (__v16si) __W,
   9878 						    (__mmask16) __U);
   9879 }
   9880 
   9881 extern __inline __m512i
   9882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9883 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
   9884 {
   9885   return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
   9886 						     (__v16si)
   9887 						     _mm512_setzero_si512 (),
   9888 						     (__mmask16) __U);
   9889 }
   9890 
   9891 extern __inline __m512i
   9892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9893 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
   9894 {
   9895   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
   9896 							(__v16si) __W,
   9897 							(__mmask16) __U);
   9898 }
   9899 
   9900 extern __inline __m512i
   9901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9902 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
   9903 {
   9904   return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
   9905 							 (__v16si)
   9906 							 _mm512_setzero_si512
   9907 							 (), (__mmask16) __U);
   9908 }
   9909 
   9910 /* Mask arithmetic operations */
   9911 extern __inline __mmask16
   9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9913 _mm512_kand (__mmask16 __A, __mmask16 __B)
   9914 {
   9915   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
   9916 }
   9917 
   9918 extern __inline __mmask16
   9919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9920 _mm512_kandn (__mmask16 __A, __mmask16 __B)
   9921 {
   9922   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
   9923 }
   9924 
   9925 extern __inline __mmask16
   9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9927 _mm512_kor (__mmask16 __A, __mmask16 __B)
   9928 {
   9929   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
   9930 }
   9931 
   9932 extern __inline int
   9933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9934 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
   9935 {
   9936   return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
   9937 						(__mmask16) __B);
   9938 }
   9939 
   9940 extern __inline int
   9941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9942 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
   9943 {
   9944   return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
   9945 						(__mmask16) __B);
   9946 }
   9947 
   9948 extern __inline __mmask16
   9949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9950 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
   9951 {
   9952   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
   9953 }
   9954 
   9955 extern __inline __mmask16
   9956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9957 _mm512_kxor (__mmask16 __A, __mmask16 __B)
   9958 {
   9959   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
   9960 }
   9961 
   9962 extern __inline __mmask16
   9963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9964 _mm512_knot (__mmask16 __A)
   9965 {
   9966   return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
   9967 }
   9968 
   9969 extern __inline __mmask16
   9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9971 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   9972 {
   9973   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
   9974 }
   9975 
   9976 #ifdef __OPTIMIZE__
   9977 extern __inline __m512i
   9978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9979 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
   9980 			  const int __imm)
   9981 {
   9982   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
   9983 						    (__v4si) __D,
   9984 						    __imm,
   9985 						    (__v16si)
   9986 						    _mm512_setzero_si512 (),
   9987 						    __B);
   9988 }
   9989 
   9990 extern __inline __m512
   9991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   9992 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
   9993 			  const int __imm)
   9994 {
   9995   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
   9996 						   (__v4sf) __D,
   9997 						   __imm,
   9998 						   (__v16sf)
   9999 						   _mm512_setzero_ps (), __B);
   10000 }
   10001 
   10002 extern __inline __m512i
   10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10004 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
   10005 			 __m128i __D, const int __imm)
   10006 {
   10007   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
   10008 						    (__v4si) __D,
   10009 						    __imm,
   10010 						    (__v16si) __A,
   10011 						    __B);
   10012 }
   10013 
   10014 extern __inline __m512
   10015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10016 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
   10017 			 __m128 __D, const int __imm)
   10018 {
   10019   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
   10020 						   (__v4sf) __D,
   10021 						   __imm,
   10022 						   (__v16sf) __A, __B);
   10023 }
   10024 #else
   10025 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
   10026   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
   10027     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
   10028     (__mmask8)(A)))
   10029 
   10030 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
   10031   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
   10032     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
   10033     (__mmask8)(A)))
   10034 
   10035 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
   10036   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
   10037     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
   10038 					     (__mmask8)(B)))
   10039 
   10040 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
   10041   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
   10042     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
   10043 					      (__mmask8)(B)))
   10044 #endif
   10045 
   10046 extern __inline __m512i
   10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10048 _mm512_max_epi64 (__m512i __A, __m512i __B)
   10049 {
   10050   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   10051 						  (__v8di) __B,
   10052 						  (__v8di)
   10053 						  _mm512_undefined_si512 (),
   10054 						  (__mmask8) -1);
   10055 }
   10056 
   10057 extern __inline __m512i
   10058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10059 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   10060 {
   10061   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   10062 						  (__v8di) __B,
   10063 						  (__v8di)
   10064 						  _mm512_setzero_si512 (),
   10065 						  __M);
   10066 }
   10067 
   10068 extern __inline __m512i
   10069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10070 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   10071 {
   10072   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
   10073 						  (__v8di) __B,
   10074 						  (__v8di) __W, __M);
   10075 }
   10076 
   10077 extern __inline __m512i
   10078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10079 _mm512_min_epi64 (__m512i __A, __m512i __B)
   10080 {
   10081   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   10082 						  (__v8di) __B,
   10083 						  (__v8di)
   10084 						  _mm512_undefined_si512 (),
   10085 						  (__mmask8) -1);
   10086 }
   10087 
   10088 extern __inline __m512i
   10089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10090 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   10091 {
   10092   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   10093 						  (__v8di) __B,
   10094 						  (__v8di) __W, __M);
   10095 }
   10096 
   10097 extern __inline __m512i
   10098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10099 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
   10100 {
   10101   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
   10102 						  (__v8di) __B,
   10103 						  (__v8di)
   10104 						  _mm512_setzero_si512 (),
   10105 						  __M);
   10106 }
   10107 
   10108 extern __inline __m512i
   10109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10110 _mm512_max_epu64 (__m512i __A, __m512i __B)
   10111 {
   10112   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   10113 						  (__v8di) __B,
   10114 						  (__v8di)
   10115 						  _mm512_undefined_si512 (),
   10116 						  (__mmask8) -1);
   10117 }
   10118 
   10119 extern __inline __m512i
   10120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10121 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   10122 {
   10123   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   10124 						  (__v8di) __B,
   10125 						  (__v8di)
   10126 						  _mm512_setzero_si512 (),
   10127 						  __M);
   10128 }
   10129 
   10130 extern __inline __m512i
   10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10132 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   10133 {
   10134   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
   10135 						  (__v8di) __B,
   10136 						  (__v8di) __W, __M);
   10137 }
   10138 
   10139 extern __inline __m512i
   10140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10141 _mm512_min_epu64 (__m512i __A, __m512i __B)
   10142 {
   10143   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   10144 						  (__v8di) __B,
   10145 						  (__v8di)
   10146 						  _mm512_undefined_si512 (),
   10147 						  (__mmask8) -1);
   10148 }
   10149 
   10150 extern __inline __m512i
   10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10152 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
   10153 {
   10154   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   10155 						  (__v8di) __B,
   10156 						  (__v8di) __W, __M);
   10157 }
   10158 
   10159 extern __inline __m512i
   10160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10161 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
   10162 {
   10163   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
   10164 						  (__v8di) __B,
   10165 						  (__v8di)
   10166 						  _mm512_setzero_si512 (),
   10167 						  __M);
   10168 }
   10169 
   10170 extern __inline __m512i
   10171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10172 _mm512_max_epi32 (__m512i __A, __m512i __B)
   10173 {
   10174   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   10175 						  (__v16si) __B,
   10176 						  (__v16si)
   10177 						  _mm512_undefined_si512 (),
   10178 						  (__mmask16) -1);
   10179 }
   10180 
   10181 extern __inline __m512i
   10182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10183 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   10184 {
   10185   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   10186 						  (__v16si) __B,
   10187 						  (__v16si)
   10188 						  _mm512_setzero_si512 (),
   10189 						  __M);
   10190 }
   10191 
   10192 extern __inline __m512i
   10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10194 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   10195 {
   10196   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
   10197 						  (__v16si) __B,
   10198 						  (__v16si) __W, __M);
   10199 }
   10200 
   10201 extern __inline __m512i
   10202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10203 _mm512_min_epi32 (__m512i __A, __m512i __B)
   10204 {
   10205   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   10206 						  (__v16si) __B,
   10207 						  (__v16si)
   10208 						  _mm512_undefined_si512 (),
   10209 						  (__mmask16) -1);
   10210 }
   10211 
   10212 extern __inline __m512i
   10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10214 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
   10215 {
   10216   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   10217 						  (__v16si) __B,
   10218 						  (__v16si)
   10219 						  _mm512_setzero_si512 (),
   10220 						  __M);
   10221 }
   10222 
   10223 extern __inline __m512i
   10224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10225 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   10226 {
   10227   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
   10228 						  (__v16si) __B,
   10229 						  (__v16si) __W, __M);
   10230 }
   10231 
   10232 extern __inline __m512i
   10233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10234 _mm512_max_epu32 (__m512i __A, __m512i __B)
   10235 {
   10236   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   10237 						  (__v16si) __B,
   10238 						  (__v16si)
   10239 						  _mm512_undefined_si512 (),
   10240 						  (__mmask16) -1);
   10241 }
   10242 
   10243 extern __inline __m512i
   10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10245 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   10246 {
   10247   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   10248 						  (__v16si) __B,
   10249 						  (__v16si)
   10250 						  _mm512_setzero_si512 (),
   10251 						  __M);
   10252 }
   10253 
   10254 extern __inline __m512i
   10255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10256 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   10257 {
   10258   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
   10259 						  (__v16si) __B,
   10260 						  (__v16si) __W, __M);
   10261 }
   10262 
   10263 extern __inline __m512i
   10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10265 _mm512_min_epu32 (__m512i __A, __m512i __B)
   10266 {
   10267   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   10268 						  (__v16si) __B,
   10269 						  (__v16si)
   10270 						  _mm512_undefined_si512 (),
   10271 						  (__mmask16) -1);
   10272 }
   10273 
   10274 extern __inline __m512i
   10275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10276 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
   10277 {
   10278   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   10279 						  (__v16si) __B,
   10280 						  (__v16si)
   10281 						  _mm512_setzero_si512 (),
   10282 						  __M);
   10283 }
   10284 
   10285 extern __inline __m512i
   10286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10287 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
   10288 {
   10289   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
   10290 						  (__v16si) __B,
   10291 						  (__v16si) __W, __M);
   10292 }
   10293 
   10294 extern __inline __m512
   10295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10296 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
   10297 {
   10298   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
   10299 						   (__v16sf) __B,
   10300 						   (__v16sf)
   10301 						   _mm512_undefined_ps (),
   10302 						   (__mmask16) -1);
   10303 }
   10304 
   10305 extern __inline __m512
   10306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10307 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10308 {
   10309   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
   10310 						   (__v16sf) __B,
   10311 						   (__v16sf) __W,
   10312 						   (__mmask16) __U);
   10313 }
   10314 
   10315 extern __inline __m512
   10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10317 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10318 {
   10319   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
   10320 						   (__v16sf) __B,
   10321 						   (__v16sf)
   10322 						   _mm512_setzero_ps (),
   10323 						   (__mmask16) __U);
   10324 }
   10325 
   10326 #ifdef __OPTIMIZE__
   10327 extern __inline __m128d
   10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10329 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
   10330 {
   10331   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
   10332 					       (__v2df) __B,
   10333 					       __R);
   10334 }
   10335 
   10336 extern __inline __m128
   10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10338 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
   10339 {
   10340   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
   10341 					      (__v4sf) __B,
   10342 					      __R);
   10343 }
   10344 
   10345 extern __inline __m128d
   10346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10347 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
   10348 {
   10349   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
   10350 					       (__v2df) __B,
   10351 					       __R);
   10352 }
   10353 
   10354 extern __inline __m128
   10355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10356 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
   10357 {
   10358   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
   10359 					      (__v4sf) __B,
   10360 					      __R);
   10361 }
   10362 
   10363 #else
   10364 #define _mm_max_round_sd(A, B, C)            \
   10365     (__m128d)__builtin_ia32_addsd_round(A, B, C)
   10366 
   10367 #define _mm_max_round_ss(A, B, C)            \
   10368     (__m128)__builtin_ia32_addss_round(A, B, C)
   10369 
   10370 #define _mm_min_round_sd(A, B, C)            \
   10371     (__m128d)__builtin_ia32_subsd_round(A, B, C)
   10372 
   10373 #define _mm_min_round_ss(A, B, C)            \
   10374     (__m128)__builtin_ia32_subss_round(A, B, C)
   10375 #endif
   10376 
   10377 extern __inline __m512d
   10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10379 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
   10380 {
   10381   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
   10382 						     (__v8df) __W,
   10383 						     (__mmask8) __U);
   10384 }
   10385 
   10386 extern __inline __m512
   10387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10388 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
   10389 {
   10390   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
   10391 						    (__v16sf) __W,
   10392 						    (__mmask16) __U);
   10393 }
   10394 
   10395 extern __inline __m512i
   10396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10397 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
   10398 {
   10399   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
   10400 						    (__v8di) __W,
   10401 						    (__mmask8) __U);
   10402 }
   10403 
   10404 extern __inline __m512i
   10405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10406 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
   10407 {
   10408   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
   10409 						    (__v16si) __W,
   10410 						    (__mmask16) __U);
   10411 }
   10412 
   10413 #ifdef __OPTIMIZE__
   10414 extern __inline __m128d
   10415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10416 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
   10417 {
   10418   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
   10419 						   (__v2df) __A,
   10420 						   (__v2df) __B,
   10421 						   __R);
   10422 }
   10423 
   10424 extern __inline __m128
   10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10426 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
   10427 {
   10428   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
   10429 						  (__v4sf) __A,
   10430 						  (__v4sf) __B,
   10431 						  __R);
   10432 }
   10433 
   10434 extern __inline __m128d
   10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10436 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
   10437 {
   10438   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
   10439 						   (__v2df) __A,
   10440 						   -(__v2df) __B,
   10441 						   __R);
   10442 }
   10443 
   10444 extern __inline __m128
   10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10446 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
   10447 {
   10448   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
   10449 						  (__v4sf) __A,
   10450 						  -(__v4sf) __B,
   10451 						  __R);
   10452 }
   10453 
   10454 extern __inline __m128d
   10455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10456 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
   10457 {
   10458   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
   10459 						   -(__v2df) __A,
   10460 						   (__v2df) __B,
   10461 						   __R);
   10462 }
   10463 
   10464 extern __inline __m128
   10465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10466 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
   10467 {
   10468   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
   10469 						  -(__v4sf) __A,
   10470 						  (__v4sf) __B,
   10471 						  __R);
   10472 }
   10473 
   10474 extern __inline __m128d
   10475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10476 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
   10477 {
   10478   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
   10479 						   -(__v2df) __A,
   10480 						   -(__v2df) __B,
   10481 						   __R);
   10482 }
   10483 
   10484 extern __inline __m128
   10485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10486 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
   10487 {
   10488   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
   10489 						  -(__v4sf) __A,
   10490 						  -(__v4sf) __B,
   10491 						  __R);
   10492 }
   10493 #else
   10494 #define _mm_fmadd_round_sd(A, B, C, R)            \
   10495     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
   10496 
   10497 #define _mm_fmadd_round_ss(A, B, C, R)            \
   10498     (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
   10499 
   10500 #define _mm_fmsub_round_sd(A, B, C, R)            \
   10501     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
   10502 
   10503 #define _mm_fmsub_round_ss(A, B, C, R)            \
   10504     (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
   10505 
   10506 #define _mm_fnmadd_round_sd(A, B, C, R)            \
   10507     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
   10508 
   10509 #define _mm_fnmadd_round_ss(A, B, C, R)            \
   10510    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
   10511 
   10512 #define _mm_fnmsub_round_sd(A, B, C, R)            \
   10513     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
   10514 
   10515 #define _mm_fnmsub_round_ss(A, B, C, R)            \
   10516     (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
   10517 #endif
   10518 
   10519 #ifdef __OPTIMIZE__
   10520 extern __inline int
   10521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10522 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
   10523 {
   10524   return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
   10525 }
   10526 
   10527 extern __inline int
   10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10529 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
   10530 {
   10531   return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
   10532 }
   10533 #else
   10534 #define _mm_comi_round_ss(A, B, C, D)\
   10535 __builtin_ia32_vcomiss(A, B, C, D)
   10536 #define _mm_comi_round_sd(A, B, C, D)\
   10537 __builtin_ia32_vcomisd(A, B, C, D)
   10538 #endif
   10539 
   10540 extern __inline __m512d
   10541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10542 _mm512_sqrt_pd (__m512d __A)
   10543 {
   10544   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   10545 						  (__v8df)
   10546 						  _mm512_undefined_pd (),
   10547 						  (__mmask8) -1,
   10548 						  _MM_FROUND_CUR_DIRECTION);
   10549 }
   10550 
   10551 extern __inline __m512d
   10552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10553 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
   10554 {
   10555   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   10556 						  (__v8df) __W,
   10557 						  (__mmask8) __U,
   10558 						  _MM_FROUND_CUR_DIRECTION);
   10559 }
   10560 
   10561 extern __inline __m512d
   10562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10563 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
   10564 {
   10565   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
   10566 						  (__v8df)
   10567 						  _mm512_setzero_pd (),
   10568 						  (__mmask8) __U,
   10569 						  _MM_FROUND_CUR_DIRECTION);
   10570 }
   10571 
   10572 extern __inline __m512
   10573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10574 _mm512_sqrt_ps (__m512 __A)
   10575 {
   10576   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   10577 						 (__v16sf)
   10578 						 _mm512_undefined_ps (),
   10579 						 (__mmask16) -1,
   10580 						 _MM_FROUND_CUR_DIRECTION);
   10581 }
   10582 
   10583 extern __inline __m512
   10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10585 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
   10586 {
   10587   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   10588 						 (__v16sf) __W,
   10589 						 (__mmask16) __U,
   10590 						 _MM_FROUND_CUR_DIRECTION);
   10591 }
   10592 
   10593 extern __inline __m512
   10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10595 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
   10596 {
   10597   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
   10598 						 (__v16sf)
   10599 						 _mm512_setzero_ps (),
   10600 						 (__mmask16) __U,
   10601 						 _MM_FROUND_CUR_DIRECTION);
   10602 }
   10603 
   10604 extern __inline __m512d
   10605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10606 _mm512_add_pd (__m512d __A, __m512d __B)
   10607 {
   10608   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   10609 						 (__v8df) __B,
   10610 						 (__v8df)
   10611 						 _mm512_undefined_pd (),
   10612 						 (__mmask8) -1,
   10613 						 _MM_FROUND_CUR_DIRECTION);
   10614 }
   10615 
   10616 extern __inline __m512d
   10617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10618 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   10619 {
   10620   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   10621 						 (__v8df) __B,
   10622 						 (__v8df) __W,
   10623 						 (__mmask8) __U,
   10624 						 _MM_FROUND_CUR_DIRECTION);
   10625 }
   10626 
   10627 extern __inline __m512d
   10628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10629 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
   10630 {
   10631   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
   10632 						 (__v8df) __B,
   10633 						 (__v8df)
   10634 						 _mm512_setzero_pd (),
   10635 						 (__mmask8) __U,
   10636 						 _MM_FROUND_CUR_DIRECTION);
   10637 }
   10638 
   10639 extern __inline __m512
   10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10641 _mm512_add_ps (__m512 __A, __m512 __B)
   10642 {
   10643   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   10644 						(__v16sf) __B,
   10645 						(__v16sf)
   10646 						_mm512_undefined_ps (),
   10647 						(__mmask16) -1,
   10648 						_MM_FROUND_CUR_DIRECTION);
   10649 }
   10650 
   10651 extern __inline __m512
   10652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10653 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10654 {
   10655   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   10656 						(__v16sf) __B,
   10657 						(__v16sf) __W,
   10658 						(__mmask16) __U,
   10659 						_MM_FROUND_CUR_DIRECTION);
   10660 }
   10661 
   10662 extern __inline __m512
   10663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10664 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10665 {
   10666   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
   10667 						(__v16sf) __B,
   10668 						(__v16sf)
   10669 						_mm512_setzero_ps (),
   10670 						(__mmask16) __U,
   10671 						_MM_FROUND_CUR_DIRECTION);
   10672 }
   10673 
   10674 extern __inline __m512d
   10675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10676 _mm512_sub_pd (__m512d __A, __m512d __B)
   10677 {
   10678   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   10679 						 (__v8df) __B,
   10680 						 (__v8df)
   10681 						 _mm512_undefined_pd (),
   10682 						 (__mmask8) -1,
   10683 						 _MM_FROUND_CUR_DIRECTION);
   10684 }
   10685 
   10686 extern __inline __m512d
   10687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10688 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   10689 {
   10690   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   10691 						 (__v8df) __B,
   10692 						 (__v8df) __W,
   10693 						 (__mmask8) __U,
   10694 						 _MM_FROUND_CUR_DIRECTION);
   10695 }
   10696 
   10697 extern __inline __m512d
   10698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10699 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
   10700 {
   10701   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
   10702 						 (__v8df) __B,
   10703 						 (__v8df)
   10704 						 _mm512_setzero_pd (),
   10705 						 (__mmask8) __U,
   10706 						 _MM_FROUND_CUR_DIRECTION);
   10707 }
   10708 
   10709 extern __inline __m512
   10710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10711 _mm512_sub_ps (__m512 __A, __m512 __B)
   10712 {
   10713   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   10714 						(__v16sf) __B,
   10715 						(__v16sf)
   10716 						_mm512_undefined_ps (),
   10717 						(__mmask16) -1,
   10718 						_MM_FROUND_CUR_DIRECTION);
   10719 }
   10720 
   10721 extern __inline __m512
   10722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10723 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10724 {
   10725   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   10726 						(__v16sf) __B,
   10727 						(__v16sf) __W,
   10728 						(__mmask16) __U,
   10729 						_MM_FROUND_CUR_DIRECTION);
   10730 }
   10731 
   10732 extern __inline __m512
   10733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10734 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10735 {
   10736   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
   10737 						(__v16sf) __B,
   10738 						(__v16sf)
   10739 						_mm512_setzero_ps (),
   10740 						(__mmask16) __U,
   10741 						_MM_FROUND_CUR_DIRECTION);
   10742 }
   10743 
   10744 extern __inline __m512d
   10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10746 _mm512_mul_pd (__m512d __A, __m512d __B)
   10747 {
   10748   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   10749 						 (__v8df) __B,
   10750 						 (__v8df)
   10751 						 _mm512_undefined_pd (),
   10752 						 (__mmask8) -1,
   10753 						 _MM_FROUND_CUR_DIRECTION);
   10754 }
   10755 
   10756 extern __inline __m512d
   10757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10758 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   10759 {
   10760   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   10761 						 (__v8df) __B,
   10762 						 (__v8df) __W,
   10763 						 (__mmask8) __U,
   10764 						 _MM_FROUND_CUR_DIRECTION);
   10765 }
   10766 
   10767 extern __inline __m512d
   10768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10769 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
   10770 {
   10771   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
   10772 						 (__v8df) __B,
   10773 						 (__v8df)
   10774 						 _mm512_setzero_pd (),
   10775 						 (__mmask8) __U,
   10776 						 _MM_FROUND_CUR_DIRECTION);
   10777 }
   10778 
   10779 extern __inline __m512
   10780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10781 _mm512_mul_ps (__m512 __A, __m512 __B)
   10782 {
   10783   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   10784 						(__v16sf) __B,
   10785 						(__v16sf)
   10786 						_mm512_undefined_ps (),
   10787 						(__mmask16) -1,
   10788 						_MM_FROUND_CUR_DIRECTION);
   10789 }
   10790 
   10791 extern __inline __m512
   10792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10793 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10794 {
   10795   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   10796 						(__v16sf) __B,
   10797 						(__v16sf) __W,
   10798 						(__mmask16) __U,
   10799 						_MM_FROUND_CUR_DIRECTION);
   10800 }
   10801 
   10802 extern __inline __m512
   10803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10804 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10805 {
   10806   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
   10807 						(__v16sf) __B,
   10808 						(__v16sf)
   10809 						_mm512_setzero_ps (),
   10810 						(__mmask16) __U,
   10811 						_MM_FROUND_CUR_DIRECTION);
   10812 }
   10813 
   10814 extern __inline __m512d
   10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10816 _mm512_div_pd (__m512d __M, __m512d __V)
   10817 {
   10818   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   10819 						 (__v8df) __V,
   10820 						 (__v8df)
   10821 						 _mm512_undefined_pd (),
   10822 						 (__mmask8) -1,
   10823 						 _MM_FROUND_CUR_DIRECTION);
   10824 }
   10825 
   10826 extern __inline __m512d
   10827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10828 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
   10829 {
   10830   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   10831 						 (__v8df) __V,
   10832 						 (__v8df) __W,
   10833 						 (__mmask8) __U,
   10834 						 _MM_FROUND_CUR_DIRECTION);
   10835 }
   10836 
   10837 extern __inline __m512d
   10838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10839 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
   10840 {
   10841   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
   10842 						 (__v8df) __V,
   10843 						 (__v8df)
   10844 						 _mm512_setzero_pd (),
   10845 						 (__mmask8) __U,
   10846 						 _MM_FROUND_CUR_DIRECTION);
   10847 }
   10848 
   10849 extern __inline __m512
   10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10851 _mm512_div_ps (__m512 __A, __m512 __B)
   10852 {
   10853   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   10854 						(__v16sf) __B,
   10855 						(__v16sf)
   10856 						_mm512_undefined_ps (),
   10857 						(__mmask16) -1,
   10858 						_MM_FROUND_CUR_DIRECTION);
   10859 }
   10860 
   10861 extern __inline __m512
   10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10863 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10864 {
   10865   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   10866 						(__v16sf) __B,
   10867 						(__v16sf) __W,
   10868 						(__mmask16) __U,
   10869 						_MM_FROUND_CUR_DIRECTION);
   10870 }
   10871 
   10872 extern __inline __m512
   10873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10874 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10875 {
   10876   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
   10877 						(__v16sf) __B,
   10878 						(__v16sf)
   10879 						_mm512_setzero_ps (),
   10880 						(__mmask16) __U,
   10881 						_MM_FROUND_CUR_DIRECTION);
   10882 }
   10883 
   10884 extern __inline __m512d
   10885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10886 _mm512_max_pd (__m512d __A, __m512d __B)
   10887 {
   10888   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   10889 						 (__v8df) __B,
   10890 						 (__v8df)
   10891 						 _mm512_undefined_pd (),
   10892 						 (__mmask8) -1,
   10893 						 _MM_FROUND_CUR_DIRECTION);
   10894 }
   10895 
   10896 extern __inline __m512d
   10897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10898 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   10899 {
   10900   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   10901 						 (__v8df) __B,
   10902 						 (__v8df) __W,
   10903 						 (__mmask8) __U,
   10904 						 _MM_FROUND_CUR_DIRECTION);
   10905 }
   10906 
   10907 extern __inline __m512d
   10908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10909 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
   10910 {
   10911   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
   10912 						 (__v8df) __B,
   10913 						 (__v8df)
   10914 						 _mm512_setzero_pd (),
   10915 						 (__mmask8) __U,
   10916 						 _MM_FROUND_CUR_DIRECTION);
   10917 }
   10918 
   10919 extern __inline __m512
   10920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10921 _mm512_max_ps (__m512 __A, __m512 __B)
   10922 {
   10923   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   10924 						(__v16sf) __B,
   10925 						(__v16sf)
   10926 						_mm512_undefined_ps (),
   10927 						(__mmask16) -1,
   10928 						_MM_FROUND_CUR_DIRECTION);
   10929 }
   10930 
   10931 extern __inline __m512
   10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10933 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   10934 {
   10935   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   10936 						(__v16sf) __B,
   10937 						(__v16sf) __W,
   10938 						(__mmask16) __U,
   10939 						_MM_FROUND_CUR_DIRECTION);
   10940 }
   10941 
   10942 extern __inline __m512
   10943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10944 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
   10945 {
   10946   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
   10947 						(__v16sf) __B,
   10948 						(__v16sf)
   10949 						_mm512_setzero_ps (),
   10950 						(__mmask16) __U,
   10951 						_MM_FROUND_CUR_DIRECTION);
   10952 }
   10953 
   10954 extern __inline __m512d
   10955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10956 _mm512_min_pd (__m512d __A, __m512d __B)
   10957 {
   10958   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   10959 						 (__v8df) __B,
   10960 						 (__v8df)
   10961 						 _mm512_undefined_pd (),
   10962 						 (__mmask8) -1,
   10963 						 _MM_FROUND_CUR_DIRECTION);
   10964 }
   10965 
   10966 extern __inline __m512d
   10967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10968 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   10969 {
   10970   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   10971 						 (__v8df) __B,
   10972 						 (__v8df) __W,
   10973 						 (__mmask8) __U,
   10974 						 _MM_FROUND_CUR_DIRECTION);
   10975 }
   10976 
   10977 extern __inline __m512d
   10978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10979 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
   10980 {
   10981   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
   10982 						 (__v8df) __B,
   10983 						 (__v8df)
   10984 						 _mm512_setzero_pd (),
   10985 						 (__mmask8) __U,
   10986 						 _MM_FROUND_CUR_DIRECTION);
   10987 }
   10988 
   10989 extern __inline __m512
   10990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   10991 _mm512_min_ps (__m512 __A, __m512 __B)
   10992 {
   10993   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   10994 						(__v16sf) __B,
   10995 						(__v16sf)
   10996 						_mm512_undefined_ps (),
   10997 						(__mmask16) -1,
   10998 						_MM_FROUND_CUR_DIRECTION);
   10999 }
   11000 
   11001 extern __inline __m512
   11002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11003 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   11004 {
   11005   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   11006 						(__v16sf) __B,
   11007 						(__v16sf) __W,
   11008 						(__mmask16) __U,
   11009 						_MM_FROUND_CUR_DIRECTION);
   11010 }
   11011 
   11012 extern __inline __m512
   11013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11014 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
   11015 {
   11016   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
   11017 						(__v16sf) __B,
   11018 						(__v16sf)
   11019 						_mm512_setzero_ps (),
   11020 						(__mmask16) __U,
   11021 						_MM_FROUND_CUR_DIRECTION);
   11022 }
   11023 
   11024 extern __inline __m512d
   11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11026 _mm512_scalef_pd (__m512d __A, __m512d __B)
   11027 {
   11028   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   11029 						    (__v8df) __B,
   11030 						    (__v8df)
   11031 						    _mm512_undefined_pd (),
   11032 						    (__mmask8) -1,
   11033 						    _MM_FROUND_CUR_DIRECTION);
   11034 }
   11035 
   11036 extern __inline __m512d
   11037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11038 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
   11039 {
   11040   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   11041 						    (__v8df) __B,
   11042 						    (__v8df) __W,
   11043 						    (__mmask8) __U,
   11044 						    _MM_FROUND_CUR_DIRECTION);
   11045 }
   11046 
   11047 extern __inline __m512d
   11048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11049 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
   11050 {
   11051   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
   11052 						    (__v8df) __B,
   11053 						    (__v8df)
   11054 						    _mm512_setzero_pd (),
   11055 						    (__mmask8) __U,
   11056 						    _MM_FROUND_CUR_DIRECTION);
   11057 }
   11058 
   11059 extern __inline __m512
   11060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11061 _mm512_scalef_ps (__m512 __A, __m512 __B)
   11062 {
   11063   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   11064 						   (__v16sf) __B,
   11065 						   (__v16sf)
   11066 						   _mm512_undefined_ps (),
   11067 						   (__mmask16) -1,
   11068 						   _MM_FROUND_CUR_DIRECTION);
   11069 }
   11070 
   11071 extern __inline __m512
   11072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11073 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
   11074 {
   11075   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   11076 						   (__v16sf) __B,
   11077 						   (__v16sf) __W,
   11078 						   (__mmask16) __U,
   11079 						   _MM_FROUND_CUR_DIRECTION);
   11080 }
   11081 
   11082 extern __inline __m512
   11083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11084 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
   11085 {
   11086   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
   11087 						   (__v16sf) __B,
   11088 						   (__v16sf)
   11089 						   _mm512_setzero_ps (),
   11090 						   (__mmask16) __U,
   11091 						   _MM_FROUND_CUR_DIRECTION);
   11092 }
   11093 
   11094 extern __inline __m128d
   11095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11096 _mm_scalef_sd (__m128d __A, __m128d __B)
   11097 {
   11098   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
   11099 						  (__v2df) __B,
   11100 						  _MM_FROUND_CUR_DIRECTION);
   11101 }
   11102 
   11103 extern __inline __m128
   11104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11105 _mm_scalef_ss (__m128 __A, __m128 __B)
   11106 {
   11107   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
   11108 						 (__v4sf) __B,
   11109 						 _MM_FROUND_CUR_DIRECTION);
   11110 }
   11111 
   11112 extern __inline __m512d
   11113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11114 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
   11115 {
   11116   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   11117 						    (__v8df) __B,
   11118 						    (__v8df) __C,
   11119 						    (__mmask8) -1,
   11120 						    _MM_FROUND_CUR_DIRECTION);
   11121 }
   11122 
   11123 extern __inline __m512d
   11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11125 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11126 {
   11127   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   11128 						    (__v8df) __B,
   11129 						    (__v8df) __C,
   11130 						    (__mmask8) __U,
   11131 						    _MM_FROUND_CUR_DIRECTION);
   11132 }
   11133 
   11134 extern __inline __m512d
   11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11136 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11137 {
   11138   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
   11139 						     (__v8df) __B,
   11140 						     (__v8df) __C,
   11141 						     (__mmask8) __U,
   11142 						     _MM_FROUND_CUR_DIRECTION);
   11143 }
   11144 
   11145 extern __inline __m512d
   11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11147 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11148 {
   11149   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   11150 						     (__v8df) __B,
   11151 						     (__v8df) __C,
   11152 						     (__mmask8) __U,
   11153 						     _MM_FROUND_CUR_DIRECTION);
   11154 }
   11155 
   11156 extern __inline __m512
   11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11158 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
   11159 {
   11160   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   11161 						   (__v16sf) __B,
   11162 						   (__v16sf) __C,
   11163 						   (__mmask16) -1,
   11164 						   _MM_FROUND_CUR_DIRECTION);
   11165 }
   11166 
   11167 extern __inline __m512
   11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11169 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11170 {
   11171   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   11172 						   (__v16sf) __B,
   11173 						   (__v16sf) __C,
   11174 						   (__mmask16) __U,
   11175 						   _MM_FROUND_CUR_DIRECTION);
   11176 }
   11177 
   11178 extern __inline __m512
   11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11180 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11181 {
   11182   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
   11183 						    (__v16sf) __B,
   11184 						    (__v16sf) __C,
   11185 						    (__mmask16) __U,
   11186 						    _MM_FROUND_CUR_DIRECTION);
   11187 }
   11188 
   11189 extern __inline __m512
   11190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11191 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11192 {
   11193   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   11194 						    (__v16sf) __B,
   11195 						    (__v16sf) __C,
   11196 						    (__mmask16) __U,
   11197 						    _MM_FROUND_CUR_DIRECTION);
   11198 }
   11199 
   11200 extern __inline __m512d
   11201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11202 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
   11203 {
   11204   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   11205 						    (__v8df) __B,
   11206 						    -(__v8df) __C,
   11207 						    (__mmask8) -1,
   11208 						    _MM_FROUND_CUR_DIRECTION);
   11209 }
   11210 
   11211 extern __inline __m512d
   11212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11213 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11214 {
   11215   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
   11216 						    (__v8df) __B,
   11217 						    -(__v8df) __C,
   11218 						    (__mmask8) __U,
   11219 						    _MM_FROUND_CUR_DIRECTION);
   11220 }
   11221 
   11222 extern __inline __m512d
   11223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11224 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11225 {
   11226   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
   11227 						     (__v8df) __B,
   11228 						     (__v8df) __C,
   11229 						     (__mmask8) __U,
   11230 						     _MM_FROUND_CUR_DIRECTION);
   11231 }
   11232 
   11233 extern __inline __m512d
   11234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11235 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11236 {
   11237   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
   11238 						     (__v8df) __B,
   11239 						     -(__v8df) __C,
   11240 						     (__mmask8) __U,
   11241 						     _MM_FROUND_CUR_DIRECTION);
   11242 }
   11243 
   11244 extern __inline __m512
   11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11246 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
   11247 {
   11248   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   11249 						   (__v16sf) __B,
   11250 						   -(__v16sf) __C,
   11251 						   (__mmask16) -1,
   11252 						   _MM_FROUND_CUR_DIRECTION);
   11253 }
   11254 
   11255 extern __inline __m512
   11256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11257 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11258 {
   11259   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
   11260 						   (__v16sf) __B,
   11261 						   -(__v16sf) __C,
   11262 						   (__mmask16) __U,
   11263 						   _MM_FROUND_CUR_DIRECTION);
   11264 }
   11265 
   11266 extern __inline __m512
   11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11268 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11269 {
   11270   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
   11271 						    (__v16sf) __B,
   11272 						    (__v16sf) __C,
   11273 						    (__mmask16) __U,
   11274 						    _MM_FROUND_CUR_DIRECTION);
   11275 }
   11276 
   11277 extern __inline __m512
   11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11279 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11280 {
   11281   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
   11282 						    (__v16sf) __B,
   11283 						    -(__v16sf) __C,
   11284 						    (__mmask16) __U,
   11285 						    _MM_FROUND_CUR_DIRECTION);
   11286 }
   11287 
   11288 extern __inline __m512d
   11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11290 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
   11291 {
   11292   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   11293 						       (__v8df) __B,
   11294 						       (__v8df) __C,
   11295 						       (__mmask8) -1,
   11296 						       _MM_FROUND_CUR_DIRECTION);
   11297 }
   11298 
   11299 extern __inline __m512d
   11300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11301 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11302 {
   11303   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   11304 						       (__v8df) __B,
   11305 						       (__v8df) __C,
   11306 						       (__mmask8) __U,
   11307 						       _MM_FROUND_CUR_DIRECTION);
   11308 }
   11309 
   11310 extern __inline __m512d
   11311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11312 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11313 {
   11314   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
   11315 							(__v8df) __B,
   11316 							(__v8df) __C,
   11317 							(__mmask8) __U,
   11318 							_MM_FROUND_CUR_DIRECTION);
   11319 }
   11320 
   11321 extern __inline __m512d
   11322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11323 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11324 {
   11325   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   11326 							(__v8df) __B,
   11327 							(__v8df) __C,
   11328 							(__mmask8) __U,
   11329 							_MM_FROUND_CUR_DIRECTION);
   11330 }
   11331 
   11332 extern __inline __m512
   11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11334 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
   11335 {
   11336   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   11337 						      (__v16sf) __B,
   11338 						      (__v16sf) __C,
   11339 						      (__mmask16) -1,
   11340 						      _MM_FROUND_CUR_DIRECTION);
   11341 }
   11342 
   11343 extern __inline __m512
   11344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11345 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11346 {
   11347   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   11348 						      (__v16sf) __B,
   11349 						      (__v16sf) __C,
   11350 						      (__mmask16) __U,
   11351 						      _MM_FROUND_CUR_DIRECTION);
   11352 }
   11353 
   11354 extern __inline __m512
   11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11356 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11357 {
   11358   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
   11359 						       (__v16sf) __B,
   11360 						       (__v16sf) __C,
   11361 						       (__mmask16) __U,
   11362 						       _MM_FROUND_CUR_DIRECTION);
   11363 }
   11364 
   11365 extern __inline __m512
   11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11367 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11368 {
   11369   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   11370 						       (__v16sf) __B,
   11371 						       (__v16sf) __C,
   11372 						       (__mmask16) __U,
   11373 						       _MM_FROUND_CUR_DIRECTION);
   11374 }
   11375 
   11376 extern __inline __m512d
   11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11378 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
   11379 {
   11380   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   11381 						       (__v8df) __B,
   11382 						       -(__v8df) __C,
   11383 						       (__mmask8) -1,
   11384 						       _MM_FROUND_CUR_DIRECTION);
   11385 }
   11386 
   11387 extern __inline __m512d
   11388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11389 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11390 {
   11391   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
   11392 						       (__v8df) __B,
   11393 						       -(__v8df) __C,
   11394 						       (__mmask8) __U,
   11395 						       _MM_FROUND_CUR_DIRECTION);
   11396 }
   11397 
   11398 extern __inline __m512d
   11399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11400 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11401 {
   11402   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
   11403 							(__v8df) __B,
   11404 							(__v8df) __C,
   11405 							(__mmask8) __U,
   11406 							_MM_FROUND_CUR_DIRECTION);
   11407 }
   11408 
   11409 extern __inline __m512d
   11410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11411 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11412 {
   11413   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
   11414 							(__v8df) __B,
   11415 							-(__v8df) __C,
   11416 							(__mmask8) __U,
   11417 							_MM_FROUND_CUR_DIRECTION);
   11418 }
   11419 
   11420 extern __inline __m512
   11421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11422 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
   11423 {
   11424   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   11425 						      (__v16sf) __B,
   11426 						      -(__v16sf) __C,
   11427 						      (__mmask16) -1,
   11428 						      _MM_FROUND_CUR_DIRECTION);
   11429 }
   11430 
   11431 extern __inline __m512
   11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11433 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11434 {
   11435   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
   11436 						      (__v16sf) __B,
   11437 						      -(__v16sf) __C,
   11438 						      (__mmask16) __U,
   11439 						      _MM_FROUND_CUR_DIRECTION);
   11440 }
   11441 
   11442 extern __inline __m512
   11443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11444 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11445 {
   11446   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
   11447 						       (__v16sf) __B,
   11448 						       (__v16sf) __C,
   11449 						       (__mmask16) __U,
   11450 						       _MM_FROUND_CUR_DIRECTION);
   11451 }
   11452 
   11453 extern __inline __m512
   11454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11455 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11456 {
   11457   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
   11458 						       (__v16sf) __B,
   11459 						       -(__v16sf) __C,
   11460 						       (__mmask16) __U,
   11461 						       _MM_FROUND_CUR_DIRECTION);
   11462 }
   11463 
   11464 extern __inline __m512d
   11465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11466 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
   11467 {
   11468   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   11469 						    (__v8df) __B,
   11470 						    (__v8df) __C,
   11471 						    (__mmask8) -1,
   11472 						    _MM_FROUND_CUR_DIRECTION);
   11473 }
   11474 
   11475 extern __inline __m512d
   11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11477 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11478 {
   11479   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
   11480 						     (__v8df) __B,
   11481 						     (__v8df) __C,
   11482 						     (__mmask8) __U,
   11483 						     _MM_FROUND_CUR_DIRECTION);
   11484 }
   11485 
   11486 extern __inline __m512d
   11487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11488 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11489 {
   11490   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
   11491 						     (__v8df) __B,
   11492 						     (__v8df) __C,
   11493 						     (__mmask8) __U,
   11494 						     _MM_FROUND_CUR_DIRECTION);
   11495 }
   11496 
   11497 extern __inline __m512d
   11498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11499 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11500 {
   11501   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   11502 						     (__v8df) __B,
   11503 						     (__v8df) __C,
   11504 						     (__mmask8) __U,
   11505 						     _MM_FROUND_CUR_DIRECTION);
   11506 }
   11507 
   11508 extern __inline __m512
   11509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11510 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
   11511 {
   11512   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   11513 						   (__v16sf) __B,
   11514 						   (__v16sf) __C,
   11515 						   (__mmask16) -1,
   11516 						   _MM_FROUND_CUR_DIRECTION);
   11517 }
   11518 
   11519 extern __inline __m512
   11520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11521 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11522 {
   11523   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
   11524 						    (__v16sf) __B,
   11525 						    (__v16sf) __C,
   11526 						    (__mmask16) __U,
   11527 						    _MM_FROUND_CUR_DIRECTION);
   11528 }
   11529 
   11530 extern __inline __m512
   11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11532 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11533 {
   11534   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
   11535 						    (__v16sf) __B,
   11536 						    (__v16sf) __C,
   11537 						    (__mmask16) __U,
   11538 						    _MM_FROUND_CUR_DIRECTION);
   11539 }
   11540 
   11541 extern __inline __m512
   11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11543 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11544 {
   11545   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   11546 						    (__v16sf) __B,
   11547 						    (__v16sf) __C,
   11548 						    (__mmask16) __U,
   11549 						    _MM_FROUND_CUR_DIRECTION);
   11550 }
   11551 
   11552 extern __inline __m512d
   11553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11554 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
   11555 {
   11556   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
   11557 						    (__v8df) __B,
   11558 						    -(__v8df) __C,
   11559 						    (__mmask8) -1,
   11560 						    _MM_FROUND_CUR_DIRECTION);
   11561 }
   11562 
   11563 extern __inline __m512d
   11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11565 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
   11566 {
   11567   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
   11568 						     (__v8df) __B,
   11569 						     (__v8df) __C,
   11570 						     (__mmask8) __U,
   11571 						     _MM_FROUND_CUR_DIRECTION);
   11572 }
   11573 
   11574 extern __inline __m512d
   11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11576 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
   11577 {
   11578   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
   11579 						      (__v8df) __B,
   11580 						      (__v8df) __C,
   11581 						      (__mmask8) __U,
   11582 						      _MM_FROUND_CUR_DIRECTION);
   11583 }
   11584 
   11585 extern __inline __m512d
   11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11587 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
   11588 {
   11589   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
   11590 						     (__v8df) __B,
   11591 						     -(__v8df) __C,
   11592 						     (__mmask8) __U,
   11593 						     _MM_FROUND_CUR_DIRECTION);
   11594 }
   11595 
   11596 extern __inline __m512
   11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11598 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
   11599 {
   11600   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
   11601 						   (__v16sf) __B,
   11602 						   -(__v16sf) __C,
   11603 						   (__mmask16) -1,
   11604 						   _MM_FROUND_CUR_DIRECTION);
   11605 }
   11606 
   11607 extern __inline __m512
   11608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11609 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
   11610 {
   11611   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
   11612 						    (__v16sf) __B,
   11613 						    (__v16sf) __C,
   11614 						    (__mmask16) __U,
   11615 						    _MM_FROUND_CUR_DIRECTION);
   11616 }
   11617 
   11618 extern __inline __m512
   11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11620 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
   11621 {
   11622   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
   11623 						     (__v16sf) __B,
   11624 						     (__v16sf) __C,
   11625 						     (__mmask16) __U,
   11626 						     _MM_FROUND_CUR_DIRECTION);
   11627 }
   11628 
   11629 extern __inline __m512
   11630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11631 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
   11632 {
   11633   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
   11634 						    (__v16sf) __B,
   11635 						    -(__v16sf) __C,
   11636 						    (__mmask16) __U,
   11637 						    _MM_FROUND_CUR_DIRECTION);
   11638 }
   11639 
   11640 extern __inline __m256i
   11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11642 _mm512_cvttpd_epi32 (__m512d __A)
   11643 {
   11644   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   11645 						     (__v8si)
   11646 						     _mm256_undefined_si256 (),
   11647 						     (__mmask8) -1,
   11648 						     _MM_FROUND_CUR_DIRECTION);
   11649 }
   11650 
   11651 extern __inline __m256i
   11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11653 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   11654 {
   11655   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   11656 						     (__v8si) __W,
   11657 						     (__mmask8) __U,
   11658 						     _MM_FROUND_CUR_DIRECTION);
   11659 }
   11660 
   11661 extern __inline __m256i
   11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11663 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
   11664 {
   11665   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
   11666 						     (__v8si)
   11667 						     _mm256_setzero_si256 (),
   11668 						     (__mmask8) __U,
   11669 						     _MM_FROUND_CUR_DIRECTION);
   11670 }
   11671 
   11672 extern __inline __m256i
   11673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11674 _mm512_cvttpd_epu32 (__m512d __A)
   11675 {
   11676   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   11677 						      (__v8si)
   11678 						      _mm256_undefined_si256 (),
   11679 						      (__mmask8) -1,
   11680 						      _MM_FROUND_CUR_DIRECTION);
   11681 }
   11682 
   11683 extern __inline __m256i
   11684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11685 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   11686 {
   11687   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   11688 						      (__v8si) __W,
   11689 						      (__mmask8) __U,
   11690 						      _MM_FROUND_CUR_DIRECTION);
   11691 }
   11692 
   11693 extern __inline __m256i
   11694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11695 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
   11696 {
   11697   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
   11698 						      (__v8si)
   11699 						      _mm256_setzero_si256 (),
   11700 						      (__mmask8) __U,
   11701 						      _MM_FROUND_CUR_DIRECTION);
   11702 }
   11703 
   11704 extern __inline __m256i
   11705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11706 _mm512_cvtpd_epi32 (__m512d __A)
   11707 {
   11708   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   11709 						    (__v8si)
   11710 						    _mm256_undefined_si256 (),
   11711 						    (__mmask8) -1,
   11712 						    _MM_FROUND_CUR_DIRECTION);
   11713 }
   11714 
   11715 extern __inline __m256i
   11716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11717 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
   11718 {
   11719   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   11720 						    (__v8si) __W,
   11721 						    (__mmask8) __U,
   11722 						    _MM_FROUND_CUR_DIRECTION);
   11723 }
   11724 
   11725 extern __inline __m256i
   11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11727 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
   11728 {
   11729   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
   11730 						    (__v8si)
   11731 						    _mm256_setzero_si256 (),
   11732 						    (__mmask8) __U,
   11733 						    _MM_FROUND_CUR_DIRECTION);
   11734 }
   11735 
   11736 extern __inline __m256i
   11737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11738 _mm512_cvtpd_epu32 (__m512d __A)
   11739 {
   11740   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   11741 						     (__v8si)
   11742 						     _mm256_undefined_si256 (),
   11743 						     (__mmask8) -1,
   11744 						     _MM_FROUND_CUR_DIRECTION);
   11745 }
   11746 
   11747 extern __inline __m256i
   11748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11749 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
   11750 {
   11751   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   11752 						     (__v8si) __W,
   11753 						     (__mmask8) __U,
   11754 						     _MM_FROUND_CUR_DIRECTION);
   11755 }
   11756 
   11757 extern __inline __m256i
   11758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11759 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
   11760 {
   11761   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
   11762 						     (__v8si)
   11763 						     _mm256_setzero_si256 (),
   11764 						     (__mmask8) __U,
   11765 						     _MM_FROUND_CUR_DIRECTION);
   11766 }
   11767 
   11768 extern __inline __m512i
   11769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11770 _mm512_cvttps_epi32 (__m512 __A)
   11771 {
   11772   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   11773 						     (__v16si)
   11774 						     _mm512_undefined_si512 (),
   11775 						     (__mmask16) -1,
   11776 						     _MM_FROUND_CUR_DIRECTION);
   11777 }
   11778 
   11779 extern __inline __m512i
   11780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11781 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   11782 {
   11783   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   11784 						     (__v16si) __W,
   11785 						     (__mmask16) __U,
   11786 						     _MM_FROUND_CUR_DIRECTION);
   11787 }
   11788 
   11789 extern __inline __m512i
   11790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11791 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
   11792 {
   11793   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
   11794 						     (__v16si)
   11795 						     _mm512_setzero_si512 (),
   11796 						     (__mmask16) __U,
   11797 						     _MM_FROUND_CUR_DIRECTION);
   11798 }
   11799 
   11800 extern __inline __m512i
   11801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11802 _mm512_cvttps_epu32 (__m512 __A)
   11803 {
   11804   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   11805 						      (__v16si)
   11806 						      _mm512_undefined_si512 (),
   11807 						      (__mmask16) -1,
   11808 						      _MM_FROUND_CUR_DIRECTION);
   11809 }
   11810 
   11811 extern __inline __m512i
   11812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11813 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   11814 {
   11815   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   11816 						      (__v16si) __W,
   11817 						      (__mmask16) __U,
   11818 						      _MM_FROUND_CUR_DIRECTION);
   11819 }
   11820 
   11821 extern __inline __m512i
   11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11823 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
   11824 {
   11825   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
   11826 						      (__v16si)
   11827 						      _mm512_setzero_si512 (),
   11828 						      (__mmask16) __U,
   11829 						      _MM_FROUND_CUR_DIRECTION);
   11830 }
   11831 
   11832 extern __inline __m512i
   11833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11834 _mm512_cvtps_epi32 (__m512 __A)
   11835 {
   11836   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   11837 						    (__v16si)
   11838 						    _mm512_undefined_si512 (),
   11839 						    (__mmask16) -1,
   11840 						    _MM_FROUND_CUR_DIRECTION);
   11841 }
   11842 
   11843 extern __inline __m512i
   11844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11845 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
   11846 {
   11847   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   11848 						    (__v16si) __W,
   11849 						    (__mmask16) __U,
   11850 						    _MM_FROUND_CUR_DIRECTION);
   11851 }
   11852 
   11853 extern __inline __m512i
   11854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11855 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
   11856 {
   11857   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
   11858 						    (__v16si)
   11859 						    _mm512_setzero_si512 (),
   11860 						    (__mmask16) __U,
   11861 						    _MM_FROUND_CUR_DIRECTION);
   11862 }
   11863 
   11864 extern __inline __m512i
   11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11866 _mm512_cvtps_epu32 (__m512 __A)
   11867 {
   11868   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   11869 						     (__v16si)
   11870 						     _mm512_undefined_si512 (),
   11871 						     (__mmask16) -1,
   11872 						     _MM_FROUND_CUR_DIRECTION);
   11873 }
   11874 
   11875 extern __inline __m512i
   11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11877 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
   11878 {
   11879   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   11880 						     (__v16si) __W,
   11881 						     (__mmask16) __U,
   11882 						     _MM_FROUND_CUR_DIRECTION);
   11883 }
   11884 
   11885 extern __inline __m512i
   11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11887 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
   11888 {
   11889   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
   11890 						     (__v16si)
   11891 						     _mm512_setzero_si512 (),
   11892 						     (__mmask16) __U,
   11893 						     _MM_FROUND_CUR_DIRECTION);
   11894 }
   11895 
   11896 #ifdef __x86_64__
   11897 extern __inline __m128
   11898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11899 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
   11900 {
   11901   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
   11902 					      _MM_FROUND_CUR_DIRECTION);
   11903 }
   11904 
   11905 extern __inline __m128d
   11906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11907 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
   11908 {
   11909   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
   11910 					       _MM_FROUND_CUR_DIRECTION);
   11911 }
   11912 #endif
   11913 
   11914 extern __inline __m128
   11915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11916 _mm_cvtu32_ss (__m128 __A, unsigned __B)
   11917 {
   11918   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
   11919 					      _MM_FROUND_CUR_DIRECTION);
   11920 }
   11921 
   11922 extern __inline __m512
   11923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11924 _mm512_cvtepi32_ps (__m512i __A)
   11925 {
   11926   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   11927 						   (__v16sf)
   11928 						   _mm512_undefined_ps (),
   11929 						   (__mmask16) -1,
   11930 						   _MM_FROUND_CUR_DIRECTION);
   11931 }
   11932 
   11933 extern __inline __m512
   11934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11935 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   11936 {
   11937   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   11938 						   (__v16sf) __W,
   11939 						   (__mmask16) __U,
   11940 						   _MM_FROUND_CUR_DIRECTION);
   11941 }
   11942 
   11943 extern __inline __m512
   11944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11945 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
   11946 {
   11947   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
   11948 						   (__v16sf)
   11949 						   _mm512_setzero_ps (),
   11950 						   (__mmask16) __U,
   11951 						   _MM_FROUND_CUR_DIRECTION);
   11952 }
   11953 
   11954 extern __inline __m512
   11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11956 _mm512_cvtepu32_ps (__m512i __A)
   11957 {
   11958   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   11959 						    (__v16sf)
   11960 						    _mm512_undefined_ps (),
   11961 						    (__mmask16) -1,
   11962 						    _MM_FROUND_CUR_DIRECTION);
   11963 }
   11964 
   11965 extern __inline __m512
   11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11967 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
   11968 {
   11969   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   11970 						    (__v16sf) __W,
   11971 						    (__mmask16) __U,
   11972 						    _MM_FROUND_CUR_DIRECTION);
   11973 }
   11974 
   11975 extern __inline __m512
   11976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11977 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
   11978 {
   11979   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
   11980 						    (__v16sf)
   11981 						    _mm512_setzero_ps (),
   11982 						    (__mmask16) __U,
   11983 						    _MM_FROUND_CUR_DIRECTION);
   11984 }
   11985 
   11986 #ifdef __OPTIMIZE__
   11987 extern __inline __m512d
   11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   11989 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
   11990 {
   11991   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
   11992 						      (__v8df) __B,
   11993 						      (__v8di) __C,
   11994 						      __imm,
   11995 						      (__mmask8) -1,
   11996 						      _MM_FROUND_CUR_DIRECTION);
   11997 }
   11998 
   11999 extern __inline __m512d
   12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12001 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
   12002 			 __m512i __C, const int __imm)
   12003 {
   12004   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
   12005 						      (__v8df) __B,
   12006 						      (__v8di) __C,
   12007 						      __imm,
   12008 						      (__mmask8) __U,
   12009 						      _MM_FROUND_CUR_DIRECTION);
   12010 }
   12011 
   12012 extern __inline __m512d
   12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12014 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
   12015 			  __m512i __C, const int __imm)
   12016 {
   12017   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
   12018 						       (__v8df) __B,
   12019 						       (__v8di) __C,
   12020 						       __imm,
   12021 						       (__mmask8) __U,
   12022 						       _MM_FROUND_CUR_DIRECTION);
   12023 }
   12024 
   12025 extern __inline __m512
   12026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12027 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
   12028 {
   12029   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
   12030 						     (__v16sf) __B,
   12031 						     (__v16si) __C,
   12032 						     __imm,
   12033 						     (__mmask16) -1,
   12034 						     _MM_FROUND_CUR_DIRECTION);
   12035 }
   12036 
   12037 extern __inline __m512
   12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12039 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
   12040 			 __m512i __C, const int __imm)
   12041 {
   12042   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
   12043 						     (__v16sf) __B,
   12044 						     (__v16si) __C,
   12045 						     __imm,
   12046 						     (__mmask16) __U,
   12047 						     _MM_FROUND_CUR_DIRECTION);
   12048 }
   12049 
   12050 extern __inline __m512
   12051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12052 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
   12053 			  __m512i __C, const int __imm)
   12054 {
   12055   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
   12056 						      (__v16sf) __B,
   12057 						      (__v16si) __C,
   12058 						      __imm,
   12059 						      (__mmask16) __U,
   12060 						      _MM_FROUND_CUR_DIRECTION);
   12061 }
   12062 
   12063 extern __inline __m128d
   12064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12065 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
   12066 {
   12067   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
   12068 						   (__v2df) __B,
   12069 						   (__v2di) __C, __imm,
   12070 						   (__mmask8) -1,
   12071 						   _MM_FROUND_CUR_DIRECTION);
   12072 }
   12073 
   12074 extern __inline __m128d
   12075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12076 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
   12077 		      __m128i __C, const int __imm)
   12078 {
   12079   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
   12080 						   (__v2df) __B,
   12081 						   (__v2di) __C, __imm,
   12082 						   (__mmask8) __U,
   12083 						   _MM_FROUND_CUR_DIRECTION);
   12084 }
   12085 
   12086 extern __inline __m128d
   12087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12088 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
   12089 		       __m128i __C, const int __imm)
   12090 {
   12091   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
   12092 						    (__v2df) __B,
   12093 						    (__v2di) __C,
   12094 						    __imm,
   12095 						    (__mmask8) __U,
   12096 						    _MM_FROUND_CUR_DIRECTION);
   12097 }
   12098 
   12099 extern __inline __m128
   12100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12101 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
   12102 {
   12103   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
   12104 						  (__v4sf) __B,
   12105 						  (__v4si) __C, __imm,
   12106 						  (__mmask8) -1,
   12107 						  _MM_FROUND_CUR_DIRECTION);
   12108 }
   12109 
   12110 extern __inline __m128
   12111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12112 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
   12113 		      __m128i __C, const int __imm)
   12114 {
   12115   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
   12116 						  (__v4sf) __B,
   12117 						  (__v4si) __C, __imm,
   12118 						  (__mmask8) __U,
   12119 						  _MM_FROUND_CUR_DIRECTION);
   12120 }
   12121 
   12122 extern __inline __m128
   12123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12124 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
   12125 		       __m128i __C, const int __imm)
   12126 {
   12127   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
   12128 						   (__v4sf) __B,
   12129 						   (__v4si) __C, __imm,
   12130 						   (__mmask8) __U,
   12131 						   _MM_FROUND_CUR_DIRECTION);
   12132 }
   12133 #else
   12134 #define _mm512_fixupimm_pd(X, Y, Z, C)					\
   12135   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
   12136       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
   12137       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
   12138 
   12139 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
   12140   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
   12141       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
   12142       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12143 
   12144 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
   12145   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
   12146       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
   12147       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12148 
   12149 #define _mm512_fixupimm_ps(X, Y, Z, C)					\
   12150   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
   12151     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
   12152     (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
   12153 
   12154 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
   12155   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
   12156     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
   12157     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
   12158 
   12159 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
   12160   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
   12161     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
   12162     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
   12163 
   12164 #define _mm_fixupimm_sd(X, Y, Z, C)					\
   12165     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
   12166       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   12167       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
   12168 
   12169 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C)				\
   12170     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
   12171       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   12172       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12173 
   12174 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)				\
   12175     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
   12176       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
   12177       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12178 
   12179 #define _mm_fixupimm_ss(X, Y, Z, C)					\
   12180     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
   12181       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   12182       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
   12183 
   12184 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C)				\
   12185     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
   12186       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   12187       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12188 
   12189 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)				\
   12190     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
   12191       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
   12192       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12193 #endif
   12194 
   12195 #ifdef __x86_64__
   12196 extern __inline unsigned long long
   12197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12198 _mm_cvtss_u64 (__m128 __A)
   12199 {
   12200   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
   12201 							   __A,
   12202 							   _MM_FROUND_CUR_DIRECTION);
   12203 }
   12204 
   12205 extern __inline unsigned long long
   12206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12207 _mm_cvttss_u64 (__m128 __A)
   12208 {
   12209   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
   12210 							    __A,
   12211 							    _MM_FROUND_CUR_DIRECTION);
   12212 }
   12213 
   12214 extern __inline long long
   12215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12216 _mm_cvttss_i64 (__m128 __A)
   12217 {
   12218   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
   12219 						  _MM_FROUND_CUR_DIRECTION);
   12220 }
   12221 #endif /* __x86_64__ */
   12222 
   12223 extern __inline unsigned
   12224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12225 _mm_cvtss_u32 (__m128 __A)
   12226 {
   12227   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
   12228 						 _MM_FROUND_CUR_DIRECTION);
   12229 }
   12230 
   12231 extern __inline unsigned
   12232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12233 _mm_cvttss_u32 (__m128 __A)
   12234 {
   12235   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
   12236 						  _MM_FROUND_CUR_DIRECTION);
   12237 }
   12238 
   12239 extern __inline int
   12240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12241 _mm_cvttss_i32 (__m128 __A)
   12242 {
   12243   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
   12244 					    _MM_FROUND_CUR_DIRECTION);
   12245 }
   12246 
   12247 #ifdef __x86_64__
   12248 extern __inline unsigned long long
   12249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12250 _mm_cvtsd_u64 (__m128d __A)
   12251 {
   12252   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
   12253 							   __A,
   12254 							   _MM_FROUND_CUR_DIRECTION);
   12255 }
   12256 
   12257 extern __inline unsigned long long
   12258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12259 _mm_cvttsd_u64 (__m128d __A)
   12260 {
   12261   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
   12262 							    __A,
   12263 							    _MM_FROUND_CUR_DIRECTION);
   12264 }
   12265 
   12266 extern __inline long long
   12267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12268 _mm_cvttsd_i64 (__m128d __A)
   12269 {
   12270   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
   12271 						  _MM_FROUND_CUR_DIRECTION);
   12272 }
   12273 #endif /* __x86_64__ */
   12274 
   12275 extern __inline unsigned
   12276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12277 _mm_cvtsd_u32 (__m128d __A)
   12278 {
   12279   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
   12280 						 _MM_FROUND_CUR_DIRECTION);
   12281 }
   12282 
   12283 extern __inline unsigned
   12284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12285 _mm_cvttsd_u32 (__m128d __A)
   12286 {
   12287   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
   12288 						  _MM_FROUND_CUR_DIRECTION);
   12289 }
   12290 
   12291 extern __inline int
   12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12293 _mm_cvttsd_i32 (__m128d __A)
   12294 {
   12295   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
   12296 					    _MM_FROUND_CUR_DIRECTION);
   12297 }
   12298 
   12299 extern __inline __m512d
   12300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12301 _mm512_cvtps_pd (__m256 __A)
   12302 {
   12303   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   12304 						    (__v8df)
   12305 						    _mm512_undefined_pd (),
   12306 						    (__mmask8) -1,
   12307 						    _MM_FROUND_CUR_DIRECTION);
   12308 }
   12309 
   12310 extern __inline __m512d
   12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12312 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
   12313 {
   12314   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   12315 						    (__v8df) __W,
   12316 						    (__mmask8) __U,
   12317 						    _MM_FROUND_CUR_DIRECTION);
   12318 }
   12319 
   12320 extern __inline __m512d
   12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12322 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
   12323 {
   12324   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
   12325 						    (__v8df)
   12326 						    _mm512_setzero_pd (),
   12327 						    (__mmask8) __U,
   12328 						    _MM_FROUND_CUR_DIRECTION);
   12329 }
   12330 
   12331 extern __inline __m512
   12332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12333 _mm512_cvtph_ps (__m256i __A)
   12334 {
   12335   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   12336 						    (__v16sf)
   12337 						    _mm512_undefined_ps (),
   12338 						    (__mmask16) -1,
   12339 						    _MM_FROUND_CUR_DIRECTION);
   12340 }
   12341 
   12342 extern __inline __m512
   12343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12344 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
   12345 {
   12346   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   12347 						    (__v16sf) __W,
   12348 						    (__mmask16) __U,
   12349 						    _MM_FROUND_CUR_DIRECTION);
   12350 }
   12351 
   12352 extern __inline __m512
   12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12354 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
   12355 {
   12356   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
   12357 						    (__v16sf)
   12358 						    _mm512_setzero_ps (),
   12359 						    (__mmask16) __U,
   12360 						    _MM_FROUND_CUR_DIRECTION);
   12361 }
   12362 
   12363 extern __inline __m256
   12364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12365 _mm512_cvtpd_ps (__m512d __A)
   12366 {
   12367   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   12368 						   (__v8sf)
   12369 						   _mm256_undefined_ps (),
   12370 						   (__mmask8) -1,
   12371 						   _MM_FROUND_CUR_DIRECTION);
   12372 }
   12373 
   12374 extern __inline __m256
   12375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12376 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
   12377 {
   12378   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   12379 						   (__v8sf) __W,
   12380 						   (__mmask8) __U,
   12381 						   _MM_FROUND_CUR_DIRECTION);
   12382 }
   12383 
   12384 extern __inline __m256
   12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12386 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   12387 {
   12388   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
   12389 						   (__v8sf)
   12390 						   _mm256_setzero_ps (),
   12391 						   (__mmask8) __U,
   12392 						   _MM_FROUND_CUR_DIRECTION);
   12393 }
   12394 
   12395 #ifdef __OPTIMIZE__
   12396 extern __inline __m512
   12397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12398 _mm512_getexp_ps (__m512 __A)
   12399 {
   12400   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   12401 						   (__v16sf)
   12402 						   _mm512_undefined_ps (),
   12403 						   (__mmask16) -1,
   12404 						   _MM_FROUND_CUR_DIRECTION);
   12405 }
   12406 
   12407 extern __inline __m512
   12408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12409 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
   12410 {
   12411   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   12412 						   (__v16sf) __W,
   12413 						   (__mmask16) __U,
   12414 						   _MM_FROUND_CUR_DIRECTION);
   12415 }
   12416 
   12417 extern __inline __m512
   12418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12419 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
   12420 {
   12421   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
   12422 						   (__v16sf)
   12423 						   _mm512_setzero_ps (),
   12424 						   (__mmask16) __U,
   12425 						   _MM_FROUND_CUR_DIRECTION);
   12426 }
   12427 
   12428 extern __inline __m512d
   12429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12430 _mm512_getexp_pd (__m512d __A)
   12431 {
   12432   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   12433 						    (__v8df)
   12434 						    _mm512_undefined_pd (),
   12435 						    (__mmask8) -1,
   12436 						    _MM_FROUND_CUR_DIRECTION);
   12437 }
   12438 
   12439 extern __inline __m512d
   12440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12441 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
   12442 {
   12443   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   12444 						    (__v8df) __W,
   12445 						    (__mmask8) __U,
   12446 						    _MM_FROUND_CUR_DIRECTION);
   12447 }
   12448 
   12449 extern __inline __m512d
   12450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12451 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
   12452 {
   12453   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
   12454 						    (__v8df)
   12455 						    _mm512_setzero_pd (),
   12456 						    (__mmask8) __U,
   12457 						    _MM_FROUND_CUR_DIRECTION);
   12458 }
   12459 
   12460 extern __inline __m128
   12461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12462 _mm_getexp_ss (__m128 __A, __m128 __B)
   12463 {
   12464   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
   12465 						    (__v4sf) __B,
   12466 						    _MM_FROUND_CUR_DIRECTION);
   12467 }
   12468 
   12469 extern __inline __m128d
   12470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12471 _mm_getexp_sd (__m128d __A, __m128d __B)
   12472 {
   12473   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
   12474 						     (__v2df) __B,
   12475 						     _MM_FROUND_CUR_DIRECTION);
   12476 }
   12477 
   12478 extern __inline __m512d
   12479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12480 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
   12481 		   _MM_MANTISSA_SIGN_ENUM __C)
   12482 {
   12483   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   12484 						     (__C << 2) | __B,
   12485 						     _mm512_undefined_pd (),
   12486 						     (__mmask8) -1,
   12487 						     _MM_FROUND_CUR_DIRECTION);
   12488 }
   12489 
   12490 extern __inline __m512d
   12491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12492 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
   12493 			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
   12494 {
   12495   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   12496 						     (__C << 2) | __B,
   12497 						     (__v8df) __W, __U,
   12498 						     _MM_FROUND_CUR_DIRECTION);
   12499 }
   12500 
   12501 extern __inline __m512d
   12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12503 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
   12504 			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
   12505 {
   12506   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
   12507 						     (__C << 2) | __B,
   12508 						     (__v8df)
   12509 						     _mm512_setzero_pd (),
   12510 						     __U,
   12511 						     _MM_FROUND_CUR_DIRECTION);
   12512 }
   12513 
   12514 extern __inline __m512
   12515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12516 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
   12517 		   _MM_MANTISSA_SIGN_ENUM __C)
   12518 {
   12519   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   12520 						    (__C << 2) | __B,
   12521 						    _mm512_undefined_ps (),
   12522 						    (__mmask16) -1,
   12523 						    _MM_FROUND_CUR_DIRECTION);
   12524 }
   12525 
   12526 extern __inline __m512
   12527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12528 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
   12529 			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
   12530 {
   12531   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   12532 						    (__C << 2) | __B,
   12533 						    (__v16sf) __W, __U,
   12534 						    _MM_FROUND_CUR_DIRECTION);
   12535 }
   12536 
   12537 extern __inline __m512
   12538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12539 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
   12540 			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
   12541 {
   12542   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
   12543 						    (__C << 2) | __B,
   12544 						    (__v16sf)
   12545 						    _mm512_setzero_ps (),
   12546 						    __U,
   12547 						    _MM_FROUND_CUR_DIRECTION);
   12548 }
   12549 
   12550 extern __inline __m128d
   12551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12552 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
   12553 		_MM_MANTISSA_SIGN_ENUM __D)
   12554 {
   12555   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
   12556 						   (__v2df) __B,
   12557 						   (__D << 2) | __C,
   12558 						   _MM_FROUND_CUR_DIRECTION);
   12559 }
   12560 
   12561 extern __inline __m128
   12562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12563 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
   12564 		_MM_MANTISSA_SIGN_ENUM __D)
   12565 {
   12566   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
   12567 						  (__v4sf) __B,
   12568 						  (__D << 2) | __C,
   12569 						  _MM_FROUND_CUR_DIRECTION);
   12570 }
   12571 
   12572 #else
   12573 #define _mm512_getmant_pd(X, B, C)                                                  \
   12574   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   12575                                               (int)(((C)<<2) | (B)),                \
   12576                                               (__v8df)_mm512_undefined_pd(),        \
   12577                                               (__mmask8)-1,\
   12578 					      _MM_FROUND_CUR_DIRECTION))
   12579 
   12580 #define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
   12581   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   12582                                               (int)(((C)<<2) | (B)),                \
   12583                                               (__v8df)(__m512d)(W),                 \
   12584                                               (__mmask8)(U),\
   12585 					      _MM_FROUND_CUR_DIRECTION))
   12586 
   12587 #define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
   12588   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
   12589                                               (int)(((C)<<2) | (B)),                \
   12590                                               (__v8df)_mm512_setzero_pd(),          \
   12591                                               (__mmask8)(U),\
   12592 					      _MM_FROUND_CUR_DIRECTION))
   12593 #define _mm512_getmant_ps(X, B, C)                                                  \
   12594   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   12595                                              (int)(((C)<<2) | (B)),                 \
   12596                                              (__v16sf)_mm512_undefined_ps(),        \
   12597                                              (__mmask16)-1,\
   12598 					     _MM_FROUND_CUR_DIRECTION))
   12599 
   12600 #define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
   12601   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   12602                                              (int)(((C)<<2) | (B)),                 \
   12603                                              (__v16sf)(__m512)(W),                  \
   12604                                              (__mmask16)(U),\
   12605 					     _MM_FROUND_CUR_DIRECTION))
   12606 
   12607 #define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
   12608   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
   12609                                              (int)(((C)<<2) | (B)),                 \
   12610                                              (__v16sf)_mm512_setzero_ps(),          \
   12611                                              (__mmask16)(U),\
   12612 					     _MM_FROUND_CUR_DIRECTION))
   12613 #define _mm_getmant_sd(X, Y, C, D)                                                  \
   12614   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
   12615                                            (__v2df)(__m128d)(Y),                    \
   12616                                            (int)(((D)<<2) | (C)),                   \
   12617 					   _MM_FROUND_CUR_DIRECTION))
   12618 
   12619 #define _mm_getmant_ss(X, Y, C, D)                                                  \
   12620   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
   12621                                           (__v4sf)(__m128)(Y),                      \
   12622                                           (int)(((D)<<2) | (C)),                    \
   12623 					  _MM_FROUND_CUR_DIRECTION))
   12624 
   12625 #define _mm_getexp_ss(A, B)						      \
   12626   ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
   12627 					   _MM_FROUND_CUR_DIRECTION))
   12628 
   12629 #define _mm_getexp_sd(A, B)						       \
   12630   ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
   12631 					    _MM_FROUND_CUR_DIRECTION))
   12632 
   12633 #define _mm512_getexp_ps(A)						\
   12634   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   12635   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
   12636 
   12637 #define _mm512_mask_getexp_ps(W, U, A)					\
   12638   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   12639   (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
   12640 
   12641 #define _mm512_maskz_getexp_ps(U, A)					\
   12642   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
   12643   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
   12644 
   12645 #define _mm512_getexp_pd(A)						\
   12646   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   12647   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
   12648 
   12649 #define _mm512_mask_getexp_pd(W, U, A)					\
   12650   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   12651   (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12652 
   12653 #define _mm512_maskz_getexp_pd(U, A)					\
   12654   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
   12655   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
   12656 #endif
   12657 
   12658 #ifdef __OPTIMIZE__
   12659 extern __inline __m512
   12660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12661 _mm512_roundscale_ps (__m512 __A, const int __imm)
   12662 {
   12663   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
   12664 						  (__v16sf)
   12665 						  _mm512_undefined_ps (),
   12666 						  -1,
   12667 						  _MM_FROUND_CUR_DIRECTION);
   12668 }
   12669 
   12670 extern __inline __m512
   12671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12672 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
   12673 			   const int __imm)
   12674 {
   12675   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
   12676 						  (__v16sf) __A,
   12677 						  (__mmask16) __B,
   12678 						  _MM_FROUND_CUR_DIRECTION);
   12679 }
   12680 
   12681 extern __inline __m512
   12682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12683 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
   12684 {
   12685   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
   12686 						  __imm,
   12687 						  (__v16sf)
   12688 						  _mm512_setzero_ps (),
   12689 						  (__mmask16) __A,
   12690 						  _MM_FROUND_CUR_DIRECTION);
   12691 }
   12692 
   12693 extern __inline __m512d
   12694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12695 _mm512_roundscale_pd (__m512d __A, const int __imm)
   12696 {
   12697   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
   12698 						   (__v8df)
   12699 						   _mm512_undefined_pd (),
   12700 						   -1,
   12701 						   _MM_FROUND_CUR_DIRECTION);
   12702 }
   12703 
   12704 extern __inline __m512d
   12705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12706 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
   12707 			   const int __imm)
   12708 {
   12709   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
   12710 						   (__v8df) __A,
   12711 						   (__mmask8) __B,
   12712 						   _MM_FROUND_CUR_DIRECTION);
   12713 }
   12714 
   12715 extern __inline __m512d
   12716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12717 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
   12718 {
   12719   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
   12720 						   __imm,
   12721 						   (__v8df)
   12722 						   _mm512_setzero_pd (),
   12723 						   (__mmask8) __A,
   12724 						   _MM_FROUND_CUR_DIRECTION);
   12725 }
   12726 
   12727 extern __inline __m128
   12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12729 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
   12730 {
   12731   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
   12732 						   (__v4sf) __B, __imm,
   12733 						   _MM_FROUND_CUR_DIRECTION);
   12734 }
   12735 
   12736 extern __inline __m128d
   12737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12738 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
   12739 {
   12740   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
   12741 						    (__v2df) __B, __imm,
   12742 						   _MM_FROUND_CUR_DIRECTION);
   12743 }
   12744 
   12745 #else
   12746 #define _mm512_roundscale_ps(A, B) \
   12747   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
   12748     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
   12749 #define _mm512_mask_roundscale_ps(A, B, C, D)				\
   12750   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
   12751 					    (int)(D),			\
   12752 					    (__v16sf)(__m512)(A),	\
   12753 					    (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
   12754 #define _mm512_maskz_roundscale_ps(A, B, C)				\
   12755   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
   12756 					    (int)(C),			\
   12757 					    (__v16sf)_mm512_setzero_ps(),\
   12758 					    (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
   12759 #define _mm512_roundscale_pd(A, B) \
   12760   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
   12761     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
   12762 #define _mm512_mask_roundscale_pd(A, B, C, D)				\
   12763   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
   12764 					     (int)(D),			\
   12765 					     (__v8df)(__m512d)(A),	\
   12766 					     (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
   12767 #define _mm512_maskz_roundscale_pd(A, B, C)				\
   12768   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
   12769 					     (int)(C),			\
   12770 					     (__v8df)_mm512_setzero_pd(),\
   12771 					     (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
   12772 #define _mm_roundscale_ss(A, B, C)					\
   12773   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
   12774   (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
   12775 #define _mm_roundscale_sd(A, B, C)					\
   12776   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
   12777     (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
   12778 #endif
   12779 
   12780 #ifdef __OPTIMIZE__
   12781 extern __inline __mmask8
   12782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12783 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
   12784 {
   12785   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
   12786 						  (__v8df) __Y, __P,
   12787 						  (__mmask8) -1,
   12788 						  _MM_FROUND_CUR_DIRECTION);
   12789 }
   12790 
   12791 extern __inline __mmask16
   12792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12793 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
   12794 {
   12795   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
   12796 						   (__v16sf) __Y, __P,
   12797 						   (__mmask16) -1,
   12798 						   _MM_FROUND_CUR_DIRECTION);
   12799 }
   12800 
   12801 extern __inline __mmask16
   12802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12803 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
   12804 {
   12805   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
   12806 						   (__v16sf) __Y, __P,
   12807 						   (__mmask16) __U,
   12808 						   _MM_FROUND_CUR_DIRECTION);
   12809 }
   12810 
   12811 extern __inline __mmask8
   12812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12813 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
   12814 {
   12815   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
   12816 						  (__v8df) __Y, __P,
   12817 						  (__mmask8) __U,
   12818 						  _MM_FROUND_CUR_DIRECTION);
   12819 }
   12820 
   12821 extern __inline __mmask8
   12822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12823 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
   12824 {
   12825   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
   12826 					       (__v2df) __Y, __P,
   12827 					       (__mmask8) -1,
   12828 					       _MM_FROUND_CUR_DIRECTION);
   12829 }
   12830 
   12831 extern __inline __mmask8
   12832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12833 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
   12834 {
   12835   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
   12836 					       (__v2df) __Y, __P,
   12837 					       (__mmask8) __M,
   12838 					       _MM_FROUND_CUR_DIRECTION);
   12839 }
   12840 
   12841 extern __inline __mmask8
   12842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12843 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
   12844 {
   12845   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
   12846 					       (__v4sf) __Y, __P,
   12847 					       (__mmask8) -1,
   12848 					       _MM_FROUND_CUR_DIRECTION);
   12849 }
   12850 
   12851 extern __inline __mmask8
   12852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12853 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
   12854 {
   12855   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
   12856 					       (__v4sf) __Y, __P,
   12857 					       (__mmask8) __M,
   12858 					       _MM_FROUND_CUR_DIRECTION);
   12859 }
   12860 
   12861 #else
   12862 #define _mm512_cmp_pd_mask(X, Y, P)					\
   12863   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
   12864 					    (__v8df)(__m512d)(Y), (int)(P),\
   12865 					    (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
   12866 
   12867 #define _mm512_cmp_ps_mask(X, Y, P)					\
   12868   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
   12869 					     (__v16sf)(__m512)(Y), (int)(P),\
   12870 					     (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
   12871 
   12872 #define _mm512_mask_cmp_pd_mask(M, X, Y, P)					\
   12873   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
   12874 					    (__v8df)(__m512d)(Y), (int)(P),\
   12875 					    (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
   12876 
   12877 #define _mm512_mask_cmp_ps_mask(M, X, Y, P)					\
   12878   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
   12879 					     (__v16sf)(__m512)(Y), (int)(P),\
   12880 					     (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
   12881 
   12882 #define _mm_cmp_sd_mask(X, Y, P)					\
   12883   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
   12884 					 (__v2df)(__m128d)(Y), (int)(P),\
   12885 					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
   12886 
   12887 #define _mm_mask_cmp_sd_mask(M, X, Y, P)					\
   12888   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
   12889 					 (__v2df)(__m128d)(Y), (int)(P),\
   12890 					 M,_MM_FROUND_CUR_DIRECTION))
   12891 
   12892 #define _mm_cmp_ss_mask(X, Y, P)					\
   12893   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
   12894 					 (__v4sf)(__m128)(Y), (int)(P), \
   12895 					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
   12896 
   12897 #define _mm_mask_cmp_ss_mask(M, X, Y, P)					\
   12898   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
   12899 					 (__v4sf)(__m128)(Y), (int)(P), \
   12900 					 M,_MM_FROUND_CUR_DIRECTION))
   12901 #endif
   12902 
   12903 extern __inline __mmask16
   12904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   12905 _mm512_kmov (__mmask16 __A)
   12906 {
   12907   return __builtin_ia32_kmov16 (__A);
   12908 }
   12909 
   12910 #ifdef __DISABLE_AVX512F__
   12911 #undef __DISABLE_AVX512F__
   12912 #pragma GCC pop_options
   12913 #endif /* __DISABLE_AVX512F__ */
   12914 
   12915 #endif /* _AVX512FINTRIN_H_INCLUDED */
   12916