Home | History | Annotate | Download | only in include
      1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __AVX512DQINTRIN_H
     29 #define __AVX512DQINTRIN_H
     30 
     31 /* Define the default attributes for the functions in this file. */
     32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
     33 
     34 static __inline__ __m512i __DEFAULT_FN_ATTRS
     35 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
     36   return (__m512i) ((__v8du) __A * (__v8du) __B);
     37 }
     38 
     39 static __inline__ __m512i __DEFAULT_FN_ATTRS
     40 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
     41   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
     42               (__v8di) __B,
     43               (__v8di) __W,
     44               (__mmask8) __U);
     45 }
     46 
     47 static __inline__ __m512i __DEFAULT_FN_ATTRS
     48 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
     49   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
     50               (__v8di) __B,
     51               (__v8di)
     52               _mm512_setzero_si512 (),
     53               (__mmask8) __U);
     54 }
     55 
     56 static __inline__ __m512d __DEFAULT_FN_ATTRS
     57 _mm512_xor_pd (__m512d __A, __m512d __B) {
     58   return (__m512d) ((__v8du) __A ^ (__v8du) __B);
     59 }
     60 
     61 static __inline__ __m512d __DEFAULT_FN_ATTRS
     62 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
     63   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
     64              (__v8df) __B,
     65              (__v8df) __W,
     66              (__mmask8) __U);
     67 }
     68 
     69 static __inline__ __m512d __DEFAULT_FN_ATTRS
     70 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
     71   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
     72              (__v8df) __B,
     73              (__v8df)
     74              _mm512_setzero_pd (),
     75              (__mmask8) __U);
     76 }
     77 
     78 static __inline__ __m512 __DEFAULT_FN_ATTRS
     79 _mm512_xor_ps (__m512 __A, __m512 __B) {
     80   return (__m512) ((__v16su) __A ^ (__v16su) __B);
     81 }
     82 
     83 static __inline__ __m512 __DEFAULT_FN_ATTRS
     84 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
     85   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
     86             (__v16sf) __B,
     87             (__v16sf) __W,
     88             (__mmask16) __U);
     89 }
     90 
     91 static __inline__ __m512 __DEFAULT_FN_ATTRS
     92 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
     93   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
     94             (__v16sf) __B,
     95             (__v16sf)
     96             _mm512_setzero_ps (),
     97             (__mmask16) __U);
     98 }
     99 
    100 static __inline__ __m512d __DEFAULT_FN_ATTRS
    101 _mm512_or_pd (__m512d __A, __m512d __B) {
    102   return (__m512d) ((__v8du) __A | (__v8du) __B);
    103 }
    104 
    105 static __inline__ __m512d __DEFAULT_FN_ATTRS
    106 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
    107   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
    108             (__v8df) __B,
    109             (__v8df) __W,
    110             (__mmask8) __U);
    111 }
    112 
    113 static __inline__ __m512d __DEFAULT_FN_ATTRS
    114 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
    115   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
    116             (__v8df) __B,
    117             (__v8df)
    118             _mm512_setzero_pd (),
    119             (__mmask8) __U);
    120 }
    121 
    122 static __inline__ __m512 __DEFAULT_FN_ATTRS
    123 _mm512_or_ps (__m512 __A, __m512 __B) {
    124   return (__m512) ((__v16su) __A | (__v16su) __B);
    125 }
    126 
    127 static __inline__ __m512 __DEFAULT_FN_ATTRS
    128 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
    129   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
    130                  (__v16sf) __B,
    131                  (__v16sf) __W,
    132                  (__mmask16) __U);
    133 }
    134 
    135 static __inline__ __m512 __DEFAULT_FN_ATTRS
    136 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
    137   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
    138                  (__v16sf) __B,
    139                  (__v16sf)
    140                  _mm512_setzero_ps (),
    141                  (__mmask16) __U);
    142 }
    143 
    144 static __inline__ __m512d __DEFAULT_FN_ATTRS
    145 _mm512_and_pd (__m512d __A, __m512d __B) {
    146   return (__m512d) ((__v8du) __A & (__v8du) __B);
    147 }
    148 
    149 static __inline__ __m512d __DEFAULT_FN_ATTRS
    150 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
    151   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
    152              (__v8df) __B,
    153              (__v8df) __W,
    154              (__mmask8) __U);
    155 }
    156 
    157 static __inline__ __m512d __DEFAULT_FN_ATTRS
    158 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
    159   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
    160              (__v8df) __B,
    161              (__v8df)
    162              _mm512_setzero_pd (),
    163              (__mmask8) __U);
    164 }
    165 
    166 static __inline__ __m512 __DEFAULT_FN_ATTRS
    167 _mm512_and_ps (__m512 __A, __m512 __B) {
    168   return (__m512) ((__v16su) __A & (__v16su) __B);
    169 }
    170 
    171 static __inline__ __m512 __DEFAULT_FN_ATTRS
    172 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
    173   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
    174             (__v16sf) __B,
    175             (__v16sf) __W,
    176             (__mmask16) __U);
    177 }
    178 
    179 static __inline__ __m512 __DEFAULT_FN_ATTRS
    180 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
    181   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
    182             (__v16sf) __B,
    183             (__v16sf)
    184             _mm512_setzero_ps (),
    185             (__mmask16) __U);
    186 }
    187 
    188 static __inline__ __m512d __DEFAULT_FN_ATTRS
    189 _mm512_andnot_pd (__m512d __A, __m512d __B) {
    190   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
    191               (__v8df) __B,
    192               (__v8df)
    193               _mm512_setzero_pd (),
    194               (__mmask8) -1);
    195 }
    196 
    197 static __inline__ __m512d __DEFAULT_FN_ATTRS
    198 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
    199   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
    200               (__v8df) __B,
    201               (__v8df) __W,
    202               (__mmask8) __U);
    203 }
    204 
    205 static __inline__ __m512d __DEFAULT_FN_ATTRS
    206 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
    207   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
    208               (__v8df) __B,
    209               (__v8df)
    210               _mm512_setzero_pd (),
    211               (__mmask8) __U);
    212 }
    213 
    214 static __inline__ __m512 __DEFAULT_FN_ATTRS
    215 _mm512_andnot_ps (__m512 __A, __m512 __B) {
    216   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
    217              (__v16sf) __B,
    218              (__v16sf)
    219              _mm512_setzero_ps (),
    220              (__mmask16) -1);
    221 }
    222 
    223 static __inline__ __m512 __DEFAULT_FN_ATTRS
    224 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
    225   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
    226              (__v16sf) __B,
    227              (__v16sf) __W,
    228              (__mmask16) __U);
    229 }
    230 
    231 static __inline__ __m512 __DEFAULT_FN_ATTRS
    232 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
    233   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
    234              (__v16sf) __B,
    235              (__v16sf)
    236              _mm512_setzero_ps (),
    237              (__mmask16) __U);
    238 }
    239 
    240 static __inline__ __m512i __DEFAULT_FN_ATTRS
    241 _mm512_cvtpd_epi64 (__m512d __A) {
    242   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
    243                 (__v8di) _mm512_setzero_si512(),
    244                 (__mmask8) -1,
    245                 _MM_FROUND_CUR_DIRECTION);
    246 }
    247 
    248 static __inline__ __m512i __DEFAULT_FN_ATTRS
    249 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
    250   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
    251                 (__v8di) __W,
    252                 (__mmask8) __U,
    253                 _MM_FROUND_CUR_DIRECTION);
    254 }
    255 
    256 static __inline__ __m512i __DEFAULT_FN_ATTRS
    257 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
    258   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
    259                 (__v8di) _mm512_setzero_si512(),
    260                 (__mmask8) __U,
    261                 _MM_FROUND_CUR_DIRECTION);
    262 }
    263 
    264 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({              \
    265   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
    266                                            (__v8di)_mm512_setzero_si512(), \
    267                                            (__mmask8)-1, (int)(R)); })
    268 
    269 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
    270   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
    271                                            (__v8di)(__m512i)(W), \
    272                                            (__mmask8)(U), (int)(R)); })
    273 
    274 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({   \
    275   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
    276                                            (__v8di)_mm512_setzero_si512(), \
    277                                            (__mmask8)(U), (int)(R)); })
    278 
    279 static __inline__ __m512i __DEFAULT_FN_ATTRS
    280 _mm512_cvtpd_epu64 (__m512d __A) {
    281   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
    282                  (__v8di) _mm512_setzero_si512(),
    283                  (__mmask8) -1,
    284                  _MM_FROUND_CUR_DIRECTION);
    285 }
    286 
    287 static __inline__ __m512i __DEFAULT_FN_ATTRS
    288 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
    289   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
    290                  (__v8di) __W,
    291                  (__mmask8) __U,
    292                  _MM_FROUND_CUR_DIRECTION);
    293 }
    294 
    295 static __inline__ __m512i __DEFAULT_FN_ATTRS
    296 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
    297   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
    298                  (__v8di) _mm512_setzero_si512(),
    299                  (__mmask8) __U,
    300                  _MM_FROUND_CUR_DIRECTION);
    301 }
    302 
    303 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({               \
    304   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
    305                                             (__v8di)_mm512_setzero_si512(), \
    306                                             (__mmask8)-1, (int)(R)); })
    307 
    308 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
    309   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
    310                                             (__v8di)(__m512i)(W), \
    311                                             (__mmask8)(U), (int)(R)); })
    312 
    313 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({     \
    314   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
    315                                             (__v8di)_mm512_setzero_si512(), \
    316                                             (__mmask8)(U), (int)(R)); })
    317 
    318 static __inline__ __m512i __DEFAULT_FN_ATTRS
    319 _mm512_cvtps_epi64 (__m256 __A) {
    320   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
    321                 (__v8di) _mm512_setzero_si512(),
    322                 (__mmask8) -1,
    323                 _MM_FROUND_CUR_DIRECTION);
    324 }
    325 
    326 static __inline__ __m512i __DEFAULT_FN_ATTRS
    327 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
    328   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
    329                 (__v8di) __W,
    330                 (__mmask8) __U,
    331                 _MM_FROUND_CUR_DIRECTION);
    332 }
    333 
    334 static __inline__ __m512i __DEFAULT_FN_ATTRS
    335 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
    336   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
    337                 (__v8di) _mm512_setzero_si512(),
    338                 (__mmask8) __U,
    339                 _MM_FROUND_CUR_DIRECTION);
    340 }
    341 
    342 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({             \
    343   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
    344                                            (__v8di)_mm512_setzero_si512(), \
    345                                            (__mmask8)-1, (int)(R)); })
    346 
    347 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
    348   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
    349                                            (__v8di)(__m512i)(W), \
    350                                            (__mmask8)(U), (int)(R)); })
    351 
    352 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({   \
    353   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
    354                                            (__v8di)_mm512_setzero_si512(), \
    355                                            (__mmask8)(U), (int)(R)); })
    356 
    357 static __inline__ __m512i __DEFAULT_FN_ATTRS
    358 _mm512_cvtps_epu64 (__m256 __A) {
    359   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
    360                  (__v8di) _mm512_setzero_si512(),
    361                  (__mmask8) -1,
    362                  _MM_FROUND_CUR_DIRECTION);
    363 }
    364 
    365 static __inline__ __m512i __DEFAULT_FN_ATTRS
    366 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
    367   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
    368                  (__v8di) __W,
    369                  (__mmask8) __U,
    370                  _MM_FROUND_CUR_DIRECTION);
    371 }
    372 
    373 static __inline__ __m512i __DEFAULT_FN_ATTRS
    374 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
    375   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
    376                  (__v8di) _mm512_setzero_si512(),
    377                  (__mmask8) __U,
    378                  _MM_FROUND_CUR_DIRECTION);
    379 }
    380 
    381 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({              \
    382   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
    383                                             (__v8di)_mm512_setzero_si512(), \
    384                                             (__mmask8)-1, (int)(R)); })
    385 
    386 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
    387   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
    388                                             (__v8di)(__m512i)(W), \
    389                                             (__mmask8)(U), (int)(R)); })
    390 
    391 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({   \
    392   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
    393                                             (__v8di)_mm512_setzero_si512(), \
    394                                             (__mmask8)(U), (int)(R)); })
    395 
    396 
    397 static __inline__ __m512d __DEFAULT_FN_ATTRS
    398 _mm512_cvtepi64_pd (__m512i __A) {
    399   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
    400                 (__v8df) _mm512_setzero_pd(),
    401                 (__mmask8) -1,
    402                 _MM_FROUND_CUR_DIRECTION);
    403 }
    404 
    405 static __inline__ __m512d __DEFAULT_FN_ATTRS
    406 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
    407   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
    408                 (__v8df) __W,
    409                 (__mmask8) __U,
    410                 _MM_FROUND_CUR_DIRECTION);
    411 }
    412 
    413 static __inline__ __m512d __DEFAULT_FN_ATTRS
    414 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
    415   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
    416                 (__v8df) _mm512_setzero_pd(),
    417                 (__mmask8) __U,
    418                 _MM_FROUND_CUR_DIRECTION);
    419 }
    420 
    421 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({          \
    422   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
    423                                            (__v8df)_mm512_setzero_pd(), \
    424                                            (__mmask8)-1, (int)(R)); })
    425 
    426 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
    427   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
    428                                            (__v8df)(__m512d)(W), \
    429                                            (__mmask8)(U), (int)(R)); })
    430 
    431 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
    432   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
    433                                            (__v8df)_mm512_setzero_pd(), \
    434                                            (__mmask8)(U), (int)(R)); })
    435 
    436 static __inline__ __m256 __DEFAULT_FN_ATTRS
    437 _mm512_cvtepi64_ps (__m512i __A) {
    438   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
    439                (__v8sf) _mm256_setzero_ps(),
    440                (__mmask8) -1,
    441                _MM_FROUND_CUR_DIRECTION);
    442 }
    443 
    444 static __inline__ __m256 __DEFAULT_FN_ATTRS
    445 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
    446   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
    447                (__v8sf) __W,
    448                (__mmask8) __U,
    449                _MM_FROUND_CUR_DIRECTION);
    450 }
    451 
    452 static __inline__ __m256 __DEFAULT_FN_ATTRS
    453 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
    454   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
    455                (__v8sf) _mm256_setzero_ps(),
    456                (__mmask8) __U,
    457                _MM_FROUND_CUR_DIRECTION);
    458 }
    459 
    460 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({        \
    461   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
    462                                           (__v8sf)_mm256_setzero_ps(), \
    463                                           (__mmask8)-1, (int)(R)); })
    464 
    465 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
    466   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
    467                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
    468                                           (int)(R)); })
    469 
    470 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
    471   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
    472                                           (__v8sf)_mm256_setzero_ps(), \
    473                                           (__mmask8)(U), (int)(R)); })
    474 
    475 
    476 static __inline__ __m512i __DEFAULT_FN_ATTRS
    477 _mm512_cvttpd_epi64 (__m512d __A) {
    478   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
    479                  (__v8di) _mm512_setzero_si512(),
    480                  (__mmask8) -1,
    481                  _MM_FROUND_CUR_DIRECTION);
    482 }
    483 
    484 static __inline__ __m512i __DEFAULT_FN_ATTRS
    485 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
    486   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
    487                  (__v8di) __W,
    488                  (__mmask8) __U,
    489                  _MM_FROUND_CUR_DIRECTION);
    490 }
    491 
    492 static __inline__ __m512i __DEFAULT_FN_ATTRS
    493 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
    494   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
    495                  (__v8di) _mm512_setzero_si512(),
    496                  (__mmask8) __U,
    497                  _MM_FROUND_CUR_DIRECTION);
    498 }
    499 
    500 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({             \
    501   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
    502                                             (__v8di)_mm512_setzero_si512(), \
    503                                             (__mmask8)-1, (int)(R)); })
    504 
    505 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
    506   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
    507                                             (__v8di)(__m512i)(W), \
    508                                             (__mmask8)(U), (int)(R)); })
    509 
    510 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
    511   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
    512                                             (__v8di)_mm512_setzero_si512(), \
    513                                             (__mmask8)(U), (int)(R)); })
    514 
    515 static __inline__ __m512i __DEFAULT_FN_ATTRS
    516 _mm512_cvttpd_epu64 (__m512d __A) {
    517   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
    518                   (__v8di) _mm512_setzero_si512(),
    519                   (__mmask8) -1,
    520                   _MM_FROUND_CUR_DIRECTION);
    521 }
    522 
    523 static __inline__ __m512i __DEFAULT_FN_ATTRS
    524 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
    525   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
    526                   (__v8di) __W,
    527                   (__mmask8) __U,
    528                   _MM_FROUND_CUR_DIRECTION);
    529 }
    530 
    531 static __inline__ __m512i __DEFAULT_FN_ATTRS
    532 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
    533   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
    534                   (__v8di) _mm512_setzero_si512(),
    535                   (__mmask8) __U,
    536                   _MM_FROUND_CUR_DIRECTION);
    537 }
    538 
    539 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({              \
    540   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
    541                                              (__v8di)_mm512_setzero_si512(), \
    542                                              (__mmask8)-1, (int)(R)); })
    543 
    544 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
    545   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
    546                                              (__v8di)(__m512i)(W), \
    547                                              (__mmask8)(U), (int)(R)); })
    548 
    549 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({   \
    550   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
    551                                              (__v8di)_mm512_setzero_si512(), \
    552                                              (__mmask8)(U), (int)(R)); })
    553 
    554 static __inline__ __m512i __DEFAULT_FN_ATTRS
    555 _mm512_cvttps_epi64 (__m256 __A) {
    556   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
    557                  (__v8di) _mm512_setzero_si512(),
    558                  (__mmask8) -1,
    559                  _MM_FROUND_CUR_DIRECTION);
    560 }
    561 
    562 static __inline__ __m512i __DEFAULT_FN_ATTRS
    563 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
    564   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
    565                  (__v8di) __W,
    566                  (__mmask8) __U,
    567                  _MM_FROUND_CUR_DIRECTION);
    568 }
    569 
    570 static __inline__ __m512i __DEFAULT_FN_ATTRS
    571 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
    572   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
    573                  (__v8di) _mm512_setzero_si512(),
    574                  (__mmask8) __U,
    575                  _MM_FROUND_CUR_DIRECTION);
    576 }
    577 
    578 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({            \
    579   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
    580                                             (__v8di)_mm512_setzero_si512(), \
    581                                             (__mmask8)-1, (int)(R)); })
    582 
    583 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
    584   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
    585                                             (__v8di)(__m512i)(W), \
    586                                             (__mmask8)(U), (int)(R)); })
    587 
    588 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({  \
    589   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
    590                                             (__v8di)_mm512_setzero_si512(), \
    591                                             (__mmask8)(U), (int)(R)); })
    592 
    593 static __inline__ __m512i __DEFAULT_FN_ATTRS
    594 _mm512_cvttps_epu64 (__m256 __A) {
    595   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
    596                   (__v8di) _mm512_setzero_si512(),
    597                   (__mmask8) -1,
    598                   _MM_FROUND_CUR_DIRECTION);
    599 }
    600 
    601 static __inline__ __m512i __DEFAULT_FN_ATTRS
    602 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
    603   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
    604                   (__v8di) __W,
    605                   (__mmask8) __U,
    606                   _MM_FROUND_CUR_DIRECTION);
    607 }
    608 
    609 static __inline__ __m512i __DEFAULT_FN_ATTRS
    610 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
    611   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
    612                   (__v8di) _mm512_setzero_si512(),
    613                   (__mmask8) __U,
    614                   _MM_FROUND_CUR_DIRECTION);
    615 }
    616 
    617 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({            \
    618   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
    619                                              (__v8di)_mm512_setzero_si512(), \
    620                                              (__mmask8)-1, (int)(R)); })
    621 
    622 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
    623   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
    624                                              (__v8di)(__m512i)(W), \
    625                                              (__mmask8)(U), (int)(R)); })
    626 
    627 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({  \
    628   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
    629                                              (__v8di)_mm512_setzero_si512(), \
    630                                              (__mmask8)(U), (int)(R)); })
    631 
    632 static __inline__ __m512d __DEFAULT_FN_ATTRS
    633 _mm512_cvtepu64_pd (__m512i __A) {
    634   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
    635                  (__v8df) _mm512_setzero_pd(),
    636                  (__mmask8) -1,
    637                  _MM_FROUND_CUR_DIRECTION);
    638 }
    639 
    640 static __inline__ __m512d __DEFAULT_FN_ATTRS
    641 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
    642   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
    643                  (__v8df) __W,
    644                  (__mmask8) __U,
    645                  _MM_FROUND_CUR_DIRECTION);
    646 }
    647 
    648 static __inline__ __m512d __DEFAULT_FN_ATTRS
    649 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
    650   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
    651                  (__v8df) _mm512_setzero_pd(),
    652                  (__mmask8) __U,
    653                  _MM_FROUND_CUR_DIRECTION);
    654 }
    655 
    656 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({          \
    657   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
    658                                             (__v8df)_mm512_setzero_pd(), \
    659                                             (__mmask8)-1, (int)(R)); })
    660 
    661 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
    662   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
    663                                             (__v8df)(__m512d)(W), \
    664                                             (__mmask8)(U), (int)(R)); })
    665 
    666 
    667 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
    668   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
    669                                             (__v8df)_mm512_setzero_pd(), \
    670                                             (__mmask8)(U), (int)(R)); })
    671 
    672 
    673 static __inline__ __m256 __DEFAULT_FN_ATTRS
    674 _mm512_cvtepu64_ps (__m512i __A) {
    675   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
    676                 (__v8sf) _mm256_setzero_ps(),
    677                 (__mmask8) -1,
    678                 _MM_FROUND_CUR_DIRECTION);
    679 }
    680 
    681 static __inline__ __m256 __DEFAULT_FN_ATTRS
    682 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
    683   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
    684                 (__v8sf) __W,
    685                 (__mmask8) __U,
    686                 _MM_FROUND_CUR_DIRECTION);
    687 }
    688 
    689 static __inline__ __m256 __DEFAULT_FN_ATTRS
    690 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
    691   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
    692                 (__v8sf) _mm256_setzero_ps(),
    693                 (__mmask8) __U,
    694                 _MM_FROUND_CUR_DIRECTION);
    695 }
    696 
    697 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({         \
    698   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
    699                                            (__v8sf)_mm256_setzero_ps(), \
    700                                            (__mmask8)-1, (int)(R)); })
    701 
    702 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
    703   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
    704                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
    705                                            (int)(R)); })
    706 
    707 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
    708   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
    709                                            (__v8sf)_mm256_setzero_ps(), \
    710                                            (__mmask8)(U), (int)(R)); })
    711 
    712 #define _mm512_range_pd(A, B, C) __extension__ ({                     \
    713   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    714                                           (__v8df)(__m512d)(B), (int)(C), \
    715                                           (__v8df)_mm512_setzero_pd(), \
    716                                           (__mmask8)-1, \
    717                                           _MM_FROUND_CUR_DIRECTION); })
    718 
    719 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({      \
    720   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    721                                           (__v8df)(__m512d)(B), (int)(C), \
    722                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
    723                                           _MM_FROUND_CUR_DIRECTION); })
    724 
    725 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({           \
    726   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    727                                           (__v8df)(__m512d)(B), (int)(C), \
    728                                           (__v8df)_mm512_setzero_pd(), \
    729                                           (__mmask8)(U), \
    730                                           _MM_FROUND_CUR_DIRECTION); })
    731 
    732 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({           \
    733   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    734                                           (__v8df)(__m512d)(B), (int)(C), \
    735                                           (__v8df)_mm512_setzero_pd(), \
    736                                           (__mmask8)-1, (int)(R)); })
    737 
    738 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
    739   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    740                                           (__v8df)(__m512d)(B), (int)(C), \
    741                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
    742                                           (int)(R)); })
    743 
    744 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
    745   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
    746                                           (__v8df)(__m512d)(B), (int)(C), \
    747                                           (__v8df)_mm512_setzero_pd(), \
    748                                           (__mmask8)(U), (int)(R)); })
    749 
    750 #define _mm512_range_ps(A, B, C) __extension__ ({                       \
    751   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    752                                          (__v16sf)(__m512)(B), (int)(C), \
    753                                          (__v16sf)_mm512_setzero_ps(), \
    754                                          (__mmask16)-1, \
    755                                          _MM_FROUND_CUR_DIRECTION); })
    756 
    757 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({         \
    758   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    759                                          (__v16sf)(__m512)(B), (int)(C), \
    760                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
    761                                          _MM_FROUND_CUR_DIRECTION); })
    762 
    763 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({      \
    764   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    765                                          (__v16sf)(__m512)(B), (int)(C), \
    766                                          (__v16sf)_mm512_setzero_ps(), \
    767                                          (__mmask16)(U), \
    768                                          _MM_FROUND_CUR_DIRECTION); })
    769 
    770 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({         \
    771   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    772                                          (__v16sf)(__m512)(B), (int)(C), \
    773                                          (__v16sf)_mm512_setzero_ps(), \
    774                                          (__mmask16)-1, (int)(R)); })
    775 
    776 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
    777   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    778                                          (__v16sf)(__m512)(B), (int)(C), \
    779                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
    780                                          (int)(R)); })
    781 
    782 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
    783   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
    784                                          (__v16sf)(__m512)(B), (int)(C), \
    785                                          (__v16sf)_mm512_setzero_ps(), \
    786                                          (__mmask16)(U), (int)(R)); })
    787 
    788 #define _mm_range_round_ss(A, B, C, R) __extension__ ({           \
    789   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
    790                                                (__v4sf)(__m128)(B), \
    791                                                (__v4sf)_mm_setzero_ps(), \
    792                                                (__mmask8) -1, (int)(C),\
    793                                                (int)(R)); })
    794 
    795 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
    796 
    797 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
    798   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
    799                                                (__v4sf)(__m128)(B), \
    800                                                (__v4sf)(__m128)(W),\
    801                                                (__mmask8)(U), (int)(C),\
    802                                                (int)(R)); })
    803 
    804 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
    805 
    806 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
    807   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
    808                                                (__v4sf)(__m128)(B), \
    809                                                (__v4sf)_mm_setzero_ps(), \
    810                                                (__mmask8)(U), (int)(C),\
    811                                                (int)(R)); })
    812 
    813 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
    814 
    815 #define _mm_range_round_sd(A, B, C, R) __extension__ ({           \
    816   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
    817                                                 (__v2df)(__m128d)(B), \
    818                                                 (__v2df)_mm_setzero_pd(), \
    819                                                 (__mmask8) -1, (int)(C),\
    820                                                 (int)(R)); })
    821 
    822 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
    823 
    824 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
    825   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
    826                                                 (__v2df)(__m128d)(B), \
    827                                                 (__v2df)(__m128d)(W),\
    828                                                 (__mmask8)(U), (int)(C),\
    829                                                 (int)(R)); })
    830 
    831 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
    832 
    833 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
    834   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
    835                                                 (__v2df)(__m128d)(B), \
    836                                                 (__v2df)_mm_setzero_pd(), \
    837                                                 (__mmask8)(U), (int)(C),\
    838                                                 (int)(R)); })
    839 
    840 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
    841 
    842 #define _mm512_reduce_pd(A, B) __extension__ ({             \
    843   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    844                                            (__v8df)_mm512_setzero_pd(), \
    845                                            (__mmask8)-1, \
    846                                            _MM_FROUND_CUR_DIRECTION); })
    847 
    848 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
    849   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    850                                            (__v8df)(__m512d)(W), \
    851                                            (__mmask8)(U), \
    852                                            _MM_FROUND_CUR_DIRECTION); })
    853 
    854 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({  \
    855   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    856                                            (__v8df)_mm512_setzero_pd(), \
    857                                            (__mmask8)(U), \
    858                                            _MM_FROUND_CUR_DIRECTION); })
    859 
    860 #define _mm512_reduce_ps(A, B) __extension__ ({              \
    861   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    862                                           (__v16sf)_mm512_setzero_ps(), \
    863                                           (__mmask16)-1, \
    864                                           _MM_FROUND_CUR_DIRECTION); })
    865 
    866 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({   \
    867   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    868                                           (__v16sf)(__m512)(W), \
    869                                           (__mmask16)(U), \
    870                                           _MM_FROUND_CUR_DIRECTION); })
    871 
    872 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({       \
    873   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    874                                           (__v16sf)_mm512_setzero_ps(), \
    875                                           (__mmask16)(U), \
    876                                           _MM_FROUND_CUR_DIRECTION); })
    877 
    878 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
    879   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    880                                            (__v8df)_mm512_setzero_pd(), \
    881                                            (__mmask8)-1, (int)(R)); })
    882 
    883 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
    884   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    885                                            (__v8df)(__m512d)(W), \
    886                                            (__mmask8)(U), (int)(R)); })
    887 
    888 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
    889   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
    890                                            (__v8df)_mm512_setzero_pd(), \
    891                                            (__mmask8)(U), (int)(R)); })
    892 
    893 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
    894   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    895                                           (__v16sf)_mm512_setzero_ps(), \
    896                                           (__mmask16)-1, (int)(R)); })
    897 
    898 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
    899   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    900                                           (__v16sf)(__m512)(W), \
    901                                           (__mmask16)(U), (int)(R)); })
    902 
    903 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
    904   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
    905                                           (__v16sf)_mm512_setzero_ps(), \
    906                                           (__mmask16)(U), (int)(R)); })
    907 
    908 #define _mm_reduce_ss(A, B, C) __extension__ ({              \
    909   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    910                                        (__v4sf)(__m128)(B), \
    911                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
    912                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
    913 
    914 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
    915   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    916                                        (__v4sf)(__m128)(B), \
    917                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
    918                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
    919 
    920 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
    921   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    922                                        (__v4sf)(__m128)(B), \
    923                                        (__v4sf)_mm_setzero_ps(), \
    924                                        (__mmask8)(U), (int)(C), \
    925                                        _MM_FROUND_CUR_DIRECTION); })
    926 
    927 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
    928   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    929                                        (__v4sf)(__m128)(B), \
    930                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
    931                                        (int)(C), (int)(R)); })
    932 
    933 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
    934   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    935                                        (__v4sf)(__m128)(B), \
    936                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
    937                                        (int)(C), (int)(R)); })
    938 
    939 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
    940   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
    941                                        (__v4sf)(__m128)(B), \
    942                                        (__v4sf)_mm_setzero_ps(), \
    943                                        (__mmask8)(U), (int)(C), (int)(R)); })
    944 
    945 #define _mm_reduce_sd(A, B, C) __extension__ ({              \
    946   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    947                                         (__v2df)(__m128d)(B), \
    948                                         (__v2df)_mm_setzero_pd(), \
    949                                         (__mmask8)-1, (int)(C), \
    950                                         _MM_FROUND_CUR_DIRECTION); })
    951 
    952 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
    953   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    954                                         (__v2df)(__m128d)(B), \
    955                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
    956                                         (int)(C), _MM_FROUND_CUR_DIRECTION); })
    957 
    958 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
    959   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    960                                         (__v2df)(__m128d)(B), \
    961                                         (__v2df)_mm_setzero_pd(), \
    962                                         (__mmask8)(U), (int)(C), \
    963                                         _MM_FROUND_CUR_DIRECTION); })
    964 
    965 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
    966   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    967                                         (__v2df)(__m128d)(B), \
    968                                         (__v2df)_mm_setzero_pd(), \
    969                                         (__mmask8)-1, (int)(C), (int)(R)); })
    970 
    971 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
    972   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    973                                         (__v2df)(__m128d)(B), \
    974                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
    975                                         (int)(C), (int)(R)); })
    976 
    977 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
    978   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
    979                                         (__v2df)(__m128d)(B), \
    980                                         (__v2df)_mm_setzero_pd(), \
    981                                         (__mmask8)(U), (int)(C), (int)(R)); })
    982 
    983 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
    984 _mm512_movepi32_mask (__m512i __A)
    985 {
    986   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
    987 }
    988 
    989 static __inline__ __m512i __DEFAULT_FN_ATTRS
    990 _mm512_movm_epi32 (__mmask16 __A)
    991 {
    992   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
    993 }
    994 
    995 static __inline__ __m512i __DEFAULT_FN_ATTRS
    996 _mm512_movm_epi64 (__mmask8 __A)
    997 {
    998   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
    999 }
   1000 
   1001 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   1002 _mm512_movepi64_mask (__m512i __A)
   1003 {
   1004   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
   1005 }
   1006 
   1007 
   1008 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1009 _mm512_broadcast_f32x2 (__m128 __A)
   1010 {
   1011   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
   1012                 (__v16sf)_mm512_undefined_ps(),
   1013                 (__mmask16) -1);
   1014 }
   1015 
   1016 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1017 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
   1018 {
   1019   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
   1020                 (__v16sf)
   1021                 __O, __M);
   1022 }
   1023 
   1024 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1025 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
   1026 {
   1027   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
   1028                 (__v16sf)_mm512_setzero_ps (),
   1029                 __M);
   1030 }
   1031 
   1032 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1033 _mm512_broadcast_f32x8 (__m256 __A)
   1034 {
   1035   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
   1036                 _mm512_undefined_ps(),
   1037                 (__mmask16) -1);
   1038 }
   1039 
   1040 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1041 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
   1042 {
   1043   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
   1044                 (__v16sf)__O,
   1045                 __M);
   1046 }
   1047 
   1048 static __inline__ __m512 __DEFAULT_FN_ATTRS
   1049 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
   1050 {
   1051   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
   1052                 (__v16sf)_mm512_setzero_ps (),
   1053                 __M);
   1054 }
   1055 
   1056 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1057 _mm512_broadcast_f64x2 (__m128d __A)
   1058 {
   1059   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
   1060                  (__v8df)_mm512_undefined_pd(),
   1061                  (__mmask8) -1);
   1062 }
   1063 
   1064 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1065 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
   1066 {
   1067   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
   1068                  (__v8df)
   1069                  __O, __M);
   1070 }
   1071 
   1072 static __inline__ __m512d __DEFAULT_FN_ATTRS
   1073 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
   1074 {
   1075   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
   1076                  (__v8df)_mm512_setzero_ps (),
   1077                  __M);
   1078 }
   1079 
   1080 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1081 _mm512_broadcast_i32x2 (__m128i __A)
   1082 {
   1083   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
   1084                  (__v16si)_mm512_setzero_si512(),
   1085                  (__mmask16) -1);
   1086 }
   1087 
   1088 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1089 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
   1090 {
   1091   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
   1092                  (__v16si)
   1093                  __O, __M);
   1094 }
   1095 
   1096 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1097 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
   1098 {
   1099   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
   1100                  (__v16si)_mm512_setzero_si512 (),
   1101                  __M);
   1102 }
   1103 
   1104 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1105 _mm512_broadcast_i32x8 (__m256i __A)
   1106 {
   1107   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
   1108                  (__v16si)_mm512_setzero_si512(),
   1109                  (__mmask16) -1);
   1110 }
   1111 
   1112 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1113 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
   1114 {
   1115   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
   1116                  (__v16si)__O,
   1117                  __M);
   1118 }
   1119 
   1120 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1121 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
   1122 {
   1123   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
   1124                  (__v16si)
   1125                  _mm512_setzero_si512 (),
   1126                  __M);
   1127 }
   1128 
   1129 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1130 _mm512_broadcast_i64x2 (__m128i __A)
   1131 {
   1132   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
   1133                  (__v8di)_mm512_setzero_si512(),
   1134                  (__mmask8) -1);
   1135 }
   1136 
   1137 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1138 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
   1139 {
   1140   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
   1141                  (__v8di)
   1142                  __O, __M);
   1143 }
   1144 
   1145 static __inline__ __m512i __DEFAULT_FN_ATTRS
   1146 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
   1147 {
   1148   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
   1149                  (__v8di)_mm512_setzero_si512 (),
   1150                  __M);
   1151 }
   1152 
   1153 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
   1154   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
   1155                                            (__v8sf)_mm256_setzero_ps(), \
   1156                                            (__mmask8)-1); })
   1157 
   1158 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
   1159   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
   1160                                            (__v8sf)(__m256)(W), \
   1161                                            (__mmask8)(U)); })
   1162 
   1163 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
   1164   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
   1165                                            (__v8sf)_mm256_setzero_ps(), \
   1166                                            (__mmask8)(U)); })
   1167 
   1168 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
   1169   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
   1170                                                 (int)(imm), \
   1171                                                 (__v2df)_mm_setzero_pd(), \
   1172                                                 (__mmask8)-1); })
   1173 
   1174 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
   1175   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
   1176                                                 (int)(imm), \
   1177                                                 (__v2df)(__m128d)(W), \
   1178                                                 (__mmask8)(U)); })
   1179 
   1180 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
   1181   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
   1182                                                 (int)(imm), \
   1183                                                 (__v2df)_mm_setzero_pd(), \
   1184                                                 (__mmask8)(U)); })
   1185 
   1186 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
   1187   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
   1188                                             (__v8si)_mm256_setzero_si256(), \
   1189                                             (__mmask8)-1); })
   1190 
   1191 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
   1192   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
   1193                                             (__v8si)(__m256i)(W), \
   1194                                             (__mmask8)(U)); })
   1195 
   1196 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
   1197   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
   1198                                             (__v8si)_mm256_setzero_si256(), \
   1199                                             (__mmask8)(U)); })
   1200 
   1201 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
   1202   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
   1203                                                 (int)(imm), \
   1204                                                 (__v2di)_mm_setzero_di(), \
   1205                                                 (__mmask8)-1); })
   1206 
   1207 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
   1208   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
   1209                                                 (int)(imm), \
   1210                                                 (__v2di)(__m128i)(W), \
   1211                                                 (__mmask8)(U)); })
   1212 
   1213 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
   1214   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
   1215                                                 (int)(imm), \
   1216                                                 (__v2di)_mm_setzero_di(), \
   1217                                                 (__mmask8)(U)); })
   1218 
   1219 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
   1220   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
   1221                                           (__v8sf)(__m256)(B), (int)(imm), \
   1222                                           (__v16sf)_mm512_setzero_ps(), \
   1223                                           (__mmask16)-1); })
   1224 
   1225 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
   1226   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
   1227                                           (__v8sf)(__m256)(B), (int)(imm), \
   1228                                           (__v16sf)(__m512)(W), \
   1229                                           (__mmask16)(U)); })
   1230 
   1231 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
   1232   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
   1233                                           (__v8sf)(__m256)(B), (int)(imm), \
   1234                                           (__v16sf)_mm512_setzero_ps(), \
   1235                                           (__mmask16)(U)); })
   1236 
   1237 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
   1238   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
   1239                                                (__v2df)(__m128d)(B), \
   1240                                                (int)(imm), \
   1241                                                (__v8df)_mm512_setzero_pd(), \
   1242                                                (__mmask8)-1); })
   1243 
   1244 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
   1245   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
   1246                                                (__v2df)(__m128d)(B), \
   1247                                                (int)(imm), \
   1248                                                (__v8df)(__m512d)(W), \
   1249                                                (__mmask8)(U)); })
   1250 
   1251 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
   1252   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
   1253                                                (__v2df)(__m128d)(B), \
   1254                                                (int)(imm), \
   1255                                                (__v8df)_mm512_setzero_pd(), \
   1256                                                (__mmask8)(U)); })
   1257 
   1258 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
   1259   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
   1260                                            (__v8si)(__m256i)(B), (int)(imm), \
   1261                                            (__v16si)_mm512_setzero_si512(), \
   1262                                            (__mmask16)-1); })
   1263 
   1264 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
   1265   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
   1266                                            (__v8si)(__m256i)(B), (int)(imm), \
   1267                                            (__v16si)(__m512i)(W), \
   1268                                            (__mmask16)(U)); })
   1269 
   1270 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
   1271   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
   1272                                            (__v8si)(__m256i)(B), (int)(imm), \
   1273                                            (__v16si)_mm512_setzero_si512(), \
   1274                                            (__mmask16)(U)); })
   1275 
   1276 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
   1277   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
   1278                                                (__v2di)(__m128i)(B), \
   1279                                                (int)(imm), \
   1280                                                (__v8di)_mm512_setzero_si512(), \
   1281                                                (__mmask8)-1); })
   1282 
   1283 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
   1284   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
   1285                                                (__v2di)(__m128i)(B), \
   1286                                                (int)(imm), \
   1287                                                (__v8di)(__m512i)(W), \
   1288                                                (__mmask8)(U)); })
   1289 
   1290 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
   1291   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
   1292                                                (__v2di)(__m128i)(B), \
   1293                                                (int)(imm), \
   1294                                                (__v8di)_mm512_setzero_si512(), \
   1295                                                (__mmask8)(U)); })
   1296 
   1297 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
   1298   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
   1299                                               (int)(imm), (__mmask16)(U)); })
   1300 
   1301 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
   1302   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
   1303                                               (int)(imm), (__mmask16)-1); })
   1304 
   1305 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
   1306   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
   1307                                              (__mmask8)(U)); })
   1308 
   1309 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
   1310   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
   1311                                              (__mmask8)-1); })
   1312 
   1313 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
   1314   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
   1315                                           (__mmask8)-1); })
   1316 
   1317 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
   1318   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
   1319                                           (__mmask8)(U)); })
   1320 
   1321 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
   1322   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
   1323                                           (__mmask8)-1); })
   1324 
   1325 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
   1326   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
   1327                                           (__mmask8)(U)); })
   1328 
   1329 #undef __DEFAULT_FN_ATTRS
   1330 
   1331 #endif
   1332