Home | History | Annotate | Download | only in clang-include
      1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __AVX512VLDQINTRIN_H
     29 #define __AVX512VLDQINTRIN_H
     30 
     31 /* Define the default attributes for the functions in this file. */
     32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
     33 
     34 static __inline__ __m256i __DEFAULT_FN_ATTRS
     35 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
     36   return (__m256i) ((__v4du) __A * (__v4du) __B);
     37 }
     38 
     39 static __inline__ __m256i __DEFAULT_FN_ATTRS
     40 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
     41   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
     42               (__v4di) __B,
     43               (__v4di) __W,
     44               (__mmask8) __U);
     45 }
     46 
     47 static __inline__ __m256i __DEFAULT_FN_ATTRS
     48 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
     49   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
     50               (__v4di) __B,
     51               (__v4di)
     52               _mm256_setzero_si256 (),
     53               (__mmask8) __U);
     54 }
     55 
     56 static __inline__ __m128i __DEFAULT_FN_ATTRS
     57 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
     58   return (__m128i) ((__v2du) __A * (__v2du) __B);
     59 }
     60 
     61 static __inline__ __m128i __DEFAULT_FN_ATTRS
     62 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
     63   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
     64               (__v2di) __B,
     65               (__v2di) __W,
     66               (__mmask8) __U);
     67 }
     68 
     69 static __inline__ __m128i __DEFAULT_FN_ATTRS
     70 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
     71   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
     72               (__v2di) __B,
     73               (__v2di)
     74               _mm_setzero_si128 (),
     75               (__mmask8) __U);
     76 }
     77 
     78 static __inline__ __m256d __DEFAULT_FN_ATTRS
     79 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
     80   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
     81               (__v4df) __B,
     82               (__v4df) __W,
     83               (__mmask8) __U);
     84 }
     85 
     86 static __inline__ __m256d __DEFAULT_FN_ATTRS
     87 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
     88   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
     89               (__v4df) __B,
     90               (__v4df)
     91               _mm256_setzero_pd (),
     92               (__mmask8) __U);
     93 }
     94 
     95 static __inline__ __m128d __DEFAULT_FN_ATTRS
     96 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
     97   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
     98               (__v2df) __B,
     99               (__v2df) __W,
    100               (__mmask8) __U);
    101 }
    102 
    103 static __inline__ __m128d __DEFAULT_FN_ATTRS
    104 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
    105   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
    106               (__v2df) __B,
    107               (__v2df)
    108               _mm_setzero_pd (),
    109               (__mmask8) __U);
    110 }
    111 
    112 static __inline__ __m256 __DEFAULT_FN_ATTRS
    113 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    114   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
    115              (__v8sf) __B,
    116              (__v8sf) __W,
    117              (__mmask8) __U);
    118 }
    119 
    120 static __inline__ __m256 __DEFAULT_FN_ATTRS
    121 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
    122   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
    123              (__v8sf) __B,
    124              (__v8sf)
    125              _mm256_setzero_ps (),
    126              (__mmask8) __U);
    127 }
    128 
    129 static __inline__ __m128 __DEFAULT_FN_ATTRS
    130 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    131   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
    132              (__v4sf) __B,
    133              (__v4sf) __W,
    134              (__mmask8) __U);
    135 }
    136 
    137 static __inline__ __m128 __DEFAULT_FN_ATTRS
    138 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
    139   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
    140              (__v4sf) __B,
    141              (__v4sf)
    142              _mm_setzero_ps (),
    143              (__mmask8) __U);
    144 }
    145 
    146 static __inline__ __m256d __DEFAULT_FN_ATTRS
    147 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
    148   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
    149              (__v4df) __B,
    150              (__v4df) __W,
    151              (__mmask8) __U);
    152 }
    153 
    154 static __inline__ __m256d __DEFAULT_FN_ATTRS
    155 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
    156   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
    157              (__v4df) __B,
    158              (__v4df)
    159              _mm256_setzero_pd (),
    160              (__mmask8) __U);
    161 }
    162 
    163 static __inline__ __m128d __DEFAULT_FN_ATTRS
    164 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    165   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
    166              (__v2df) __B,
    167              (__v2df) __W,
    168              (__mmask8) __U);
    169 }
    170 
    171 static __inline__ __m128d __DEFAULT_FN_ATTRS
    172 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
    173   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
    174              (__v2df) __B,
    175              (__v2df)
    176              _mm_setzero_pd (),
    177              (__mmask8) __U);
    178 }
    179 
    180 static __inline__ __m256 __DEFAULT_FN_ATTRS
    181 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    182   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
    183             (__v8sf) __B,
    184             (__v8sf) __W,
    185             (__mmask8) __U);
    186 }
    187 
    188 static __inline__ __m256 __DEFAULT_FN_ATTRS
    189 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
    190   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
    191             (__v8sf) __B,
    192             (__v8sf)
    193             _mm256_setzero_ps (),
    194             (__mmask8) __U);
    195 }
    196 
    197 static __inline__ __m128 __DEFAULT_FN_ATTRS
    198 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    199   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
    200             (__v4sf) __B,
    201             (__v4sf) __W,
    202             (__mmask8) __U);
    203 }
    204 
    205 static __inline__ __m128 __DEFAULT_FN_ATTRS
    206 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
    207   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
    208             (__v4sf) __B,
    209             (__v4sf)
    210             _mm_setzero_ps (),
    211             (__mmask8) __U);
    212 }
    213 
    214 static __inline__ __m256d __DEFAULT_FN_ATTRS
    215 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
    216         __m256d __B) {
    217   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
    218              (__v4df) __B,
    219              (__v4df) __W,
    220              (__mmask8) __U);
    221 }
    222 
    223 static __inline__ __m256d __DEFAULT_FN_ATTRS
    224 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
    225   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
    226              (__v4df) __B,
    227              (__v4df)
    228              _mm256_setzero_pd (),
    229              (__mmask8) __U);
    230 }
    231 
    232 static __inline__ __m128d __DEFAULT_FN_ATTRS
    233 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    234   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
    235              (__v2df) __B,
    236              (__v2df) __W,
    237              (__mmask8) __U);
    238 }
    239 
    240 static __inline__ __m128d __DEFAULT_FN_ATTRS
    241 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
    242   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
    243              (__v2df) __B,
    244              (__v2df)
    245              _mm_setzero_pd (),
    246              (__mmask8) __U);
    247 }
    248 
    249 static __inline__ __m256 __DEFAULT_FN_ATTRS
    250 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    251   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
    252             (__v8sf) __B,
    253             (__v8sf) __W,
    254             (__mmask8) __U);
    255 }
    256 
    257 static __inline__ __m256 __DEFAULT_FN_ATTRS
    258 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
    259   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
    260             (__v8sf) __B,
    261             (__v8sf)
    262             _mm256_setzero_ps (),
    263             (__mmask8) __U);
    264 }
    265 
    266 static __inline__ __m128 __DEFAULT_FN_ATTRS
    267 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    268   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
    269             (__v4sf) __B,
    270             (__v4sf) __W,
    271             (__mmask8) __U);
    272 }
    273 
    274 static __inline__ __m128 __DEFAULT_FN_ATTRS
    275 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
    276   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
    277             (__v4sf) __B,
    278             (__v4sf)
    279             _mm_setzero_ps (),
    280             (__mmask8) __U);
    281 }
    282 
    283 static __inline__ __m256d __DEFAULT_FN_ATTRS
    284 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
    285   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
    286             (__v4df) __B,
    287             (__v4df) __W,
    288             (__mmask8) __U);
    289 }
    290 
    291 static __inline__ __m256d __DEFAULT_FN_ATTRS
    292 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
    293   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
    294             (__v4df) __B,
    295             (__v4df)
    296             _mm256_setzero_pd (),
    297             (__mmask8) __U);
    298 }
    299 
    300 static __inline__ __m128d __DEFAULT_FN_ATTRS
    301 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    302   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
    303             (__v2df) __B,
    304             (__v2df) __W,
    305             (__mmask8) __U);
    306 }
    307 
    308 static __inline__ __m128d __DEFAULT_FN_ATTRS
    309 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
    310   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
    311             (__v2df) __B,
    312             (__v2df)
    313             _mm_setzero_pd (),
    314             (__mmask8) __U);
    315 }
    316 
    317 static __inline__ __m256 __DEFAULT_FN_ATTRS
    318 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    319   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
    320                  (__v8sf) __B,
    321                  (__v8sf) __W,
    322                  (__mmask8) __U);
    323 }
    324 
    325 static __inline__ __m256 __DEFAULT_FN_ATTRS
    326 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
    327   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
    328                  (__v8sf) __B,
    329                  (__v8sf)
    330                  _mm256_setzero_ps (),
    331                  (__mmask8) __U);
    332 }
    333 
    334 static __inline__ __m128 __DEFAULT_FN_ATTRS
    335 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    336   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
    337                  (__v4sf) __B,
    338                  (__v4sf) __W,
    339                  (__mmask8) __U);
    340 }
    341 
    342 static __inline__ __m128 __DEFAULT_FN_ATTRS
    343 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
    344   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
    345                  (__v4sf) __B,
    346                  (__v4sf)
    347                  _mm_setzero_ps (),
    348                  (__mmask8) __U);
    349 }
    350 
    351 static __inline__ __m128i __DEFAULT_FN_ATTRS
    352 _mm_cvtpd_epi64 (__m128d __A) {
    353   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    354                 (__v2di) _mm_setzero_si128(),
    355                 (__mmask8) -1);
    356 }
    357 
    358 static __inline__ __m128i __DEFAULT_FN_ATTRS
    359 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
    360   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    361                 (__v2di) __W,
    362                 (__mmask8) __U);
    363 }
    364 
    365 static __inline__ __m128i __DEFAULT_FN_ATTRS
    366 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
    367   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    368                 (__v2di) _mm_setzero_si128(),
    369                 (__mmask8) __U);
    370 }
    371 
    372 static __inline__ __m256i __DEFAULT_FN_ATTRS
    373 _mm256_cvtpd_epi64 (__m256d __A) {
    374   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    375                 (__v4di) _mm256_setzero_si256(),
    376                 (__mmask8) -1);
    377 }
    378 
    379 static __inline__ __m256i __DEFAULT_FN_ATTRS
    380 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
    381   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    382                 (__v4di) __W,
    383                 (__mmask8) __U);
    384 }
    385 
    386 static __inline__ __m256i __DEFAULT_FN_ATTRS
    387 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
    388   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    389                 (__v4di) _mm256_setzero_si256(),
    390                 (__mmask8) __U);
    391 }
    392 
    393 static __inline__ __m128i __DEFAULT_FN_ATTRS
    394 _mm_cvtpd_epu64 (__m128d __A) {
    395   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    396                 (__v2di) _mm_setzero_si128(),
    397                 (__mmask8) -1);
    398 }
    399 
    400 static __inline__ __m128i __DEFAULT_FN_ATTRS
    401 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
    402   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    403                 (__v2di) __W,
    404                 (__mmask8) __U);
    405 }
    406 
    407 static __inline__ __m128i __DEFAULT_FN_ATTRS
    408 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
    409   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    410                 (__v2di) _mm_setzero_si128(),
    411                 (__mmask8) __U);
    412 }
    413 
    414 static __inline__ __m256i __DEFAULT_FN_ATTRS
    415 _mm256_cvtpd_epu64 (__m256d __A) {
    416   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    417                 (__v4di) _mm256_setzero_si256(),
    418                 (__mmask8) -1);
    419 }
    420 
    421 static __inline__ __m256i __DEFAULT_FN_ATTRS
    422 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
    423   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    424                 (__v4di) __W,
    425                 (__mmask8) __U);
    426 }
    427 
    428 static __inline__ __m256i __DEFAULT_FN_ATTRS
    429 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
    430   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    431                 (__v4di) _mm256_setzero_si256(),
    432                 (__mmask8) __U);
    433 }
    434 
    435 static __inline__ __m128i __DEFAULT_FN_ATTRS
    436 _mm_cvtps_epi64 (__m128 __A) {
    437   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    438                 (__v2di) _mm_setzero_si128(),
    439                 (__mmask8) -1);
    440 }
    441 
    442 static __inline__ __m128i __DEFAULT_FN_ATTRS
    443 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
    444   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    445                 (__v2di) __W,
    446                 (__mmask8) __U);
    447 }
    448 
    449 static __inline__ __m128i __DEFAULT_FN_ATTRS
    450 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
    451   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    452                 (__v2di) _mm_setzero_si128(),
    453                 (__mmask8) __U);
    454 }
    455 
    456 static __inline__ __m256i __DEFAULT_FN_ATTRS
    457 _mm256_cvtps_epi64 (__m128 __A) {
    458   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    459                 (__v4di) _mm256_setzero_si256(),
    460                 (__mmask8) -1);
    461 }
    462 
    463 static __inline__ __m256i __DEFAULT_FN_ATTRS
    464 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
    465   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    466                 (__v4di) __W,
    467                 (__mmask8) __U);
    468 }
    469 
    470 static __inline__ __m256i __DEFAULT_FN_ATTRS
    471 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
    472   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    473                 (__v4di) _mm256_setzero_si256(),
    474                 (__mmask8) __U);
    475 }
    476 
    477 static __inline__ __m128i __DEFAULT_FN_ATTRS
    478 _mm_cvtps_epu64 (__m128 __A) {
    479   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    480                 (__v2di) _mm_setzero_si128(),
    481                 (__mmask8) -1);
    482 }
    483 
    484 static __inline__ __m128i __DEFAULT_FN_ATTRS
    485 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
    486   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    487                 (__v2di) __W,
    488                 (__mmask8) __U);
    489 }
    490 
    491 static __inline__ __m128i __DEFAULT_FN_ATTRS
    492 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
    493   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    494                 (__v2di) _mm_setzero_si128(),
    495                 (__mmask8) __U);
    496 }
    497 
    498 static __inline__ __m256i __DEFAULT_FN_ATTRS
    499 _mm256_cvtps_epu64 (__m128 __A) {
    500   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    501                 (__v4di) _mm256_setzero_si256(),
    502                 (__mmask8) -1);
    503 }
    504 
    505 static __inline__ __m256i __DEFAULT_FN_ATTRS
    506 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
    507   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    508                 (__v4di) __W,
    509                 (__mmask8) __U);
    510 }
    511 
    512 static __inline__ __m256i __DEFAULT_FN_ATTRS
    513 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
    514   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    515                 (__v4di) _mm256_setzero_si256(),
    516                 (__mmask8) __U);
    517 }
    518 
    519 static __inline__ __m128d __DEFAULT_FN_ATTRS
    520 _mm_cvtepi64_pd (__m128i __A) {
    521   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
    522                 (__v2df) _mm_setzero_pd(),
    523                 (__mmask8) -1);
    524 }
    525 
    526 static __inline__ __m128d __DEFAULT_FN_ATTRS
    527 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
    528   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
    529                 (__v2df) __W,
    530                 (__mmask8) __U);
    531 }
    532 
    533 static __inline__ __m128d __DEFAULT_FN_ATTRS
    534 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
    535   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
    536                 (__v2df) _mm_setzero_pd(),
    537                 (__mmask8) __U);
    538 }
    539 
    540 static __inline__ __m256d __DEFAULT_FN_ATTRS
    541 _mm256_cvtepi64_pd (__m256i __A) {
    542   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
    543                 (__v4df) _mm256_setzero_pd(),
    544                 (__mmask8) -1);
    545 }
    546 
    547 static __inline__ __m256d __DEFAULT_FN_ATTRS
    548 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
    549   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
    550                 (__v4df) __W,
    551                 (__mmask8) __U);
    552 }
    553 
    554 static __inline__ __m256d __DEFAULT_FN_ATTRS
    555 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
    556   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
    557                 (__v4df) _mm256_setzero_pd(),
    558                 (__mmask8) __U);
    559 }
    560 
    561 static __inline__ __m128 __DEFAULT_FN_ATTRS
    562 _mm_cvtepi64_ps (__m128i __A) {
    563   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    564                 (__v4sf) _mm_setzero_ps(),
    565                 (__mmask8) -1);
    566 }
    567 
    568 static __inline__ __m128 __DEFAULT_FN_ATTRS
    569 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
    570   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    571                 (__v4sf) __W,
    572                 (__mmask8) __U);
    573 }
    574 
    575 static __inline__ __m128 __DEFAULT_FN_ATTRS
    576 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
    577   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    578                 (__v4sf) _mm_setzero_ps(),
    579                 (__mmask8) __U);
    580 }
    581 
    582 static __inline__ __m128 __DEFAULT_FN_ATTRS
    583 _mm256_cvtepi64_ps (__m256i __A) {
    584   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
    585                 (__v4sf) _mm_setzero_ps(),
    586                 (__mmask8) -1);
    587 }
    588 
    589 static __inline__ __m128 __DEFAULT_FN_ATTRS
    590 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
    591   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
    592                 (__v4sf) __W,
    593                 (__mmask8) __U);
    594 }
    595 
    596 static __inline__ __m128 __DEFAULT_FN_ATTRS
    597 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
    598   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
    599                 (__v4sf) _mm_setzero_ps(),
    600                 (__mmask8) __U);
    601 }
    602 
    603 static __inline__ __m128i __DEFAULT_FN_ATTRS
    604 _mm_cvttpd_epi64 (__m128d __A) {
    605   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    606                 (__v2di) _mm_setzero_si128(),
    607                 (__mmask8) -1);
    608 }
    609 
    610 static __inline__ __m128i __DEFAULT_FN_ATTRS
    611 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
    612   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    613                 (__v2di) __W,
    614                 (__mmask8) __U);
    615 }
    616 
    617 static __inline__ __m128i __DEFAULT_FN_ATTRS
    618 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
    619   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    620                 (__v2di) _mm_setzero_si128(),
    621                 (__mmask8) __U);
    622 }
    623 
    624 static __inline__ __m256i __DEFAULT_FN_ATTRS
    625 _mm256_cvttpd_epi64 (__m256d __A) {
    626   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    627                 (__v4di) _mm256_setzero_si256(),
    628                 (__mmask8) -1);
    629 }
    630 
    631 static __inline__ __m256i __DEFAULT_FN_ATTRS
    632 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
    633   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    634                 (__v4di) __W,
    635                 (__mmask8) __U);
    636 }
    637 
    638 static __inline__ __m256i __DEFAULT_FN_ATTRS
    639 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
    640   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    641                 (__v4di) _mm256_setzero_si256(),
    642                 (__mmask8) __U);
    643 }
    644 
    645 static __inline__ __m128i __DEFAULT_FN_ATTRS
    646 _mm_cvttpd_epu64 (__m128d __A) {
    647   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    648                 (__v2di) _mm_setzero_si128(),
    649                 (__mmask8) -1);
    650 }
    651 
    652 static __inline__ __m128i __DEFAULT_FN_ATTRS
    653 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
    654   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    655                 (__v2di) __W,
    656                 (__mmask8) __U);
    657 }
    658 
    659 static __inline__ __m128i __DEFAULT_FN_ATTRS
    660 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
    661   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    662                 (__v2di) _mm_setzero_si128(),
    663                 (__mmask8) __U);
    664 }
    665 
    666 static __inline__ __m256i __DEFAULT_FN_ATTRS
    667 _mm256_cvttpd_epu64 (__m256d __A) {
    668   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    669                 (__v4di) _mm256_setzero_si256(),
    670                 (__mmask8) -1);
    671 }
    672 
    673 static __inline__ __m256i __DEFAULT_FN_ATTRS
    674 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
    675   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    676                 (__v4di) __W,
    677                 (__mmask8) __U);
    678 }
    679 
    680 static __inline__ __m256i __DEFAULT_FN_ATTRS
    681 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
    682   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    683                 (__v4di) _mm256_setzero_si256(),
    684                 (__mmask8) __U);
    685 }
    686 
    687 static __inline__ __m128i __DEFAULT_FN_ATTRS
    688 _mm_cvttps_epi64 (__m128 __A) {
    689   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    690                 (__v2di) _mm_setzero_si128(),
    691                 (__mmask8) -1);
    692 }
    693 
    694 static __inline__ __m128i __DEFAULT_FN_ATTRS
    695 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
    696   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    697                 (__v2di) __W,
    698                 (__mmask8) __U);
    699 }
    700 
    701 static __inline__ __m128i __DEFAULT_FN_ATTRS
    702 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
    703   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    704                 (__v2di) _mm_setzero_si128(),
    705                 (__mmask8) __U);
    706 }
    707 
    708 static __inline__ __m256i __DEFAULT_FN_ATTRS
    709 _mm256_cvttps_epi64 (__m128 __A) {
    710   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    711                 (__v4di) _mm256_setzero_si256(),
    712                 (__mmask8) -1);
    713 }
    714 
    715 static __inline__ __m256i __DEFAULT_FN_ATTRS
    716 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
    717   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    718                 (__v4di) __W,
    719                 (__mmask8) __U);
    720 }
    721 
    722 static __inline__ __m256i __DEFAULT_FN_ATTRS
    723 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
    724   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    725                 (__v4di) _mm256_setzero_si256(),
    726                 (__mmask8) __U);
    727 }
    728 
    729 static __inline__ __m128i __DEFAULT_FN_ATTRS
    730 _mm_cvttps_epu64 (__m128 __A) {
    731   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    732                 (__v2di) _mm_setzero_si128(),
    733                 (__mmask8) -1);
    734 }
    735 
    736 static __inline__ __m128i __DEFAULT_FN_ATTRS
    737 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
    738   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    739                 (__v2di) __W,
    740                 (__mmask8) __U);
    741 }
    742 
    743 static __inline__ __m128i __DEFAULT_FN_ATTRS
    744 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
    745   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    746                 (__v2di) _mm_setzero_si128(),
    747                 (__mmask8) __U);
    748 }
    749 
    750 static __inline__ __m256i __DEFAULT_FN_ATTRS
    751 _mm256_cvttps_epu64 (__m128 __A) {
    752   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    753                 (__v4di) _mm256_setzero_si256(),
    754                 (__mmask8) -1);
    755 }
    756 
    757 static __inline__ __m256i __DEFAULT_FN_ATTRS
    758 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
    759   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    760                 (__v4di) __W,
    761                 (__mmask8) __U);
    762 }
    763 
    764 static __inline__ __m256i __DEFAULT_FN_ATTRS
    765 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
    766   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    767                 (__v4di) _mm256_setzero_si256(),
    768                 (__mmask8) __U);
    769 }
    770 
    771 static __inline__ __m128d __DEFAULT_FN_ATTRS
    772 _mm_cvtepu64_pd (__m128i __A) {
    773   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
    774                 (__v2df) _mm_setzero_pd(),
    775                 (__mmask8) -1);
    776 }
    777 
    778 static __inline__ __m128d __DEFAULT_FN_ATTRS
    779 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
    780   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
    781                 (__v2df) __W,
    782                 (__mmask8) __U);
    783 }
    784 
    785 static __inline__ __m128d __DEFAULT_FN_ATTRS
    786 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
    787   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
    788                 (__v2df) _mm_setzero_pd(),
    789                 (__mmask8) __U);
    790 }
    791 
    792 static __inline__ __m256d __DEFAULT_FN_ATTRS
    793 _mm256_cvtepu64_pd (__m256i __A) {
    794   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
    795                 (__v4df) _mm256_setzero_pd(),
    796                 (__mmask8) -1);
    797 }
    798 
    799 static __inline__ __m256d __DEFAULT_FN_ATTRS
    800 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
    801   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
    802                 (__v4df) __W,
    803                 (__mmask8) __U);
    804 }
    805 
    806 static __inline__ __m256d __DEFAULT_FN_ATTRS
    807 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
    808   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
    809                 (__v4df) _mm256_setzero_pd(),
    810                 (__mmask8) __U);
    811 }
    812 
    813 static __inline__ __m128 __DEFAULT_FN_ATTRS
    814 _mm_cvtepu64_ps (__m128i __A) {
    815   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    816                 (__v4sf) _mm_setzero_ps(),
    817                 (__mmask8) -1);
    818 }
    819 
    820 static __inline__ __m128 __DEFAULT_FN_ATTRS
    821 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
    822   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    823                 (__v4sf) __W,
    824                 (__mmask8) __U);
    825 }
    826 
    827 static __inline__ __m128 __DEFAULT_FN_ATTRS
    828 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
    829   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    830                 (__v4sf) _mm_setzero_ps(),
    831                 (__mmask8) __U);
    832 }
    833 
    834 static __inline__ __m128 __DEFAULT_FN_ATTRS
    835 _mm256_cvtepu64_ps (__m256i __A) {
    836   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
    837                 (__v4sf) _mm_setzero_ps(),
    838                 (__mmask8) -1);
    839 }
    840 
    841 static __inline__ __m128 __DEFAULT_FN_ATTRS
    842 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
    843   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
    844                 (__v4sf) __W,
    845                 (__mmask8) __U);
    846 }
    847 
    848 static __inline__ __m128 __DEFAULT_FN_ATTRS
    849 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
    850   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
    851                 (__v4sf) _mm_setzero_ps(),
    852                 (__mmask8) __U);
    853 }
    854 
    855 #define _mm_range_pd(A, B, C) __extension__ ({                         \
    856   (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    857                                           (__v2df)(__m128d)(B), (int)(C), \
    858                                           (__v2df)_mm_setzero_pd(), \
    859                                           (__mmask8)-1); })
    860 
    861 #define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({          \
    862   (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    863                                           (__v2df)(__m128d)(B), (int)(C), \
    864                                           (__v2df)(__m128d)(W), \
    865                                           (__mmask8)(U)); })
    866 
    867 #define _mm_maskz_range_pd(U, A, B, C) __extension__ ({              \
    868   (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    869                                           (__v2df)(__m128d)(B), (int)(C), \
    870                                           (__v2df)_mm_setzero_pd(), \
    871                                           (__mmask8)(U)); })
    872 
    873 #define _mm256_range_pd(A, B, C) __extension__ ({                      \
    874   (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    875                                           (__v4df)(__m256d)(B), (int)(C), \
    876                                           (__v4df)_mm256_setzero_pd(), \
    877                                           (__mmask8)-1); })
    878 
    879 #define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({       \
    880   (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    881                                           (__v4df)(__m256d)(B), (int)(C), \
    882                                           (__v4df)(__m256d)(W), \
    883                                           (__mmask8)(U)); })
    884 
    885 #define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({           \
    886   (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    887                                           (__v4df)(__m256d)(B), (int)(C), \
    888                                           (__v4df)_mm256_setzero_pd(), \
    889                                           (__mmask8)(U)); })
    890 
    891 #define _mm_range_ps(A, B, C) __extension__ ({                         \
    892   (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    893                                          (__v4sf)(__m128)(B), (int)(C), \
    894                                          (__v4sf)_mm_setzero_ps(), \
    895                                          (__mmask8)-1); })
    896 
    897 #define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({          \
    898   (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    899                                          (__v4sf)(__m128)(B), (int)(C), \
    900                                          (__v4sf)(__m128)(W), (__mmask8)(U)); })
    901 
    902 #define _mm_maskz_range_ps(U, A, B, C) __extension__ ({              \
    903   (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    904                                          (__v4sf)(__m128)(B), (int)(C), \
    905                                          (__v4sf)_mm_setzero_ps(), \
    906                                          (__mmask8)(U)); })
    907 
    908 #define _mm256_range_ps(A, B, C) __extension__ ({                      \
    909   (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    910                                          (__v8sf)(__m256)(B), (int)(C), \
    911                                          (__v8sf)_mm256_setzero_ps(), \
    912                                          (__mmask8)-1); })
    913 
    914 #define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({       \
    915   (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    916                                          (__v8sf)(__m256)(B), (int)(C), \
    917                                          (__v8sf)(__m256)(W), (__mmask8)(U)); })
    918 
    919 #define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({           \
    920   (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    921                                          (__v8sf)(__m256)(B), (int)(C), \
    922                                          (__v8sf)_mm256_setzero_ps(), \
    923                                          (__mmask8)(U)); })
    924 
    925 #define _mm_reduce_pd(A, B) __extension__ ({                \
    926   (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    927                                            (__v2df)_mm_setzero_pd(), \
    928                                            (__mmask8)-1); })
    929 
    930 #define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
    931   (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    932                                            (__v2df)(__m128d)(W), \
    933                                            (__mmask8)(U)); })
    934 
    935 #define _mm_maskz_reduce_pd(U, A, B) __extension__ ({     \
    936   (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    937                                            (__v2df)_mm_setzero_pd(), \
    938                                            (__mmask8)(U)); })
    939 
    940 #define _mm256_reduce_pd(A, B) __extension__ ({                \
    941   (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    942                                            (__v4df)_mm256_setzero_pd(), \
    943                                            (__mmask8)-1); })
    944 
    945 #define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
    946   (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    947                                            (__v4df)(__m256d)(W), \
    948                                            (__mmask8)(U)); })
    949 
    950 #define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({     \
    951   (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    952                                            (__v4df)_mm256_setzero_pd(), \
    953                                            (__mmask8)(U)); })
    954 
    955 #define _mm_reduce_ps(A, B) __extension__ ({                   \
    956   (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    957                                           (__v4sf)_mm_setzero_ps(), \
    958                                           (__mmask8)-1); })
    959 
    960 #define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({    \
    961   (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    962                                           (__v4sf)(__m128)(W), \
    963                                           (__mmask8)(U)); })
    964 
    965 #define _mm_maskz_reduce_ps(U, A, B) __extension__ ({        \
    966   (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    967                                           (__v4sf)_mm_setzero_ps(), \
    968                                           (__mmask8)(U)); })
    969 
    970 #define _mm256_reduce_ps(A, B) __extension__ ({                \
    971   (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    972                                           (__v8sf)_mm256_setzero_ps(), \
    973                                           (__mmask8)-1); })
    974 
    975 #define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
    976   (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    977                                           (__v8sf)(__m256)(W), \
    978                                           (__mmask8)(U)); })
    979 
    980 #define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({     \
    981   (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    982                                           (__v8sf)_mm256_setzero_ps(), \
    983                                           (__mmask8)(U)); })
    984 
    985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    986 _mm_movepi32_mask (__m128i __A)
    987 {
    988   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
    989 }
    990 
    991 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    992 _mm256_movepi32_mask (__m256i __A)
    993 {
    994   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
    995 }
    996 
    997 static __inline__ __m128i __DEFAULT_FN_ATTRS
    998 _mm_movm_epi32 (__mmask8 __A)
    999 {
   1000   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
   1001 }
   1002 
   1003 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1004 _mm256_movm_epi32 (__mmask8 __A)
   1005 {
   1006   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
   1007 }
   1008 
   1009 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1010 _mm_movm_epi64 (__mmask8 __A)
   1011 {
   1012   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
   1013 }
   1014 
   1015 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1016 _mm256_movm_epi64 (__mmask8 __A)
   1017 {
   1018   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
   1019 }
   1020 
   1021 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   1022 _mm_movepi64_mask (__m128i __A)
   1023 {
   1024   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
   1025 }
   1026 
   1027 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   1028 _mm256_movepi64_mask (__m256i __A)
   1029 {
   1030   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
   1031 }
   1032 
   1033 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1034 _mm256_broadcast_f32x2 (__m128 __A)
   1035 {
   1036   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
   1037                 (__v8sf)_mm256_undefined_ps(),
   1038                 (__mmask8) -1);
   1039 }
   1040 
   1041 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1042 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
   1043 {
   1044   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
   1045                 (__v8sf) __O,
   1046                 __M);
   1047 }
   1048 
   1049 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1050 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
   1051 {
   1052   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
   1053                 (__v8sf) _mm256_setzero_ps (),
   1054                 __M);
   1055 }
   1056 
   1057 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1058 _mm256_broadcast_f64x2 (__m128d __A)
   1059 {
   1060   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
   1061                  (__v4df)_mm256_undefined_pd(),
   1062                  (__mmask8) -1);
   1063 }
   1064 
   1065 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1066 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
   1067 {
   1068   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
   1069                  (__v4df) __O,
   1070                  __M);
   1071 }
   1072 
   1073 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1074 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
   1075 {
   1076   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
   1077                  (__v4df) _mm256_setzero_ps (),
   1078                  __M);
   1079 }
   1080 
   1081 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1082 _mm_broadcast_i32x2 (__m128i __A)
   1083 {
   1084   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
   1085                  (__v4si)_mm_undefined_si128(),
   1086                  (__mmask8) -1);
   1087 }
   1088 
   1089 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1090 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
   1091 {
   1092   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
   1093                  (__v4si) __O,
   1094                  __M);
   1095 }
   1096 
   1097 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1098 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   1099 {
   1100   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
   1101                  (__v4si) _mm_setzero_si128 (),
   1102                  __M);
   1103 }
   1104 
   1105 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1106 _mm256_broadcast_i32x2 (__m128i __A)
   1107 {
   1108   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
   1109                  (__v8si)_mm256_undefined_si256(),
   1110                  (__mmask8) -1);
   1111 }
   1112 
   1113 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1114 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
   1115 {
   1116   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
   1117                  (__v8si) __O,
   1118                  __M);
   1119 }
   1120 
   1121 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1122 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   1123 {
   1124   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
   1125                  (__v8si) _mm256_setzero_si256 (),
   1126                  __M);
   1127 }
   1128 
   1129 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1130 _mm256_broadcast_i64x2 (__m128i __A)
   1131 {
   1132   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
   1133                  (__v4di)_mm256_undefined_si256(),
   1134                  (__mmask8) -1);
   1135 }
   1136 
   1137 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1138 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
   1139 {
   1140   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
   1141                  (__v4di) __O,
   1142                  __M);
   1143 }
   1144 
   1145 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1146 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
   1147 {
   1148   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
   1149                  (__v4di) _mm256_setzero_si256 (),
   1150                  __M);
   1151 }
   1152 
   1153 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
   1154   (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1155                                                 (int)(imm), \
   1156                                                 (__v2df)_mm_setzero_pd(), \
   1157                                                 (__mmask8)-1); })
   1158 
   1159 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
   1160   (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1161                                                 (int)(imm), \
   1162                                                 (__v2df)(__m128d)(W), \
   1163                                                 (__mmask8)(U)); })
   1164 
   1165 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
   1166   (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1167                                                 (int)(imm), \
   1168                                                 (__v2df)_mm_setzero_pd(), \
   1169                                                 (__mmask8)(U)); })
   1170 
   1171 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
   1172   (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1173                                                 (int)(imm), \
   1174                                                 (__v2di)_mm_setzero_di(), \
   1175                                                 (__mmask8)-1); })
   1176 
   1177 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
   1178   (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1179                                                 (int)(imm), \
   1180                                                 (__v2di)(__m128i)(W), \
   1181                                                 (__mmask8)(U)); })
   1182 
   1183 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
   1184   (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1185                                                 (int)(imm), \
   1186                                                 (__v2di)_mm_setzero_di(), \
   1187                                                 (__mmask8)(U)); })
   1188 
   1189 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
   1190   (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
   1191                                                (__v2df)(__m128d)(B), \
   1192                                                (int)(imm), \
   1193                                                (__v4df)_mm256_setzero_pd(), \
   1194                                                (__mmask8)-1); })
   1195 
   1196 #define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
   1197   (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
   1198                                                (__v2df)(__m128d)(B), \
   1199                                                (int)(imm), \
   1200                                                (__v4df)(__m256d)(W), \
   1201                                                (__mmask8)(U)); })
   1202 
   1203 #define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
   1204   (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
   1205                                                (__v2df)(__m128d)(B), \
   1206                                                (int)(imm), \
   1207                                                (__v4df)_mm256_setzero_pd(), \
   1208                                                (__mmask8)(U)); })
   1209 
   1210 #define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
   1211   (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
   1212                                                (__v2di)(__m128i)(B), \
   1213                                                (int)(imm), \
   1214                                                (__v4di)_mm256_setzero_si256(), \
   1215                                                (__mmask8)-1); })
   1216 
   1217 #define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
   1218   (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
   1219                                                (__v2di)(__m128i)(B), \
   1220                                                (int)(imm), \
   1221                                                (__v4di)(__m256i)(W), \
   1222                                                (__mmask8)(U)); })
   1223 
   1224 #define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
   1225   (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
   1226                                                (__v2di)(__m128i)(B), \
   1227                                                (int)(imm), \
   1228                                                (__v4di)_mm256_setzero_si256(), \
   1229                                                (__mmask8)(U)); })
   1230 
   1231 #define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
   1232   (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
   1233                                              (__mmask8)(U)); })
   1234 
   1235 #define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
   1236   (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
   1237                                              (__mmask8)-1); })
   1238 
   1239 #define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
   1240   (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
   1241                                              (__mmask8)(U)); })
   1242 
   1243 #define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
   1244   (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
   1245                                              (__mmask8)-1); })
   1246 
   1247 #define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
   1248   (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
   1249                                              (__mmask8)(U)); })
   1250 
   1251 #define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
   1252   (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
   1253                                              (__mmask8)-1); })
   1254 
   1255 #define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
   1256   (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
   1257                                              (__mmask8)(U)); })
   1258 
   1259 #define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
   1260   (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
   1261                                              (__mmask8)-1); })
   1262 
   1263 #undef __DEFAULT_FN_ATTRS
   1264 
   1265 #endif
   1266