Home | History | Annotate | Download | only in include
      1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __AVX512VLINTRIN_H
     29 #define __AVX512VLINTRIN_H
     30 
     31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
     32 
     33 /* Doesn't require avx512vl, used in avx512dqintrin.h */
     34 static  __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
     35 _mm_setzero_di(void) {
     36   return (__m128i)(__v2di){ 0LL, 0LL};
     37 }
     38 
     39 /* Integer compare */
     40 
     41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     42 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
     43   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
     44                                                   (__mmask8)-1);
     45 }
     46 
     47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     48 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
     49   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
     50                                                   __u);
     51 }
     52 
     53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     54 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
     55   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
     56                                                 (__mmask8)-1);
     57 }
     58 
     59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     60 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
     61   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
     62                                                 __u);
     63 }
     64 
     65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     66 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
     67   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
     68                                                   (__mmask8)-1);
     69 }
     70 
     71 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     72 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
     73   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
     74                                                   __u);
     75 }
     76 
     77 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     78 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
     79   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
     80                                                 (__mmask8)-1);
     81 }
     82 
     83 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     84 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
     85   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
     86                                                 __u);
     87 }
     88 
     89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     90 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
     91   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
     92                                                   (__mmask8)-1);
     93 }
     94 
     95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
     96 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
     97   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
     98                                                   __u);
     99 }
    100 
    101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    102 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
    103   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
    104                                                 (__mmask8)-1);
    105 }
    106 
    107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    108 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    109   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
    110                                                 __u);
    111 }
    112 
    113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    114 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
    115   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
    116                                                   (__mmask8)-1);
    117 }
    118 
    119 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    120 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    121   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
    122                                                   __u);
    123 }
    124 
    125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    126 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
    127   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
    128                                                 (__mmask8)-1);
    129 }
    130 
    131 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    132 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    133   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
    134                                                 __u);
    135 }
    136 
    137 
    138 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    139 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
    140   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
    141                                                (__mmask8)-1);
    142 }
    143 
    144 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    145 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    146   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
    147                                                __u);
    148 }
    149 
    150 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    151 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
    152   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
    153                                                 (__mmask8)-1);
    154 }
    155 
    156 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    157 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    158   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
    159                                                 __u);
    160 }
    161 
    162 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    163 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
    164   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
    165                                                (__mmask8)-1);
    166 }
    167 
    168 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    169 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    170   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
    171                                                __u);
    172 }
    173 
    174 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    175 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
    176   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
    177                                                 (__mmask8)-1);
    178 }
    179 
    180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    181 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    182   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
    183                                                 __u);
    184 }
    185 
    186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    187 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
    188   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
    189                                                (__mmask8)-1);
    190 }
    191 
    192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    193 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    194   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
    195                                                __u);
    196 }
    197 
    198 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    199 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
    200   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
    201                                                 (__mmask8)-1);
    202 }
    203 
    204 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    205 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    206   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
    207                                                 __u);
    208 }
    209 
    210 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    211 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
    212   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
    213                                                (__mmask8)-1);
    214 }
    215 
    216 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    217 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    218   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
    219                                                __u);
    220 }
    221 
    222 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    223 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
    224   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
    225                                                 (__mmask8)-1);
    226 }
    227 
    228 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    229 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    230   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
    231                                                 __u);
    232 }
    233 
    234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    235 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
    236   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
    237                                                   (__mmask8)-1);
    238 }
    239 
    240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    241 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    242   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
    243                                                   __u);
    244 }
    245 
    246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    247 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
    248   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
    249                                                 (__mmask8)-1);
    250 }
    251 
    252 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    253 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    254   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
    255                                                 __u);
    256 }
    257 
    258 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    259 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
    260   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
    261                                                   (__mmask8)-1);
    262 }
    263 
    264 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    265 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    266   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
    267                                                   __u);
    268 }
    269 
    270 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    271 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
    272   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
    273                                                 (__mmask8)-1);
    274 }
    275 
    276 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    277 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    278   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
    279                                                 __u);
    280 }
    281 
    282 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    283 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
    284   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
    285                                                   (__mmask8)-1);
    286 }
    287 
    288 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    289 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    290   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
    291                                                   __u);
    292 }
    293 
    294 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    295 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
    296   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
    297                                                 (__mmask8)-1);
    298 }
    299 
    300 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    301 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    302   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
    303                                                 __u);
    304 }
    305 
    306 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    307 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
    308   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
    309                                                   (__mmask8)-1);
    310 }
    311 
    312 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    313 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    314   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
    315                                                   __u);
    316 }
    317 
    318 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    319 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
    320   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
    321                                                 (__mmask8)-1);
    322 }
    323 
    324 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    325 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    326   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
    327                                                 __u);
    328 }
    329 
    330 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    331 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
    332   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
    333                                                (__mmask8)-1);
    334 }
    335 
    336 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    337 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    338   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
    339                                                __u);
    340 }
    341 
    342 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    343 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
    344   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
    345                                                 (__mmask8)-1);
    346 }
    347 
    348 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    349 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    350   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
    351                                                 __u);
    352 }
    353 
    354 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    355 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
    356   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
    357                                                (__mmask8)-1);
    358 }
    359 
    360 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    361 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    362   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
    363                                                __u);
    364 }
    365 
    366 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    367 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
    368   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
    369                                                 (__mmask8)-1);
    370 }
    371 
    372 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    373 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    374   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
    375                                                 __u);
    376 }
    377 
    378 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    379 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
    380   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
    381                                                (__mmask8)-1);
    382 }
    383 
    384 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    385 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    386   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
    387                                                __u);
    388 }
    389 
    390 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    391 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
    392   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
    393                                                 (__mmask8)-1);
    394 }
    395 
    396 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    397 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    398   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
    399                                                 __u);
    400 }
    401 
    402 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    403 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
    404   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
    405                                                (__mmask8)-1);
    406 }
    407 
    408 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    409 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    410   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
    411                                                __u);
    412 }
    413 
    414 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    415 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
    416   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
    417                                                 (__mmask8)-1);
    418 }
    419 
    420 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    421 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    422   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
    423                                                 __u);
    424 }
    425 
    426 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    427 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
    428   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
    429                                                (__mmask8)-1);
    430 }
    431 
    432 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    433 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    434   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
    435                                                __u);
    436 }
    437 
    438 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    439 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
    440   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
    441                                                 (__mmask8)-1);
    442 }
    443 
    444 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    445 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    446   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
    447                                                 __u);
    448 }
    449 
    450 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    451 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
    452   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
    453                                                (__mmask8)-1);
    454 }
    455 
    456 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    457 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    458   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
    459                                                __u);
    460 }
    461 
    462 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    463 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
    464   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
    465                                                 (__mmask8)-1);
    466 }
    467 
    468 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    469 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    470   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
    471                                                 __u);
    472 }
    473 
    474 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    475 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
    476   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
    477                                                (__mmask8)-1);
    478 }
    479 
    480 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    481 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    482   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
    483                                                __u);
    484 }
    485 
    486 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    487 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
    488   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
    489                                                 (__mmask8)-1);
    490 }
    491 
    492 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    493 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    494   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
    495                                                 __u);
    496 }
    497 
    498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    499 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
    500   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
    501                                                (__mmask8)-1);
    502 }
    503 
    504 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    505 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    506   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
    507                                                __u);
    508 }
    509 
    510 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    511 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
    512   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
    513                                                 (__mmask8)-1);
    514 }
    515 
    516 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    517 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    518   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
    519                                                 __u);
    520 }
    521 
    522 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    523 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
    524   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
    525                                                (__mmask8)-1);
    526 }
    527 
    528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    529 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    530   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
    531                                                __u);
    532 }
    533 
    534 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    535 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
    536   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
    537                                                 (__mmask8)-1);
    538 }
    539 
    540 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    541 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    542   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
    543                                                 __u);
    544 }
    545 
    546 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    547 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
    548   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
    549                                                (__mmask8)-1);
    550 }
    551 
    552 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    553 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    554   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
    555                                                __u);
    556 }
    557 
    558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    559 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
    560   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
    561                                                 (__mmask8)-1);
    562 }
    563 
    564 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    565 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    566   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
    567                                                 __u);
    568 }
    569 
    570 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    571 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
    572   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
    573                                                (__mmask8)-1);
    574 }
    575 
    576 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    577 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    578   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
    579                                                __u);
    580 }
    581 
    582 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    583 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
    584   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
    585                                                 (__mmask8)-1);
    586 }
    587 
    588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    589 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
    590   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
    591                                                 __u);
    592 }
    593 
    594 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    595 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
    596   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
    597                                                (__mmask8)-1);
    598 }
    599 
    600 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    601 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    602   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
    603                                                __u);
    604 }
    605 
    606 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    607 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
    608   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
    609                                                 (__mmask8)-1);
    610 }
    611 
    612 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
    613 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
    614   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
    615                                                 __u);
    616 }
    617 
    618 static __inline__ __m256i __DEFAULT_FN_ATTRS
    619 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    620 {
    621   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    622                                              (__v8si)_mm256_add_epi32(__A, __B),
    623                                              (__v8si)__W);
    624 }
    625 
    626 static __inline__ __m256i __DEFAULT_FN_ATTRS
    627 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    628 {
    629   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    630                                              (__v8si)_mm256_add_epi32(__A, __B),
    631                                              (__v8si)_mm256_setzero_si256());
    632 }
    633 
    634 static __inline__ __m256i __DEFAULT_FN_ATTRS
    635 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    636 {
    637   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    638                                              (__v4di)_mm256_add_epi64(__A, __B),
    639                                              (__v4di)__W);
    640 }
    641 
    642 static __inline__ __m256i __DEFAULT_FN_ATTRS
    643 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    644 {
    645   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    646                                              (__v4di)_mm256_add_epi64(__A, __B),
    647                                              (__v4di)_mm256_setzero_si256());
    648 }
    649 
    650 static __inline__ __m256i __DEFAULT_FN_ATTRS
    651 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    652 {
    653   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    654                                              (__v8si)_mm256_sub_epi32(__A, __B),
    655                                              (__v8si)__W);
    656 }
    657 
    658 static __inline__ __m256i __DEFAULT_FN_ATTRS
    659 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    660 {
    661   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    662                                              (__v8si)_mm256_sub_epi32(__A, __B),
    663                                              (__v8si)_mm256_setzero_si256());
    664 }
    665 
    666 static __inline__ __m256i __DEFAULT_FN_ATTRS
    667 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    668 {
    669   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    670                                              (__v4di)_mm256_sub_epi64(__A, __B),
    671                                              (__v4di)__W);
    672 }
    673 
    674 static __inline__ __m256i __DEFAULT_FN_ATTRS
    675 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    676 {
    677   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    678                                              (__v4di)_mm256_sub_epi64(__A, __B),
    679                                              (__v4di)_mm256_setzero_si256());
    680 }
    681 
    682 static __inline__ __m128i __DEFAULT_FN_ATTRS
    683 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    684 {
    685   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    686                                              (__v4si)_mm_add_epi32(__A, __B),
    687                                              (__v4si)__W);
    688 }
    689 
    690 static __inline__ __m128i __DEFAULT_FN_ATTRS
    691 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    692 {
    693   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    694                                              (__v4si)_mm_add_epi32(__A, __B),
    695                                              (__v4si)_mm_setzero_si128());
    696 }
    697 
    698 static __inline__ __m128i __DEFAULT_FN_ATTRS
    699 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    700 {
    701   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    702                                              (__v2di)_mm_add_epi64(__A, __B),
    703                                              (__v2di)__W);
    704 }
    705 
    706 static __inline__ __m128i __DEFAULT_FN_ATTRS
    707 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    708 {
    709   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    710                                              (__v2di)_mm_add_epi64(__A, __B),
    711                                              (__v2di)_mm_setzero_si128());
    712 }
    713 
    714 static __inline__ __m128i __DEFAULT_FN_ATTRS
    715 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    716 {
    717   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    718                                              (__v4si)_mm_sub_epi32(__A, __B),
    719                                              (__v4si)__W);
    720 }
    721 
    722 static __inline__ __m128i __DEFAULT_FN_ATTRS
    723 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    724 {
    725   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    726                                              (__v4si)_mm_sub_epi32(__A, __B),
    727                                              (__v4si)_mm_setzero_si128());
    728 }
    729 
    730 static __inline__ __m128i __DEFAULT_FN_ATTRS
    731 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    732 {
    733   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    734                                              (__v2di)_mm_sub_epi64(__A, __B),
    735                                              (__v2di)__W);
    736 }
    737 
    738 static __inline__ __m128i __DEFAULT_FN_ATTRS
    739 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    740 {
    741   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    742                                              (__v2di)_mm_sub_epi64(__A, __B),
    743                                              (__v2di)_mm_setzero_si128());
    744 }
    745 
    746 static __inline__ __m256i __DEFAULT_FN_ATTRS
    747 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
    748 {
    749   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    750                                              (__v4di)_mm256_mul_epi32(__X, __Y),
    751                                              (__v4di)__W);
    752 }
    753 
    754 static __inline__ __m256i __DEFAULT_FN_ATTRS
    755 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
    756 {
    757   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    758                                              (__v4di)_mm256_mul_epi32(__X, __Y),
    759                                              (__v4di)_mm256_setzero_si256());
    760 }
    761 
    762 static __inline__ __m128i __DEFAULT_FN_ATTRS
    763 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
    764 {
    765   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    766                                              (__v2di)_mm_mul_epi32(__X, __Y),
    767                                              (__v2di)__W);
    768 }
    769 
    770 static __inline__ __m128i __DEFAULT_FN_ATTRS
    771 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
    772 {
    773   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    774                                              (__v2di)_mm_mul_epi32(__X, __Y),
    775                                              (__v2di)_mm_setzero_si128());
    776 }
    777 
    778 static __inline__ __m256i __DEFAULT_FN_ATTRS
    779 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
    780 {
    781   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    782                                              (__v4di)_mm256_mul_epu32(__X, __Y),
    783                                              (__v4di)__W);
    784 }
    785 
    786 static __inline__ __m256i __DEFAULT_FN_ATTRS
    787 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
    788 {
    789   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    790                                              (__v4di)_mm256_mul_epu32(__X, __Y),
    791                                              (__v4di)_mm256_setzero_si256());
    792 }
    793 
    794 static __inline__ __m128i __DEFAULT_FN_ATTRS
    795 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
    796 {
    797   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    798                                              (__v2di)_mm_mul_epu32(__X, __Y),
    799                                              (__v2di)__W);
    800 }
    801 
    802 static __inline__ __m128i __DEFAULT_FN_ATTRS
    803 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
    804 {
    805   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    806                                              (__v2di)_mm_mul_epu32(__X, __Y),
    807                                              (__v2di)_mm_setzero_si128());
    808 }
    809 
    810 static __inline__ __m256i __DEFAULT_FN_ATTRS
    811 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
    812 {
    813   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
    814                                              (__v8si)_mm256_mullo_epi32(__A, __B),
    815                                              (__v8si)_mm256_setzero_si256());
    816 }
    817 
    818 static __inline__ __m256i __DEFAULT_FN_ATTRS
    819 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
    820 {
    821   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
    822                                              (__v8si)_mm256_mullo_epi32(__A, __B),
    823                                              (__v8si)__W);
    824 }
    825 
    826 static __inline__ __m128i __DEFAULT_FN_ATTRS
    827 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
    828 {
    829   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
    830                                              (__v4si)_mm_mullo_epi32(__A, __B),
    831                                              (__v4si)_mm_setzero_si128());
    832 }
    833 
    834 static __inline__ __m128i __DEFAULT_FN_ATTRS
    835 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
    836 {
    837   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
    838                                              (__v4si)_mm_mullo_epi32(__A, __B),
    839                                              (__v4si)__W);
    840 }
    841 
    842 static __inline__ __m256i __DEFAULT_FN_ATTRS
    843 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    844 {
    845   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    846                                              (__v8si)_mm256_and_si256(__A, __B),
    847                                              (__v8si)__W);
    848 }
    849 
    850 static __inline__ __m256i __DEFAULT_FN_ATTRS
    851 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    852 {
    853   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
    854 }
    855 
    856 static __inline__ __m128i __DEFAULT_FN_ATTRS
    857 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    858 {
    859   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    860                                              (__v4si)_mm_and_si128(__A, __B),
    861                                              (__v4si)__W);
    862 }
    863 
    864 static __inline__ __m128i __DEFAULT_FN_ATTRS
    865 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    866 {
    867   return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
    868 }
    869 
    870 static __inline__ __m256i __DEFAULT_FN_ATTRS
    871 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    872 {
    873   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    874                                           (__v8si)_mm256_andnot_si256(__A, __B),
    875                                           (__v8si)__W);
    876 }
    877 
    878 static __inline__ __m256i __DEFAULT_FN_ATTRS
    879 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    880 {
    881   return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
    882                                            __U, __A, __B);
    883 }
    884 
    885 static __inline__ __m128i __DEFAULT_FN_ATTRS
    886 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    887 {
    888   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    889                                              (__v4si)_mm_andnot_si128(__A, __B),
    890                                              (__v4si)__W);
    891 }
    892 
    893 static __inline__ __m128i __DEFAULT_FN_ATTRS
    894 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
    895 {
    896   return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
    897 }
    898 
    899 static __inline__ __m256i __DEFAULT_FN_ATTRS
    900 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    901 {
    902   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    903                                              (__v8si)_mm256_or_si256(__A, __B),
    904                                              (__v8si)__W);
    905 }
    906 
    907 static __inline__ __m256i __DEFAULT_FN_ATTRS
    908 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    909 {
    910   return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
    911 }
    912 
    913 static __inline__ __m128i __DEFAULT_FN_ATTRS
    914 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    915 {
    916   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    917                                              (__v4si)_mm_or_si128(__A, __B),
    918                                              (__v4si)__W);
    919 }
    920 
    921 static __inline__ __m128i __DEFAULT_FN_ATTRS
    922 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    923 {
    924   return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
    925 }
    926 
    927 static __inline__ __m256i __DEFAULT_FN_ATTRS
    928 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    929 {
    930   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    931                                              (__v8si)_mm256_xor_si256(__A, __B),
    932                                              (__v8si)__W);
    933 }
    934 
    935 static __inline__ __m256i __DEFAULT_FN_ATTRS
    936 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    937 {
    938   return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
    939 }
    940 
    941 static __inline__ __m128i __DEFAULT_FN_ATTRS
    942 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
    943         __m128i __B)
    944 {
    945   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    946                                              (__v4si)_mm_xor_si128(__A, __B),
    947                                              (__v4si)__W);
    948 }
    949 
    950 static __inline__ __m128i __DEFAULT_FN_ATTRS
    951 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    952 {
    953   return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
    954 }
    955 
    956 static __inline__ __m256i __DEFAULT_FN_ATTRS
    957 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    958 {
    959   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    960                                              (__v4di)_mm256_and_si256(__A, __B),
    961                                              (__v4di)__W);
    962 }
    963 
    964 static __inline__ __m256i __DEFAULT_FN_ATTRS
    965 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    966 {
    967   return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
    968 }
    969 
    970 static __inline__ __m128i __DEFAULT_FN_ATTRS
    971 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    972 {
    973   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    974                                              (__v2di)_mm_and_si128(__A, __B),
    975                                              (__v2di)__W);
    976 }
    977 
    978 static __inline__ __m128i __DEFAULT_FN_ATTRS
    979 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    980 {
    981   return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
    982 }
    983 
    984 static __inline__ __m256i __DEFAULT_FN_ATTRS
    985 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    986 {
    987   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    988                                           (__v4di)_mm256_andnot_si256(__A, __B),
    989                                           (__v4di)__W);
    990 }
    991 
    992 static __inline__ __m256i __DEFAULT_FN_ATTRS
    993 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    994 {
    995   return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
    996                                            __U, __A, __B);
    997 }
    998 
    999 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1000 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   1001 {
   1002   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   1003                                              (__v2di)_mm_andnot_si128(__A, __B),
   1004                                              (__v2di)__W);
   1005 }
   1006 
   1007 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1008 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   1009 {
   1010   return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
   1011 }
   1012 
   1013 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1014 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   1015 {
   1016   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   1017                                              (__v4di)_mm256_or_si256(__A, __B),
   1018                                              (__v4di)__W);
   1019 }
   1020 
   1021 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1022 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   1023 {
   1024   return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
   1025 }
   1026 
   1027 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1028 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   1029 {
   1030   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   1031                                              (__v2di)_mm_or_si128(__A, __B),
   1032                                              (__v2di)__W);
   1033 }
   1034 
   1035 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1036 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   1037 {
   1038   return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
   1039 }
   1040 
   1041 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1042 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   1043 {
   1044   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   1045                                              (__v4di)_mm256_xor_si256(__A, __B),
   1046                                              (__v4di)__W);
   1047 }
   1048 
   1049 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1050 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   1051 {
   1052   return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
   1053 }
   1054 
   1055 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1056 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
   1057         __m128i __B)
   1058 {
   1059   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   1060                                              (__v2di)_mm_xor_si128(__A, __B),
   1061                                              (__v2di)__W);
   1062 }
   1063 
   1064 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1065 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   1066 {
   1067   return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
   1068 }
   1069 
   1070 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
   1071   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
   1072                                         (__v4si)(__m128i)(b), (int)(p), \
   1073                                         (__mmask8)-1); })
   1074 
   1075 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   1076   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
   1077                                         (__v4si)(__m128i)(b), (int)(p), \
   1078                                         (__mmask8)(m)); })
   1079 
   1080 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
   1081   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
   1082                                          (__v4si)(__m128i)(b), (int)(p), \
   1083                                          (__mmask8)-1); })
   1084 
   1085 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   1086   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
   1087                                          (__v4si)(__m128i)(b), (int)(p), \
   1088                                          (__mmask8)(m)); })
   1089 
   1090 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
   1091   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
   1092                                         (__v8si)(__m256i)(b), (int)(p), \
   1093                                         (__mmask8)-1); })
   1094 
   1095 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   1096   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
   1097                                         (__v8si)(__m256i)(b), (int)(p), \
   1098                                         (__mmask8)(m)); })
   1099 
   1100 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
   1101   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
   1102                                          (__v8si)(__m256i)(b), (int)(p), \
   1103                                          (__mmask8)-1); })
   1104 
   1105 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   1106   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
   1107                                          (__v8si)(__m256i)(b), (int)(p), \
   1108                                          (__mmask8)(m)); })
   1109 
   1110 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
   1111   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
   1112                                         (__v2di)(__m128i)(b), (int)(p), \
   1113                                         (__mmask8)-1); })
   1114 
   1115 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   1116   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
   1117                                         (__v2di)(__m128i)(b), (int)(p), \
   1118                                         (__mmask8)(m)); })
   1119 
   1120 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
   1121   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
   1122                                          (__v2di)(__m128i)(b), (int)(p), \
   1123                                          (__mmask8)-1); })
   1124 
   1125 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   1126   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
   1127                                          (__v2di)(__m128i)(b), (int)(p), \
   1128                                          (__mmask8)(m)); })
   1129 
   1130 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
   1131   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
   1132                                         (__v4di)(__m256i)(b), (int)(p), \
   1133                                         (__mmask8)-1); })
   1134 
   1135 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   1136   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
   1137                                         (__v4di)(__m256i)(b), (int)(p), \
   1138                                         (__mmask8)(m)); })
   1139 
   1140 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
   1141   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
   1142                                          (__v4di)(__m256i)(b), (int)(p), \
   1143                                          (__mmask8)-1); })
   1144 
   1145 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   1146   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
   1147                                          (__v4di)(__m256i)(b), (int)(p), \
   1148                                          (__mmask8)(m)); })
   1149 
   1150 #define _mm256_cmp_ps_mask(a, b, p)  __extension__ ({ \
   1151   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
   1152                                          (__v8sf)(__m256)(b), (int)(p), \
   1153                                          (__mmask8)-1); })
   1154 
   1155 #define _mm256_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
   1156   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
   1157                                          (__v8sf)(__m256)(b), (int)(p), \
   1158                                          (__mmask8)(m)); })
   1159 
   1160 #define _mm256_cmp_pd_mask(a, b, p)  __extension__ ({ \
   1161   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
   1162                                          (__v4df)(__m256d)(b), (int)(p), \
   1163                                          (__mmask8)-1); })
   1164 
   1165 #define _mm256_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
   1166   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
   1167                                          (__v4df)(__m256d)(b), (int)(p), \
   1168                                          (__mmask8)(m)); })
   1169 
   1170 #define _mm_cmp_ps_mask(a, b, p)  __extension__ ({ \
   1171   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
   1172                                          (__v4sf)(__m128)(b), (int)(p), \
   1173                                          (__mmask8)-1); })
   1174 
   1175 #define _mm_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
   1176   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
   1177                                          (__v4sf)(__m128)(b), (int)(p), \
   1178                                          (__mmask8)(m)); })
   1179 
   1180 #define _mm_cmp_pd_mask(a, b, p)  __extension__ ({ \
   1181   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
   1182                                          (__v2df)(__m128d)(b), (int)(p), \
   1183                                          (__mmask8)-1); })
   1184 
   1185 #define _mm_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
   1186   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
   1187                                          (__v2df)(__m128d)(b), (int)(p), \
   1188                                          (__mmask8)(m)); })
   1189 
   1190 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1191 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1192 {
   1193   return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
   1194                                                     (__v2df) __B,
   1195                                                     (__v2df) __C,
   1196                                                     (__mmask8) __U);
   1197 }
   1198 
   1199 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1200 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1201 {
   1202   return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
   1203                                                      (__v2df) __B,
   1204                                                      (__v2df) __C,
   1205                                                      (__mmask8) __U);
   1206 }
   1207 
   1208 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1209 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1210 {
   1211   return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
   1212                                                      (__v2df) __B,
   1213                                                      (__v2df) __C,
   1214                                                      (__mmask8) __U);
   1215 }
   1216 
   1217 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1218 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1219 {
   1220   return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
   1221                                                     (__v2df) __B,
   1222                                                     -(__v2df) __C,
   1223                                                     (__mmask8) __U);
   1224 }
   1225 
   1226 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1227 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1228 {
   1229   return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
   1230                                                      (__v2df) __B,
   1231                                                      -(__v2df) __C,
   1232                                                      (__mmask8) __U);
   1233 }
   1234 
   1235 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1236 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1237 {
   1238   return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
   1239                                                      (__v2df) __B,
   1240                                                      (__v2df) __C,
   1241                                                      (__mmask8) __U);
   1242 }
   1243 
   1244 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1245 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1246 {
   1247   return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
   1248                                                      (__v2df) __B,
   1249                                                      (__v2df) __C,
   1250                                                      (__mmask8) __U);
   1251 }
   1252 
   1253 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1254 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1255 {
   1256   return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
   1257                                                      (__v2df) __B,
   1258                                                      -(__v2df) __C,
   1259                                                      (__mmask8) __U);
   1260 }
   1261 
   1262 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1263 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1264 {
   1265   return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
   1266                                                     (__v4df) __B,
   1267                                                     (__v4df) __C,
   1268                                                     (__mmask8) __U);
   1269 }
   1270 
   1271 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1272 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1273 {
   1274   return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
   1275                                                      (__v4df) __B,
   1276                                                      (__v4df) __C,
   1277                                                      (__mmask8) __U);
   1278 }
   1279 
   1280 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1281 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1282 {
   1283   return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
   1284                                                      (__v4df) __B,
   1285                                                      (__v4df) __C,
   1286                                                      (__mmask8) __U);
   1287 }
   1288 
   1289 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1290 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1291 {
   1292   return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
   1293                                                     (__v4df) __B,
   1294                                                     -(__v4df) __C,
   1295                                                     (__mmask8) __U);
   1296 }
   1297 
   1298 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1299 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1300 {
   1301   return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
   1302                                                      (__v4df) __B,
   1303                                                      -(__v4df) __C,
   1304                                                      (__mmask8) __U);
   1305 }
   1306 
   1307 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1308 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1309 {
   1310   return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
   1311                                                      (__v4df) __B,
   1312                                                      (__v4df) __C,
   1313                                                      (__mmask8) __U);
   1314 }
   1315 
   1316 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1317 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1318 {
   1319   return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
   1320                                                      (__v4df) __B,
   1321                                                      (__v4df) __C,
   1322                                                      (__mmask8) __U);
   1323 }
   1324 
   1325 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1326 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1327 {
   1328   return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
   1329                                                      (__v4df) __B,
   1330                                                      -(__v4df) __C,
   1331                                                      (__mmask8) __U);
   1332 }
   1333 
   1334 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1335 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1336 {
   1337   return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
   1338                                                    (__v4sf) __B,
   1339                                                    (__v4sf) __C,
   1340                                                    (__mmask8) __U);
   1341 }
   1342 
   1343 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1344 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1345 {
   1346   return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
   1347                                                     (__v4sf) __B,
   1348                                                     (__v4sf) __C,
   1349                                                     (__mmask8) __U);
   1350 }
   1351 
   1352 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1353 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1354 {
   1355   return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
   1356                                                     (__v4sf) __B,
   1357                                                     (__v4sf) __C,
   1358                                                     (__mmask8) __U);
   1359 }
   1360 
   1361 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1362 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1363 {
   1364   return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
   1365                                                    (__v4sf) __B,
   1366                                                    -(__v4sf) __C,
   1367                                                    (__mmask8) __U);
   1368 }
   1369 
   1370 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1371 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1372 {
   1373   return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
   1374                                                     (__v4sf) __B,
   1375                                                     -(__v4sf) __C,
   1376                                                     (__mmask8) __U);
   1377 }
   1378 
   1379 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1380 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1381 {
   1382   return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
   1383                                                     (__v4sf) __B,
   1384                                                     (__v4sf) __C,
   1385                                                     (__mmask8) __U);
   1386 }
   1387 
   1388 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1389 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1390 {
   1391   return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
   1392                                                     (__v4sf) __B,
   1393                                                     (__v4sf) __C,
   1394                                                     (__mmask8) __U);
   1395 }
   1396 
   1397 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1398 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1399 {
   1400   return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
   1401                                                     (__v4sf) __B,
   1402                                                     -(__v4sf) __C,
   1403                                                     (__mmask8) __U);
   1404 }
   1405 
   1406 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1407 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1408 {
   1409   return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
   1410                                                    (__v8sf) __B,
   1411                                                    (__v8sf) __C,
   1412                                                    (__mmask8) __U);
   1413 }
   1414 
   1415 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1416 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1417 {
   1418   return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
   1419                                                     (__v8sf) __B,
   1420                                                     (__v8sf) __C,
   1421                                                     (__mmask8) __U);
   1422 }
   1423 
   1424 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1425 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1426 {
   1427   return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
   1428                                                     (__v8sf) __B,
   1429                                                     (__v8sf) __C,
   1430                                                     (__mmask8) __U);
   1431 }
   1432 
   1433 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1434 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1435 {
   1436   return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
   1437                                                    (__v8sf) __B,
   1438                                                    -(__v8sf) __C,
   1439                                                    (__mmask8) __U);
   1440 }
   1441 
   1442 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1443 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1444 {
   1445   return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
   1446                                                     (__v8sf) __B,
   1447                                                     -(__v8sf) __C,
   1448                                                     (__mmask8) __U);
   1449 }
   1450 
   1451 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1452 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1453 {
   1454   return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
   1455                                                     (__v8sf) __B,
   1456                                                     (__v8sf) __C,
   1457                                                     (__mmask8) __U);
   1458 }
   1459 
   1460 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1461 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1462 {
   1463   return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
   1464                                                     (__v8sf) __B,
   1465                                                     (__v8sf) __C,
   1466                                                     (__mmask8) __U);
   1467 }
   1468 
   1469 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1470 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1471 {
   1472   return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
   1473                                                     (__v8sf) __B,
   1474                                                     -(__v8sf) __C,
   1475                                                     (__mmask8) __U);
   1476 }
   1477 
   1478 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1479 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1480 {
   1481   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
   1482                                                        (__v2df) __B,
   1483                                                        (__v2df) __C,
   1484                                                        (__mmask8) __U);
   1485 }
   1486 
   1487 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1488 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1489 {
   1490   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
   1491                                                         (__v2df) __B,
   1492                                                         (__v2df) __C,
   1493                                                         (__mmask8)
   1494                                                         __U);
   1495 }
   1496 
   1497 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1498 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1499 {
   1500   return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
   1501                                                         (__v2df) __B,
   1502                                                         (__v2df) __C,
   1503                                                         (__mmask8)
   1504                                                         __U);
   1505 }
   1506 
   1507 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1508 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1509 {
   1510   return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
   1511                                                        (__v2df) __B,
   1512                                                        -(__v2df) __C,
   1513                                                        (__mmask8) __U);
   1514 }
   1515 
   1516 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1517 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1518 {
   1519   return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
   1520                                                         (__v2df) __B,
   1521                                                         -(__v2df) __C,
   1522                                                         (__mmask8)
   1523                                                         __U);
   1524 }
   1525 
   1526 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1527 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1528 {
   1529   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
   1530                                                        (__v4df) __B,
   1531                                                        (__v4df) __C,
   1532                                                        (__mmask8) __U);
   1533 }
   1534 
   1535 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1536 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1537 {
   1538   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
   1539                                                         (__v4df) __B,
   1540                                                         (__v4df) __C,
   1541                                                         (__mmask8)
   1542                                                         __U);
   1543 }
   1544 
   1545 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1546 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1547 {
   1548   return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
   1549                                                         (__v4df) __B,
   1550                                                         (__v4df) __C,
   1551                                                         (__mmask8)
   1552                                                         __U);
   1553 }
   1554 
   1555 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1556 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1557 {
   1558   return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
   1559                                                        (__v4df) __B,
   1560                                                        -(__v4df) __C,
   1561                                                        (__mmask8) __U);
   1562 }
   1563 
   1564 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1565 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1566 {
   1567   return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
   1568                                                         (__v4df) __B,
   1569                                                         -(__v4df) __C,
   1570                                                         (__mmask8)
   1571                                                         __U);
   1572 }
   1573 
   1574 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1575 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1576 {
   1577   return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
   1578                                                       (__v4sf) __B,
   1579                                                       (__v4sf) __C,
   1580                                                       (__mmask8) __U);
   1581 }
   1582 
   1583 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1584 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1585 {
   1586   return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
   1587                                                        (__v4sf) __B,
   1588                                                        (__v4sf) __C,
   1589                                                        (__mmask8) __U);
   1590 }
   1591 
   1592 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1593 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1594 {
   1595   return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
   1596                                                        (__v4sf) __B,
   1597                                                        (__v4sf) __C,
   1598                                                        (__mmask8) __U);
   1599 }
   1600 
   1601 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1602 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1603 {
   1604   return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
   1605                                                       (__v4sf) __B,
   1606                                                       -(__v4sf) __C,
   1607                                                       (__mmask8) __U);
   1608 }
   1609 
   1610 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1611 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1612 {
   1613   return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
   1614                                                        (__v4sf) __B,
   1615                                                        -(__v4sf) __C,
   1616                                                        (__mmask8) __U);
   1617 }
   1618 
   1619 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1620 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
   1621                          __m256 __C)
   1622 {
   1623   return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
   1624                                                       (__v8sf) __B,
   1625                                                       (__v8sf) __C,
   1626                                                       (__mmask8) __U);
   1627 }
   1628 
   1629 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1630 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1631 {
   1632   return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
   1633                                                        (__v8sf) __B,
   1634                                                        (__v8sf) __C,
   1635                                                        (__mmask8) __U);
   1636 }
   1637 
   1638 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1639 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1640 {
   1641   return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
   1642                                                        (__v8sf) __B,
   1643                                                        (__v8sf) __C,
   1644                                                        (__mmask8) __U);
   1645 }
   1646 
   1647 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1648 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1649 {
   1650   return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
   1651                                                       (__v8sf) __B,
   1652                                                       -(__v8sf) __C,
   1653                                                       (__mmask8) __U);
   1654 }
   1655 
   1656 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1657 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1658 {
   1659   return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
   1660                                                        (__v8sf) __B,
   1661                                                        -(__v8sf) __C,
   1662                                                        (__mmask8) __U);
   1663 }
   1664 
   1665 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1666 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1667 {
   1668   return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
   1669                                                      (__v2df) __B,
   1670                                                      (__v2df) __C,
   1671                                                      (__mmask8) __U);
   1672 }
   1673 
   1674 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1675 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1676 {
   1677   return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
   1678                                                      (__v4df) __B,
   1679                                                      (__v4df) __C,
   1680                                                      (__mmask8) __U);
   1681 }
   1682 
   1683 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1684 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1685 {
   1686   return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
   1687                                                     (__v4sf) __B,
   1688                                                     (__v4sf) __C,
   1689                                                     (__mmask8) __U);
   1690 }
   1691 
   1692 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1693 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1694 {
   1695   return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
   1696                                                     (__v8sf) __B,
   1697                                                     (__v8sf) __C,
   1698                                                     (__mmask8) __U);
   1699 }
   1700 
   1701 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1702 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1703 {
   1704   return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
   1705                                                         (__v2df) __B,
   1706                                                         (__v2df) __C,
   1707                                                         (__mmask8)
   1708                                                         __U);
   1709 }
   1710 
   1711 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1712 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1713 {
   1714   return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
   1715                                                         (__v4df) __B,
   1716                                                         (__v4df) __C,
   1717                                                         (__mmask8)
   1718                                                         __U);
   1719 }
   1720 
   1721 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1722 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1723 {
   1724   return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
   1725                                                        (__v4sf) __B,
   1726                                                        (__v4sf) __C,
   1727                                                        (__mmask8) __U);
   1728 }
   1729 
   1730 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1731 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1732 {
   1733   return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
   1734                                                        (__v8sf) __B,
   1735                                                        (__v8sf) __C,
   1736                                                        (__mmask8) __U);
   1737 }
   1738 
   1739 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1740 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1741 {
   1742   return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
   1743                                                      (__v2df) __B,
   1744                                                      (__v2df) __C,
   1745                                                      (__mmask8) __U);
   1746 }
   1747 
   1748 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1749 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1750 {
   1751   return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
   1752                                                      (__v4df) __B,
   1753                                                      (__v4df) __C,
   1754                                                      (__mmask8) __U);
   1755 }
   1756 
   1757 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1758 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1759 {
   1760   return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
   1761                                                     (__v4sf) __B,
   1762                                                     (__v4sf) __C,
   1763                                                     (__mmask8) __U);
   1764 }
   1765 
   1766 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1767 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1768 {
   1769   return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
   1770                                                     (__v8sf) __B,
   1771                                                     (__v8sf) __C,
   1772                                                     (__mmask8) __U);
   1773 }
   1774 
   1775 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1776 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1777 {
   1778   return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
   1779                                                      (__v2df) __B,
   1780                                                      (__v2df) __C,
   1781                                                      (__mmask8) __U);
   1782 }
   1783 
   1784 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1785 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1786 {
   1787   return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
   1788                                                       (__v2df) __B,
   1789                                                       (__v2df) __C,
   1790                                                       (__mmask8) __U);
   1791 }
   1792 
   1793 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1794 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1795 {
   1796   return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
   1797                                                      (__v4df) __B,
   1798                                                      (__v4df) __C,
   1799                                                      (__mmask8) __U);
   1800 }
   1801 
   1802 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1803 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1804 {
   1805   return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
   1806                                                       (__v4df) __B,
   1807                                                       (__v4df) __C,
   1808                                                       (__mmask8) __U);
   1809 }
   1810 
   1811 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1812 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1813 {
   1814   return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
   1815                                                     (__v4sf) __B,
   1816                                                     (__v4sf) __C,
   1817                                                     (__mmask8) __U);
   1818 }
   1819 
   1820 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1821 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1822 {
   1823   return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
   1824                                                      (__v4sf) __B,
   1825                                                      (__v4sf) __C,
   1826                                                      (__mmask8) __U);
   1827 }
   1828 
   1829 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1830 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1831 {
   1832   return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
   1833                                                     (__v8sf) __B,
   1834                                                     (__v8sf) __C,
   1835                                                     (__mmask8) __U);
   1836 }
   1837 
   1838 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1839 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1840 {
   1841   return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
   1842                                                      (__v8sf) __B,
   1843                                                      (__v8sf) __C,
   1844                                                      (__mmask8) __U);
   1845 }
   1846 
   1847 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1848 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   1849   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   1850                                               (__v2df)_mm_add_pd(__A, __B),
   1851                                               (__v2df)__W);
   1852 }
   1853 
   1854 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1855 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   1856   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   1857                                               (__v2df)_mm_add_pd(__A, __B),
   1858                                               (__v2df)_mm_setzero_pd());
   1859 }
   1860 
   1861 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1862 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   1863   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   1864                                               (__v4df)_mm256_add_pd(__A, __B),
   1865                                               (__v4df)__W);
   1866 }
   1867 
   1868 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1869 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   1870   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   1871                                               (__v4df)_mm256_add_pd(__A, __B),
   1872                                               (__v4df)_mm256_setzero_pd());
   1873 }
   1874 
   1875 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1876 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   1877   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1878                                              (__v4sf)_mm_add_ps(__A, __B),
   1879                                              (__v4sf)__W);
   1880 }
   1881 
   1882 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1883 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   1884   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1885                                              (__v4sf)_mm_add_ps(__A, __B),
   1886                                              (__v4sf)_mm_setzero_ps());
   1887 }
   1888 
   1889 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1890 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   1891   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1892                                              (__v8sf)_mm256_add_ps(__A, __B),
   1893                                              (__v8sf)__W);
   1894 }
   1895 
   1896 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1897 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   1898   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1899                                              (__v8sf)_mm256_add_ps(__A, __B),
   1900                                              (__v8sf)_mm256_setzero_ps());
   1901 }
   1902 
   1903 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1904 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
   1905   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   1906                 (__v4si) __W,
   1907                 (__v4si) __A);
   1908 }
   1909 
   1910 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1911 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
   1912   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   1913                 (__v8si) __W,
   1914                 (__v8si) __A);
   1915 }
   1916 
   1917 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1918 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
   1919   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   1920                  (__v2df) __W,
   1921                  (__v2df) __A);
   1922 }
   1923 
   1924 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1925 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
   1926   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   1927                  (__v4df) __W,
   1928                  (__v4df) __A);
   1929 }
   1930 
   1931 static __inline__ __m128 __DEFAULT_FN_ATTRS
   1932 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
   1933   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   1934                 (__v4sf) __W,
   1935                 (__v4sf) __A);
   1936 }
   1937 
   1938 static __inline__ __m256 __DEFAULT_FN_ATTRS
   1939 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
   1940   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   1941                 (__v8sf) __W,
   1942                 (__v8sf) __A);
   1943 }
   1944 
   1945 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1946 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
   1947   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   1948                 (__v2di) __W,
   1949                 (__v2di) __A);
   1950 }
   1951 
   1952 static __inline__ __m256i __DEFAULT_FN_ATTRS
   1953 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
   1954   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   1955                 (__v4di) __W,
   1956                 (__v4di) __A);
   1957 }
   1958 
   1959 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1960 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   1961   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
   1962                   (__v2df) __W,
   1963                   (__mmask8) __U);
   1964 }
   1965 
   1966 static __inline__ __m128d __DEFAULT_FN_ATTRS
   1967 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
   1968   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
   1969                   (__v2df)
   1970                   _mm_setzero_pd (),
   1971                   (__mmask8) __U);
   1972 }
   1973 
   1974 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1975 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   1976   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
   1977                   (__v4df) __W,
   1978                   (__mmask8) __U);
   1979 }
   1980 
   1981 static __inline__ __m256d __DEFAULT_FN_ATTRS
   1982 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
   1983   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
   1984                   (__v4df)
   1985                   _mm256_setzero_pd (),
   1986                   (__mmask8) __U);
   1987 }
   1988 
   1989 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1990 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   1991   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
   1992                   (__v2di) __W,
   1993                   (__mmask8) __U);
   1994 }
   1995 
   1996 static __inline__ __m128i __DEFAULT_FN_ATTRS
   1997 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
   1998   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
   1999                   (__v2di)
   2000                   _mm_setzero_si128 (),
   2001                   (__mmask8) __U);
   2002 }
   2003 
   2004 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2005 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   2006   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
   2007                   (__v4di) __W,
   2008                   (__mmask8) __U);
   2009 }
   2010 
   2011 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2012 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
   2013   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
   2014                   (__v4di)
   2015                   _mm256_setzero_si256 (),
   2016                   (__mmask8) __U);
   2017 }
   2018 
   2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2020 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   2021   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
   2022                  (__v4sf) __W,
   2023                  (__mmask8) __U);
   2024 }
   2025 
   2026 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2027 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
   2028   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
   2029                  (__v4sf)
   2030                  _mm_setzero_ps (),
   2031                  (__mmask8) __U);
   2032 }
   2033 
   2034 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2035 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   2036   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
   2037                  (__v8sf) __W,
   2038                  (__mmask8) __U);
   2039 }
   2040 
   2041 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2042 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
   2043   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
   2044                  (__v8sf)
   2045                  _mm256_setzero_ps (),
   2046                  (__mmask8) __U);
   2047 }
   2048 
   2049 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2050 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   2051   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
   2052                   (__v4si) __W,
   2053                   (__mmask8) __U);
   2054 }
   2055 
   2056 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2057 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
   2058   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
   2059                   (__v4si)
   2060                   _mm_setzero_si128 (),
   2061                   (__mmask8) __U);
   2062 }
   2063 
   2064 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2065 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   2066   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
   2067                   (__v8si) __W,
   2068                   (__mmask8) __U);
   2069 }
   2070 
   2071 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2072 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
   2073   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
   2074                   (__v8si)
   2075                   _mm256_setzero_si256 (),
   2076                   (__mmask8) __U);
   2077 }
   2078 
   2079 static __inline__ void __DEFAULT_FN_ATTRS
   2080 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
   2081   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
   2082             (__v2df) __A,
   2083             (__mmask8) __U);
   2084 }
   2085 
   2086 static __inline__ void __DEFAULT_FN_ATTRS
   2087 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
   2088   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
   2089             (__v4df) __A,
   2090             (__mmask8) __U);
   2091 }
   2092 
   2093 static __inline__ void __DEFAULT_FN_ATTRS
   2094 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
   2095   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
   2096             (__v2di) __A,
   2097             (__mmask8) __U);
   2098 }
   2099 
   2100 static __inline__ void __DEFAULT_FN_ATTRS
   2101 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
   2102   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
   2103             (__v4di) __A,
   2104             (__mmask8) __U);
   2105 }
   2106 
   2107 static __inline__ void __DEFAULT_FN_ATTRS
   2108 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
   2109   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
   2110             (__v4sf) __A,
   2111             (__mmask8) __U);
   2112 }
   2113 
   2114 static __inline__ void __DEFAULT_FN_ATTRS
   2115 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
   2116   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
   2117             (__v8sf) __A,
   2118             (__mmask8) __U);
   2119 }
   2120 
   2121 static __inline__ void __DEFAULT_FN_ATTRS
   2122 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
   2123   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
   2124             (__v4si) __A,
   2125             (__mmask8) __U);
   2126 }
   2127 
   2128 static __inline__ void __DEFAULT_FN_ATTRS
   2129 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
   2130   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
   2131             (__v8si) __A,
   2132             (__mmask8) __U);
   2133 }
   2134 
   2135 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2136 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   2137   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2138                                               (__v2df)_mm_cvtepi32_pd(__A),
   2139                                               (__v2df)__W);
   2140 }
   2141 
   2142 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2143 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   2144   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2145                                               (__v2df)_mm_cvtepi32_pd(__A),
   2146                                               (__v2df)_mm_setzero_pd());
   2147 }
   2148 
   2149 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2150 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   2151   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2152                                               (__v4df)_mm256_cvtepi32_pd(__A),
   2153                                               (__v4df)__W);
   2154 }
   2155 
   2156 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2157 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   2158   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2159                                               (__v4df)_mm256_cvtepi32_pd(__A),
   2160                                               (__v4df)_mm256_setzero_pd());
   2161 }
   2162 
   2163 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2164 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   2165   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
   2166                (__v4sf) __W,
   2167                (__mmask8) __U);
   2168 }
   2169 
   2170 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2171 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
   2172   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
   2173                (__v4sf)
   2174                _mm_setzero_ps (),
   2175                (__mmask8) __U);
   2176 }
   2177 
   2178 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2179 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   2180   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
   2181                (__v8sf) __W,
   2182                (__mmask8) __U);
   2183 }
   2184 
   2185 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2186 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
   2187   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
   2188                (__v8sf)
   2189                _mm256_setzero_ps (),
   2190                (__mmask8) __U);
   2191 }
   2192 
   2193 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2194 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2195   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
   2196                 (__v4si) __W,
   2197                 (__mmask8) __U);
   2198 }
   2199 
   2200 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2201 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
   2202   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
   2203                 (__v4si)
   2204                 _mm_setzero_si128 (),
   2205                 (__mmask8) __U);
   2206 }
   2207 
   2208 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2209 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2210   return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
   2211                 (__v4si) __W,
   2212                 (__mmask8) __U);
   2213 }
   2214 
   2215 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2216 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
   2217   return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
   2218                 (__v4si)
   2219                 _mm_setzero_si128 (),
   2220                 (__mmask8) __U);
   2221 }
   2222 
   2223 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2224 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
   2225   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
   2226             (__v4sf) __W,
   2227             (__mmask8) __U);
   2228 }
   2229 
   2230 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2231 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
   2232   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
   2233             (__v4sf)
   2234             _mm_setzero_ps (),
   2235             (__mmask8) __U);
   2236 }
   2237 
   2238 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2239 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
   2240   return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
   2241                (__v4sf) __W,
   2242                (__mmask8) __U);
   2243 }
   2244 
   2245 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2246 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
   2247   return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
   2248                (__v4sf)
   2249                _mm_setzero_ps (),
   2250                (__mmask8) __U);
   2251 }
   2252 
   2253 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2254 _mm_cvtpd_epu32 (__m128d __A) {
   2255   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2256                  (__v4si)
   2257                  _mm_setzero_si128 (),
   2258                  (__mmask8) -1);
   2259 }
   2260 
   2261 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2262 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2263   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2264                  (__v4si) __W,
   2265                  (__mmask8) __U);
   2266 }
   2267 
   2268 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2269 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
   2270   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2271                  (__v4si)
   2272                  _mm_setzero_si128 (),
   2273                  (__mmask8) __U);
   2274 }
   2275 
   2276 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2277 _mm256_cvtpd_epu32 (__m256d __A) {
   2278   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2279                  (__v4si)
   2280                  _mm_setzero_si128 (),
   2281                  (__mmask8) -1);
   2282 }
   2283 
   2284 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2285 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2286   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2287                  (__v4si) __W,
   2288                  (__mmask8) __U);
   2289 }
   2290 
   2291 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2292 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
   2293   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2294                  (__v4si)
   2295                  _mm_setzero_si128 (),
   2296                  (__mmask8) __U);
   2297 }
   2298 
   2299 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2300 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2301   return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
   2302                 (__v4si) __W,
   2303                 (__mmask8) __U);
   2304 }
   2305 
   2306 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2307 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
   2308   return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
   2309                 (__v4si)
   2310                 _mm_setzero_si128 (),
   2311                 (__mmask8) __U);
   2312 }
   2313 
   2314 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2315 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2316   return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
   2317                 (__v8si) __W,
   2318                 (__mmask8) __U);
   2319 }
   2320 
   2321 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2322 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
   2323   return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
   2324                 (__v8si)
   2325                 _mm256_setzero_si256 (),
   2326                 (__mmask8) __U);
   2327 }
   2328 
   2329 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2330 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
   2331   return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
   2332                 (__v2df) __W,
   2333                 (__mmask8) __U);
   2334 }
   2335 
   2336 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2337 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   2338   return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
   2339                 (__v2df)
   2340                 _mm_setzero_pd (),
   2341                 (__mmask8) __U);
   2342 }
   2343 
   2344 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2345 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
   2346   return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
   2347                 (__v4df) __W,
   2348                 (__mmask8) __U);
   2349 }
   2350 
   2351 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2352 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   2353   return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
   2354                 (__v4df)
   2355                 _mm256_setzero_pd (),
   2356                 (__mmask8) __U);
   2357 }
   2358 
   2359 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2360 _mm_cvtps_epu32 (__m128 __A) {
   2361   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2362                  (__v4si)
   2363                  _mm_setzero_si128 (),
   2364                  (__mmask8) -1);
   2365 }
   2366 
   2367 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2368 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2369   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2370                  (__v4si) __W,
   2371                  (__mmask8) __U);
   2372 }
   2373 
   2374 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2375 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
   2376   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2377                  (__v4si)
   2378                  _mm_setzero_si128 (),
   2379                  (__mmask8) __U);
   2380 }
   2381 
   2382 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2383 _mm256_cvtps_epu32 (__m256 __A) {
   2384   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2385                  (__v8si)
   2386                  _mm256_setzero_si256 (),
   2387                  (__mmask8) -1);
   2388 }
   2389 
   2390 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2391 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2392   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2393                  (__v8si) __W,
   2394                  (__mmask8) __U);
   2395 }
   2396 
   2397 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2398 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
   2399   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2400                  (__v8si)
   2401                  _mm256_setzero_si256 (),
   2402                  (__mmask8) __U);
   2403 }
   2404 
   2405 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2406 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2407   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
   2408                  (__v4si) __W,
   2409                  (__mmask8) __U);
   2410 }
   2411 
   2412 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2413 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
   2414   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
   2415                  (__v4si)
   2416                  _mm_setzero_si128 (),
   2417                  (__mmask8) __U);
   2418 }
   2419 
   2420 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2421 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2422   return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
   2423                  (__v4si) __W,
   2424                  (__mmask8) __U);
   2425 }
   2426 
   2427 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2428 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
   2429   return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
   2430                  (__v4si)
   2431                  _mm_setzero_si128 (),
   2432                  (__mmask8) __U);
   2433 }
   2434 
   2435 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2436 _mm_cvttpd_epu32 (__m128d __A) {
   2437   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2438                   (__v4si)
   2439                   _mm_setzero_si128 (),
   2440                   (__mmask8) -1);
   2441 }
   2442 
   2443 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2444 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2445   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2446                   (__v4si) __W,
   2447                   (__mmask8) __U);
   2448 }
   2449 
   2450 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2451 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
   2452   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2453                   (__v4si)
   2454                   _mm_setzero_si128 (),
   2455                   (__mmask8) __U);
   2456 }
   2457 
   2458 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2459 _mm256_cvttpd_epu32 (__m256d __A) {
   2460   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2461                   (__v4si)
   2462                   _mm_setzero_si128 (),
   2463                   (__mmask8) -1);
   2464 }
   2465 
   2466 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2467 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2468   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2469                   (__v4si) __W,
   2470                   (__mmask8) __U);
   2471 }
   2472 
   2473 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2474 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
   2475   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2476                   (__v4si)
   2477                   _mm_setzero_si128 (),
   2478                   (__mmask8) __U);
   2479 }
   2480 
   2481 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2482 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2483   return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
   2484                  (__v4si) __W,
   2485                  (__mmask8) __U);
   2486 }
   2487 
   2488 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2489 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
   2490   return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
   2491                  (__v4si)
   2492                  _mm_setzero_si128 (),
   2493                  (__mmask8) __U);
   2494 }
   2495 
   2496 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2497 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2498   return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
   2499                  (__v8si) __W,
   2500                  (__mmask8) __U);
   2501 }
   2502 
   2503 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2504 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
   2505   return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
   2506                  (__v8si)
   2507                  _mm256_setzero_si256 (),
   2508                  (__mmask8) __U);
   2509 }
   2510 
   2511 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2512 _mm_cvttps_epu32 (__m128 __A) {
   2513   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2514                   (__v4si)
   2515                   _mm_setzero_si128 (),
   2516                   (__mmask8) -1);
   2517 }
   2518 
   2519 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2520 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2521   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2522                   (__v4si) __W,
   2523                   (__mmask8) __U);
   2524 }
   2525 
   2526 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2527 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
   2528   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2529                   (__v4si)
   2530                   _mm_setzero_si128 (),
   2531                   (__mmask8) __U);
   2532 }
   2533 
   2534 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2535 _mm256_cvttps_epu32 (__m256 __A) {
   2536   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2537                   (__v8si)
   2538                   _mm256_setzero_si256 (),
   2539                   (__mmask8) -1);
   2540 }
   2541 
   2542 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2543 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2544   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2545                   (__v8si) __W,
   2546                   (__mmask8) __U);
   2547 }
   2548 
   2549 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2550 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
   2551   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2552                   (__v8si)
   2553                   _mm256_setzero_si256 (),
   2554                   (__mmask8) __U);
   2555 }
   2556 
   2557 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2558 _mm_cvtepu32_pd (__m128i __A) {
   2559   return (__m128d) __builtin_convertvector(
   2560       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
   2561 }
   2562 
   2563 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2564 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   2565   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2566                                               (__v2df)_mm_cvtepu32_pd(__A),
   2567                                               (__v2df)__W);
   2568 }
   2569 
   2570 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2571 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   2572   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2573                                               (__v2df)_mm_cvtepu32_pd(__A),
   2574                                               (__v2df)_mm_setzero_pd());
   2575 }
   2576 
   2577 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2578 _mm256_cvtepu32_pd (__m128i __A) {
   2579   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
   2580 }
   2581 
   2582 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2583 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   2584   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2585                                               (__v4df)_mm256_cvtepu32_pd(__A),
   2586                                               (__v4df)__W);
   2587 }
   2588 
   2589 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2590 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   2591   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2592                                               (__v4df)_mm256_cvtepu32_pd(__A),
   2593                                               (__v4df)_mm256_setzero_pd());
   2594 }
   2595 
   2596 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2597 _mm_cvtepu32_ps (__m128i __A) {
   2598   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
   2599                 (__v4sf)
   2600                 _mm_setzero_ps (),
   2601                 (__mmask8) -1);
   2602 }
   2603 
   2604 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2605 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   2606   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
   2607                 (__v4sf) __W,
   2608                 (__mmask8) __U);
   2609 }
   2610 
   2611 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2612 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
   2613   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
   2614                 (__v4sf)
   2615                 _mm_setzero_ps (),
   2616                 (__mmask8) __U);
   2617 }
   2618 
   2619 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2620 _mm256_cvtepu32_ps (__m256i __A) {
   2621   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
   2622                 (__v8sf)
   2623                 _mm256_setzero_ps (),
   2624                 (__mmask8) -1);
   2625 }
   2626 
   2627 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2628 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   2629   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
   2630                 (__v8sf) __W,
   2631                 (__mmask8) __U);
   2632 }
   2633 
   2634 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2635 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
   2636   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
   2637                 (__v8sf)
   2638                 _mm256_setzero_ps (),
   2639                 (__mmask8) __U);
   2640 }
   2641 
   2642 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2643 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   2644   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2645                                               (__v2df)_mm_div_pd(__A, __B),
   2646                                               (__v2df)__W);
   2647 }
   2648 
   2649 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2650 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   2651   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2652                                               (__v2df)_mm_div_pd(__A, __B),
   2653                                               (__v2df)_mm_setzero_pd());
   2654 }
   2655 
   2656 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2657 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   2658   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2659                                               (__v4df)_mm256_div_pd(__A, __B),
   2660                                               (__v4df)__W);
   2661 }
   2662 
   2663 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2664 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   2665   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2666                                               (__v4df)_mm256_div_pd(__A, __B),
   2667                                               (__v4df)_mm256_setzero_pd());
   2668 }
   2669 
   2670 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2671 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   2672   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2673                                              (__v4sf)_mm_div_ps(__A, __B),
   2674                                              (__v4sf)__W);
   2675 }
   2676 
   2677 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2678 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   2679   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2680                                              (__v4sf)_mm_div_ps(__A, __B),
   2681                                              (__v4sf)_mm_setzero_ps());
   2682 }
   2683 
   2684 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2685 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   2686   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2687                                              (__v8sf)_mm256_div_ps(__A, __B),
   2688                                              (__v8sf)__W);
   2689 }
   2690 
   2691 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2692 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   2693   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2694                                              (__v8sf)_mm256_div_ps(__A, __B),
   2695                                              (__v8sf)_mm256_setzero_ps());
   2696 }
   2697 
   2698 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2699 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   2700   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
   2701                 (__v2df) __W,
   2702                 (__mmask8) __U);
   2703 }
   2704 
   2705 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2706 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
   2707   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
   2708                  (__v2df)
   2709                  _mm_setzero_pd (),
   2710                  (__mmask8) __U);
   2711 }
   2712 
   2713 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2714 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   2715   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
   2716                 (__v4df) __W,
   2717                 (__mmask8) __U);
   2718 }
   2719 
   2720 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2721 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
   2722   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
   2723                  (__v4df)
   2724                  _mm256_setzero_pd (),
   2725                  (__mmask8) __U);
   2726 }
   2727 
   2728 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2729 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   2730   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
   2731                 (__v2di) __W,
   2732                 (__mmask8) __U);
   2733 }
   2734 
   2735 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2736 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
   2737   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
   2738                  (__v2di)
   2739                  _mm_setzero_si128 (),
   2740                  (__mmask8) __U);
   2741 }
   2742 
   2743 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2744 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   2745   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
   2746                 (__v4di) __W,
   2747                 (__mmask8) __U);
   2748 }
   2749 
   2750 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2751 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
   2752   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
   2753                  (__v4di)
   2754                  _mm256_setzero_si256 (),
   2755                  (__mmask8) __U);
   2756 }
   2757 
   2758 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2759 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
   2760   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
   2761               (__v2df) __W,
   2762               (__mmask8)
   2763               __U);
   2764 }
   2765 
   2766 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2767 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   2768   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
   2769                (__v2df)
   2770                _mm_setzero_pd (),
   2771                (__mmask8)
   2772                __U);
   2773 }
   2774 
   2775 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2776 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
   2777   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
   2778               (__v4df) __W,
   2779               (__mmask8)
   2780               __U);
   2781 }
   2782 
   2783 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2784 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   2785   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
   2786                (__v4df)
   2787                _mm256_setzero_pd (),
   2788                (__mmask8)
   2789                __U);
   2790 }
   2791 
   2792 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2793 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
   2794   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
   2795               (__v2di) __W,
   2796               (__mmask8)
   2797               __U);
   2798 }
   2799 
   2800 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2801 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   2802   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
   2803                (__v2di)
   2804                _mm_setzero_si128 (),
   2805                (__mmask8)
   2806                __U);
   2807 }
   2808 
   2809 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2810 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
   2811              void const *__P) {
   2812   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
   2813               (__v4di) __W,
   2814               (__mmask8)
   2815               __U);
   2816 }
   2817 
   2818 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2819 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   2820   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
   2821                (__v4di)
   2822                _mm256_setzero_si256 (),
   2823                (__mmask8)
   2824                __U);
   2825 }
   2826 
   2827 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2828 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
   2829   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
   2830                    (__v4sf) __W,
   2831                    (__mmask8) __U);
   2832 }
   2833 
   2834 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2835 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   2836   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
   2837               (__v4sf)
   2838               _mm_setzero_ps (),
   2839               (__mmask8)
   2840               __U);
   2841 }
   2842 
   2843 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2844 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
   2845   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
   2846                    (__v8sf) __W,
   2847                    (__mmask8) __U);
   2848 }
   2849 
   2850 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2851 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   2852   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
   2853               (__v8sf)
   2854               _mm256_setzero_ps (),
   2855               (__mmask8)
   2856               __U);
   2857 }
   2858 
   2859 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2860 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
   2861   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
   2862               (__v4si) __W,
   2863               (__mmask8)
   2864               __U);
   2865 }
   2866 
   2867 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2868 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   2869   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
   2870                (__v4si)
   2871                _mm_setzero_si128 (),
   2872                (__mmask8)     __U);
   2873 }
   2874 
   2875 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2876 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
   2877              void const *__P) {
   2878   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
   2879               (__v8si) __W,
   2880               (__mmask8)
   2881               __U);
   2882 }
   2883 
   2884 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2885 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   2886   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
   2887                (__v8si)
   2888                _mm256_setzero_si256 (),
   2889                (__mmask8)
   2890                __U);
   2891 }
   2892 
   2893 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2894 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   2895   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
   2896                (__v4sf) __W,
   2897                (__mmask8) __U);
   2898 }
   2899 
   2900 static __inline__ __m128 __DEFAULT_FN_ATTRS
   2901 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
   2902   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
   2903                 (__v4sf)
   2904                 _mm_setzero_ps (),
   2905                 (__mmask8) __U);
   2906 }
   2907 
   2908 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2909 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   2910   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
   2911                (__v8sf) __W,
   2912                (__mmask8) __U);
   2913 }
   2914 
   2915 static __inline__ __m256 __DEFAULT_FN_ATTRS
   2916 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
   2917   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
   2918                 (__v8sf)
   2919                 _mm256_setzero_ps (),
   2920                 (__mmask8) __U);
   2921 }
   2922 
   2923 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2924 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   2925   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
   2926                 (__v4si) __W,
   2927                 (__mmask8) __U);
   2928 }
   2929 
   2930 static __inline__ __m128i __DEFAULT_FN_ATTRS
   2931 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
   2932   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
   2933                  (__v4si)
   2934                  _mm_setzero_si128 (),
   2935                  (__mmask8) __U);
   2936 }
   2937 
   2938 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2939 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   2940   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
   2941                 (__v8si) __W,
   2942                 (__mmask8) __U);
   2943 }
   2944 
   2945 static __inline__ __m256i __DEFAULT_FN_ATTRS
   2946 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
   2947   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
   2948                  (__v8si)
   2949                  _mm256_setzero_si256 (),
   2950                  (__mmask8) __U);
   2951 }
   2952 
   2953 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2954 _mm_getexp_pd (__m128d __A) {
   2955   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2956                 (__v2df)
   2957                 _mm_setzero_pd (),
   2958                 (__mmask8) -1);
   2959 }
   2960 
   2961 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2962 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   2963   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2964                 (__v2df) __W,
   2965                 (__mmask8) __U);
   2966 }
   2967 
   2968 static __inline__ __m128d __DEFAULT_FN_ATTRS
   2969 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
   2970   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2971                 (__v2df)
   2972                 _mm_setzero_pd (),
   2973                 (__mmask8) __U);
   2974 }
   2975 
   2976 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2977 _mm256_getexp_pd (__m256d __A) {
   2978   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2979                 (__v4df)
   2980                 _mm256_setzero_pd (),
   2981                 (__mmask8) -1);
   2982 }
   2983 
   2984 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2985 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   2986   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2987                 (__v4df) __W,
   2988                 (__mmask8) __U);
   2989 }
   2990 
   2991 static __inline__ __m256d __DEFAULT_FN_ATTRS
   2992 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
   2993   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2994                 (__v4df)
   2995                 _mm256_setzero_pd (),
   2996                 (__mmask8) __U);
   2997 }
   2998 
   2999 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3000 _mm_getexp_ps (__m128 __A) {
   3001   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   3002                (__v4sf)
   3003                _mm_setzero_ps (),
   3004                (__mmask8) -1);
   3005 }
   3006 
   3007 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3008 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   3009   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   3010                (__v4sf) __W,
   3011                (__mmask8) __U);
   3012 }
   3013 
   3014 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3015 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
   3016   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   3017                (__v4sf)
   3018                _mm_setzero_ps (),
   3019                (__mmask8) __U);
   3020 }
   3021 
   3022 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3023 _mm256_getexp_ps (__m256 __A) {
   3024   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   3025                (__v8sf)
   3026                _mm256_setzero_ps (),
   3027                (__mmask8) -1);
   3028 }
   3029 
   3030 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3031 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   3032   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   3033                (__v8sf) __W,
   3034                (__mmask8) __U);
   3035 }
   3036 
   3037 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3038 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
   3039   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   3040                (__v8sf)
   3041                _mm256_setzero_ps (),
   3042                (__mmask8) __U);
   3043 }
   3044 
   3045 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3046 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   3047   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3048                                               (__v2df)_mm_max_pd(__A, __B),
   3049                                               (__v2df)__W);
   3050 }
   3051 
   3052 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3053 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   3054   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3055                                               (__v2df)_mm_max_pd(__A, __B),
   3056                                               (__v2df)_mm_setzero_pd());
   3057 }
   3058 
   3059 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3060 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   3061   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3062                                               (__v4df)_mm256_max_pd(__A, __B),
   3063                                               (__v4df)__W);
   3064 }
   3065 
   3066 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3067 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   3068   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3069                                               (__v4df)_mm256_max_pd(__A, __B),
   3070                                               (__v4df)_mm256_setzero_pd());
   3071 }
   3072 
   3073 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3074 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3075   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3076                                              (__v4sf)_mm_max_ps(__A, __B),
   3077                                              (__v4sf)__W);
   3078 }
   3079 
   3080 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3081 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   3082   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3083                                              (__v4sf)_mm_max_ps(__A, __B),
   3084                                              (__v4sf)_mm_setzero_ps());
   3085 }
   3086 
   3087 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3088 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   3089   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3090                                              (__v8sf)_mm256_max_ps(__A, __B),
   3091                                              (__v8sf)__W);
   3092 }
   3093 
   3094 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3095 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   3096   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3097                                              (__v8sf)_mm256_max_ps(__A, __B),
   3098                                              (__v8sf)_mm256_setzero_ps());
   3099 }
   3100 
   3101 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3102 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   3103   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3104                                               (__v2df)_mm_min_pd(__A, __B),
   3105                                               (__v2df)__W);
   3106 }
   3107 
   3108 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3109 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   3110   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3111                                               (__v2df)_mm_min_pd(__A, __B),
   3112                                               (__v2df)_mm_setzero_pd());
   3113 }
   3114 
   3115 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3116 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   3117   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3118                                               (__v4df)_mm256_min_pd(__A, __B),
   3119                                               (__v4df)__W);
   3120 }
   3121 
   3122 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3123 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   3124   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3125                                               (__v4df)_mm256_min_pd(__A, __B),
   3126                                               (__v4df)_mm256_setzero_pd());
   3127 }
   3128 
   3129 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3130 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3131   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3132                                              (__v4sf)_mm_min_ps(__A, __B),
   3133                                              (__v4sf)__W);
   3134 }
   3135 
   3136 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3137 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   3138   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3139                                              (__v4sf)_mm_min_ps(__A, __B),
   3140                                              (__v4sf)_mm_setzero_ps());
   3141 }
   3142 
   3143 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3144 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   3145   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3146                                              (__v8sf)_mm256_min_ps(__A, __B),
   3147                                              (__v8sf)__W);
   3148 }
   3149 
   3150 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3151 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   3152   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3153                                              (__v8sf)_mm256_min_ps(__A, __B),
   3154                                              (__v8sf)_mm256_setzero_ps());
   3155 }
   3156 
   3157 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3158 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   3159   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3160                                               (__v2df)_mm_mul_pd(__A, __B),
   3161                                               (__v2df)__W);
   3162 }
   3163 
   3164 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3165 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   3166   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3167                                               (__v2df)_mm_mul_pd(__A, __B),
   3168                                               (__v2df)_mm_setzero_pd());
   3169 }
   3170 
   3171 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3172 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   3173   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3174                                               (__v4df)_mm256_mul_pd(__A, __B),
   3175                                               (__v4df)__W);
   3176 }
   3177 
   3178 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3179 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   3180   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3181                                               (__v4df)_mm256_mul_pd(__A, __B),
   3182                                               (__v4df)_mm256_setzero_pd());
   3183 }
   3184 
   3185 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3186 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3187   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3188                                              (__v4sf)_mm_mul_ps(__A, __B),
   3189                                              (__v4sf)__W);
   3190 }
   3191 
   3192 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3193 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   3194   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3195                                              (__v4sf)_mm_mul_ps(__A, __B),
   3196                                              (__v4sf)_mm_setzero_ps());
   3197 }
   3198 
   3199 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3200 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   3201   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3202                                              (__v8sf)_mm256_mul_ps(__A, __B),
   3203                                              (__v8sf)__W);
   3204 }
   3205 
   3206 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3207 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   3208   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3209                                              (__v8sf)_mm256_mul_ps(__A, __B),
   3210                                              (__v8sf)_mm256_setzero_ps());
   3211 }
   3212 
   3213 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3214 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   3215   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   3216                                              (__v4si)_mm_abs_epi32(__A),
   3217                                              (__v4si)__W);
   3218 }
   3219 
   3220 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3221 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
   3222   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   3223                                              (__v4si)_mm_abs_epi32(__A),
   3224                                              (__v4si)_mm_setzero_si128());
   3225 }
   3226 
   3227 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3228 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   3229   return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
   3230                                              (__v8si)_mm256_abs_epi32(__A),
   3231                                              (__v8si)__W);
   3232 }
   3233 
   3234 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3235 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
   3236   return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
   3237                                              (__v8si)_mm256_abs_epi32(__A),
   3238                                              (__v8si)_mm256_setzero_si256());
   3239 }
   3240 
   3241 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3242 _mm_abs_epi64 (__m128i __A) {
   3243   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
   3244              (__v2di)
   3245              _mm_setzero_si128 (),
   3246              (__mmask8) -1);
   3247 }
   3248 
   3249 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3250 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   3251   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
   3252              (__v2di) __W,
   3253              (__mmask8) __U);
   3254 }
   3255 
   3256 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3257 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
   3258   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
   3259              (__v2di)
   3260              _mm_setzero_si128 (),
   3261              (__mmask8) __U);
   3262 }
   3263 
   3264 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3265 _mm256_abs_epi64 (__m256i __A) {
   3266   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
   3267              (__v4di)
   3268              _mm256_setzero_si256 (),
   3269              (__mmask8) -1);
   3270 }
   3271 
   3272 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3273 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   3274   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
   3275              (__v4di) __W,
   3276              (__mmask8) __U);
   3277 }
   3278 
   3279 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3280 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
   3281   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
   3282              (__v4di)
   3283              _mm256_setzero_si256 (),
   3284              (__mmask8) __U);
   3285 }
   3286 
   3287 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3288 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   3289   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3290                                              (__v4si)_mm_max_epi32(__A, __B),
   3291                                              (__v4si)_mm_setzero_si128());
   3292 }
   3293 
   3294 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3295 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3296   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3297                                              (__v4si)_mm_max_epi32(__A, __B),
   3298                                              (__v4si)__W);
   3299 }
   3300 
   3301 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3302 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   3303   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3304                                              (__v8si)_mm256_max_epi32(__A, __B),
   3305                                              (__v8si)_mm256_setzero_si256());
   3306 }
   3307 
   3308 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3309 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3310   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3311                                              (__v8si)_mm256_max_epi32(__A, __B),
   3312                                              (__v8si)__W);
   3313 }
   3314 
   3315 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3316 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3317   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
   3318               (__v2di) __B,
   3319               (__v2di)
   3320               _mm_setzero_si128 (),
   3321               __M);
   3322 }
   3323 
   3324 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3325 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
   3326         __m128i __B) {
   3327   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
   3328               (__v2di) __B,
   3329               (__v2di) __W, __M);
   3330 }
   3331 
   3332 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3333 _mm_max_epi64 (__m128i __A, __m128i __B) {
   3334   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
   3335               (__v2di) __B,
   3336               (__v2di)
   3337               _mm_setzero_si128 (),
   3338               (__mmask8) -1);
   3339 }
   3340 
   3341 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3342 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3343   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
   3344               (__v4di) __B,
   3345               (__v4di)
   3346               _mm256_setzero_si256 (),
   3347               __M);
   3348 }
   3349 
   3350 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3351 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
   3352            __m256i __B) {
   3353   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
   3354               (__v4di) __B,
   3355               (__v4di) __W, __M);
   3356 }
   3357 
   3358 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3359 _mm256_max_epi64 (__m256i __A, __m256i __B) {
   3360   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
   3361               (__v4di) __B,
   3362               (__v4di)
   3363               _mm256_setzero_si256 (),
   3364               (__mmask8) -1);
   3365 }
   3366 
   3367 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3368 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   3369   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3370                                              (__v4si)_mm_max_epu32(__A, __B),
   3371                                              (__v4si)_mm_setzero_si128());
   3372 }
   3373 
   3374 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3375 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3376   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3377                                              (__v4si)_mm_max_epu32(__A, __B),
   3378                                              (__v4si)__W);
   3379 }
   3380 
   3381 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3382 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   3383   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3384                                              (__v8si)_mm256_max_epu32(__A, __B),
   3385                                              (__v8si)_mm256_setzero_si256());
   3386 }
   3387 
   3388 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3389 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3390   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3391                                              (__v8si)_mm256_max_epu32(__A, __B),
   3392                                              (__v8si)__W);
   3393 }
   3394 
   3395 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3396 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3397   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
   3398               (__v2di) __B,
   3399               (__v2di)
   3400               _mm_setzero_si128 (),
   3401               __M);
   3402 }
   3403 
   3404 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3405 _mm_max_epu64 (__m128i __A, __m128i __B) {
   3406   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
   3407               (__v2di) __B,
   3408               (__v2di)
   3409               _mm_setzero_si128 (),
   3410               (__mmask8) -1);
   3411 }
   3412 
   3413 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3414 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
   3415         __m128i __B) {
   3416   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
   3417               (__v2di) __B,
   3418               (__v2di) __W, __M);
   3419 }
   3420 
   3421 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3422 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3423   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
   3424               (__v4di) __B,
   3425               (__v4di)
   3426               _mm256_setzero_si256 (),
   3427               __M);
   3428 }
   3429 
   3430 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3431 _mm256_max_epu64 (__m256i __A, __m256i __B) {
   3432   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
   3433               (__v4di) __B,
   3434               (__v4di)
   3435               _mm256_setzero_si256 (),
   3436               (__mmask8) -1);
   3437 }
   3438 
   3439 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3440 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
   3441            __m256i __B) {
   3442   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
   3443               (__v4di) __B,
   3444               (__v4di) __W, __M);
   3445 }
   3446 
   3447 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3448 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   3449   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3450                                              (__v4si)_mm_min_epi32(__A, __B),
   3451                                              (__v4si)_mm_setzero_si128());
   3452 }
   3453 
   3454 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3455 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3456   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3457                                              (__v4si)_mm_min_epi32(__A, __B),
   3458                                              (__v4si)__W);
   3459 }
   3460 
   3461 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3462 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   3463   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3464                                              (__v8si)_mm256_min_epi32(__A, __B),
   3465                                              (__v8si)_mm256_setzero_si256());
   3466 }
   3467 
   3468 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3469 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3470   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3471                                              (__v8si)_mm256_min_epi32(__A, __B),
   3472                                              (__v8si)__W);
   3473 }
   3474 
   3475 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3476 _mm_min_epi64 (__m128i __A, __m128i __B) {
   3477   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
   3478               (__v2di) __B,
   3479               (__v2di)
   3480               _mm_setzero_si128 (),
   3481               (__mmask8) -1);
   3482 }
   3483 
   3484 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3485 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
   3486         __m128i __B) {
   3487   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
   3488               (__v2di) __B,
   3489               (__v2di) __W, __M);
   3490 }
   3491 
   3492 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3493 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3494   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
   3495               (__v2di) __B,
   3496               (__v2di)
   3497               _mm_setzero_si128 (),
   3498               __M);
   3499 }
   3500 
   3501 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3502 _mm256_min_epi64 (__m256i __A, __m256i __B) {
   3503   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
   3504               (__v4di) __B,
   3505               (__v4di)
   3506               _mm256_setzero_si256 (),
   3507               (__mmask8) -1);
   3508 }
   3509 
   3510 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3511 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
   3512            __m256i __B) {
   3513   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
   3514               (__v4di) __B,
   3515               (__v4di) __W, __M);
   3516 }
   3517 
   3518 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3519 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3520   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
   3521               (__v4di) __B,
   3522               (__v4di)
   3523               _mm256_setzero_si256 (),
   3524               __M);
   3525 }
   3526 
   3527 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3528 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   3529   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3530                                              (__v4si)_mm_min_epu32(__A, __B),
   3531                                              (__v4si)_mm_setzero_si128());
   3532 }
   3533 
   3534 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3535 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3536   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3537                                              (__v4si)_mm_min_epu32(__A, __B),
   3538                                              (__v4si)__W);
   3539 }
   3540 
   3541 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3542 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   3543   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3544                                              (__v8si)_mm256_min_epu32(__A, __B),
   3545                                              (__v8si)_mm256_setzero_si256());
   3546 }
   3547 
   3548 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3549 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3550   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3551                                              (__v8si)_mm256_min_epu32(__A, __B),
   3552                                              (__v8si)__W);
   3553 }
   3554 
   3555 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3556 _mm_min_epu64 (__m128i __A, __m128i __B) {
   3557   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
   3558               (__v2di) __B,
   3559               (__v2di)
   3560               _mm_setzero_si128 (),
   3561               (__mmask8) -1);
   3562 }
   3563 
   3564 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3565 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
   3566         __m128i __B) {
   3567   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
   3568               (__v2di) __B,
   3569               (__v2di) __W, __M);
   3570 }
   3571 
   3572 static __inline__ __m128i __DEFAULT_FN_ATTRS
   3573 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3574   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
   3575               (__v2di) __B,
   3576               (__v2di)
   3577               _mm_setzero_si128 (),
   3578               __M);
   3579 }
   3580 
   3581 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3582 _mm256_min_epu64 (__m256i __A, __m256i __B) {
   3583   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
   3584               (__v4di) __B,
   3585               (__v4di)
   3586               _mm256_setzero_si256 (),
   3587               (__mmask8) -1);
   3588 }
   3589 
   3590 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3591 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
   3592            __m256i __B) {
   3593   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
   3594               (__v4di) __B,
   3595               (__v4di) __W, __M);
   3596 }
   3597 
   3598 static __inline__ __m256i __DEFAULT_FN_ATTRS
   3599 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3600   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
   3601               (__v4di) __B,
   3602               (__v4di)
   3603               _mm256_setzero_si256 (),
   3604               __M);
   3605 }
   3606 
   3607 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
   3608   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3609                                               (int)(imm), \
   3610                                               (__v2df)_mm_setzero_pd(), \
   3611                                               (__mmask8)-1); })
   3612 
   3613 
   3614 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
   3615   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3616                                               (int)(imm), \
   3617                                               (__v2df)(__m128d)(W), \
   3618                                               (__mmask8)(U)); })
   3619 
   3620 
   3621 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
   3622   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3623                                               (int)(imm), \
   3624                                               (__v2df)_mm_setzero_pd(), \
   3625                                               (__mmask8)(U)); })
   3626 
   3627 
   3628 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
   3629   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3630                                               (int)(imm), \
   3631                                               (__v4df)_mm256_setzero_pd(), \
   3632                                               (__mmask8)-1); })
   3633 
   3634 
   3635 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
   3636   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3637                                               (int)(imm), \
   3638                                               (__v4df)(__m256d)(W), \
   3639                                               (__mmask8)(U)); })
   3640 
   3641 
   3642 #define _mm256_maskz_roundscale_pd(U, A, imm)  __extension__ ({ \
   3643   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3644                                               (int)(imm), \
   3645                                               (__v4df)_mm256_setzero_pd(), \
   3646                                               (__mmask8)(U)); })
   3647 
   3648 #define _mm_roundscale_ps(A, imm)  __extension__ ({ \
   3649   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3650                                              (__v4sf)_mm_setzero_ps(), \
   3651                                              (__mmask8)-1); })
   3652 
   3653 
   3654 #define _mm_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
   3655   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3656                                              (__v4sf)(__m128)(W), \
   3657                                              (__mmask8)(U)); })
   3658 
   3659 
   3660 #define _mm_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
   3661   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3662                                              (__v4sf)_mm_setzero_ps(), \
   3663                                              (__mmask8)(U)); })
   3664 
   3665 #define _mm256_roundscale_ps(A, imm)  __extension__ ({ \
   3666   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3667                                              (__v8sf)_mm256_setzero_ps(), \
   3668                                              (__mmask8)-1); })
   3669 
   3670 #define _mm256_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
   3671   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3672                                              (__v8sf)(__m256)(W), \
   3673                                              (__mmask8)(U)); })
   3674 
   3675 
   3676 #define _mm256_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
   3677   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3678                                              (__v8sf)_mm256_setzero_ps(), \
   3679                                              (__mmask8)(U)); })
   3680 
   3681 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3682 _mm_scalef_pd (__m128d __A, __m128d __B) {
   3683   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3684                 (__v2df) __B,
   3685                 (__v2df)
   3686                 _mm_setzero_pd (),
   3687                 (__mmask8) -1);
   3688 }
   3689 
   3690 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3691 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
   3692         __m128d __B) {
   3693   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3694                 (__v2df) __B,
   3695                 (__v2df) __W,
   3696                 (__mmask8) __U);
   3697 }
   3698 
   3699 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3700 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   3701   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3702                 (__v2df) __B,
   3703                 (__v2df)
   3704                 _mm_setzero_pd (),
   3705                 (__mmask8) __U);
   3706 }
   3707 
   3708 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3709 _mm256_scalef_pd (__m256d __A, __m256d __B) {
   3710   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3711                 (__v4df) __B,
   3712                 (__v4df)
   3713                 _mm256_setzero_pd (),
   3714                 (__mmask8) -1);
   3715 }
   3716 
   3717 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3718 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
   3719            __m256d __B) {
   3720   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3721                 (__v4df) __B,
   3722                 (__v4df) __W,
   3723                 (__mmask8) __U);
   3724 }
   3725 
   3726 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3727 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   3728   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3729                 (__v4df) __B,
   3730                 (__v4df)
   3731                 _mm256_setzero_pd (),
   3732                 (__mmask8) __U);
   3733 }
   3734 
   3735 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3736 _mm_scalef_ps (__m128 __A, __m128 __B) {
   3737   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3738                (__v4sf) __B,
   3739                (__v4sf)
   3740                _mm_setzero_ps (),
   3741                (__mmask8) -1);
   3742 }
   3743 
   3744 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3745 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3746   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3747                (__v4sf) __B,
   3748                (__v4sf) __W,
   3749                (__mmask8) __U);
   3750 }
   3751 
   3752 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3753 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   3754   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3755                (__v4sf) __B,
   3756                (__v4sf)
   3757                _mm_setzero_ps (),
   3758                (__mmask8) __U);
   3759 }
   3760 
   3761 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3762 _mm256_scalef_ps (__m256 __A, __m256 __B) {
   3763   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3764                (__v8sf) __B,
   3765                (__v8sf)
   3766                _mm256_setzero_ps (),
   3767                (__mmask8) -1);
   3768 }
   3769 
   3770 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3771 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
   3772            __m256 __B) {
   3773   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3774                (__v8sf) __B,
   3775                (__v8sf) __W,
   3776                (__mmask8) __U);
   3777 }
   3778 
   3779 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3780 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   3781   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3782                (__v8sf) __B,
   3783                (__v8sf)
   3784                _mm256_setzero_ps (),
   3785                (__mmask8) __U);
   3786 }
   3787 
   3788 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
   3789   __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
   3790                                (__v2di)(__m128i)(index), \
   3791                                (__v2df)(__m128d)(v1), (int)(scale)); })
   3792 
   3793 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
   3794   __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
   3795                                (__v2di)(__m128i)(index), \
   3796                                (__v2df)(__m128d)(v1), (int)(scale)); })
   3797 
   3798 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
   3799   __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
   3800                                (__v2di)(__m128i)(index), \
   3801                                (__v2di)(__m128i)(v1), (int)(scale)); })
   3802 
   3803 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
   3804   __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
   3805                                (__v2di)(__m128i)(index), \
   3806                                (__v2di)(__m128i)(v1), (int)(scale)); })
   3807 
   3808 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
   3809   __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
   3810                                (__v4di)(__m256i)(index), \
   3811                                (__v4df)(__m256d)(v1), (int)(scale)); })
   3812 
   3813 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
   3814   __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
   3815                                (__v4di)(__m256i)(index), \
   3816                                (__v4df)(__m256d)(v1), (int)(scale)); })
   3817 
   3818 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
   3819   __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
   3820                                (__v4di)(__m256i)(index), \
   3821                                (__v4di)(__m256i)(v1), (int)(scale)); })
   3822 
   3823 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
   3824   __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
   3825                                (__v4di)(__m256i)(index), \
   3826                                (__v4di)(__m256i)(v1), (int)(scale)); })
   3827 
   3828 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
   3829   __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
   3830                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3831                                (int)(scale)); })
   3832 
   3833 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
   3834   __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
   3835                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3836                                (int)(scale)); })
   3837 
   3838 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
   3839   __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
   3840                                (__v2di)(__m128i)(index), \
   3841                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3842 
   3843 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
   3844   __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
   3845                                (__v2di)(__m128i)(index), \
   3846                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3847 
   3848 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
   3849   __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
   3850                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
   3851                                (int)(scale)); })
   3852 
   3853 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
   3854   __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
   3855                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
   3856                                (int)(scale)); })
   3857 
   3858 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
   3859   __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
   3860                                (__v4di)(__m256i)(index), \
   3861                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3862 
   3863 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({  \
   3864   __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
   3865                                (__v4di)(__m256i)(index), \
   3866                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3867 
   3868 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({      \
   3869   __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
   3870                                (__v4si)(__m128i)(index), \
   3871                                (__v2df)(__m128d)(v1), (int)(scale)); })
   3872 
   3873 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({        \
   3874   __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
   3875                                (__v4si)(__m128i)(index), \
   3876                                (__v2df)(__m128d)(v1), (int)(scale)); })
   3877 
   3878 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
   3879   __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
   3880                                (__v4si)(__m128i)(index), \
   3881                                (__v2di)(__m128i)(v1), (int)(scale)); })
   3882 
   3883 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
   3884   __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
   3885                                (__v4si)(__m128i)(index), \
   3886                                (__v2di)(__m128i)(v1), (int)(scale)); })
   3887 
   3888 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
   3889   __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
   3890                                (__v4si)(__m128i)(index), \
   3891                                (__v4df)(__m256d)(v1), (int)(scale)); })
   3892 
   3893 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
   3894   __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
   3895                                (__v4si)(__m128i)(index), \
   3896                                (__v4df)(__m256d)(v1), (int)(scale)); })
   3897 
   3898 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
   3899   __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
   3900                                (__v4si)(__m128i)(index), \
   3901                                (__v4di)(__m256i)(v1), (int)(scale)); })
   3902 
   3903 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
   3904   __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
   3905                                (__v4si)(__m128i)(index), \
   3906                                (__v4di)(__m256i)(v1), (int)(scale)); })
   3907 
   3908 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
   3909   __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
   3910                                (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3911                                (int)(scale)); })
   3912 
   3913 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
   3914   __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
   3915                                (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3916                                (int)(scale)); })
   3917 
   3918 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
   3919   __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
   3920                                (__v4si)(__m128i)(index), \
   3921                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3922 
   3923 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
   3924   __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
   3925                                (__v4si)(__m128i)(index), \
   3926                                (__v4si)(__m128i)(v1), (int)(scale)); })
   3927 
   3928 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
   3929   __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
   3930                                (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
   3931                                (int)(scale)); })
   3932 
   3933 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
   3934   __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
   3935                                (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
   3936                                (int)(scale)); })
   3937 
   3938 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
   3939   __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
   3940                                (__v8si)(__m256i)(index), \
   3941                                (__v8si)(__m256i)(v1), (int)(scale)); })
   3942 
   3943 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
   3944   __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
   3945                                (__v8si)(__m256i)(index), \
   3946                                (__v8si)(__m256i)(v1), (int)(scale)); })
   3947 
   3948 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3949 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   3950   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3951                                               (__v2df)_mm_sqrt_pd(__A),
   3952                                               (__v2df)__W);
   3953 }
   3954 
   3955 static __inline__ __m128d __DEFAULT_FN_ATTRS
   3956 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
   3957   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3958                                               (__v2df)_mm_sqrt_pd(__A),
   3959                                               (__v2df)_mm_setzero_pd());
   3960 }
   3961 
   3962 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3963 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
   3964   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3965                                               (__v4df)_mm256_sqrt_pd(__A),
   3966                                               (__v4df)__W);
   3967 }
   3968 
   3969 static __inline__ __m256d __DEFAULT_FN_ATTRS
   3970 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
   3971   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3972                                               (__v4df)_mm256_sqrt_pd(__A),
   3973                                               (__v4df)_mm256_setzero_pd());
   3974 }
   3975 
   3976 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3977 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   3978   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3979                                              (__v4sf)_mm_sqrt_ps(__A),
   3980                                              (__v4sf)__W);
   3981 }
   3982 
   3983 static __inline__ __m128 __DEFAULT_FN_ATTRS
   3984 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
   3985   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3986                                              (__v4sf)_mm_sqrt_ps(__A),
   3987                                              (__v4sf)_mm_setzero_pd());
   3988 }
   3989 
   3990 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3991 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   3992   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3993                                              (__v8sf)_mm256_sqrt_ps(__A),
   3994                                              (__v8sf)__W);
   3995 }
   3996 
   3997 static __inline__ __m256 __DEFAULT_FN_ATTRS
   3998 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
   3999   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   4000                                              (__v8sf)_mm256_sqrt_ps(__A),
   4001                                              (__v8sf)_mm256_setzero_ps());
   4002 }
   4003 
   4004 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4005 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   4006   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   4007                                               (__v2df)_mm_sub_pd(__A, __B),
   4008                                               (__v2df)__W);
   4009 }
   4010 
   4011 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4012 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   4013   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   4014                                               (__v2df)_mm_sub_pd(__A, __B),
   4015                                               (__v2df)_mm_setzero_pd());
   4016 }
   4017 
   4018 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4019 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   4020   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   4021                                               (__v4df)_mm256_sub_pd(__A, __B),
   4022                                               (__v4df)__W);
   4023 }
   4024 
   4025 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4026 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   4027   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   4028                                               (__v4df)_mm256_sub_pd(__A, __B),
   4029                                               (__v4df)_mm256_setzero_pd());
   4030 }
   4031 
   4032 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4033 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   4034   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   4035                                              (__v4sf)_mm_sub_ps(__A, __B),
   4036                                              (__v4sf)__W);
   4037 }
   4038 
   4039 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4040 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   4041   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   4042                                              (__v4sf)_mm_sub_ps(__A, __B),
   4043                                              (__v4sf)_mm_setzero_ps());
   4044 }
   4045 
   4046 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4047 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   4048   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   4049                                              (__v8sf)_mm256_sub_ps(__A, __B),
   4050                                              (__v8sf)__W);
   4051 }
   4052 
   4053 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4054 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   4055   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   4056                                              (__v8sf)_mm256_sub_ps(__A, __B),
   4057                                              (__v8sf)_mm256_setzero_ps());
   4058 }
   4059 
   4060 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4061 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
   4062             __m128i __B) {
   4063   return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
   4064                    (__v4si) __I
   4065                    /* idx */ ,
   4066                    (__v4si) __B,
   4067                    (__mmask8) __U);
   4068 }
   4069 
   4070 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4071 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
   4072          __mmask8 __U, __m256i __B) {
   4073   return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
   4074                    (__v8si) __I
   4075                    /* idx */ ,
   4076                    (__v8si) __B,
   4077                    (__mmask8) __U);
   4078 }
   4079 
   4080 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4081 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
   4082          __m128d __B) {
   4083   return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
   4084               (__v2di) __I
   4085               /* idx */ ,
   4086               (__v2df) __B,
   4087               (__mmask8)
   4088               __U);
   4089 }
   4090 
   4091 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4092 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
   4093             __m256d __B) {
   4094   return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
   4095               (__v4di) __I
   4096               /* idx */ ,
   4097               (__v4df) __B,
   4098               (__mmask8)
   4099               __U);
   4100 }
   4101 
   4102 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4103 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
   4104          __m128 __B) {
   4105   return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
   4106                    (__v4si) __I
   4107                    /* idx */ ,
   4108                    (__v4sf) __B,
   4109                    (__mmask8) __U);
   4110 }
   4111 
   4112 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4113 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
   4114             __m256 __B) {
   4115   return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
   4116                    (__v8si) __I
   4117                    /* idx */ ,
   4118                    (__v8sf) __B,
   4119                    (__mmask8) __U);
   4120 }
   4121 
   4122 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4123 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
   4124             __m128i __B) {
   4125   return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
   4126                    (__v2di) __I
   4127                    /* idx */ ,
   4128                    (__v2di) __B,
   4129                    (__mmask8) __U);
   4130 }
   4131 
   4132 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4133 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
   4134          __mmask8 __U, __m256i __B) {
   4135   return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
   4136                    (__v4di) __I
   4137                    /* idx */ ,
   4138                    (__v4di) __B,
   4139                    (__mmask8) __U);
   4140 }
   4141 
   4142 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4143 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
   4144   return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
   4145                    /* idx */ ,
   4146                    (__v4si) __A,
   4147                    (__v4si) __B,
   4148                    (__mmask8) -1);
   4149 }
   4150 
   4151 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4152 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
   4153            __m128i __B) {
   4154   return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
   4155                    /* idx */ ,
   4156                    (__v4si) __A,
   4157                    (__v4si) __B,
   4158                    (__mmask8) __U);
   4159 }
   4160 
   4161 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4162 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
   4163             __m128i __B) {
   4164   return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
   4165               /* idx */ ,
   4166               (__v4si) __A,
   4167               (__v4si) __B,
   4168               (__mmask8)
   4169               __U);
   4170 }
   4171 
   4172 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4173 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
   4174   return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
   4175                    /* idx */ ,
   4176                    (__v8si) __A,
   4177                    (__v8si) __B,
   4178                    (__mmask8) -1);
   4179 }
   4180 
   4181 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4182 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
   4183         __m256i __B) {
   4184   return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
   4185                    /* idx */ ,
   4186                    (__v8si) __A,
   4187                    (__v8si) __B,
   4188                    (__mmask8) __U);
   4189 }
   4190 
   4191 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4192 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
   4193          __m256i __I, __m256i __B) {
   4194   return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
   4195               /* idx */ ,
   4196               (__v8si) __A,
   4197               (__v8si) __B,
   4198               (__mmask8)
   4199               __U);
   4200 }
   4201 
   4202 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4203 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
   4204   return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
   4205               /* idx */ ,
   4206               (__v2df) __A,
   4207               (__v2df) __B,
   4208               (__mmask8) -
   4209               1);
   4210 }
   4211 
   4212 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4213 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
   4214         __m128d __B) {
   4215   return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
   4216               /* idx */ ,
   4217               (__v2df) __A,
   4218               (__v2df) __B,
   4219               (__mmask8)
   4220               __U);
   4221 }
   4222 
   4223 static __inline__ __m128d __DEFAULT_FN_ATTRS
   4224 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
   4225          __m128d __B) {
   4226   return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
   4227                /* idx */ ,
   4228                (__v2df) __A,
   4229                (__v2df) __B,
   4230                (__mmask8)
   4231                __U);
   4232 }
   4233 
   4234 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4235 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
   4236   return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
   4237               /* idx */ ,
   4238               (__v4df) __A,
   4239               (__v4df) __B,
   4240               (__mmask8) -
   4241               1);
   4242 }
   4243 
   4244 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4245 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
   4246            __m256d __B) {
   4247   return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
   4248               /* idx */ ,
   4249               (__v4df) __A,
   4250               (__v4df) __B,
   4251               (__mmask8)
   4252               __U);
   4253 }
   4254 
   4255 static __inline__ __m256d __DEFAULT_FN_ATTRS
   4256 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
   4257             __m256d __B) {
   4258   return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
   4259                /* idx */ ,
   4260                (__v4df) __A,
   4261                (__v4df) __B,
   4262                (__mmask8)
   4263                __U);
   4264 }
   4265 
   4266 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4267 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
   4268   return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
   4269                    /* idx */ ,
   4270                    (__v4sf) __A,
   4271                    (__v4sf) __B,
   4272                    (__mmask8) -1);
   4273 }
   4274 
   4275 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4276 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
   4277         __m128 __B) {
   4278   return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
   4279                    /* idx */ ,
   4280                    (__v4sf) __A,
   4281                    (__v4sf) __B,
   4282                    (__mmask8) __U);
   4283 }
   4284 
   4285 static __inline__ __m128 __DEFAULT_FN_ATTRS
   4286 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
   4287          __m128 __B) {
   4288   return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
   4289               /* idx */ ,
   4290               (__v4sf) __A,
   4291               (__v4sf) __B,
   4292               (__mmask8)
   4293               __U);
   4294 }
   4295 
   4296 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4297 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
   4298   return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
   4299                    /* idx */ ,
   4300                    (__v8sf) __A,
   4301                    (__v8sf) __B,
   4302                    (__mmask8) -1);
   4303 }
   4304 
   4305 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4306 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
   4307            __m256 __B) {
   4308   return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
   4309                    /* idx */ ,
   4310                    (__v8sf) __A,
   4311                    (__v8sf) __B,
   4312                    (__mmask8) __U);
   4313 }
   4314 
   4315 static __inline__ __m256 __DEFAULT_FN_ATTRS
   4316 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
   4317             __m256 __B) {
   4318   return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
   4319               /* idx */ ,
   4320               (__v8sf) __A,
   4321               (__v8sf) __B,
   4322               (__mmask8)
   4323               __U);
   4324 }
   4325 
   4326 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4327 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
   4328   return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
   4329                    /* idx */ ,
   4330                    (__v2di) __A,
   4331                    (__v2di) __B,
   4332                    (__mmask8) -1);
   4333 }
   4334 
   4335 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4336 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
   4337            __m128i __B) {
   4338   return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
   4339                    /* idx */ ,
   4340                    (__v2di) __A,
   4341                    (__v2di) __B,
   4342                    (__mmask8) __U);
   4343 }
   4344 
   4345 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4346 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
   4347             __m128i __B) {
   4348   return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
   4349               /* idx */ ,
   4350               (__v2di) __A,
   4351               (__v2di) __B,
   4352               (__mmask8)
   4353               __U);
   4354 }
   4355 
   4356 
   4357 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4358 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
   4359   return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
   4360                    /* idx */ ,
   4361                    (__v4di) __A,
   4362                    (__v4di) __B,
   4363                    (__mmask8) -1);
   4364 }
   4365 
   4366 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4367 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
   4368         __m256i __B) {
   4369   return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
   4370                    /* idx */ ,
   4371                    (__v4di) __A,
   4372                    (__v4di) __B,
   4373                    (__mmask8) __U);
   4374 }
   4375 
   4376 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4377 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
   4378          __m256i __I, __m256i __B) {
   4379   return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
   4380               /* idx */ ,
   4381               (__v4di) __A,
   4382               (__v4di) __B,
   4383               (__mmask8)
   4384               __U);
   4385 }
   4386 
   4387 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4388 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4389 {
   4390   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4391                                              (__v4si)_mm_cvtepi8_epi32(__A),
   4392                                              (__v4si)__W);
   4393 }
   4394 
   4395 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4396 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
   4397 {
   4398   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4399                                              (__v4si)_mm_cvtepi8_epi32(__A),
   4400                                              (__v4si)_mm_setzero_si128());
   4401 }
   4402 
   4403 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4404 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
   4405 {
   4406   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4407                                              (__v8si)_mm256_cvtepi8_epi32(__A),
   4408                                              (__v8si)__W);
   4409 }
   4410 
   4411 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4412 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
   4413 {
   4414   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4415                                              (__v8si)_mm256_cvtepi8_epi32(__A),
   4416                                              (__v8si)_mm256_setzero_si256());
   4417 }
   4418 
   4419 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4420 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4421 {
   4422   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4423                                              (__v2di)_mm_cvtepi8_epi64(__A),
   4424                                              (__v2di)__W);
   4425 }
   4426 
   4427 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4428 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   4429 {
   4430   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4431                                              (__v2di)_mm_cvtepi8_epi64(__A),
   4432                                              (__v2di)_mm_setzero_si128());
   4433 }
   4434 
   4435 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4436 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4437 {
   4438   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4439                                              (__v4di)_mm256_cvtepi8_epi64(__A),
   4440                                              (__v4di)__W);
   4441 }
   4442 
   4443 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4444 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   4445 {
   4446   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4447                                              (__v4di)_mm256_cvtepi8_epi64(__A),
   4448                                              (__v4di)_mm256_setzero_si256());
   4449 }
   4450 
   4451 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4452 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   4453 {
   4454   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4455                                              (__v2di)_mm_cvtepi32_epi64(__X),
   4456                                              (__v2di)__W);
   4457 }
   4458 
   4459 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4460 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   4461 {
   4462   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4463                                              (__v2di)_mm_cvtepi32_epi64(__X),
   4464                                              (__v2di)_mm_setzero_si128());
   4465 }
   4466 
   4467 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4468 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   4469 {
   4470   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4471                                              (__v4di)_mm256_cvtepi32_epi64(__X),
   4472                                              (__v4di)__W);
   4473 }
   4474 
   4475 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4476 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   4477 {
   4478   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4479                                              (__v4di)_mm256_cvtepi32_epi64(__X),
   4480                                              (__v4di)_mm256_setzero_si256());
   4481 }
   4482 
   4483 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4484 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4485 {
   4486   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4487                                              (__v4si)_mm_cvtepi16_epi32(__A),
   4488                                              (__v4si)__W);
   4489 }
   4490 
   4491 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4492 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
   4493 {
   4494   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4495                                              (__v4si)_mm_cvtepi16_epi32(__A),
   4496                                              (__v4si)_mm_setzero_si128());
   4497 }
   4498 
   4499 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4500 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4501 {
   4502   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4503                                              (__v8si)_mm256_cvtepi16_epi32(__A),
   4504                                              (__v8si)__W);
   4505 }
   4506 
   4507 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4508 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
   4509 {
   4510   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4511                                              (__v8si)_mm256_cvtepi16_epi32(__A),
   4512                                              (__v8si)_mm256_setzero_si256());
   4513 }
   4514 
   4515 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4516 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4517 {
   4518   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4519                                              (__v2di)_mm_cvtepi16_epi64(__A),
   4520                                              (__v2di)__W);
   4521 }
   4522 
   4523 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4524 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   4525 {
   4526   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4527                                              (__v2di)_mm_cvtepi16_epi64(__A),
   4528                                              (__v2di)_mm_setzero_si128());
   4529 }
   4530 
   4531 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4532 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4533 {
   4534   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4535                                              (__v4di)_mm256_cvtepi16_epi64(__A),
   4536                                              (__v4di)__W);
   4537 }
   4538 
   4539 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4540 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   4541 {
   4542   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4543                                              (__v4di)_mm256_cvtepi16_epi64(__A),
   4544                                              (__v4di)_mm256_setzero_si256());
   4545 }
   4546 
   4547 
   4548 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4549 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4550 {
   4551   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4552                                              (__v4si)_mm_cvtepu8_epi32(__A),
   4553                                              (__v4si)__W);
   4554 }
   4555 
   4556 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4557 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   4558 {
   4559   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4560                                              (__v4si)_mm_cvtepu8_epi32(__A),
   4561                                              (__v4si)_mm_setzero_si128());
   4562 }
   4563 
   4564 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4565 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4566 {
   4567   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4568                                              (__v8si)_mm256_cvtepu8_epi32(__A),
   4569                                              (__v8si)__W);
   4570 }
   4571 
   4572 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4573 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   4574 {
   4575   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4576                                              (__v8si)_mm256_cvtepu8_epi32(__A),
   4577                                              (__v8si)_mm256_setzero_si256());
   4578 }
   4579 
   4580 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4581 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4582 {
   4583   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4584                                              (__v2di)_mm_cvtepu8_epi64(__A),
   4585                                              (__v2di)__W);
   4586 }
   4587 
   4588 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4589 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   4590 {
   4591   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4592                                              (__v2di)_mm_cvtepu8_epi64(__A),
   4593                                              (__v2di)_mm_setzero_si128());
   4594 }
   4595 
   4596 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4597 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4598 {
   4599   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4600                                              (__v4di)_mm256_cvtepu8_epi64(__A),
   4601                                              (__v4di)__W);
   4602 }
   4603 
   4604 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4605 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   4606 {
   4607   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4608                                              (__v4di)_mm256_cvtepu8_epi64(__A),
   4609                                              (__v4di)_mm256_setzero_si256());
   4610 }
   4611 
   4612 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4613 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   4614 {
   4615   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4616                                              (__v2di)_mm_cvtepu32_epi64(__X),
   4617                                              (__v2di)__W);
   4618 }
   4619 
   4620 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4621 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   4622 {
   4623   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4624                                              (__v2di)_mm_cvtepu32_epi64(__X),
   4625                                              (__v2di)_mm_setzero_si128());
   4626 }
   4627 
   4628 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4629 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   4630 {
   4631   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4632                                              (__v4di)_mm256_cvtepu32_epi64(__X),
   4633                                              (__v4di)__W);
   4634 }
   4635 
   4636 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4637 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   4638 {
   4639   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4640                                              (__v4di)_mm256_cvtepu32_epi64(__X),
   4641                                              (__v4di)_mm256_setzero_si256());
   4642 }
   4643 
   4644 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4645 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4646 {
   4647   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4648                                              (__v4si)_mm_cvtepu16_epi32(__A),
   4649                                              (__v4si)__W);
   4650 }
   4651 
   4652 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4653 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   4654 {
   4655   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4656                                              (__v4si)_mm_cvtepu16_epi32(__A),
   4657                                              (__v4si)_mm_setzero_si128());
   4658 }
   4659 
   4660 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4661 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4662 {
   4663   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4664                                              (__v8si)_mm256_cvtepu16_epi32(__A),
   4665                                              (__v8si)__W);
   4666 }
   4667 
   4668 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4669 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   4670 {
   4671   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4672                                              (__v8si)_mm256_cvtepu16_epi32(__A),
   4673                                              (__v8si)_mm256_setzero_si256());
   4674 }
   4675 
   4676 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4677 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4678 {
   4679   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4680                                              (__v2di)_mm_cvtepu16_epi64(__A),
   4681                                              (__v2di)__W);
   4682 }
   4683 
   4684 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4685 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   4686 {
   4687   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4688                                              (__v2di)_mm_cvtepu16_epi64(__A),
   4689                                              (__v2di)_mm_setzero_si128());
   4690 }
   4691 
   4692 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4693 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4694 {
   4695   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4696                                              (__v4di)_mm256_cvtepu16_epi64(__A),
   4697                                              (__v4di)__W);
   4698 }
   4699 
   4700 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4701 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   4702 {
   4703   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4704                                              (__v4di)_mm256_cvtepu16_epi64(__A),
   4705                                              (__v4di)_mm256_setzero_si256());
   4706 }
   4707 
   4708 
   4709 #define _mm_rol_epi32(a, b) __extension__ ({\
   4710   (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
   4711                                         (__v4si)_mm_setzero_si128(), \
   4712                                         (__mmask8)-1); })
   4713 
   4714 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
   4715   (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
   4716                                         (__v4si)(__m128i)(w), (__mmask8)(u)); })
   4717 
   4718 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
   4719   (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
   4720                                         (__v4si)_mm_setzero_si128(), \
   4721                                         (__mmask8)(u)); })
   4722 
   4723 #define _mm256_rol_epi32(a, b) __extension__ ({\
   4724   (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
   4725                                         (__v8si)_mm256_setzero_si256(), \
   4726                                         (__mmask8)-1); })
   4727 
   4728 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
   4729   (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
   4730                                         (__v8si)(__m256i)(w), (__mmask8)(u)); })
   4731 
   4732 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
   4733   (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
   4734                                         (__v8si)_mm256_setzero_si256(), \
   4735                                         (__mmask8)(u)); })
   4736 
   4737 #define _mm_rol_epi64(a, b) __extension__ ({\
   4738   (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
   4739                                         (__v2di)_mm_setzero_di(), \
   4740                                         (__mmask8)-1); })
   4741 
   4742 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
   4743   (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
   4744                                         (__v2di)(__m128i)(w), (__mmask8)(u)); })
   4745 
   4746 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
   4747   (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
   4748                                         (__v2di)_mm_setzero_di(), \
   4749                                         (__mmask8)(u)); })
   4750 
   4751 #define _mm256_rol_epi64(a, b) __extension__ ({\
   4752   (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
   4753                                         (__v4di)_mm256_setzero_si256(), \
   4754                                         (__mmask8)-1); })
   4755 
   4756 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
   4757   (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
   4758                                         (__v4di)(__m256i)(w), (__mmask8)(u)); })
   4759 
   4760 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
   4761   (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
   4762                                         (__v4di)_mm256_setzero_si256(), \
   4763                                         (__mmask8)(u)); })
   4764 
   4765 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4766 _mm_rolv_epi32 (__m128i __A, __m128i __B)
   4767 {
   4768   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
   4769               (__v4si) __B,
   4770               (__v4si)
   4771               _mm_setzero_si128 (),
   4772               (__mmask8) -1);
   4773 }
   4774 
   4775 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4776 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
   4777          __m128i __B)
   4778 {
   4779   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
   4780               (__v4si) __B,
   4781               (__v4si) __W,
   4782               (__mmask8) __U);
   4783 }
   4784 
   4785 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4786 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   4787 {
   4788   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
   4789               (__v4si) __B,
   4790               (__v4si)
   4791               _mm_setzero_si128 (),
   4792               (__mmask8) __U);
   4793 }
   4794 
   4795 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4796 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
   4797 {
   4798   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
   4799               (__v8si) __B,
   4800               (__v8si)
   4801               _mm256_setzero_si256 (),
   4802               (__mmask8) -1);
   4803 }
   4804 
   4805 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4806 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
   4807       __m256i __B)
   4808 {
   4809   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
   4810               (__v8si) __B,
   4811               (__v8si) __W,
   4812               (__mmask8) __U);
   4813 }
   4814 
   4815 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4816 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   4817 {
   4818   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
   4819               (__v8si) __B,
   4820               (__v8si)
   4821               _mm256_setzero_si256 (),
   4822               (__mmask8) __U);
   4823 }
   4824 
   4825 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4826 _mm_rolv_epi64 (__m128i __A, __m128i __B)
   4827 {
   4828   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
   4829               (__v2di) __B,
   4830               (__v2di)
   4831               _mm_setzero_di (),
   4832               (__mmask8) -1);
   4833 }
   4834 
   4835 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4836 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
   4837          __m128i __B)
   4838 {
   4839   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
   4840               (__v2di) __B,
   4841               (__v2di) __W,
   4842               (__mmask8) __U);
   4843 }
   4844 
   4845 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4846 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   4847 {
   4848   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
   4849               (__v2di) __B,
   4850               (__v2di)
   4851               _mm_setzero_di (),
   4852               (__mmask8) __U);
   4853 }
   4854 
   4855 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4856 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
   4857 {
   4858   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
   4859               (__v4di) __B,
   4860               (__v4di)
   4861               _mm256_setzero_si256 (),
   4862               (__mmask8) -1);
   4863 }
   4864 
   4865 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4866 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
   4867       __m256i __B)
   4868 {
   4869   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
   4870               (__v4di) __B,
   4871               (__v4di) __W,
   4872               (__mmask8) __U);
   4873 }
   4874 
   4875 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4876 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   4877 {
   4878   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
   4879               (__v4di) __B,
   4880               (__v4di)
   4881               _mm256_setzero_si256 (),
   4882               (__mmask8) __U);
   4883 }
   4884 
   4885 #define _mm_ror_epi32(A, B) __extension__ ({ \
   4886   (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
   4887                                         (__v4si)_mm_setzero_si128(), \
   4888                                         (__mmask8)-1); })
   4889 
   4890 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   4891   (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
   4892                                         (__v4si)(__m128i)(W), (__mmask8)(U)); })
   4893 
   4894 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
   4895   (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
   4896                                         (__v4si)_mm_setzero_si128(), \
   4897                                         (__mmask8)(U)); })
   4898 
   4899 #define _mm256_ror_epi32(A, B) __extension__ ({ \
   4900   (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
   4901                                         (__v8si)_mm256_setzero_si256(), \
   4902                                         (__mmask8)-1); })
   4903 
   4904 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
   4905   (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
   4906                                         (__v8si)(__m256i)(W), (__mmask8)(U)); })
   4907 
   4908 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
   4909   (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
   4910                                         (__v8si)_mm256_setzero_si256(), \
   4911                                         (__mmask8)(U)); })
   4912 
   4913 #define _mm_ror_epi64(A, B) __extension__ ({ \
   4914   (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
   4915                                         (__v2di)_mm_setzero_di(), \
   4916                                         (__mmask8)-1); })
   4917 
   4918 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   4919   (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
   4920                                         (__v2di)(__m128i)(W), (__mmask8)(U)); })
   4921 
   4922 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
   4923   (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
   4924                                         (__v2di)_mm_setzero_di(), \
   4925                                         (__mmask8)(U)); })
   4926 
   4927 #define _mm256_ror_epi64(A, B) __extension__ ({ \
   4928   (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
   4929                                         (__v4di)_mm256_setzero_si256(), \
   4930                                         (__mmask8)-1); })
   4931 
   4932 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
   4933   (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
   4934                                         (__v4di)(__m256i)(W), (__mmask8)(U)); })
   4935 
   4936 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
   4937   (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
   4938                                         (__v4di)_mm256_setzero_si256(), \
   4939                                         (__mmask8)(U)); })
   4940 
   4941 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4942 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4943 {
   4944   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4945                                              (__v4si)_mm_sll_epi32(__A, __B),
   4946                                              (__v4si)__W);
   4947 }
   4948 
   4949 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4950 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   4951 {
   4952   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4953                                              (__v4si)_mm_sll_epi32(__A, __B),
   4954                                              (__v4si)_mm_setzero_si128());
   4955 }
   4956 
   4957 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4958 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   4959 {
   4960   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4961                                              (__v8si)_mm256_sll_epi32(__A, __B),
   4962                                              (__v8si)__W);
   4963 }
   4964 
   4965 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4966 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   4967 {
   4968   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4969                                              (__v8si)_mm256_sll_epi32(__A, __B),
   4970                                              (__v8si)_mm256_setzero_si256());
   4971 }
   4972 
   4973 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4974 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
   4975 {
   4976   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4977                                              (__v4si)_mm_slli_epi32(__A, __B),
   4978                                              (__v4si)__W);
   4979 }
   4980 
   4981 static __inline__ __m128i __DEFAULT_FN_ATTRS
   4982 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
   4983 {
   4984   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4985                                              (__v4si)_mm_slli_epi32(__A, __B),
   4986                                              (__v4si)_mm_setzero_si128());
   4987 }
   4988 
   4989 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4990 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
   4991 {
   4992   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4993                                              (__v8si)_mm256_slli_epi32(__A, __B),
   4994                                              (__v8si)__W);
   4995 }
   4996 
   4997 static __inline__ __m256i __DEFAULT_FN_ATTRS
   4998 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
   4999 {
   5000   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5001                                              (__v8si)_mm256_slli_epi32(__A, __B),
   5002                                              (__v8si)_mm256_setzero_si256());
   5003 }
   5004 
   5005 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5006 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   5007 {
   5008   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5009                                              (__v2di)_mm_sll_epi64(__A, __B),
   5010                                              (__v2di)__W);
   5011 }
   5012 
   5013 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5014 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   5015 {
   5016   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5017                                              (__v2di)_mm_sll_epi64(__A, __B),
   5018                                              (__v2di)_mm_setzero_di());
   5019 }
   5020 
   5021 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5022 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   5023 {
   5024   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5025                                              (__v4di)_mm256_sll_epi64(__A, __B),
   5026                                              (__v4di)__W);
   5027 }
   5028 
   5029 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5030 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   5031 {
   5032   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5033                                              (__v4di)_mm256_sll_epi64(__A, __B),
   5034                                              (__v4di)_mm256_setzero_si256());
   5035 }
   5036 
   5037 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5038 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
   5039 {
   5040   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5041                                              (__v2di)_mm_slli_epi64(__A, __B),
   5042                                              (__v2di)__W);
   5043 }
   5044 
   5045 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5046 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
   5047 {
   5048   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5049                                              (__v2di)_mm_slli_epi64(__A, __B),
   5050                                              (__v2di)_mm_setzero_di());
   5051 }
   5052 
   5053 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5054 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
   5055 {
   5056   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5057                                              (__v4di)_mm256_slli_epi64(__A, __B),
   5058                                              (__v4di)__W);
   5059 }
   5060 
   5061 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5062 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
   5063 {
   5064   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5065                                              (__v4di)_mm256_slli_epi64(__A, __B),
   5066                                              (__v4di)_mm256_setzero_si256());
   5067 }
   5068 
   5069 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5070 _mm_rorv_epi32 (__m128i __A, __m128i __B)
   5071 {
   5072   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
   5073               (__v4si) __B,
   5074               (__v4si)
   5075               _mm_setzero_si128 (),
   5076               (__mmask8) -1);
   5077 }
   5078 
   5079 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5080 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
   5081          __m128i __B)
   5082 {
   5083   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
   5084               (__v4si) __B,
   5085               (__v4si) __W,
   5086               (__mmask8) __U);
   5087 }
   5088 
   5089 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5090 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   5091 {
   5092   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
   5093               (__v4si) __B,
   5094               (__v4si)
   5095               _mm_setzero_si128 (),
   5096               (__mmask8) __U);
   5097 }
   5098 
   5099 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5100 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
   5101 {
   5102   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
   5103               (__v8si) __B,
   5104               (__v8si)
   5105               _mm256_setzero_si256 (),
   5106               (__mmask8) -1);
   5107 }
   5108 
   5109 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5110 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
   5111       __m256i __B)
   5112 {
   5113   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
   5114               (__v8si) __B,
   5115               (__v8si) __W,
   5116               (__mmask8) __U);
   5117 }
   5118 
   5119 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5120 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   5121 {
   5122   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
   5123               (__v8si) __B,
   5124               (__v8si)
   5125               _mm256_setzero_si256 (),
   5126               (__mmask8) __U);
   5127 }
   5128 
   5129 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5130 _mm_rorv_epi64 (__m128i __A, __m128i __B)
   5131 {
   5132   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
   5133               (__v2di) __B,
   5134               (__v2di)
   5135               _mm_setzero_di (),
   5136               (__mmask8) -1);
   5137 }
   5138 
   5139 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5140 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
   5141          __m128i __B)
   5142 {
   5143   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
   5144               (__v2di) __B,
   5145               (__v2di) __W,
   5146               (__mmask8) __U);
   5147 }
   5148 
   5149 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5150 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   5151 {
   5152   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
   5153               (__v2di) __B,
   5154               (__v2di)
   5155               _mm_setzero_di (),
   5156               (__mmask8) __U);
   5157 }
   5158 
   5159 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5160 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
   5161 {
   5162   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
   5163               (__v4di) __B,
   5164               (__v4di)
   5165               _mm256_setzero_si256 (),
   5166               (__mmask8) -1);
   5167 }
   5168 
   5169 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5170 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
   5171       __m256i __B)
   5172 {
   5173   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
   5174               (__v4di) __B,
   5175               (__v4di) __W,
   5176               (__mmask8) __U);
   5177 }
   5178 
   5179 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5180 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   5181 {
   5182   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
   5183               (__v4di) __B,
   5184               (__v4di)
   5185               _mm256_setzero_si256 (),
   5186               (__mmask8) __U);
   5187 }
   5188 
   5189 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5190 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5191 {
   5192   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5193                                              (__v2di)_mm_sllv_epi64(__X, __Y),
   5194                                              (__v2di)__W);
   5195 }
   5196 
   5197 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5198 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   5199 {
   5200   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5201                                              (__v2di)_mm_sllv_epi64(__X, __Y),
   5202                                              (__v2di)_mm_setzero_di());
   5203 }
   5204 
   5205 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5206 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5207 {
   5208   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5209                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
   5210                                             (__v4di)__W);
   5211 }
   5212 
   5213 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5214 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
   5215 {
   5216   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5217                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
   5218                                             (__v4di)_mm256_setzero_si256());
   5219 }
   5220 
   5221 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5222 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5223 {
   5224   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5225                                              (__v4si)_mm_sllv_epi32(__X, __Y),
   5226                                              (__v4si)__W);
   5227 }
   5228 
   5229 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5230 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   5231 {
   5232   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5233                                              (__v4si)_mm_sllv_epi32(__X, __Y),
   5234                                              (__v4si)_mm_setzero_si128());
   5235 }
   5236 
   5237 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5238 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5239 {
   5240   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5241                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
   5242                                             (__v8si)__W);
   5243 }
   5244 
   5245 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5246 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   5247 {
   5248   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5249                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
   5250                                             (__v8si)_mm256_setzero_si256());
   5251 }
   5252 
   5253 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5254 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5255 {
   5256   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5257                                              (__v2di)_mm_srlv_epi64(__X, __Y),
   5258                                              (__v2di)__W);
   5259 }
   5260 
   5261 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5262 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   5263 {
   5264   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5265                                              (__v2di)_mm_srlv_epi64(__X, __Y),
   5266                                              (__v2di)_mm_setzero_di());
   5267 }
   5268 
   5269 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5270 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5271 {
   5272   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5273                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
   5274                                             (__v4di)__W);
   5275 }
   5276 
   5277 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5278 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
   5279 {
   5280   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5281                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
   5282                                             (__v4di)_mm256_setzero_si256());
   5283 }
   5284 
   5285 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5286 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5287 {
   5288   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5289                                             (__v4si)_mm_srlv_epi32(__X, __Y),
   5290                                             (__v4si)__W);
   5291 }
   5292 
   5293 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5294 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   5295 {
   5296   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5297                                             (__v4si)_mm_srlv_epi32(__X, __Y),
   5298                                             (__v4si)_mm_setzero_si128());
   5299 }
   5300 
   5301 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5302 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5303 {
   5304   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5305                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
   5306                                             (__v8si)__W);
   5307 }
   5308 
   5309 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5310 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   5311 {
   5312   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5313                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
   5314                                             (__v8si)_mm256_setzero_si256());
   5315 }
   5316 
   5317 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5318 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   5319 {
   5320   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5321                                              (__v4si)_mm_srl_epi32(__A, __B),
   5322                                              (__v4si)__W);
   5323 }
   5324 
   5325 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5326 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   5327 {
   5328   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5329                                              (__v4si)_mm_srl_epi32(__A, __B),
   5330                                              (__v4si)_mm_setzero_si128());
   5331 }
   5332 
   5333 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5334 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   5335 {
   5336   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5337                                              (__v8si)_mm256_srl_epi32(__A, __B),
   5338                                              (__v8si)__W);
   5339 }
   5340 
   5341 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5342 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   5343 {
   5344   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5345                                              (__v8si)_mm256_srl_epi32(__A, __B),
   5346                                              (__v8si)_mm256_setzero_si256());
   5347 }
   5348 
   5349 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5350 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
   5351 {
   5352   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5353                                              (__v4si)_mm_srli_epi32(__A, __B),
   5354                                              (__v4si)__W);
   5355 }
   5356 
   5357 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5358 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
   5359 {
   5360   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5361                                              (__v4si)_mm_srli_epi32(__A, __B),
   5362                                              (__v4si)_mm_setzero_si128());
   5363 }
   5364 
   5365 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5366 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
   5367 {
   5368   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5369                                              (__v8si)_mm256_srli_epi32(__A, __B),
   5370                                              (__v8si)__W);
   5371 }
   5372 
   5373 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5374 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
   5375 {
   5376   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5377                                              (__v8si)_mm256_srli_epi32(__A, __B),
   5378                                              (__v8si)_mm256_setzero_si256());
   5379 }
   5380 
   5381 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5382 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   5383 {
   5384   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5385                                              (__v2di)_mm_srl_epi64(__A, __B),
   5386                                              (__v2di)__W);
   5387 }
   5388 
   5389 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5390 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   5391 {
   5392   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5393                                              (__v2di)_mm_srl_epi64(__A, __B),
   5394                                              (__v2di)_mm_setzero_di());
   5395 }
   5396 
   5397 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5398 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   5399 {
   5400   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5401                                              (__v4di)_mm256_srl_epi64(__A, __B),
   5402                                              (__v4di)__W);
   5403 }
   5404 
   5405 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5406 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   5407 {
   5408   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5409                                              (__v4di)_mm256_srl_epi64(__A, __B),
   5410                                              (__v4di)_mm256_setzero_si256());
   5411 }
   5412 
   5413 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5414 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
   5415 {
   5416   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5417                                              (__v2di)_mm_srli_epi64(__A, __B),
   5418                                              (__v2di)__W);
   5419 }
   5420 
   5421 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5422 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
   5423 {
   5424   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5425                                              (__v2di)_mm_srli_epi64(__A, __B),
   5426                                              (__v2di)_mm_setzero_di());
   5427 }
   5428 
   5429 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5430 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
   5431 {
   5432   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5433                                              (__v4di)_mm256_srli_epi64(__A, __B),
   5434                                              (__v4di)__W);
   5435 }
   5436 
   5437 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5438 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
   5439 {
   5440   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5441                                              (__v4di)_mm256_srli_epi64(__A, __B),
   5442                                              (__v4di)_mm256_setzero_si256());
   5443 }
   5444 
   5445 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5446 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5447 {
   5448   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5449                                             (__v4si)_mm_srav_epi32(__X, __Y),
   5450                                             (__v4si)__W);
   5451 }
   5452 
   5453 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5454 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   5455 {
   5456   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   5457                                             (__v4si)_mm_srav_epi32(__X, __Y),
   5458                                             (__v4si)_mm_setzero_si128());
   5459 }
   5460 
   5461 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5462 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5463 {
   5464   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5465                                             (__v8si)_mm256_srav_epi32(__X, __Y),
   5466                                             (__v8si)__W);
   5467 }
   5468 
   5469 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5470 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   5471 {
   5472   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   5473                                             (__v8si)_mm256_srav_epi32(__X, __Y),
   5474                                             (__v8si)_mm256_setzero_si256());
   5475 }
   5476 
   5477 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5478 _mm_srav_epi64(__m128i __X, __m128i __Y)
   5479 {
   5480   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
   5481 }
   5482 
   5483 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5484 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5485 {
   5486   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5487                                              (__v2di)_mm_srav_epi64(__X, __Y),
   5488                                              (__v2di)__W);
   5489 }
   5490 
   5491 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5492 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   5493 {
   5494   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5495                                              (__v2di)_mm_srav_epi64(__X, __Y),
   5496                                              (__v2di)_mm_setzero_di());
   5497 }
   5498 
   5499 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5500 _mm256_srav_epi64(__m256i __X, __m256i __Y)
   5501 {
   5502   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
   5503 }
   5504 
   5505 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5506 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5507 {
   5508   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5509                                              (__v4di)_mm256_srav_epi64(__X, __Y),
   5510                                              (__v4di)__W);
   5511 }
   5512 
   5513 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5514 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
   5515 {
   5516   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5517                                              (__v4di)_mm256_srav_epi64(__X, __Y),
   5518                                              (__v4di)_mm256_setzero_si256());
   5519 }
   5520 
   5521 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5522 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
   5523 {
   5524   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   5525                  (__v4si) __A,
   5526                  (__v4si) __W);
   5527 }
   5528 
   5529 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5530 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
   5531 {
   5532   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   5533                  (__v4si) __A,
   5534                  (__v4si) _mm_setzero_si128 ());
   5535 }
   5536 
   5537 
   5538 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5539 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
   5540 {
   5541   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   5542                  (__v8si) __A,
   5543                  (__v8si) __W);
   5544 }
   5545 
   5546 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5547 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
   5548 {
   5549   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   5550                  (__v8si) __A,
   5551                  (__v8si) _mm256_setzero_si256 ());
   5552 }
   5553 
   5554 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5555 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
   5556 {
   5557   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
   5558               (__v4si) __W,
   5559               (__mmask8)
   5560               __U);
   5561 }
   5562 
   5563 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5564 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
   5565 {
   5566   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
   5567               (__v4si)
   5568               _mm_setzero_si128 (),
   5569               (__mmask8)
   5570               __U);
   5571 }
   5572 
   5573 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5574 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
   5575 {
   5576   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
   5577               (__v8si) __W,
   5578               (__mmask8)
   5579               __U);
   5580 }
   5581 
   5582 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5583 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
   5584 {
   5585   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
   5586               (__v8si)
   5587               _mm256_setzero_si256 (),
   5588               (__mmask8)
   5589               __U);
   5590 }
   5591 
   5592 static __inline__ void __DEFAULT_FN_ATTRS
   5593 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
   5594 {
   5595   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
   5596           (__v4si) __A,
   5597           (__mmask8) __U);
   5598 }
   5599 
   5600 static __inline__ void __DEFAULT_FN_ATTRS
   5601 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
   5602 {
   5603   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
   5604           (__v8si) __A,
   5605           (__mmask8) __U);
   5606 }
   5607 
   5608 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5609 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
   5610 {
   5611   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   5612                  (__v2di) __A,
   5613                  (__v2di) __W);
   5614 }
   5615 
   5616 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5617 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
   5618 {
   5619   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   5620                  (__v2di) __A,
   5621                  (__v2di) _mm_setzero_di ());
   5622 }
   5623 
   5624 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5625 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
   5626 {
   5627   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   5628                  (__v4di) __A,
   5629                  (__v4di) __W);
   5630 }
   5631 
   5632 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5633 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
   5634 {
   5635   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   5636                  (__v4di) __A,
   5637                  (__v4di) _mm256_setzero_si256 ());
   5638 }
   5639 
   5640 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5641 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
   5642 {
   5643   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
   5644               (__v2di) __W,
   5645               (__mmask8)
   5646               __U);
   5647 }
   5648 
   5649 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5650 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5651 {
   5652   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
   5653               (__v2di)
   5654               _mm_setzero_di (),
   5655               (__mmask8)
   5656               __U);
   5657 }
   5658 
   5659 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5660 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
   5661 {
   5662   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
   5663               (__v4di) __W,
   5664               (__mmask8)
   5665               __U);
   5666 }
   5667 
   5668 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5669 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5670 {
   5671   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
   5672               (__v4di)
   5673               _mm256_setzero_si256 (),
   5674               (__mmask8)
   5675               __U);
   5676 }
   5677 
   5678 static __inline__ void __DEFAULT_FN_ATTRS
   5679 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
   5680 {
   5681   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
   5682           (__v2di) __A,
   5683           (__mmask8) __U);
   5684 }
   5685 
   5686 static __inline__ void __DEFAULT_FN_ATTRS
   5687 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
   5688 {
   5689   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
   5690           (__v4di) __A,
   5691           (__mmask8) __U);
   5692 }
   5693 
   5694 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5695 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
   5696 {
   5697   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5698                                               (__v2df)_mm_movedup_pd(__A),
   5699                                               (__v2df)__W);
   5700 }
   5701 
   5702 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5703 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
   5704 {
   5705   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5706                                               (__v2df)_mm_movedup_pd(__A),
   5707                                               (__v2df)_mm_setzero_pd());
   5708 }
   5709 
   5710 static __inline__ __m256d __DEFAULT_FN_ATTRS
   5711 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
   5712 {
   5713   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5714                                               (__v4df)_mm256_movedup_pd(__A),
   5715                                               (__v4df)__W);
   5716 }
   5717 
   5718 static __inline__ __m256d __DEFAULT_FN_ATTRS
   5719 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
   5720 {
   5721   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5722                                               (__v4df)_mm256_movedup_pd(__A),
   5723                                               (__v4df)_mm256_setzero_pd());
   5724 }
   5725 
   5726 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5727 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
   5728 {
   5729    return (__m128i)__builtin_ia32_selectd_128(__M,
   5730                                               (__v4si) _mm_set1_epi32(__A),
   5731                                               (__v4si)__O);
   5732 }
   5733 
   5734 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5735 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
   5736 {
   5737    return (__m128i)__builtin_ia32_selectd_128(__M,
   5738                                               (__v4si) _mm_set1_epi32(__A),
   5739                                               (__v4si)_mm_setzero_si128());
   5740 }
   5741 
   5742 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5743 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
   5744 {
   5745    return (__m256i)__builtin_ia32_selectd_256(__M,
   5746                                               (__v8si) _mm256_set1_epi32(__A),
   5747                                               (__v8si)__O);
   5748 }
   5749 
   5750 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5751 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
   5752 {
   5753    return (__m256i)__builtin_ia32_selectd_256(__M,
   5754                                               (__v8si) _mm256_set1_epi32(__A),
   5755                                               (__v8si)_mm256_setzero_si256());
   5756 }
   5757 
   5758 
   5759 #ifdef __x86_64__
   5760 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5761 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
   5762 {
   5763   return (__m128i) __builtin_ia32_selectq_128(__M,
   5764                                               (__v2di) _mm_set1_epi64x(__A),
   5765                                               (__v2di) __O);
   5766 }
   5767 
   5768 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5769 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
   5770 {
   5771   return (__m128i) __builtin_ia32_selectq_128(__M,
   5772                                               (__v2di) _mm_set1_epi64x(__A),
   5773                                               (__v2di) _mm_setzero_si128());
   5774 }
   5775 
   5776 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5777 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
   5778 {
   5779   return (__m256i) __builtin_ia32_selectq_256(__M,
   5780                                               (__v4di) _mm256_set1_epi64x(__A),
   5781                                               (__v4di) __O) ;
   5782 }
   5783 
   5784 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5785 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
   5786 {
   5787    return (__m256i) __builtin_ia32_selectq_256(__M,
   5788                                                (__v4di) _mm256_set1_epi64x(__A),
   5789                                                (__v4di) _mm256_setzero_si256());
   5790 }
   5791 
   5792 #endif
   5793 
   5794 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5795   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
   5796                                              (__v2df)(__m128d)(B), \
   5797                                              (__v2di)(__m128i)(C), (int)(imm), \
   5798                                              (__mmask8)-1); })
   5799 
   5800 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5801   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
   5802                                              (__v2df)(__m128d)(B), \
   5803                                              (__v2di)(__m128i)(C), (int)(imm), \
   5804                                              (__mmask8)(U)); })
   5805 
   5806 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5807   (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
   5808                                               (__v2df)(__m128d)(B), \
   5809                                               (__v2di)(__m128i)(C), \
   5810                                               (int)(imm), (__mmask8)(U)); })
   5811 
   5812 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
   5813   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
   5814                                              (__v4df)(__m256d)(B), \
   5815                                              (__v4di)(__m256i)(C), (int)(imm), \
   5816                                              (__mmask8)-1); })
   5817 
   5818 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
   5819   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
   5820                                              (__v4df)(__m256d)(B), \
   5821                                              (__v4di)(__m256i)(C), (int)(imm), \
   5822                                              (__mmask8)(U)); })
   5823 
   5824 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
   5825   (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
   5826                                               (__v4df)(__m256d)(B), \
   5827                                               (__v4di)(__m256i)(C), \
   5828                                               (int)(imm), (__mmask8)(U)); })
   5829 
   5830 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5831   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
   5832                                             (__v4sf)(__m128)(B), \
   5833                                             (__v4si)(__m128i)(C), (int)(imm), \
   5834                                             (__mmask8)-1); })
   5835 
   5836 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5837   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
   5838                                             (__v4sf)(__m128)(B), \
   5839                                             (__v4si)(__m128i)(C), (int)(imm), \
   5840                                             (__mmask8)(U)); })
   5841 
   5842 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5843   (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
   5844                                              (__v4sf)(__m128)(B), \
   5845                                              (__v4si)(__m128i)(C), (int)(imm), \
   5846                                              (__mmask8)(U)); })
   5847 
   5848 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
   5849   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
   5850                                             (__v8sf)(__m256)(B), \
   5851                                             (__v8si)(__m256i)(C), (int)(imm), \
   5852                                             (__mmask8)-1); })
   5853 
   5854 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
   5855   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
   5856                                             (__v8sf)(__m256)(B), \
   5857                                             (__v8si)(__m256i)(C), (int)(imm), \
   5858                                             (__mmask8)(U)); })
   5859 
   5860 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
   5861   (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
   5862                                              (__v8sf)(__m256)(B), \
   5863                                              (__v8si)(__m256i)(C), (int)(imm), \
   5864                                              (__mmask8)(U)); })
   5865 
   5866 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5867 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
   5868 {
   5869   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
   5870                (__v2df) __W,
   5871                (__mmask8) __U);
   5872 }
   5873 
   5874 static __inline__ __m128d __DEFAULT_FN_ATTRS
   5875 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
   5876 {
   5877   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
   5878                (__v2df)
   5879                _mm_setzero_pd (),
   5880                (__mmask8) __U);
   5881 }
   5882 
   5883 static __inline__ __m256d __DEFAULT_FN_ATTRS
   5884 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
   5885 {
   5886   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
   5887                (__v4df) __W,
   5888                (__mmask8) __U);
   5889 }
   5890 
   5891 static __inline__ __m256d __DEFAULT_FN_ATTRS
   5892 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
   5893 {
   5894   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
   5895                (__v4df)
   5896                _mm256_setzero_pd (),
   5897                (__mmask8) __U);
   5898 }
   5899 
   5900 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5901 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
   5902 {
   5903   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
   5904               (__v4sf) __W,
   5905               (__mmask8) __U);
   5906 }
   5907 
   5908 static __inline__ __m128 __DEFAULT_FN_ATTRS
   5909 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
   5910 {
   5911   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
   5912               (__v4sf)
   5913               _mm_setzero_ps (),
   5914               (__mmask8) __U);
   5915 }
   5916 
   5917 static __inline__ __m256 __DEFAULT_FN_ATTRS
   5918 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
   5919 {
   5920   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
   5921               (__v8sf) __W,
   5922               (__mmask8) __U);
   5923 }
   5924 
   5925 static __inline__ __m256 __DEFAULT_FN_ATTRS
   5926 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
   5927 {
   5928   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
   5929               (__v8sf)
   5930               _mm256_setzero_ps (),
   5931               (__mmask8) __U);
   5932 }
   5933 
   5934 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5935 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
   5936 {
   5937   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
   5938                  (__v2di) __W,
   5939                  (__mmask8) __U);
   5940 }
   5941 
   5942 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5943 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
   5944 {
   5945   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
   5946                  (__v2di)
   5947                  _mm_setzero_si128 (),
   5948                  (__mmask8) __U);
   5949 }
   5950 
   5951 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5952 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
   5953 {
   5954   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
   5955                  (__v4di) __W,
   5956                  (__mmask8) __U);
   5957 }
   5958 
   5959 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5960 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
   5961 {
   5962   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
   5963                  (__v4di)
   5964                  _mm256_setzero_si256 (),
   5965                  (__mmask8) __U);
   5966 }
   5967 
   5968 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5969 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
   5970 {
   5971   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
   5972                  (__v4si) __W,
   5973                  (__mmask8) __U);
   5974 }
   5975 
   5976 static __inline__ __m128i __DEFAULT_FN_ATTRS
   5977 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
   5978 {
   5979   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
   5980                  (__v4si)
   5981                  _mm_setzero_si128 (),
   5982                  (__mmask8) __U);
   5983 }
   5984 
   5985 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5986 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
   5987 {
   5988   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
   5989                  (__v8si) __W,
   5990                  (__mmask8) __U);
   5991 }
   5992 
   5993 static __inline__ __m256i __DEFAULT_FN_ATTRS
   5994 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
   5995 {
   5996   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
   5997                  (__v8si)
   5998                  _mm256_setzero_si256 (),
   5999                  (__mmask8) __U);
   6000 }
   6001 
   6002 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6003 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
   6004 {
   6005   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
   6006                (__v2df) __W,
   6007                (__mmask8) __U);
   6008 }
   6009 
   6010 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6011 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
   6012 {
   6013   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
   6014                (__v2df)
   6015                _mm_setzero_pd (),
   6016                (__mmask8) __U);
   6017 }
   6018 
   6019 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6020 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
   6021 {
   6022   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
   6023                (__v4df) __W,
   6024                (__mmask8) __U);
   6025 }
   6026 
   6027 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6028 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
   6029 {
   6030   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
   6031                (__v4df)
   6032                _mm256_setzero_pd (),
   6033                (__mmask8) __U);
   6034 }
   6035 
   6036 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6037 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
   6038 {
   6039   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
   6040               (__v4sf) __W,
   6041               (__mmask8) __U);
   6042 }
   6043 
   6044 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6045 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
   6046 {
   6047   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
   6048               (__v4sf)
   6049               _mm_setzero_ps (),
   6050               (__mmask8) __U);
   6051 }
   6052 
   6053 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6054 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
   6055 {
   6056   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
   6057               (__v8sf) __W,
   6058               (__mmask8) __U);
   6059 }
   6060 
   6061 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6062 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
   6063 {
   6064   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
   6065               (__v8sf)
   6066               _mm256_setzero_ps (),
   6067               (__mmask8) __U);
   6068 }
   6069 
   6070 static __inline__ void __DEFAULT_FN_ATTRS
   6071 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
   6072 {
   6073   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
   6074            (__v2df) __A,
   6075            (__mmask8) __U);
   6076 }
   6077 
   6078 static __inline__ void __DEFAULT_FN_ATTRS
   6079 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
   6080 {
   6081   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
   6082            (__v4df) __A,
   6083            (__mmask8) __U);
   6084 }
   6085 
   6086 static __inline__ void __DEFAULT_FN_ATTRS
   6087 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
   6088 {
   6089   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
   6090            (__v4sf) __A,
   6091            (__mmask8) __U);
   6092 }
   6093 
   6094 static __inline__ void __DEFAULT_FN_ATTRS
   6095 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
   6096 {
   6097   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
   6098            (__v8sf) __A,
   6099            (__mmask8) __U);
   6100 }
   6101 
   6102 static __inline__ void __DEFAULT_FN_ATTRS
   6103 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
   6104 {
   6105   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
   6106              (__v2di) __A,
   6107              (__mmask8) __U);
   6108 }
   6109 
   6110 static __inline__ void __DEFAULT_FN_ATTRS
   6111 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
   6112 {
   6113   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
   6114              (__v4di) __A,
   6115              (__mmask8) __U);
   6116 }
   6117 
   6118 static __inline__ void __DEFAULT_FN_ATTRS
   6119 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
   6120 {
   6121   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
   6122              (__v4si) __A,
   6123              (__mmask8) __U);
   6124 }
   6125 
   6126 static __inline__ void __DEFAULT_FN_ATTRS
   6127 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
   6128 {
   6129   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
   6130              (__v8si) __A,
   6131              (__mmask8) __U);
   6132 }
   6133 
   6134 static __inline__ void __DEFAULT_FN_ATTRS
   6135 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
   6136 {
   6137   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
   6138            (__v2df) __A,
   6139            (__mmask8) __U);
   6140 }
   6141 
   6142 static __inline__ void __DEFAULT_FN_ATTRS
   6143 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
   6144 {
   6145   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
   6146            (__v4df) __A,
   6147            (__mmask8) __U);
   6148 }
   6149 
   6150 static __inline__ void __DEFAULT_FN_ATTRS
   6151 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
   6152 {
   6153   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
   6154            (__v4sf) __A,
   6155            (__mmask8) __U);
   6156 }
   6157 
   6158 static __inline__ void __DEFAULT_FN_ATTRS
   6159 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
   6160 {
   6161   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
   6162            (__v8sf) __A,
   6163            (__mmask8) __U);
   6164 }
   6165 
   6166 
   6167 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6168 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   6169 {
   6170   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6171                                               (__v2df)_mm_unpackhi_pd(__A, __B),
   6172                                               (__v2df)__W);
   6173 }
   6174 
   6175 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6176 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
   6177 {
   6178   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6179                                               (__v2df)_mm_unpackhi_pd(__A, __B),
   6180                                               (__v2df)_mm_setzero_pd());
   6181 }
   6182 
   6183 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6184 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
   6185 {
   6186   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6187                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
   6188                                            (__v4df)__W);
   6189 }
   6190 
   6191 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6192 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
   6193 {
   6194   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6195                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
   6196                                            (__v4df)_mm256_setzero_pd());
   6197 }
   6198 
   6199 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6200 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   6201 {
   6202   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6203                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
   6204                                              (__v4sf)__W);
   6205 }
   6206 
   6207 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6208 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
   6209 {
   6210   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6211                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
   6212                                              (__v4sf)_mm_setzero_ps());
   6213 }
   6214 
   6215 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6216 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
   6217 {
   6218   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6219                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
   6220                                            (__v8sf)__W);
   6221 }
   6222 
   6223 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6224 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
   6225 {
   6226   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6227                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
   6228                                            (__v8sf)_mm256_setzero_ps());
   6229 }
   6230 
   6231 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6232 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   6233 {
   6234   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6235                                               (__v2df)_mm_unpacklo_pd(__A, __B),
   6236                                               (__v2df)__W);
   6237 }
   6238 
   6239 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6240 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
   6241 {
   6242   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6243                                               (__v2df)_mm_unpacklo_pd(__A, __B),
   6244                                               (__v2df)_mm_setzero_pd());
   6245 }
   6246 
   6247 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6248 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
   6249 {
   6250   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6251                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
   6252                                            (__v4df)__W);
   6253 }
   6254 
   6255 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
   6257 {
   6258   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6259                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
   6260                                            (__v4df)_mm256_setzero_pd());
   6261 }
   6262 
   6263 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6264 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   6265 {
   6266   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6267                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
   6268                                              (__v4sf)__W);
   6269 }
   6270 
   6271 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6272 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
   6273 {
   6274   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6275                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
   6276                                              (__v4sf)_mm_setzero_ps());
   6277 }
   6278 
   6279 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6280 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
   6281 {
   6282   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6283                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
   6284                                            (__v8sf)__W);
   6285 }
   6286 
   6287 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6288 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
   6289 {
   6290   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6291                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
   6292                                            (__v8sf)_mm256_setzero_ps());
   6293 }
   6294 
   6295 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6296 _mm_rcp14_pd (__m128d __A)
   6297 {
   6298   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   6299                 (__v2df)
   6300                 _mm_setzero_pd (),
   6301                 (__mmask8) -1);
   6302 }
   6303 
   6304 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6305 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
   6306 {
   6307   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   6308                 (__v2df) __W,
   6309                 (__mmask8) __U);
   6310 }
   6311 
   6312 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6313 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
   6314 {
   6315   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   6316                 (__v2df)
   6317                 _mm_setzero_pd (),
   6318                 (__mmask8) __U);
   6319 }
   6320 
   6321 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6322 _mm256_rcp14_pd (__m256d __A)
   6323 {
   6324   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   6325                 (__v4df)
   6326                 _mm256_setzero_pd (),
   6327                 (__mmask8) -1);
   6328 }
   6329 
   6330 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6331 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
   6332 {
   6333   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   6334                 (__v4df) __W,
   6335                 (__mmask8) __U);
   6336 }
   6337 
   6338 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6339 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
   6340 {
   6341   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   6342                 (__v4df)
   6343                 _mm256_setzero_pd (),
   6344                 (__mmask8) __U);
   6345 }
   6346 
   6347 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6348 _mm_rcp14_ps (__m128 __A)
   6349 {
   6350   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   6351                (__v4sf)
   6352                _mm_setzero_ps (),
   6353                (__mmask8) -1);
   6354 }
   6355 
   6356 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6357 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
   6358 {
   6359   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   6360                (__v4sf) __W,
   6361                (__mmask8) __U);
   6362 }
   6363 
   6364 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6365 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
   6366 {
   6367   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   6368                (__v4sf)
   6369                _mm_setzero_ps (),
   6370                (__mmask8) __U);
   6371 }
   6372 
   6373 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6374 _mm256_rcp14_ps (__m256 __A)
   6375 {
   6376   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6377                (__v8sf)
   6378                _mm256_setzero_ps (),
   6379                (__mmask8) -1);
   6380 }
   6381 
   6382 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6383 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
   6384 {
   6385   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6386                (__v8sf) __W,
   6387                (__mmask8) __U);
   6388 }
   6389 
   6390 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6391 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
   6392 {
   6393   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6394                (__v8sf)
   6395                _mm256_setzero_ps (),
   6396                (__mmask8) __U);
   6397 }
   6398 
   6399 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6400   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6401                                        (__v2df)_mm_permute_pd((X), (C)), \
   6402                                        (__v2df)(__m128d)(W)); })
   6403 
   6404 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
   6405   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6406                                        (__v2df)_mm_permute_pd((X), (C)), \
   6407                                        (__v2df)_mm_setzero_pd()); })
   6408 
   6409 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
   6410   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6411                                        (__v4df)_mm256_permute_pd((X), (C)), \
   6412                                        (__v4df)(__m256d)(W)); })
   6413 
   6414 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
   6415   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6416                                        (__v4df)_mm256_permute_pd((X), (C)), \
   6417                                        (__v4df)_mm256_setzero_pd()); })
   6418 
   6419 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6420   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6421                                       (__v4sf)_mm_permute_ps((X), (C)), \
   6422                                       (__v4sf)(__m128)(W)); })
   6423 
   6424 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
   6425   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6426                                       (__v4sf)_mm_permute_ps((X), (C)), \
   6427                                       (__v4sf)_mm_setzero_ps()); })
   6428 
   6429 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
   6430   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6431                                       (__v8sf)_mm256_permute_ps((X), (C)), \
   6432                                       (__v8sf)(__m256)(W)); })
   6433 
   6434 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
   6435   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6436                                       (__v8sf)_mm256_permute_ps((X), (C)), \
   6437                                       (__v8sf)_mm256_setzero_ps()); })
   6438 
   6439 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6440 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
   6441 {
   6442   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6443                                             (__v2df)_mm_permutevar_pd(__A, __C),
   6444                                             (__v2df)__W);
   6445 }
   6446 
   6447 static __inline__ __m128d __DEFAULT_FN_ATTRS
   6448 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
   6449 {
   6450   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6451                                             (__v2df)_mm_permutevar_pd(__A, __C),
   6452                                             (__v2df)_mm_setzero_pd());
   6453 }
   6454 
   6455 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6456 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
   6457 {
   6458   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6459                                          (__v4df)_mm256_permutevar_pd(__A, __C),
   6460                                          (__v4df)__W);
   6461 }
   6462 
   6463 static __inline__ __m256d __DEFAULT_FN_ATTRS
   6464 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
   6465 {
   6466   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6467                                          (__v4df)_mm256_permutevar_pd(__A, __C),
   6468                                          (__v4df)_mm256_setzero_pd());
   6469 }
   6470 
   6471 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6472 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
   6473 {
   6474   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6475                                             (__v4sf)_mm_permutevar_ps(__A, __C),
   6476                                             (__v4sf)__W);
   6477 }
   6478 
   6479 static __inline__ __m128 __DEFAULT_FN_ATTRS
   6480 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
   6481 {
   6482   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6483                                             (__v4sf)_mm_permutevar_ps(__A, __C),
   6484                                             (__v4sf)_mm_setzero_ps());
   6485 }
   6486 
   6487 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6488 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
   6489 {
   6490   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6491                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
   6492                                           (__v8sf)__W);
   6493 }
   6494 
   6495 static __inline__ __m256 __DEFAULT_FN_ATTRS
   6496 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
   6497 {
   6498   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6499                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
   6500                                           (__v8sf)_mm256_setzero_ps());
   6501 }
   6502 
   6503 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6504 _mm_test_epi32_mask (__m128i __A, __m128i __B)
   6505 {
   6506   return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
   6507                  (__v4si) __B,
   6508                  (__mmask8) -1);
   6509 }
   6510 
   6511 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6512 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6513 {
   6514   return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
   6515                  (__v4si) __B, __U);
   6516 }
   6517 
   6518 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6519 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
   6520 {
   6521   return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
   6522                  (__v8si) __B,
   6523                  (__mmask8) -1);
   6524 }
   6525 
   6526 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6527 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6528 {
   6529   return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
   6530                  (__v8si) __B, __U);
   6531 }
   6532 
   6533 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6534 _mm_test_epi64_mask (__m128i __A, __m128i __B)
   6535 {
   6536   return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
   6537                  (__v2di) __B,
   6538                  (__mmask8) -1);
   6539 }
   6540 
   6541 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6542 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6543 {
   6544   return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
   6545                  (__v2di) __B, __U);
   6546 }
   6547 
   6548 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6549 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
   6550 {
   6551   return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
   6552                  (__v4di) __B,
   6553                  (__mmask8) -1);
   6554 }
   6555 
   6556 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6557 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6558 {
   6559   return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
   6560                  (__v4di) __B, __U);
   6561 }
   6562 
   6563 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6564 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
   6565 {
   6566   return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
   6567             (__v4si) __B,
   6568             (__mmask8) -1);
   6569 }
   6570 
   6571 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6572 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6573 {
   6574   return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
   6575             (__v4si) __B, __U);
   6576 }
   6577 
   6578 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6579 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
   6580 {
   6581   return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
   6582             (__v8si) __B,
   6583             (__mmask8) -1);
   6584 }
   6585 
   6586 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6587 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6588 {
   6589   return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
   6590             (__v8si) __B, __U);
   6591 }
   6592 
   6593 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6594 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
   6595 {
   6596   return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
   6597             (__v2di) __B,
   6598             (__mmask8) -1);
   6599 }
   6600 
   6601 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6602 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6603 {
   6604   return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
   6605             (__v2di) __B, __U);
   6606 }
   6607 
   6608 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6609 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
   6610 {
   6611   return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
   6612             (__v4di) __B,
   6613             (__mmask8) -1);
   6614 }
   6615 
   6616 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
   6617 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6618 {
   6619   return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
   6620             (__v4di) __B, __U);
   6621 }
   6622 
   6623 
   6624 
   6625 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6626 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6627 {
   6628   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6629                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
   6630                                            (__v4si)__W);
   6631 }
   6632 
   6633 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6634 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6635 {
   6636   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6637                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
   6638                                            (__v4si)_mm_setzero_si128());
   6639 }
   6640 
   6641 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6642 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6643 {
   6644   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6645                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
   6646                                         (__v8si)__W);
   6647 }
   6648 
   6649 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6650 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
   6651 {
   6652   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6653                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
   6654                                         (__v8si)_mm256_setzero_si256());
   6655 }
   6656 
   6657 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6658 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6659 {
   6660   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6661                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
   6662                                            (__v2di)__W);
   6663 }
   6664 
   6665 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6666 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6667 {
   6668   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6669                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
   6670                                            (__v2di)_mm_setzero_di());
   6671 }
   6672 
   6673 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6674 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6675 {
   6676   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6677                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
   6678                                         (__v4di)__W);
   6679 }
   6680 
   6681 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6682 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   6683 {
   6684   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6685                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
   6686                                         (__v4di)_mm256_setzero_si256());
   6687 }
   6688 
   6689 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6690 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6691 {
   6692   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6693                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
   6694                                            (__v4si)__W);
   6695 }
   6696 
   6697 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6698 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6699 {
   6700   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6701                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
   6702                                            (__v4si)_mm_setzero_si128());
   6703 }
   6704 
   6705 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6706 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6707 {
   6708   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6709                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
   6710                                         (__v8si)__W);
   6711 }
   6712 
   6713 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6714 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
   6715 {
   6716   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6717                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
   6718                                         (__v8si)_mm256_setzero_si256());
   6719 }
   6720 
   6721 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6722 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6723 {
   6724   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6725                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
   6726                                            (__v2di)__W);
   6727 }
   6728 
   6729 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6730 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6731 {
   6732   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6733                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
   6734                                            (__v2di)_mm_setzero_di());
   6735 }
   6736 
   6737 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6738 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6739 {
   6740   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6741                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
   6742                                         (__v4di)__W);
   6743 }
   6744 
   6745 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6746 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   6747 {
   6748   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6749                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
   6750                                         (__v4di)_mm256_setzero_si256());
   6751 }
   6752 
   6753 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6754 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6755 {
   6756   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6757                                              (__v4si)_mm_sra_epi32(__A, __B),
   6758                                              (__v4si)__W);
   6759 }
   6760 
   6761 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6762 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6763 {
   6764   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6765                                              (__v4si)_mm_sra_epi32(__A, __B),
   6766                                              (__v4si)_mm_setzero_si128());
   6767 }
   6768 
   6769 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6770 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   6771 {
   6772   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6773                                              (__v8si)_mm256_sra_epi32(__A, __B),
   6774                                              (__v8si)__W);
   6775 }
   6776 
   6777 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6778 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   6779 {
   6780   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6781                                              (__v8si)_mm256_sra_epi32(__A, __B),
   6782                                              (__v8si)_mm256_setzero_si256());
   6783 }
   6784 
   6785 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6786 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
   6787 {
   6788   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6789                                              (__v4si)_mm_srai_epi32(__A, __B),
   6790                                              (__v4si)__W);
   6791 }
   6792 
   6793 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6794 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
   6795 {
   6796   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6797                                              (__v4si)_mm_srai_epi32(__A, __B),
   6798                                              (__v4si)_mm_setzero_si128());
   6799 }
   6800 
   6801 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6802 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
   6803 {
   6804   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6805                                              (__v8si)_mm256_srai_epi32(__A, __B),
   6806                                              (__v8si)__W);
   6807 }
   6808 
   6809 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6810 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
   6811 {
   6812   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6813                                              (__v8si)_mm256_srai_epi32(__A, __B),
   6814                                              (__v8si)_mm256_setzero_si256());
   6815 }
   6816 
   6817 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6818 _mm_sra_epi64(__m128i __A, __m128i __B)
   6819 {
   6820   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
   6821 }
   6822 
   6823 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6824 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6825 {
   6826   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6827                                              (__v2di)_mm_sra_epi64(__A, __B), \
   6828                                              (__v2di)__W);
   6829 }
   6830 
   6831 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6832 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6833 {
   6834   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6835                                              (__v2di)_mm_sra_epi64(__A, __B), \
   6836                                              (__v2di)_mm_setzero_di());
   6837 }
   6838 
   6839 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6840 _mm256_sra_epi64(__m256i __A, __m128i __B)
   6841 {
   6842   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
   6843 }
   6844 
   6845 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6846 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   6847 {
   6848   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6849                                            (__v4di)_mm256_sra_epi64(__A, __B), \
   6850                                            (__v4di)__W);
   6851 }
   6852 
   6853 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6854 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   6855 {
   6856   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6857                                            (__v4di)_mm256_sra_epi64(__A, __B), \
   6858                                            (__v4di)_mm256_setzero_si256());
   6859 }
   6860 
   6861 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6862 _mm_srai_epi64(__m128i __A, int __imm)
   6863 {
   6864   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
   6865 }
   6866 
   6867 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6868 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
   6869 {
   6870   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6871                                            (__v2di)_mm_srai_epi64(__A, __imm), \
   6872                                            (__v2di)__W);
   6873 }
   6874 
   6875 static __inline__ __m128i __DEFAULT_FN_ATTRS
   6876 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
   6877 {
   6878   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6879                                            (__v2di)_mm_srai_epi64(__A, __imm), \
   6880                                            (__v2di)_mm_setzero_di());
   6881 }
   6882 
   6883 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6884 _mm256_srai_epi64(__m256i __A, int __imm)
   6885 {
   6886   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
   6887 }
   6888 
   6889 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6890 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
   6891 {
   6892   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6893                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
   6894                                         (__v4di)__W);
   6895 }
   6896 
   6897 static __inline__ __m256i __DEFAULT_FN_ATTRS
   6898 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
   6899 {
   6900   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6901                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
   6902                                         (__v4di)_mm256_setzero_si256());
   6903 }
   6904 
   6905 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6906   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
   6907                                             (__v4si)(__m128i)(B), \
   6908                                             (__v4si)(__m128i)(C), (int)(imm), \
   6909                                             (__mmask8)-1); })
   6910 
   6911 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6912   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
   6913                                             (__v4si)(__m128i)(B), \
   6914                                             (__v4si)(__m128i)(C), (int)(imm), \
   6915                                             (__mmask8)(U)); })
   6916 
   6917 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6918   (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
   6919                                              (__v4si)(__m128i)(B), \
   6920                                              (__v4si)(__m128i)(C), (int)(imm), \
   6921                                              (__mmask8)(U)); })
   6922 
   6923 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
   6924   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
   6925                                             (__v8si)(__m256i)(B), \
   6926                                             (__v8si)(__m256i)(C), (int)(imm), \
   6927                                             (__mmask8)-1); })
   6928 
   6929 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
   6930   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
   6931                                             (__v8si)(__m256i)(B), \
   6932                                             (__v8si)(__m256i)(C), (int)(imm), \
   6933                                             (__mmask8)(U)); })
   6934 
   6935 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
   6936   (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
   6937                                              (__v8si)(__m256i)(B), \
   6938                                              (__v8si)(__m256i)(C), (int)(imm), \
   6939                                              (__mmask8)(U)); })
   6940 
   6941 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6942   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
   6943                                             (__v2di)(__m128i)(B), \
   6944                                             (__v2di)(__m128i)(C), (int)(imm), \
   6945                                             (__mmask8)-1); })
   6946 
   6947 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6948   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
   6949                                             (__v2di)(__m128i)(B), \
   6950                                             (__v2di)(__m128i)(C), (int)(imm), \
   6951                                             (__mmask8)(U)); })
   6952 
   6953 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6954   (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
   6955                                              (__v2di)(__m128i)(B), \
   6956                                              (__v2di)(__m128i)(C), (int)(imm), \
   6957                                              (__mmask8)(U)); })
   6958 
   6959 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
   6960   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
   6961                                             (__v4di)(__m256i)(B), \
   6962                                             (__v4di)(__m256i)(C), (int)(imm), \
   6963                                             (__mmask8)-1); })
   6964 
   6965 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
   6966   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
   6967                                             (__v4di)(__m256i)(B), \
   6968                                             (__v4di)(__m256i)(C), (int)(imm), \
   6969                                             (__mmask8)(U)); })
   6970 
   6971 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
   6972   (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
   6973                                              (__v4di)(__m256i)(B), \
   6974                                              (__v4di)(__m256i)(C), (int)(imm), \
   6975                                              (__mmask8)(U)); })
   6976 
   6977 
   6978 
   6979 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
   6980   (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
   6981                                              (__v8sf)(__m256)(B), (int)(imm), \
   6982                                              (__v8sf)_mm256_setzero_ps(), \
   6983                                              (__mmask8)-1); })
   6984 
   6985 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
   6986   (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
   6987                                              (__v8sf)(__m256)(B), (int)(imm), \
   6988                                              (__v8sf)(__m256)(W), \
   6989                                              (__mmask8)(U)); })
   6990 
   6991 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
   6992   (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
   6993                                              (__v8sf)(__m256)(B), (int)(imm), \
   6994                                              (__v8sf)_mm256_setzero_ps(), \
   6995                                              (__mmask8)(U)); })
   6996 
   6997 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
   6998   (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
   6999                                               (__v4df)(__m256d)(B), \
   7000                                               (int)(imm), \
   7001                                               (__v4df)_mm256_setzero_pd(), \
   7002                                               (__mmask8)-1); })
   7003 
   7004 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
   7005   (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
   7006                                               (__v4df)(__m256d)(B), \
   7007                                               (int)(imm), \
   7008                                               (__v4df)(__m256d)(W), \
   7009                                               (__mmask8)(U)); })
   7010 
   7011 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
   7012   (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
   7013                                               (__v4df)(__m256d)(B), \
   7014                                               (int)(imm), \
   7015                                               (__v4df)_mm256_setzero_pd(), \
   7016                                               (__mmask8)(U)); })
   7017 
   7018 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
   7019   (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
   7020                                               (__v8si)(__m256i)(B), \
   7021                                               (int)(imm), \
   7022                                               (__v8si)_mm256_setzero_si256(), \
   7023                                               (__mmask8)-1); })
   7024 
   7025 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
   7026   (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
   7027                                               (__v8si)(__m256i)(B), \
   7028                                               (int)(imm), \
   7029                                               (__v8si)(__m256i)(W), \
   7030                                               (__mmask8)(U)); })
   7031 
   7032 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
   7033   (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
   7034                                               (__v8si)(__m256i)(B), \
   7035                                               (int)(imm), \
   7036                                               (__v8si)_mm256_setzero_si256(), \
   7037                                               (__mmask8)(U)); })
   7038 
   7039 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
   7040   (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
   7041                                               (__v4di)(__m256i)(B), \
   7042                                               (int)(imm), \
   7043                                               (__v4di)_mm256_setzero_si256(), \
   7044                                               (__mmask8)-1); })
   7045 
   7046 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
   7047   (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
   7048                                               (__v4di)(__m256i)(B), \
   7049                                               (int)(imm), \
   7050                                               (__v4di)(__m256i)(W), \
   7051                                               (__mmask8)(U)); })
   7052 
   7053 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
   7054   (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
   7055                                               (__v4di)(__m256i)(B), \
   7056                                               (int)(imm), \
   7057                                               (__v4di)_mm256_setzero_si256(), \
   7058                                               (__mmask8)(U)); })
   7059 
   7060 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7061   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   7062                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
   7063                                        (__v2df)(__m128d)(W)); })
   7064 
   7065 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7066   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   7067                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
   7068                                        (__v2df)_mm_setzero_pd()); })
   7069 
   7070 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
   7071   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   7072                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
   7073                                        (__v4df)(__m256d)(W)); })
   7074 
   7075 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
   7076   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   7077                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
   7078                                        (__v4df)_mm256_setzero_pd()); })
   7079 
   7080 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7081   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   7082                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
   7083                                       (__v4sf)(__m128)(W)); })
   7084 
   7085 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7086   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   7087                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
   7088                                       (__v4sf)_mm_setzero_ps()); })
   7089 
   7090 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
   7091   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   7092                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
   7093                                       (__v8sf)(__m256)(W)); })
   7094 
   7095 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
   7096   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   7097                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
   7098                                       (__v8sf)_mm256_setzero_ps()); })
   7099 
   7100 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7101 _mm_rsqrt14_pd (__m128d __A)
   7102 {
   7103   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   7104                  (__v2df)
   7105                  _mm_setzero_pd (),
   7106                  (__mmask8) -1);
   7107 }
   7108 
   7109 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7110 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
   7111 {
   7112   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   7113                  (__v2df) __W,
   7114                  (__mmask8) __U);
   7115 }
   7116 
   7117 static __inline__ __m128d __DEFAULT_FN_ATTRS
   7118 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
   7119 {
   7120   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   7121                  (__v2df)
   7122                  _mm_setzero_pd (),
   7123                  (__mmask8) __U);
   7124 }
   7125 
   7126 static __inline__ __m256d __DEFAULT_FN_ATTRS
   7127 _mm256_rsqrt14_pd (__m256d __A)
   7128 {
   7129   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   7130                  (__v4df)
   7131                  _mm256_setzero_pd (),
   7132                  (__mmask8) -1);
   7133 }
   7134 
   7135 static __inline__ __m256d __DEFAULT_FN_ATTRS
   7136 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
   7137 {
   7138   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   7139                  (__v4df) __W,
   7140                  (__mmask8) __U);
   7141 }
   7142 
   7143 static __inline__ __m256d __DEFAULT_FN_ATTRS
   7144 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
   7145 {
   7146   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   7147                  (__v4df)
   7148                  _mm256_setzero_pd (),
   7149                  (__mmask8) __U);
   7150 }
   7151 
   7152 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7153 _mm_rsqrt14_ps (__m128 __A)
   7154 {
   7155   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   7156                 (__v4sf)
   7157                 _mm_setzero_ps (),
   7158                 (__mmask8) -1);
   7159 }
   7160 
   7161 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7162 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
   7163 {
   7164   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   7165                 (__v4sf) __W,
   7166                 (__mmask8) __U);
   7167 }
   7168 
   7169 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7170 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
   7171 {
   7172   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   7173                 (__v4sf)
   7174                 _mm_setzero_ps (),
   7175                 (__mmask8) __U);
   7176 }
   7177 
   7178 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7179 _mm256_rsqrt14_ps (__m256 __A)
   7180 {
   7181   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   7182                 (__v8sf)
   7183                 _mm256_setzero_ps (),
   7184                 (__mmask8) -1);
   7185 }
   7186 
   7187 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7188 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
   7189 {
   7190   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   7191                 (__v8sf) __W,
   7192                 (__mmask8) __U);
   7193 }
   7194 
   7195 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7196 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
   7197 {
   7198   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   7199                 (__v8sf)
   7200                 _mm256_setzero_ps (),
   7201                 (__mmask8) __U);
   7202 }
   7203 
   7204 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7205 _mm256_broadcast_f32x4(__m128 __A)
   7206 {
   7207   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
   7208                                          0, 1, 2, 3, 0, 1, 2, 3);
   7209 }
   7210 
   7211 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7212 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
   7213 {
   7214   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
   7215                                             (__v8sf)_mm256_broadcast_f32x4(__A),
   7216                                             (__v8sf)__O);
   7217 }
   7218 
   7219 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7220 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
   7221 {
   7222   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
   7223                                             (__v8sf)_mm256_broadcast_f32x4(__A),
   7224                                             (__v8sf)_mm256_setzero_ps());
   7225 }
   7226 
   7227 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7228 _mm256_broadcast_i32x4(__m128i __A)
   7229 {
   7230   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   7231                                           0, 1, 2, 3, 0, 1, 2, 3);
   7232 }
   7233 
   7234 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7235 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
   7236 {
   7237   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   7238                                             (__v8si)_mm256_broadcast_i32x4(__A),
   7239                                             (__v8si)__O);
   7240 }
   7241 
   7242 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7243 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
   7244 {
   7245   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   7246                                             (__v8si)_mm256_broadcast_i32x4(__A),
   7247                                             (__v8si)_mm256_setzero_si256());
   7248 }
   7249 
   7250 static __inline__ __m256d __DEFAULT_FN_ATTRS
   7251 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
   7252 {
   7253   return (__m256d)__builtin_ia32_selectpd_256(__M,
   7254                                               (__v4df) _mm256_broadcastsd_pd(__A),
   7255                                               (__v4df) __O);
   7256 }
   7257 
   7258 static __inline__ __m256d __DEFAULT_FN_ATTRS
   7259 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   7260 {
   7261   return (__m256d)__builtin_ia32_selectpd_256(__M,
   7262                                               (__v4df) _mm256_broadcastsd_pd(__A),
   7263                                               (__v4df) _mm256_setzero_pd());
   7264 }
   7265 
   7266 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7267 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
   7268 {
   7269   return (__m128)__builtin_ia32_selectps_128(__M,
   7270                                              (__v4sf) _mm_broadcastss_ps(__A),
   7271                                              (__v4sf) __O);
   7272 }
   7273 
   7274 static __inline__ __m128 __DEFAULT_FN_ATTRS
   7275 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
   7276 {
   7277   return (__m128)__builtin_ia32_selectps_128(__M,
   7278                                              (__v4sf) _mm_broadcastss_ps(__A),
   7279                                              (__v4sf) _mm_setzero_ps());
   7280 }
   7281 
   7282 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7283 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
   7284 {
   7285   return (__m256)__builtin_ia32_selectps_256(__M,
   7286                                              (__v8sf) _mm256_broadcastss_ps(__A),
   7287                                              (__v8sf) __O);
   7288 }
   7289 
   7290 static __inline__ __m256 __DEFAULT_FN_ATTRS
   7291 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
   7292 {
   7293   return (__m256)__builtin_ia32_selectps_256(__M,
   7294                                              (__v8sf) _mm256_broadcastss_ps(__A),
   7295                                              (__v8sf) _mm256_setzero_ps());
   7296 }
   7297 
   7298 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7299 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7300 {
   7301   return (__m128i)__builtin_ia32_selectd_128(__M,
   7302                                              (__v4si) _mm_broadcastd_epi32(__A),
   7303                                              (__v4si) __O);
   7304 }
   7305 
   7306 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7307 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
   7308 {
   7309   return (__m128i)__builtin_ia32_selectd_128(__M,
   7310                                              (__v4si) _mm_broadcastd_epi32(__A),
   7311                                              (__v4si) _mm_setzero_si128());
   7312 }
   7313 
   7314 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7315 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
   7316 {
   7317   return (__m256i)__builtin_ia32_selectd_256(__M,
   7318                                              (__v8si) _mm256_broadcastd_epi32(__A),
   7319                                              (__v8si) __O);
   7320 }
   7321 
   7322 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7323 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
   7324 {
   7325   return (__m256i)__builtin_ia32_selectd_256(__M,
   7326                                              (__v8si) _mm256_broadcastd_epi32(__A),
   7327                                              (__v8si) _mm256_setzero_si256());
   7328 }
   7329 
   7330 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7331 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
   7332 {
   7333   return (__m128i)__builtin_ia32_selectq_128(__M,
   7334                                              (__v2di) _mm_broadcastq_epi64(__A),
   7335                                              (__v2di) __O);
   7336 }
   7337 
   7338 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7339 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
   7340 {
   7341   return (__m128i)__builtin_ia32_selectq_128(__M,
   7342                                              (__v2di) _mm_broadcastq_epi64(__A),
   7343                                              (__v2di) _mm_setzero_si128());
   7344 }
   7345 
   7346 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7347 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
   7348 {
   7349   return (__m256i)__builtin_ia32_selectq_256(__M,
   7350                                              (__v4di) _mm256_broadcastq_epi64(__A),
   7351                                              (__v4di) __O);
   7352 }
   7353 
   7354 static __inline__ __m256i __DEFAULT_FN_ATTRS
   7355 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
   7356 {
   7357   return (__m256i)__builtin_ia32_selectq_256(__M,
   7358                                              (__v4di) _mm256_broadcastq_epi64(__A),
   7359                                              (__v4di) _mm256_setzero_si256());
   7360 }
   7361 
   7362 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7363 _mm_cvtsepi32_epi8 (__m128i __A)
   7364 {
   7365   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   7366                (__v16qi)_mm_undefined_si128(),
   7367                (__mmask8) -1);
   7368 }
   7369 
   7370 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7371 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7372 {
   7373   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   7374                (__v16qi) __O, __M);
   7375 }
   7376 
   7377 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7378 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
   7379 {
   7380   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   7381                (__v16qi) _mm_setzero_si128 (),
   7382                __M);
   7383 }
   7384 
   7385 static __inline__ void __DEFAULT_FN_ATTRS
   7386 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7387 {
   7388   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   7389 }
   7390 
   7391 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7392 _mm256_cvtsepi32_epi8 (__m256i __A)
   7393 {
   7394   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   7395                (__v16qi)_mm_undefined_si128(),
   7396                (__mmask8) -1);
   7397 }
   7398 
   7399 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7400 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7401 {
   7402   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   7403                (__v16qi) __O, __M);
   7404 }
   7405 
   7406 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7407 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
   7408 {
   7409   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   7410                (__v16qi) _mm_setzero_si128 (),
   7411                __M);
   7412 }
   7413 
   7414 static __inline__ void __DEFAULT_FN_ATTRS
   7415 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7416 {
   7417   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
   7418 }
   7419 
   7420 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7421 _mm_cvtsepi32_epi16 (__m128i __A)
   7422 {
   7423   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7424                (__v8hi)_mm_setzero_si128 (),
   7425                (__mmask8) -1);
   7426 }
   7427 
   7428 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7429 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7430 {
   7431   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7432                (__v8hi)__O,
   7433                __M);
   7434 }
   7435 
   7436 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7437 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
   7438 {
   7439   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7440                (__v8hi) _mm_setzero_si128 (),
   7441                __M);
   7442 }
   7443 
   7444 static __inline__ void __DEFAULT_FN_ATTRS
   7445 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7446 {
   7447   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   7448 }
   7449 
   7450 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7451 _mm256_cvtsepi32_epi16 (__m256i __A)
   7452 {
   7453   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7454                (__v8hi)_mm_undefined_si128(),
   7455                (__mmask8) -1);
   7456 }
   7457 
   7458 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7459 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7460 {
   7461   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7462                (__v8hi) __O, __M);
   7463 }
   7464 
   7465 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7466 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
   7467 {
   7468   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7469                (__v8hi) _mm_setzero_si128 (),
   7470                __M);
   7471 }
   7472 
   7473 static __inline__ void __DEFAULT_FN_ATTRS
   7474 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7475 {
   7476   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   7477 }
   7478 
   7479 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7480 _mm_cvtsepi64_epi8 (__m128i __A)
   7481 {
   7482   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7483                (__v16qi)_mm_undefined_si128(),
   7484                (__mmask8) -1);
   7485 }
   7486 
   7487 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7488 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7489 {
   7490   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7491                (__v16qi) __O, __M);
   7492 }
   7493 
   7494 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7495 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
   7496 {
   7497   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7498                (__v16qi) _mm_setzero_si128 (),
   7499                __M);
   7500 }
   7501 
   7502 static __inline__ void __DEFAULT_FN_ATTRS
   7503 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7504 {
   7505   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   7506 }
   7507 
   7508 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7509 _mm256_cvtsepi64_epi8 (__m256i __A)
   7510 {
   7511   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7512                (__v16qi)_mm_undefined_si128(),
   7513                (__mmask8) -1);
   7514 }
   7515 
   7516 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7517 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7518 {
   7519   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7520                (__v16qi) __O, __M);
   7521 }
   7522 
   7523 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7524 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
   7525 {
   7526   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7527                (__v16qi) _mm_setzero_si128 (),
   7528                __M);
   7529 }
   7530 
   7531 static __inline__ void __DEFAULT_FN_ATTRS
   7532 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7533 {
   7534   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   7535 }
   7536 
   7537 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7538 _mm_cvtsepi64_epi32 (__m128i __A)
   7539 {
   7540   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7541                (__v4si)_mm_undefined_si128(),
   7542                (__mmask8) -1);
   7543 }
   7544 
   7545 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7546 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7547 {
   7548   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7549                (__v4si) __O, __M);
   7550 }
   7551 
   7552 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7553 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
   7554 {
   7555   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7556                (__v4si) _mm_setzero_si128 (),
   7557                __M);
   7558 }
   7559 
   7560 static __inline__ void __DEFAULT_FN_ATTRS
   7561 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   7562 {
   7563   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   7564 }
   7565 
   7566 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7567 _mm256_cvtsepi64_epi32 (__m256i __A)
   7568 {
   7569   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7570                (__v4si)_mm_undefined_si128(),
   7571                (__mmask8) -1);
   7572 }
   7573 
   7574 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7575 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   7576 {
   7577   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7578                (__v4si)__O,
   7579                __M);
   7580 }
   7581 
   7582 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7583 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
   7584 {
   7585   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7586                (__v4si) _mm_setzero_si128 (),
   7587                __M);
   7588 }
   7589 
   7590 static __inline__ void __DEFAULT_FN_ATTRS
   7591 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   7592 {
   7593   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   7594 }
   7595 
   7596 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7597 _mm_cvtsepi64_epi16 (__m128i __A)
   7598 {
   7599   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7600                (__v8hi)_mm_undefined_si128(),
   7601                (__mmask8) -1);
   7602 }
   7603 
   7604 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7605 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7606 {
   7607   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7608                (__v8hi) __O, __M);
   7609 }
   7610 
   7611 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7612 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
   7613 {
   7614   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7615                (__v8hi) _mm_setzero_si128 (),
   7616                __M);
   7617 }
   7618 
   7619 static __inline__ void __DEFAULT_FN_ATTRS
   7620 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7621 {
   7622   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   7623 }
   7624 
   7625 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7626 _mm256_cvtsepi64_epi16 (__m256i __A)
   7627 {
   7628   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7629                (__v8hi)_mm_undefined_si128(),
   7630                (__mmask8) -1);
   7631 }
   7632 
   7633 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7634 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7635 {
   7636   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7637                (__v8hi) __O, __M);
   7638 }
   7639 
   7640 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7641 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
   7642 {
   7643   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7644                (__v8hi) _mm_setzero_si128 (),
   7645                __M);
   7646 }
   7647 
   7648 static __inline__ void __DEFAULT_FN_ATTRS
   7649 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7650 {
   7651   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   7652 }
   7653 
   7654 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7655 _mm_cvtusepi32_epi8 (__m128i __A)
   7656 {
   7657   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7658                 (__v16qi)_mm_undefined_si128(),
   7659                 (__mmask8) -1);
   7660 }
   7661 
   7662 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7663 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7664 {
   7665   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7666                 (__v16qi) __O,
   7667                 __M);
   7668 }
   7669 
   7670 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7671 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
   7672 {
   7673   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7674                 (__v16qi) _mm_setzero_si128 (),
   7675                 __M);
   7676 }
   7677 
   7678 static __inline__ void __DEFAULT_FN_ATTRS
   7679 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7680 {
   7681   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   7682 }
   7683 
   7684 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7685 _mm256_cvtusepi32_epi8 (__m256i __A)
   7686 {
   7687   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7688                 (__v16qi)_mm_undefined_si128(),
   7689                 (__mmask8) -1);
   7690 }
   7691 
   7692 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7693 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7694 {
   7695   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7696                 (__v16qi) __O,
   7697                 __M);
   7698 }
   7699 
   7700 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7701 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
   7702 {
   7703   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7704                 (__v16qi) _mm_setzero_si128 (),
   7705                 __M);
   7706 }
   7707 
   7708 static __inline__ void __DEFAULT_FN_ATTRS
   7709 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7710 {
   7711   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
   7712 }
   7713 
   7714 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7715 _mm_cvtusepi32_epi16 (__m128i __A)
   7716 {
   7717   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7718                 (__v8hi)_mm_undefined_si128(),
   7719                 (__mmask8) -1);
   7720 }
   7721 
   7722 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7723 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7724 {
   7725   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7726                 (__v8hi) __O, __M);
   7727 }
   7728 
   7729 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7730 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
   7731 {
   7732   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7733                 (__v8hi) _mm_setzero_si128 (),
   7734                 __M);
   7735 }
   7736 
   7737 static __inline__ void __DEFAULT_FN_ATTRS
   7738 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7739 {
   7740   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   7741 }
   7742 
   7743 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7744 _mm256_cvtusepi32_epi16 (__m256i __A)
   7745 {
   7746   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7747                 (__v8hi) _mm_undefined_si128(),
   7748                 (__mmask8) -1);
   7749 }
   7750 
   7751 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7752 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7753 {
   7754   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7755                 (__v8hi) __O, __M);
   7756 }
   7757 
   7758 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7759 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
   7760 {
   7761   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7762                 (__v8hi) _mm_setzero_si128 (),
   7763                 __M);
   7764 }
   7765 
   7766 static __inline__ void __DEFAULT_FN_ATTRS
   7767 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7768 {
   7769   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   7770 }
   7771 
   7772 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7773 _mm_cvtusepi64_epi8 (__m128i __A)
   7774 {
   7775   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7776                 (__v16qi)_mm_undefined_si128(),
   7777                 (__mmask8) -1);
   7778 }
   7779 
   7780 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7781 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7782 {
   7783   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7784                 (__v16qi) __O,
   7785                 __M);
   7786 }
   7787 
   7788 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7789 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
   7790 {
   7791   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7792                 (__v16qi) _mm_setzero_si128 (),
   7793                 __M);
   7794 }
   7795 
   7796 static __inline__ void __DEFAULT_FN_ATTRS
   7797 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7798 {
   7799   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   7800 }
   7801 
   7802 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7803 _mm256_cvtusepi64_epi8 (__m256i __A)
   7804 {
   7805   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7806                 (__v16qi)_mm_undefined_si128(),
   7807                 (__mmask8) -1);
   7808 }
   7809 
   7810 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7811 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7812 {
   7813   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7814                 (__v16qi) __O,
   7815                 __M);
   7816 }
   7817 
   7818 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7819 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
   7820 {
   7821   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7822                 (__v16qi) _mm_setzero_si128 (),
   7823                 __M);
   7824 }
   7825 
   7826 static __inline__ void __DEFAULT_FN_ATTRS
   7827 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7828 {
   7829   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   7830 }
   7831 
   7832 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7833 _mm_cvtusepi64_epi32 (__m128i __A)
   7834 {
   7835   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7836                 (__v4si)_mm_undefined_si128(),
   7837                 (__mmask8) -1);
   7838 }
   7839 
   7840 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7841 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7842 {
   7843   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7844                 (__v4si) __O, __M);
   7845 }
   7846 
   7847 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7848 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
   7849 {
   7850   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7851                 (__v4si) _mm_setzero_si128 (),
   7852                 __M);
   7853 }
   7854 
   7855 static __inline__ void __DEFAULT_FN_ATTRS
   7856 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   7857 {
   7858   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   7859 }
   7860 
   7861 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7862 _mm256_cvtusepi64_epi32 (__m256i __A)
   7863 {
   7864   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7865                 (__v4si)_mm_undefined_si128(),
   7866                 (__mmask8) -1);
   7867 }
   7868 
   7869 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7870 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   7871 {
   7872   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7873                 (__v4si) __O, __M);
   7874 }
   7875 
   7876 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7877 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
   7878 {
   7879   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7880                 (__v4si) _mm_setzero_si128 (),
   7881                 __M);
   7882 }
   7883 
   7884 static __inline__ void __DEFAULT_FN_ATTRS
   7885 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   7886 {
   7887   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   7888 }
   7889 
   7890 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7891 _mm_cvtusepi64_epi16 (__m128i __A)
   7892 {
   7893   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7894                 (__v8hi)_mm_undefined_si128(),
   7895                 (__mmask8) -1);
   7896 }
   7897 
   7898 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7899 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7900 {
   7901   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7902                 (__v8hi) __O, __M);
   7903 }
   7904 
   7905 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7906 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
   7907 {
   7908   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7909                 (__v8hi) _mm_setzero_si128 (),
   7910                 __M);
   7911 }
   7912 
   7913 static __inline__ void __DEFAULT_FN_ATTRS
   7914 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7915 {
   7916   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   7917 }
   7918 
   7919 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7920 _mm256_cvtusepi64_epi16 (__m256i __A)
   7921 {
   7922   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7923                 (__v8hi)_mm_undefined_si128(),
   7924                 (__mmask8) -1);
   7925 }
   7926 
   7927 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7928 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7929 {
   7930   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7931                 (__v8hi) __O, __M);
   7932 }
   7933 
   7934 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7935 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
   7936 {
   7937   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7938                 (__v8hi) _mm_setzero_si128 (),
   7939                 __M);
   7940 }
   7941 
   7942 static __inline__ void __DEFAULT_FN_ATTRS
   7943 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7944 {
   7945   return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   7946 }
   7947 
   7948 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7949 _mm_cvtepi32_epi8 (__m128i __A)
   7950 {
   7951   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
   7952               (__v16qi)_mm_undefined_si128(),
   7953               (__mmask8) -1);
   7954 }
   7955 
   7956 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7957 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7958 {
   7959   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
   7960               (__v16qi) __O, __M);
   7961 }
   7962 
   7963 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7964 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
   7965 {
   7966   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
   7967               (__v16qi)
   7968               _mm_setzero_si128 (),
   7969               __M);
   7970 }
   7971 
   7972 static __inline__ void __DEFAULT_FN_ATTRS
   7973 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7974 {
   7975   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   7976 }
   7977 
   7978 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7979 _mm256_cvtepi32_epi8 (__m256i __A)
   7980 {
   7981   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
   7982               (__v16qi)_mm_undefined_si128(),
   7983               (__mmask8) -1);
   7984 }
   7985 
   7986 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7987 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7988 {
   7989   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
   7990               (__v16qi) __O, __M);
   7991 }
   7992 
   7993 static __inline__ __m128i __DEFAULT_FN_ATTRS
   7994 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
   7995 {
   7996   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
   7997               (__v16qi) _mm_setzero_si128 (),
   7998               __M);
   7999 }
   8000 
   8001 static __inline__ void __DEFAULT_FN_ATTRS
   8002 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   8003 {
   8004   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
   8005 }
   8006 
   8007 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8008 _mm_cvtepi32_epi16 (__m128i __A)
   8009 {
   8010   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
   8011               (__v8hi) _mm_setzero_si128 (),
   8012               (__mmask8) -1);
   8013 }
   8014 
   8015 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8016 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   8017 {
   8018   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
   8019               (__v8hi) __O, __M);
   8020 }
   8021 
   8022 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8023 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
   8024 {
   8025   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
   8026               (__v8hi) _mm_setzero_si128 (),
   8027               __M);
   8028 }
   8029 
   8030 static __inline__ void __DEFAULT_FN_ATTRS
   8031 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   8032 {
   8033   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   8034 }
   8035 
   8036 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8037 _mm256_cvtepi32_epi16 (__m256i __A)
   8038 {
   8039   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
   8040               (__v8hi)_mm_setzero_si128 (),
   8041               (__mmask8) -1);
   8042 }
   8043 
   8044 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8045 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   8046 {
   8047   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
   8048               (__v8hi) __O, __M);
   8049 }
   8050 
   8051 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8052 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
   8053 {
   8054   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
   8055               (__v8hi) _mm_setzero_si128 (),
   8056               __M);
   8057 }
   8058 
   8059 static __inline__ void __DEFAULT_FN_ATTRS
   8060 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
   8061 {
   8062   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   8063 }
   8064 
   8065 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8066 _mm_cvtepi64_epi8 (__m128i __A)
   8067 {
   8068   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
   8069               (__v16qi) _mm_undefined_si128(),
   8070               (__mmask8) -1);
   8071 }
   8072 
   8073 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8074 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   8075 {
   8076   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
   8077               (__v16qi) __O, __M);
   8078 }
   8079 
   8080 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8081 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
   8082 {
   8083   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
   8084               (__v16qi) _mm_setzero_si128 (),
   8085               __M);
   8086 }
   8087 
   8088 static __inline__ void __DEFAULT_FN_ATTRS
   8089 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   8090 {
   8091   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   8092 }
   8093 
   8094 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8095 _mm256_cvtepi64_epi8 (__m256i __A)
   8096 {
   8097   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
   8098               (__v16qi) _mm_undefined_si128(),
   8099               (__mmask8) -1);
   8100 }
   8101 
   8102 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8103 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   8104 {
   8105   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
   8106               (__v16qi) __O, __M);
   8107 }
   8108 
   8109 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8110 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
   8111 {
   8112   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
   8113               (__v16qi) _mm_setzero_si128 (),
   8114               __M);
   8115 }
   8116 
   8117 static __inline__ void __DEFAULT_FN_ATTRS
   8118 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   8119 {
   8120   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   8121 }
   8122 
   8123 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8124 _mm_cvtepi64_epi32 (__m128i __A)
   8125 {
   8126   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
   8127               (__v4si)_mm_undefined_si128(),
   8128               (__mmask8) -1);
   8129 }
   8130 
   8131 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8132 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   8133 {
   8134   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
   8135               (__v4si) __O, __M);
   8136 }
   8137 
   8138 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8139 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
   8140 {
   8141   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
   8142               (__v4si) _mm_setzero_si128 (),
   8143               __M);
   8144 }
   8145 
   8146 static __inline__ void __DEFAULT_FN_ATTRS
   8147 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   8148 {
   8149   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   8150 }
   8151 
   8152 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8153 _mm256_cvtepi64_epi32 (__m256i __A)
   8154 {
   8155   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
   8156               (__v4si) _mm_undefined_si128(),
   8157               (__mmask8) -1);
   8158 }
   8159 
   8160 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8161 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   8162 {
   8163   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
   8164               (__v4si) __O, __M);
   8165 }
   8166 
   8167 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8168 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
   8169 {
   8170   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
   8171               (__v4si) _mm_setzero_si128 (),
   8172               __M);
   8173 }
   8174 
   8175 static __inline__ void __DEFAULT_FN_ATTRS
   8176 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   8177 {
   8178   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   8179 }
   8180 
   8181 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8182 _mm_cvtepi64_epi16 (__m128i __A)
   8183 {
   8184   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
   8185               (__v8hi) _mm_undefined_si128(),
   8186               (__mmask8) -1);
   8187 }
   8188 
   8189 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8190 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   8191 {
   8192   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
   8193               (__v8hi)__O,
   8194               __M);
   8195 }
   8196 
   8197 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8198 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
   8199 {
   8200   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
   8201               (__v8hi) _mm_setzero_si128 (),
   8202               __M);
   8203 }
   8204 
   8205 static __inline__ void __DEFAULT_FN_ATTRS
   8206 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   8207 {
   8208   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   8209 }
   8210 
   8211 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8212 _mm256_cvtepi64_epi16 (__m256i __A)
   8213 {
   8214   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
   8215               (__v8hi)_mm_undefined_si128(),
   8216               (__mmask8) -1);
   8217 }
   8218 
   8219 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8220 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   8221 {
   8222   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
   8223               (__v8hi) __O, __M);
   8224 }
   8225 
   8226 static __inline__ __m128i __DEFAULT_FN_ATTRS
   8227 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
   8228 {
   8229   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
   8230               (__v8hi) _mm_setzero_si128 (),
   8231               __M);
   8232 }
   8233 
   8234 static __inline__ void __DEFAULT_FN_ATTRS
   8235 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   8236 {
   8237   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   8238 }
   8239 
   8240 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
   8241   (__m128)__builtin_shufflevector((__v8sf)(__m256)(A),           \
   8242                                   (__v8sf)_mm256_undefined_ps(), \
   8243                                   ((imm) & 1) ? 4 : 0,           \
   8244                                   ((imm) & 1) ? 5 : 1,           \
   8245                                   ((imm) & 1) ? 6 : 2,           \
   8246                                   ((imm) & 1) ? 7 : 3); })
   8247 
   8248 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
   8249   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   8250                                    (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
   8251                                    (__v4sf)(W)); })
   8252 
   8253 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
   8254   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   8255                                    (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
   8256                                    (__v4sf)_mm_setzero_ps()); })
   8257 
   8258 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
   8259   (__m128i)__builtin_shufflevector((__v8si)(__m256)(A),              \
   8260                                    (__v8si)_mm256_undefined_si256(), \
   8261                                    ((imm) & 1) ? 4 : 0,              \
   8262                                    ((imm) & 1) ? 5 : 1,              \
   8263                                    ((imm) & 1) ? 6 : 2,              \
   8264                                    ((imm) & 1) ? 7 : 3); })
   8265 
   8266 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
   8267   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8268                                 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
   8269                                 (__v4si)(W)); })
   8270 
   8271 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
   8272   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8273                                 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
   8274                                 (__v4si)_mm_setzero_si128()); })
   8275 
   8276 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
   8277   (__m256)__builtin_shufflevector((__v8sf)(A), \
   8278                                   (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
   8279                                   ((imm) & 0x1) ?  0 :  8, \
   8280                                   ((imm) & 0x1) ?  1 :  9, \
   8281                                   ((imm) & 0x1) ?  2 : 10, \
   8282                                   ((imm) & 0x1) ?  3 : 11, \
   8283                                   ((imm) & 0x1) ?  8 :  4, \
   8284                                   ((imm) & 0x1) ?  9 :  5, \
   8285                                   ((imm) & 0x1) ? 10 :  6, \
   8286                                   ((imm) & 0x1) ? 11 :  7); })
   8287 
   8288 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
   8289   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   8290                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
   8291                                   (__v8sf)(W)); })
   8292 
   8293 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
   8294   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   8295                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
   8296                                   (__v8sf)_mm256_setzero_ps()); })
   8297 
   8298 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
   8299   (__m256i)__builtin_shufflevector((__v8si)(A), \
   8300                                  (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
   8301                                  ((imm) & 0x1) ?  0 :  8, \
   8302                                  ((imm) & 0x1) ?  1 :  9, \
   8303                                  ((imm) & 0x1) ?  2 : 10, \
   8304                                  ((imm) & 0x1) ?  3 : 11, \
   8305                                  ((imm) & 0x1) ?  8 :  4, \
   8306                                  ((imm) & 0x1) ?  9 :  5, \
   8307                                  ((imm) & 0x1) ? 10 :  6, \
   8308                                  ((imm) & 0x1) ? 11 :  7); })
   8309 
   8310 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
   8311   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8312                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
   8313                                   (__v8si)(W)); })
   8314 
   8315 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
   8316   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8317                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
   8318                                   (__v8si)_mm256_setzero_si256()); })
   8319 
   8320 #define _mm_getmant_pd(A, B, C) __extension__({\
   8321   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   8322                                             (int)(((C)<<2) | (B)), \
   8323                                             (__v2df)_mm_setzero_pd(), \
   8324                                             (__mmask8)-1); })
   8325 
   8326 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
   8327   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   8328                                             (int)(((C)<<2) | (B)), \
   8329                                             (__v2df)(__m128d)(W), \
   8330                                             (__mmask8)(U)); })
   8331 
   8332 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
   8333   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   8334                                             (int)(((C)<<2) | (B)), \
   8335                                             (__v2df)_mm_setzero_pd(), \
   8336                                             (__mmask8)(U)); })
   8337 
   8338 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
   8339   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   8340                                             (int)(((C)<<2) | (B)), \
   8341                                             (__v4df)_mm256_setzero_pd(), \
   8342                                             (__mmask8)-1); })
   8343 
   8344 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
   8345   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   8346                                             (int)(((C)<<2) | (B)), \
   8347                                             (__v4df)(__m256d)(W), \
   8348                                             (__mmask8)(U)); })
   8349 
   8350 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
   8351   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   8352                                             (int)(((C)<<2) | (B)), \
   8353                                             (__v4df)_mm256_setzero_pd(), \
   8354                                             (__mmask8)(U)); })
   8355 
   8356 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
   8357   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   8358                                            (int)(((C)<<2) | (B)), \
   8359                                            (__v4sf)_mm_setzero_ps(), \
   8360                                            (__mmask8)-1); })
   8361 
   8362 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8363   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   8364                                            (int)(((C)<<2) | (B)), \
   8365                                            (__v4sf)(__m128)(W), \
   8366                                            (__mmask8)(U)); })
   8367 
   8368 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8369   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   8370                                            (int)(((C)<<2) | (B)), \
   8371                                            (__v4sf)_mm_setzero_ps(), \
   8372                                            (__mmask8)(U)); })
   8373 
   8374 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
   8375   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   8376                                            (int)(((C)<<2) | (B)), \
   8377                                            (__v8sf)_mm256_setzero_ps(), \
   8378                                            (__mmask8)-1); })
   8379 
   8380 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
   8381   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   8382                                            (int)(((C)<<2) | (B)), \
   8383                                            (__v8sf)(__m256)(W), \
   8384                                            (__mmask8)(U)); })
   8385 
   8386 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
   8387   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   8388                                            (int)(((C)<<2) | (B)), \
   8389                                            (__v8sf)_mm256_setzero_ps(), \
   8390                                            (__mmask8)(U)); })
   8391 
   8392 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8393   (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
   8394                                         (double const *)(addr), \
   8395                                         (__v2di)(__m128i)(index), \
   8396                                         (__mmask8)(mask), (int)(scale)); })
   8397 
   8398 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8399   (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
   8400                                         (long long const *)(addr), \
   8401                                         (__v2di)(__m128i)(index), \
   8402                                         (__mmask8)(mask), (int)(scale)); })
   8403 
   8404 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8405   (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
   8406                                         (double const *)(addr), \
   8407                                         (__v4di)(__m256i)(index), \
   8408                                         (__mmask8)(mask), (int)(scale)); })
   8409 
   8410 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8411   (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
   8412                                         (long long const *)(addr), \
   8413                                         (__v4di)(__m256i)(index), \
   8414                                         (__mmask8)(mask), (int)(scale)); })
   8415 
   8416 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8417   (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
   8418                                        (float const *)(addr), \
   8419                                        (__v2di)(__m128i)(index), \
   8420                                        (__mmask8)(mask), (int)(scale)); })
   8421 
   8422 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8423   (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
   8424                                         (int const *)(addr), \
   8425                                         (__v2di)(__m128i)(index), \
   8426                                         (__mmask8)(mask), (int)(scale)); })
   8427 
   8428 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8429   (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
   8430                                        (float const *)(addr), \
   8431                                        (__v4di)(__m256i)(index), \
   8432                                        (__mmask8)(mask), (int)(scale)); })
   8433 
   8434 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8435   (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
   8436                                         (int const *)(addr), \
   8437                                         (__v4di)(__m256i)(index), \
   8438                                         (__mmask8)(mask), (int)(scale)); })
   8439 
   8440 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8441   (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
   8442                                         (double const *)(addr), \
   8443                                         (__v4si)(__m128i)(index), \
   8444                                         (__mmask8)(mask), (int)(scale)); })
   8445 
   8446 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8447   (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
   8448                                         (long long const *)(addr), \
   8449                                         (__v4si)(__m128i)(index), \
   8450                                         (__mmask8)(mask), (int)(scale)); })
   8451 
   8452 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
   8453   (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
   8454                                         (double const *)(addr), \
   8455                                         (__v4si)(__m128i)(index), \
   8456                                         (__mmask8)(mask), (int)(scale)); })
   8457 
   8458 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
   8459   (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
   8460                                         (long long const *)(addr), \
   8461                                         (__v4si)(__m128i)(index), \
   8462                                         (__mmask8)(mask), (int)(scale)); })
   8463 
   8464 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8465   (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
   8466                                        (float const *)(addr), \
   8467                                        (__v4si)(__m128i)(index), \
   8468                                        (__mmask8)(mask), (int)(scale)); })
   8469 
   8470 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8471   (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
   8472                                         (int const *)(addr), \
   8473                                         (__v4si)(__m128i)(index), \
   8474                                         (__mmask8)(mask), (int)(scale)); })
   8475 
   8476 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
   8477   (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
   8478                                        (float const *)(addr), \
   8479                                        (__v8si)(__m256i)(index), \
   8480                                        (__mmask8)(mask), (int)(scale)); })
   8481 
   8482 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
   8483   (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
   8484                                         (int const *)(addr), \
   8485                                         (__v8si)(__m256i)(index), \
   8486                                         (__mmask8)(mask), (int)(scale)); })
   8487 
   8488 #define _mm256_permutex_pd(X, C) __extension__ ({ \
   8489   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
   8490                                    (__v4df)_mm256_undefined_pd(), \
   8491                                    ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
   8492                                    ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
   8493 
   8494 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
   8495   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   8496                                        (__v4df)_mm256_permutex_pd((X), (C)), \
   8497                                        (__v4df)(__m256d)(W)); })
   8498 
   8499 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
   8500   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   8501                                        (__v4df)_mm256_permutex_pd((X), (C)), \
   8502                                        (__v4df)_mm256_setzero_pd()); })
   8503 
   8504 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
   8505   (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
   8506                                    (__v4di)_mm256_undefined_si256(), \
   8507                                    ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
   8508                                    ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
   8509 
   8510 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
   8511   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8512                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
   8513                                       (__v4di)(__m256i)(W)); })
   8514 
   8515 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
   8516   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8517                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
   8518                                       (__v4di)_mm256_setzero_si256()); })
   8519 
   8520 static __inline__ __m256d __DEFAULT_FN_ATTRS
   8521 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
   8522 {
   8523   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
   8524                  (__v4di) __X,
   8525                  (__v4df) _mm256_undefined_si256 (),
   8526                  (__mmask8) -1);
   8527 }
   8528 
   8529 static __inline__ __m256d __DEFAULT_FN_ATTRS
   8530 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
   8531           __m256d __Y)
   8532 {
   8533   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
   8534                  (__v4di) __X,
   8535                  (__v4df) __W,
   8536                  (__mmask8) __U);
   8537 }
   8538 
   8539 static __inline__ __m256d __DEFAULT_FN_ATTRS
   8540 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
   8541 {
   8542   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
   8543                  (__v4di) __X,
   8544                  (__v4df) _mm256_setzero_pd (),
   8545                  (__mmask8) __U);
   8546 }
   8547 
   8548 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8549 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
   8550 {
   8551   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
   8552                  (__v4di) __X,
   8553                  (__v4di) _mm256_setzero_si256 (),
   8554                  (__mmask8) __M);
   8555 }
   8556 
   8557 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8558 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
   8559 {
   8560   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
   8561                  (__v4di) __X,
   8562                  (__v4di) _mm256_undefined_si256 (),
   8563                  (__mmask8) -1);
   8564 }
   8565 
   8566 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8567 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
   8568              __m256i __Y)
   8569 {
   8570   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
   8571                  (__v4di) __X,
   8572                  (__v4di) __W,
   8573                  __M);
   8574 }
   8575 
   8576 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8577 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
   8578           __m256 __Y)
   8579 {
   8580   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
   8581                 (__v8si) __X,
   8582                 (__v8sf) __W,
   8583                 (__mmask8) __U);
   8584 }
   8585 
   8586 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8587 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
   8588 {
   8589   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
   8590                 (__v8si) __X,
   8591                 (__v8sf) _mm256_setzero_ps (),
   8592                 (__mmask8) __U);
   8593 }
   8594 
   8595 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8596 _mm256_permutexvar_ps (__m256i __X, __m256 __Y)
   8597 {
   8598   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
   8599                 (__v8si) __X,
   8600                 (__v8sf) _mm256_undefined_si256 (),
   8601                 (__mmask8) -1);
   8602 }
   8603 
   8604 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8605 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
   8606 {
   8607   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
   8608                  (__v8si) __X,
   8609                  (__v8si) _mm256_setzero_si256 (),
   8610                  __M);
   8611 }
   8612 
   8613 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8614 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
   8615              __m256i __Y)
   8616 {
   8617   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
   8618                  (__v8si) __X,
   8619                  (__v8si) __W,
   8620                  (__mmask8) __M);
   8621 }
   8622 
   8623 static __inline__ __m256i __DEFAULT_FN_ATTRS
   8624 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
   8625 {
   8626   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
   8627                  (__v8si) __X,
   8628                  (__v8si) _mm256_undefined_si256(),
   8629                  (__mmask8) -1);
   8630 }
   8631 
   8632 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
   8633   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
   8634                                    (__v4si)(__m128i)(A), \
   8635                                    ((int)(imm) & 0x3) + 0, \
   8636                                    ((int)(imm) & 0x3) + 1, \
   8637                                    ((int)(imm) & 0x3) + 2, \
   8638                                    ((int)(imm) & 0x3) + 3); })
   8639 
   8640 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
   8641   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8642                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
   8643                                     (__v4si)(__m128i)(W)); })
   8644 
   8645 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
   8646   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8647                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
   8648                                     (__v4si)_mm_setzero_si128()); })
   8649 
   8650 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
   8651   (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
   8652                                    (__v8si)(__m256i)(A), \
   8653                                    ((int)(imm) & 0x7) + 0, \
   8654                                    ((int)(imm) & 0x7) + 1, \
   8655                                    ((int)(imm) & 0x7) + 2, \
   8656                                    ((int)(imm) & 0x7) + 3, \
   8657                                    ((int)(imm) & 0x7) + 4, \
   8658                                    ((int)(imm) & 0x7) + 5, \
   8659                                    ((int)(imm) & 0x7) + 6, \
   8660                                    ((int)(imm) & 0x7) + 7); })
   8661 
   8662 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
   8663   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8664                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
   8665                                  (__v8si)(__m256i)(W)); })
   8666 
   8667 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
   8668   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8669                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
   8670                                  (__v8si)_mm256_setzero_si256()); })
   8671 
   8672 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
   8673   (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
   8674                                    (__v2di)(__m128i)(A), \
   8675                                    ((int)(imm) & 0x1) + 0, \
   8676                                    ((int)(imm) & 0x1) + 1); })
   8677 
   8678 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
   8679   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
   8680                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
   8681                                     (__v2di)(__m128i)(W)); })
   8682 
   8683 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
   8684   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
   8685                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
   8686                                     (__v2di)_mm_setzero_di()); })
   8687 
   8688 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
   8689   (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
   8690                                    (__v4di)(__m256i)(A), \
   8691                                    ((int)(imm) & 0x3) + 0, \
   8692                                    ((int)(imm) & 0x3) + 1, \
   8693                                    ((int)(imm) & 0x3) + 2, \
   8694                                    ((int)(imm) & 0x3) + 3); })
   8695 
   8696 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
   8697   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8698                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
   8699                                  (__v4di)(__m256i)(W)); })
   8700 
   8701 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
   8702   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8703                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
   8704                                  (__v4di)_mm256_setzero_si256()); })
   8705 
   8706 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8707 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8708 {
   8709   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8710                                              (__v4sf)_mm_movehdup_ps(__A),
   8711                                              (__v4sf)__W);
   8712 }
   8713 
   8714 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8715 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
   8716 {
   8717   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8718                                              (__v4sf)_mm_movehdup_ps(__A),
   8719                                              (__v4sf)_mm_setzero_ps());
   8720 }
   8721 
   8722 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8723 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8724 {
   8725   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8726                                              (__v8sf)_mm256_movehdup_ps(__A),
   8727                                              (__v8sf)__W);
   8728 }
   8729 
   8730 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8731 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
   8732 {
   8733   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8734                                              (__v8sf)_mm256_movehdup_ps(__A),
   8735                                              (__v8sf)_mm256_setzero_ps());
   8736 }
   8737 
   8738 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8739 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8740 {
   8741   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8742                                              (__v4sf)_mm_moveldup_ps(__A),
   8743                                              (__v4sf)__W);
   8744 }
   8745 
   8746 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8747 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
   8748 {
   8749   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8750                                              (__v4sf)_mm_moveldup_ps(__A),
   8751                                              (__v4sf)_mm_setzero_ps());
   8752 }
   8753 
   8754 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8755 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8756 {
   8757   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8758                                              (__v8sf)_mm256_moveldup_ps(__A),
   8759                                              (__v8sf)__W);
   8760 }
   8761 
   8762 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8763 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
   8764 {
   8765   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8766                                              (__v8sf)_mm256_moveldup_ps(__A),
   8767                                              (__v8sf)_mm256_setzero_ps());
   8768 }
   8769 
   8770 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
   8771   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8772                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
   8773                                       (__v8si)(__m256i)(W)); })
   8774 
   8775 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
   8776   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8777                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
   8778                                       (__v8si)_mm256_setzero_si256()); })
   8779 
   8780 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
   8781   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8782                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
   8783                                       (__v4si)(__m128i)(W)); })
   8784 
   8785 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
   8786   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8787                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
   8788                                       (__v4si)_mm_setzero_si128()); })
   8789 
   8790 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8791 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
   8792 {
   8793   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   8794               (__v2df) __A,
   8795               (__v2df) __W);
   8796 }
   8797 
   8798 static __inline__ __m128d __DEFAULT_FN_ATTRS
   8799 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
   8800 {
   8801   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   8802               (__v2df) __A,
   8803               (__v2df) _mm_setzero_pd ());
   8804 }
   8805 
   8806 static __inline__ __m256d __DEFAULT_FN_ATTRS
   8807 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
   8808 {
   8809   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   8810               (__v4df) __A,
   8811               (__v4df) __W);
   8812 }
   8813 
   8814 static __inline__ __m256d __DEFAULT_FN_ATTRS
   8815 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
   8816 {
   8817   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   8818               (__v4df) __A,
   8819               (__v4df) _mm256_setzero_pd ());
   8820 }
   8821 
   8822 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8823 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8824 {
   8825   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   8826              (__v4sf) __A,
   8827              (__v4sf) __W);
   8828 }
   8829 
   8830 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8831 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
   8832 {
   8833   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   8834              (__v4sf) __A,
   8835              (__v4sf) _mm_setzero_ps ());
   8836 }
   8837 
   8838 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8839 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8840 {
   8841   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   8842              (__v8sf) __A,
   8843              (__v8sf) __W);
   8844 }
   8845 
   8846 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8847 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
   8848 {
   8849   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   8850              (__v8sf) __A,
   8851              (__v8sf) _mm256_setzero_ps ());
   8852 }
   8853 
   8854 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8855 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
   8856 {
   8857   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
   8858              (__v4sf) __W,
   8859              (__mmask8) __U);
   8860 }
   8861 
   8862 static __inline__ __m128 __DEFAULT_FN_ATTRS
   8863 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
   8864 {
   8865   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
   8866              (__v4sf)
   8867              _mm_setzero_ps (),
   8868              (__mmask8) __U);
   8869 }
   8870 
   8871 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8872 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
   8873 {
   8874   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
   8875                 (__v8sf) __W,
   8876                 (__mmask8) __U);
   8877 }
   8878 
   8879 static __inline__ __m256 __DEFAULT_FN_ATTRS
   8880 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
   8881 {
   8882   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
   8883                 (__v8sf)
   8884                 _mm256_setzero_ps (),
   8885                 (__mmask8) __U);
   8886 }
   8887 
   8888 static __inline __m128i __DEFAULT_FN_ATTRS
   8889 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
   8890 {
   8891   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
   8892                                                   (__v8hi) __W,
   8893                                                   (__mmask8) __U);
   8894 }
   8895 
   8896 static __inline __m128i __DEFAULT_FN_ATTRS
   8897 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
   8898 {
   8899   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
   8900                                                   (__v8hi) _mm_setzero_si128 (),
   8901                                                   (__mmask8) __U);
   8902 }
   8903 
   8904 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
   8905   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
   8906                                          (__v8hi)(__m128i)(W), \
   8907                                          (__mmask8)(U)); })
   8908 
   8909 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
   8910   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
   8911                                          (__v8hi)_mm_setzero_si128(), \
   8912                                          (__mmask8)(U)); })
   8913 
   8914 static __inline __m128i __DEFAULT_FN_ATTRS
   8915 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
   8916 {
   8917   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
   8918                                                       (__v8hi) __W,
   8919                                                       (__mmask8) __U);
   8920 }
   8921 
   8922 static __inline __m128i __DEFAULT_FN_ATTRS
   8923 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
   8924 {
   8925   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
   8926                                                       (__v8hi) _mm_setzero_si128(),
   8927                                                       (__mmask8) __U);
   8928 }
   8929 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
   8930   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
   8931                                             (__v8hi)(__m128i)(W), \
   8932                                             (__mmask8)(U)); })
   8933 
   8934 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
   8935   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
   8936                                             (__v8hi)_mm_setzero_si128(), \
   8937                                             (__mmask8)(U)); })
   8938 
   8939 
   8940 #undef __DEFAULT_FN_ATTRS
   8941 
   8942 #endif /* __AVX512VLINTRIN_H */
   8943