1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512VLINTRIN_H 29 #define __AVX512VLINTRIN_H 30 31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) 32 33 /* Doesn't require avx512vl, used in avx512dqintrin.h */ 34 static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 35 _mm_setzero_di(void) { 36 return (__m128i)(__v2di){ 0LL, 0LL}; 37 } 38 39 /* Integer compare */ 40 41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 42 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { 43 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 44 (__mmask8)-1); 45 } 46 47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 48 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 49 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 50 __u); 51 } 52 53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 54 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { 55 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 56 (__mmask8)-1); 57 } 58 59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 60 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 61 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 62 __u); 63 } 64 65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 66 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { 67 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 68 (__mmask8)-1); 69 } 70 71 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 72 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 73 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 74 __u); 75 } 76 77 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 78 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { 79 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 80 (__mmask8)-1); 81 } 82 83 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 84 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 85 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 86 __u); 87 } 88 89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 90 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { 91 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 92 (__mmask8)-1); 93 } 94 95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 96 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 97 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 98 __u); 99 } 100 101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 102 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { 103 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 104 (__mmask8)-1); 105 } 106 107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 108 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 109 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 110 __u); 111 } 112 113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 114 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { 115 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 116 (__mmask8)-1); 117 } 118 119 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 120 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 121 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 122 __u); 123 } 124 125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 126 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { 127 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 128 (__mmask8)-1); 129 } 130 131 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 132 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 133 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 134 __u); 135 } 136 137 138 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 139 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { 140 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 141 (__mmask8)-1); 142 } 143 144 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 145 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 146 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 147 __u); 148 } 149 150 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 151 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { 152 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 153 (__mmask8)-1); 154 } 155 156 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 157 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 158 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 159 __u); 160 } 161 162 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 163 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { 164 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 165 (__mmask8)-1); 166 } 167 168 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 169 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 170 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 171 __u); 172 } 173 174 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 175 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { 176 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 177 (__mmask8)-1); 178 } 179 180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 181 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 182 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 183 __u); 184 } 185 186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 187 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { 188 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 189 (__mmask8)-1); 190 } 191 192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 193 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 194 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 195 __u); 196 } 197 198 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 199 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { 200 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 201 (__mmask8)-1); 202 } 203 204 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 205 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 206 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 207 __u); 208 } 209 210 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 211 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { 212 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 213 (__mmask8)-1); 214 } 215 216 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 217 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 218 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 219 __u); 220 } 221 222 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 223 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { 224 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 225 (__mmask8)-1); 226 } 227 228 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 229 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 230 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 231 __u); 232 } 233 234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 235 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { 236 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 237 (__mmask8)-1); 238 } 239 240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 241 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 242 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 243 __u); 244 } 245 246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 247 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { 248 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 249 (__mmask8)-1); 250 } 251 252 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 253 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 254 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 255 __u); 256 } 257 258 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 259 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { 260 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 261 (__mmask8)-1); 262 } 263 264 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 265 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 266 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 267 __u); 268 } 269 270 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 271 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { 272 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 273 (__mmask8)-1); 274 } 275 276 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 277 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 278 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 279 __u); 280 } 281 282 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 283 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { 284 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 285 (__mmask8)-1); 286 } 287 288 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 289 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 290 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 291 __u); 292 } 293 294 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 295 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { 296 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 297 (__mmask8)-1); 298 } 299 300 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 301 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 302 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 303 __u); 304 } 305 306 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 307 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { 308 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 309 (__mmask8)-1); 310 } 311 312 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 313 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 314 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 315 __u); 316 } 317 318 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 319 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { 320 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 321 (__mmask8)-1); 322 } 323 324 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 325 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 326 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 327 __u); 328 } 329 330 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 331 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) { 332 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 333 (__mmask8)-1); 334 } 335 336 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 337 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 338 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 339 __u); 340 } 341 342 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 343 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) { 344 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 345 (__mmask8)-1); 346 } 347 348 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 349 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 350 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 351 __u); 352 } 353 354 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 355 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { 356 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 357 (__mmask8)-1); 358 } 359 360 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 361 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 362 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 363 __u); 364 } 365 366 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 367 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { 368 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 369 (__mmask8)-1); 370 } 371 372 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 373 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 374 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 375 __u); 376 } 377 378 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 379 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) { 380 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 381 (__mmask8)-1); 382 } 383 384 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 385 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 386 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 387 __u); 388 } 389 390 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 391 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) { 392 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 393 (__mmask8)-1); 394 } 395 396 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 397 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 398 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 399 __u); 400 } 401 402 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 403 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { 404 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 405 (__mmask8)-1); 406 } 407 408 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 409 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 410 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 411 __u); 412 } 413 414 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 415 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { 416 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 417 (__mmask8)-1); 418 } 419 420 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 421 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 422 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 423 __u); 424 } 425 426 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 427 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { 428 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 429 (__mmask8)-1); 430 } 431 432 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 433 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 434 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 435 __u); 436 } 437 438 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 439 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { 440 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 441 (__mmask8)-1); 442 } 443 444 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 445 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 446 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 447 __u); 448 } 449 450 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 451 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { 452 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 453 (__mmask8)-1); 454 } 455 456 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 457 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 458 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 459 __u); 460 } 461 462 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 463 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { 464 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 465 (__mmask8)-1); 466 } 467 468 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 469 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 470 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 471 __u); 472 } 473 474 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 475 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { 476 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 477 (__mmask8)-1); 478 } 479 480 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 481 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 482 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 483 __u); 484 } 485 486 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 487 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { 488 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 489 (__mmask8)-1); 490 } 491 492 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 493 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 494 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 495 __u); 496 } 497 498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 499 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { 500 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 501 (__mmask8)-1); 502 } 503 504 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 505 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 506 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 507 __u); 508 } 509 510 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 511 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { 512 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 513 (__mmask8)-1); 514 } 515 516 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 517 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 518 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 519 __u); 520 } 521 522 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 523 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { 524 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 525 (__mmask8)-1); 526 } 527 528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 529 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 530 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 531 __u); 532 } 533 534 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 535 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { 536 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 537 (__mmask8)-1); 538 } 539 540 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 541 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 542 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 543 __u); 544 } 545 546 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 547 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { 548 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 549 (__mmask8)-1); 550 } 551 552 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 553 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 554 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 555 __u); 556 } 557 558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 559 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { 560 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 561 (__mmask8)-1); 562 } 563 564 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 565 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 566 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 567 __u); 568 } 569 570 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 571 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { 572 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 573 (__mmask8)-1); 574 } 575 576 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 577 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 578 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 579 __u); 580 } 581 582 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 583 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { 584 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 585 (__mmask8)-1); 586 } 587 588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 589 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 590 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 591 __u); 592 } 593 594 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 595 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { 596 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 597 (__mmask8)-1); 598 } 599 600 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 601 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 602 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 603 __u); 604 } 605 606 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 607 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { 608 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 609 (__mmask8)-1); 610 } 611 612 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 613 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 614 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 615 __u); 616 } 617 618 static __inline__ __m256i __DEFAULT_FN_ATTRS 619 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 620 { 621 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 622 (__v8si)_mm256_add_epi32(__A, __B), 623 (__v8si)__W); 624 } 625 626 static __inline__ __m256i __DEFAULT_FN_ATTRS 627 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 628 { 629 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 630 (__v8si)_mm256_add_epi32(__A, __B), 631 (__v8si)_mm256_setzero_si256()); 632 } 633 634 static __inline__ __m256i __DEFAULT_FN_ATTRS 635 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 636 { 637 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 638 (__v4di)_mm256_add_epi64(__A, __B), 639 (__v4di)__W); 640 } 641 642 static __inline__ __m256i __DEFAULT_FN_ATTRS 643 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 644 { 645 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 646 (__v4di)_mm256_add_epi64(__A, __B), 647 (__v4di)_mm256_setzero_si256()); 648 } 649 650 static __inline__ __m256i __DEFAULT_FN_ATTRS 651 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 652 { 653 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 654 (__v8si)_mm256_sub_epi32(__A, __B), 655 (__v8si)__W); 656 } 657 658 static __inline__ __m256i __DEFAULT_FN_ATTRS 659 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 660 { 661 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 662 (__v8si)_mm256_sub_epi32(__A, __B), 663 (__v8si)_mm256_setzero_si256()); 664 } 665 666 static __inline__ __m256i __DEFAULT_FN_ATTRS 667 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 668 { 669 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 670 (__v4di)_mm256_sub_epi64(__A, __B), 671 (__v4di)__W); 672 } 673 674 static __inline__ __m256i __DEFAULT_FN_ATTRS 675 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 676 { 677 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 678 (__v4di)_mm256_sub_epi64(__A, __B), 679 (__v4di)_mm256_setzero_si256()); 680 } 681 682 static __inline__ __m128i __DEFAULT_FN_ATTRS 683 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 684 { 685 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 686 (__v4si)_mm_add_epi32(__A, __B), 687 (__v4si)__W); 688 } 689 690 static __inline__ __m128i __DEFAULT_FN_ATTRS 691 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 692 { 693 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 694 (__v4si)_mm_add_epi32(__A, __B), 695 (__v4si)_mm_setzero_si128()); 696 } 697 698 static __inline__ __m128i __DEFAULT_FN_ATTRS 699 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 700 { 701 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 702 (__v2di)_mm_add_epi64(__A, __B), 703 (__v2di)__W); 704 } 705 706 static __inline__ __m128i __DEFAULT_FN_ATTRS 707 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 708 { 709 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 710 (__v2di)_mm_add_epi64(__A, __B), 711 (__v2di)_mm_setzero_si128()); 712 } 713 714 static __inline__ __m128i __DEFAULT_FN_ATTRS 715 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 716 { 717 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 718 (__v4si)_mm_sub_epi32(__A, __B), 719 (__v4si)__W); 720 } 721 722 static __inline__ __m128i __DEFAULT_FN_ATTRS 723 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 724 { 725 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 726 (__v4si)_mm_sub_epi32(__A, __B), 727 (__v4si)_mm_setzero_si128()); 728 } 729 730 static __inline__ __m128i __DEFAULT_FN_ATTRS 731 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 732 { 733 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 734 (__v2di)_mm_sub_epi64(__A, __B), 735 (__v2di)__W); 736 } 737 738 static __inline__ __m128i __DEFAULT_FN_ATTRS 739 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 740 { 741 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 742 (__v2di)_mm_sub_epi64(__A, __B), 743 (__v2di)_mm_setzero_si128()); 744 } 745 746 static __inline__ __m256i __DEFAULT_FN_ATTRS 747 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 748 { 749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 750 (__v4di)_mm256_mul_epi32(__X, __Y), 751 (__v4di)__W); 752 } 753 754 static __inline__ __m256i __DEFAULT_FN_ATTRS 755 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 756 { 757 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 758 (__v4di)_mm256_mul_epi32(__X, __Y), 759 (__v4di)_mm256_setzero_si256()); 760 } 761 762 static __inline__ __m128i __DEFAULT_FN_ATTRS 763 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 764 { 765 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 766 (__v2di)_mm_mul_epi32(__X, __Y), 767 (__v2di)__W); 768 } 769 770 static __inline__ __m128i __DEFAULT_FN_ATTRS 771 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 772 { 773 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 774 (__v2di)_mm_mul_epi32(__X, __Y), 775 (__v2di)_mm_setzero_si128()); 776 } 777 778 static __inline__ __m256i __DEFAULT_FN_ATTRS 779 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 780 { 781 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 782 (__v4di)_mm256_mul_epu32(__X, __Y), 783 (__v4di)__W); 784 } 785 786 static __inline__ __m256i __DEFAULT_FN_ATTRS 787 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 788 { 789 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 790 (__v4di)_mm256_mul_epu32(__X, __Y), 791 (__v4di)_mm256_setzero_si256()); 792 } 793 794 static __inline__ __m128i __DEFAULT_FN_ATTRS 795 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 796 { 797 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 798 (__v2di)_mm_mul_epu32(__X, __Y), 799 (__v2di)__W); 800 } 801 802 static __inline__ __m128i __DEFAULT_FN_ATTRS 803 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 804 { 805 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 806 (__v2di)_mm_mul_epu32(__X, __Y), 807 (__v2di)_mm_setzero_si128()); 808 } 809 810 static __inline__ __m256i __DEFAULT_FN_ATTRS 811 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 812 { 813 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 814 (__v8si)_mm256_mullo_epi32(__A, __B), 815 (__v8si)_mm256_setzero_si256()); 816 } 817 818 static __inline__ __m256i __DEFAULT_FN_ATTRS 819 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 820 { 821 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 822 (__v8si)_mm256_mullo_epi32(__A, __B), 823 (__v8si)__W); 824 } 825 826 static __inline__ __m128i __DEFAULT_FN_ATTRS 827 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 828 { 829 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 830 (__v4si)_mm_mullo_epi32(__A, __B), 831 (__v4si)_mm_setzero_si128()); 832 } 833 834 static __inline__ __m128i __DEFAULT_FN_ATTRS 835 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 836 { 837 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 838 (__v4si)_mm_mullo_epi32(__A, __B), 839 (__v4si)__W); 840 } 841 842 static __inline__ __m256i __DEFAULT_FN_ATTRS 843 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 844 { 845 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 846 (__v8si)_mm256_and_si256(__A, __B), 847 (__v8si)__W); 848 } 849 850 static __inline__ __m256i __DEFAULT_FN_ATTRS 851 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 852 { 853 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 854 } 855 856 static __inline__ __m128i __DEFAULT_FN_ATTRS 857 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 858 { 859 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 860 (__v4si)_mm_and_si128(__A, __B), 861 (__v4si)__W); 862 } 863 864 static __inline__ __m128i __DEFAULT_FN_ATTRS 865 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 866 { 867 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 868 } 869 870 static __inline__ __m256i __DEFAULT_FN_ATTRS 871 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 872 { 873 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 874 (__v8si)_mm256_andnot_si256(__A, __B), 875 (__v8si)__W); 876 } 877 878 static __inline__ __m256i __DEFAULT_FN_ATTRS 879 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 880 { 881 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 882 __U, __A, __B); 883 } 884 885 static __inline__ __m128i __DEFAULT_FN_ATTRS 886 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 887 { 888 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 889 (__v4si)_mm_andnot_si128(__A, __B), 890 (__v4si)__W); 891 } 892 893 static __inline__ __m128i __DEFAULT_FN_ATTRS 894 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 895 { 896 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 897 } 898 899 static __inline__ __m256i __DEFAULT_FN_ATTRS 900 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 901 { 902 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 903 (__v8si)_mm256_or_si256(__A, __B), 904 (__v8si)__W); 905 } 906 907 static __inline__ __m256i __DEFAULT_FN_ATTRS 908 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 909 { 910 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 911 } 912 913 static __inline__ __m128i __DEFAULT_FN_ATTRS 914 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 915 { 916 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 917 (__v4si)_mm_or_si128(__A, __B), 918 (__v4si)__W); 919 } 920 921 static __inline__ __m128i __DEFAULT_FN_ATTRS 922 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 923 { 924 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 925 } 926 927 static __inline__ __m256i __DEFAULT_FN_ATTRS 928 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 929 { 930 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 931 (__v8si)_mm256_xor_si256(__A, __B), 932 (__v8si)__W); 933 } 934 935 static __inline__ __m256i __DEFAULT_FN_ATTRS 936 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 937 { 938 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 939 } 940 941 static __inline__ __m128i __DEFAULT_FN_ATTRS 942 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, 943 __m128i __B) 944 { 945 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 946 (__v4si)_mm_xor_si128(__A, __B), 947 (__v4si)__W); 948 } 949 950 static __inline__ __m128i __DEFAULT_FN_ATTRS 951 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 952 { 953 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 954 } 955 956 static __inline__ __m256i __DEFAULT_FN_ATTRS 957 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 958 { 959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 960 (__v4di)_mm256_and_si256(__A, __B), 961 (__v4di)__W); 962 } 963 964 static __inline__ __m256i __DEFAULT_FN_ATTRS 965 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 966 { 967 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 968 } 969 970 static __inline__ __m128i __DEFAULT_FN_ATTRS 971 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 972 { 973 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 974 (__v2di)_mm_and_si128(__A, __B), 975 (__v2di)__W); 976 } 977 978 static __inline__ __m128i __DEFAULT_FN_ATTRS 979 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 980 { 981 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 982 } 983 984 static __inline__ __m256i __DEFAULT_FN_ATTRS 985 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 986 { 987 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 988 (__v4di)_mm256_andnot_si256(__A, __B), 989 (__v4di)__W); 990 } 991 992 static __inline__ __m256i __DEFAULT_FN_ATTRS 993 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 994 { 995 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 996 __U, __A, __B); 997 } 998 999 static __inline__ __m128i __DEFAULT_FN_ATTRS 1000 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1001 { 1002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1003 (__v2di)_mm_andnot_si128(__A, __B), 1004 (__v2di)__W); 1005 } 1006 1007 static __inline__ __m128i __DEFAULT_FN_ATTRS 1008 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1009 { 1010 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 1011 } 1012 1013 static __inline__ __m256i __DEFAULT_FN_ATTRS 1014 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1015 { 1016 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1017 (__v4di)_mm256_or_si256(__A, __B), 1018 (__v4di)__W); 1019 } 1020 1021 static __inline__ __m256i __DEFAULT_FN_ATTRS 1022 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1023 { 1024 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 1025 } 1026 1027 static __inline__ __m128i __DEFAULT_FN_ATTRS 1028 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1029 { 1030 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1031 (__v2di)_mm_or_si128(__A, __B), 1032 (__v2di)__W); 1033 } 1034 1035 static __inline__ __m128i __DEFAULT_FN_ATTRS 1036 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1037 { 1038 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 1039 } 1040 1041 static __inline__ __m256i __DEFAULT_FN_ATTRS 1042 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1043 { 1044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1045 (__v4di)_mm256_xor_si256(__A, __B), 1046 (__v4di)__W); 1047 } 1048 1049 static __inline__ __m256i __DEFAULT_FN_ATTRS 1050 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1051 { 1052 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 1053 } 1054 1055 static __inline__ __m128i __DEFAULT_FN_ATTRS 1056 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 1057 __m128i __B) 1058 { 1059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1060 (__v2di)_mm_xor_si128(__A, __B), 1061 (__v2di)__W); 1062 } 1063 1064 static __inline__ __m128i __DEFAULT_FN_ATTRS 1065 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1066 { 1067 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 1068 } 1069 1070 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1071 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1072 (__v4si)(__m128i)(b), (int)(p), \ 1073 (__mmask8)-1); }) 1074 1075 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1076 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1077 (__v4si)(__m128i)(b), (int)(p), \ 1078 (__mmask8)(m)); }) 1079 1080 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1081 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1082 (__v4si)(__m128i)(b), (int)(p), \ 1083 (__mmask8)-1); }) 1084 1085 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1086 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1087 (__v4si)(__m128i)(b), (int)(p), \ 1088 (__mmask8)(m)); }) 1089 1090 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1091 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1092 (__v8si)(__m256i)(b), (int)(p), \ 1093 (__mmask8)-1); }) 1094 1095 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1096 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1097 (__v8si)(__m256i)(b), (int)(p), \ 1098 (__mmask8)(m)); }) 1099 1100 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1101 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1102 (__v8si)(__m256i)(b), (int)(p), \ 1103 (__mmask8)-1); }) 1104 1105 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1106 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1107 (__v8si)(__m256i)(b), (int)(p), \ 1108 (__mmask8)(m)); }) 1109 1110 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1111 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1112 (__v2di)(__m128i)(b), (int)(p), \ 1113 (__mmask8)-1); }) 1114 1115 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1116 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1117 (__v2di)(__m128i)(b), (int)(p), \ 1118 (__mmask8)(m)); }) 1119 1120 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1121 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1122 (__v2di)(__m128i)(b), (int)(p), \ 1123 (__mmask8)-1); }) 1124 1125 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1126 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1127 (__v2di)(__m128i)(b), (int)(p), \ 1128 (__mmask8)(m)); }) 1129 1130 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1131 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1132 (__v4di)(__m256i)(b), (int)(p), \ 1133 (__mmask8)-1); }) 1134 1135 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1136 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1137 (__v4di)(__m256i)(b), (int)(p), \ 1138 (__mmask8)(m)); }) 1139 1140 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1141 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1142 (__v4di)(__m256i)(b), (int)(p), \ 1143 (__mmask8)-1); }) 1144 1145 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1146 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1147 (__v4di)(__m256i)(b), (int)(p), \ 1148 (__mmask8)(m)); }) 1149 1150 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ 1151 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1152 (__v8sf)(__m256)(b), (int)(p), \ 1153 (__mmask8)-1); }) 1154 1155 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1156 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1157 (__v8sf)(__m256)(b), (int)(p), \ 1158 (__mmask8)(m)); }) 1159 1160 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ 1161 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1162 (__v4df)(__m256d)(b), (int)(p), \ 1163 (__mmask8)-1); }) 1164 1165 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1166 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1167 (__v4df)(__m256d)(b), (int)(p), \ 1168 (__mmask8)(m)); }) 1169 1170 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ 1171 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1172 (__v4sf)(__m128)(b), (int)(p), \ 1173 (__mmask8)-1); }) 1174 1175 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1176 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1177 (__v4sf)(__m128)(b), (int)(p), \ 1178 (__mmask8)(m)); }) 1179 1180 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1182 (__v2df)(__m128d)(b), (int)(p), \ 1183 (__mmask8)-1); }) 1184 1185 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1186 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1187 (__v2df)(__m128d)(b), (int)(p), \ 1188 (__mmask8)(m)); }) 1189 1190 static __inline__ __m128d __DEFAULT_FN_ATTRS 1191 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1192 { 1193 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1194 (__v2df) __B, 1195 (__v2df) __C, 1196 (__mmask8) __U); 1197 } 1198 1199 static __inline__ __m128d __DEFAULT_FN_ATTRS 1200 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1201 { 1202 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, 1203 (__v2df) __B, 1204 (__v2df) __C, 1205 (__mmask8) __U); 1206 } 1207 1208 static __inline__ __m128d __DEFAULT_FN_ATTRS 1209 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1210 { 1211 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1212 (__v2df) __B, 1213 (__v2df) __C, 1214 (__mmask8) __U); 1215 } 1216 1217 static __inline__ __m128d __DEFAULT_FN_ATTRS 1218 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1219 { 1220 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1221 (__v2df) __B, 1222 -(__v2df) __C, 1223 (__mmask8) __U); 1224 } 1225 1226 static __inline__ __m128d __DEFAULT_FN_ATTRS 1227 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1228 { 1229 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1230 (__v2df) __B, 1231 -(__v2df) __C, 1232 (__mmask8) __U); 1233 } 1234 1235 static __inline__ __m128d __DEFAULT_FN_ATTRS 1236 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1237 { 1238 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, 1239 (__v2df) __B, 1240 (__v2df) __C, 1241 (__mmask8) __U); 1242 } 1243 1244 static __inline__ __m128d __DEFAULT_FN_ATTRS 1245 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1246 { 1247 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1248 (__v2df) __B, 1249 (__v2df) __C, 1250 (__mmask8) __U); 1251 } 1252 1253 static __inline__ __m128d __DEFAULT_FN_ATTRS 1254 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255 { 1256 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1257 (__v2df) __B, 1258 -(__v2df) __C, 1259 (__mmask8) __U); 1260 } 1261 1262 static __inline__ __m256d __DEFAULT_FN_ATTRS 1263 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1264 { 1265 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1266 (__v4df) __B, 1267 (__v4df) __C, 1268 (__mmask8) __U); 1269 } 1270 1271 static __inline__ __m256d __DEFAULT_FN_ATTRS 1272 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1273 { 1274 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, 1275 (__v4df) __B, 1276 (__v4df) __C, 1277 (__mmask8) __U); 1278 } 1279 1280 static __inline__ __m256d __DEFAULT_FN_ATTRS 1281 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1282 { 1283 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1284 (__v4df) __B, 1285 (__v4df) __C, 1286 (__mmask8) __U); 1287 } 1288 1289 static __inline__ __m256d __DEFAULT_FN_ATTRS 1290 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1291 { 1292 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1293 (__v4df) __B, 1294 -(__v4df) __C, 1295 (__mmask8) __U); 1296 } 1297 1298 static __inline__ __m256d __DEFAULT_FN_ATTRS 1299 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1300 { 1301 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1302 (__v4df) __B, 1303 -(__v4df) __C, 1304 (__mmask8) __U); 1305 } 1306 1307 static __inline__ __m256d __DEFAULT_FN_ATTRS 1308 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1309 { 1310 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, 1311 (__v4df) __B, 1312 (__v4df) __C, 1313 (__mmask8) __U); 1314 } 1315 1316 static __inline__ __m256d __DEFAULT_FN_ATTRS 1317 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1318 { 1319 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1320 (__v4df) __B, 1321 (__v4df) __C, 1322 (__mmask8) __U); 1323 } 1324 1325 static __inline__ __m256d __DEFAULT_FN_ATTRS 1326 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1327 { 1328 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1329 (__v4df) __B, 1330 -(__v4df) __C, 1331 (__mmask8) __U); 1332 } 1333 1334 static __inline__ __m128 __DEFAULT_FN_ATTRS 1335 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1336 { 1337 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C, 1340 (__mmask8) __U); 1341 } 1342 1343 static __inline__ __m128 __DEFAULT_FN_ATTRS 1344 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1345 { 1346 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, 1347 (__v4sf) __B, 1348 (__v4sf) __C, 1349 (__mmask8) __U); 1350 } 1351 1352 static __inline__ __m128 __DEFAULT_FN_ATTRS 1353 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1354 { 1355 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1356 (__v4sf) __B, 1357 (__v4sf) __C, 1358 (__mmask8) __U); 1359 } 1360 1361 static __inline__ __m128 __DEFAULT_FN_ATTRS 1362 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1363 { 1364 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1365 (__v4sf) __B, 1366 -(__v4sf) __C, 1367 (__mmask8) __U); 1368 } 1369 1370 static __inline__ __m128 __DEFAULT_FN_ATTRS 1371 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1372 { 1373 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1374 (__v4sf) __B, 1375 -(__v4sf) __C, 1376 (__mmask8) __U); 1377 } 1378 1379 static __inline__ __m128 __DEFAULT_FN_ATTRS 1380 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1381 { 1382 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, 1383 (__v4sf) __B, 1384 (__v4sf) __C, 1385 (__mmask8) __U); 1386 } 1387 1388 static __inline__ __m128 __DEFAULT_FN_ATTRS 1389 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1390 { 1391 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1392 (__v4sf) __B, 1393 (__v4sf) __C, 1394 (__mmask8) __U); 1395 } 1396 1397 static __inline__ __m128 __DEFAULT_FN_ATTRS 1398 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1399 { 1400 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1401 (__v4sf) __B, 1402 -(__v4sf) __C, 1403 (__mmask8) __U); 1404 } 1405 1406 static __inline__ __m256 __DEFAULT_FN_ATTRS 1407 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1408 { 1409 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1410 (__v8sf) __B, 1411 (__v8sf) __C, 1412 (__mmask8) __U); 1413 } 1414 1415 static __inline__ __m256 __DEFAULT_FN_ATTRS 1416 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1417 { 1418 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, 1419 (__v8sf) __B, 1420 (__v8sf) __C, 1421 (__mmask8) __U); 1422 } 1423 1424 static __inline__ __m256 __DEFAULT_FN_ATTRS 1425 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1426 { 1427 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1428 (__v8sf) __B, 1429 (__v8sf) __C, 1430 (__mmask8) __U); 1431 } 1432 1433 static __inline__ __m256 __DEFAULT_FN_ATTRS 1434 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1435 { 1436 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1437 (__v8sf) __B, 1438 -(__v8sf) __C, 1439 (__mmask8) __U); 1440 } 1441 1442 static __inline__ __m256 __DEFAULT_FN_ATTRS 1443 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1444 { 1445 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1446 (__v8sf) __B, 1447 -(__v8sf) __C, 1448 (__mmask8) __U); 1449 } 1450 1451 static __inline__ __m256 __DEFAULT_FN_ATTRS 1452 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1453 { 1454 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, 1455 (__v8sf) __B, 1456 (__v8sf) __C, 1457 (__mmask8) __U); 1458 } 1459 1460 static __inline__ __m256 __DEFAULT_FN_ATTRS 1461 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1462 { 1463 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1464 (__v8sf) __B, 1465 (__v8sf) __C, 1466 (__mmask8) __U); 1467 } 1468 1469 static __inline__ __m256 __DEFAULT_FN_ATTRS 1470 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1471 { 1472 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1473 (__v8sf) __B, 1474 -(__v8sf) __C, 1475 (__mmask8) __U); 1476 } 1477 1478 static __inline__ __m128d __DEFAULT_FN_ATTRS 1479 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1480 { 1481 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1482 (__v2df) __B, 1483 (__v2df) __C, 1484 (__mmask8) __U); 1485 } 1486 1487 static __inline__ __m128d __DEFAULT_FN_ATTRS 1488 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1489 { 1490 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, 1491 (__v2df) __B, 1492 (__v2df) __C, 1493 (__mmask8) 1494 __U); 1495 } 1496 1497 static __inline__ __m128d __DEFAULT_FN_ATTRS 1498 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1499 { 1500 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1501 (__v2df) __B, 1502 (__v2df) __C, 1503 (__mmask8) 1504 __U); 1505 } 1506 1507 static __inline__ __m128d __DEFAULT_FN_ATTRS 1508 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1509 { 1510 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1511 (__v2df) __B, 1512 -(__v2df) __C, 1513 (__mmask8) __U); 1514 } 1515 1516 static __inline__ __m128d __DEFAULT_FN_ATTRS 1517 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1518 { 1519 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1520 (__v2df) __B, 1521 -(__v2df) __C, 1522 (__mmask8) 1523 __U); 1524 } 1525 1526 static __inline__ __m256d __DEFAULT_FN_ATTRS 1527 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1528 { 1529 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1530 (__v4df) __B, 1531 (__v4df) __C, 1532 (__mmask8) __U); 1533 } 1534 1535 static __inline__ __m256d __DEFAULT_FN_ATTRS 1536 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1537 { 1538 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, 1539 (__v4df) __B, 1540 (__v4df) __C, 1541 (__mmask8) 1542 __U); 1543 } 1544 1545 static __inline__ __m256d __DEFAULT_FN_ATTRS 1546 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1547 { 1548 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1549 (__v4df) __B, 1550 (__v4df) __C, 1551 (__mmask8) 1552 __U); 1553 } 1554 1555 static __inline__ __m256d __DEFAULT_FN_ATTRS 1556 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1557 { 1558 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1559 (__v4df) __B, 1560 -(__v4df) __C, 1561 (__mmask8) __U); 1562 } 1563 1564 static __inline__ __m256d __DEFAULT_FN_ATTRS 1565 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1566 { 1567 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1568 (__v4df) __B, 1569 -(__v4df) __C, 1570 (__mmask8) 1571 __U); 1572 } 1573 1574 static __inline__ __m128 __DEFAULT_FN_ATTRS 1575 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576 { 1577 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1578 (__v4sf) __B, 1579 (__v4sf) __C, 1580 (__mmask8) __U); 1581 } 1582 1583 static __inline__ __m128 __DEFAULT_FN_ATTRS 1584 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1585 { 1586 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, 1587 (__v4sf) __B, 1588 (__v4sf) __C, 1589 (__mmask8) __U); 1590 } 1591 1592 static __inline__ __m128 __DEFAULT_FN_ATTRS 1593 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1594 { 1595 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1596 (__v4sf) __B, 1597 (__v4sf) __C, 1598 (__mmask8) __U); 1599 } 1600 1601 static __inline__ __m128 __DEFAULT_FN_ATTRS 1602 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1603 { 1604 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1605 (__v4sf) __B, 1606 -(__v4sf) __C, 1607 (__mmask8) __U); 1608 } 1609 1610 static __inline__ __m128 __DEFAULT_FN_ATTRS 1611 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1612 { 1613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1614 (__v4sf) __B, 1615 -(__v4sf) __C, 1616 (__mmask8) __U); 1617 } 1618 1619 static __inline__ __m256 __DEFAULT_FN_ATTRS 1620 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1621 __m256 __C) 1622 { 1623 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1624 (__v8sf) __B, 1625 (__v8sf) __C, 1626 (__mmask8) __U); 1627 } 1628 1629 static __inline__ __m256 __DEFAULT_FN_ATTRS 1630 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1631 { 1632 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, 1633 (__v8sf) __B, 1634 (__v8sf) __C, 1635 (__mmask8) __U); 1636 } 1637 1638 static __inline__ __m256 __DEFAULT_FN_ATTRS 1639 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1640 { 1641 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1642 (__v8sf) __B, 1643 (__v8sf) __C, 1644 (__mmask8) __U); 1645 } 1646 1647 static __inline__ __m256 __DEFAULT_FN_ATTRS 1648 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1649 { 1650 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1651 (__v8sf) __B, 1652 -(__v8sf) __C, 1653 (__mmask8) __U); 1654 } 1655 1656 static __inline__ __m256 __DEFAULT_FN_ATTRS 1657 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1658 { 1659 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1660 (__v8sf) __B, 1661 -(__v8sf) __C, 1662 (__mmask8) __U); 1663 } 1664 1665 static __inline__ __m128d __DEFAULT_FN_ATTRS 1666 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1667 { 1668 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, 1669 (__v2df) __B, 1670 (__v2df) __C, 1671 (__mmask8) __U); 1672 } 1673 1674 static __inline__ __m256d __DEFAULT_FN_ATTRS 1675 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1676 { 1677 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, 1678 (__v4df) __B, 1679 (__v4df) __C, 1680 (__mmask8) __U); 1681 } 1682 1683 static __inline__ __m128 __DEFAULT_FN_ATTRS 1684 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1685 { 1686 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, 1687 (__v4sf) __B, 1688 (__v4sf) __C, 1689 (__mmask8) __U); 1690 } 1691 1692 static __inline__ __m256 __DEFAULT_FN_ATTRS 1693 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1694 { 1695 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, 1696 (__v8sf) __B, 1697 (__v8sf) __C, 1698 (__mmask8) __U); 1699 } 1700 1701 static __inline__ __m128d __DEFAULT_FN_ATTRS 1702 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1703 { 1704 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, 1705 (__v2df) __B, 1706 (__v2df) __C, 1707 (__mmask8) 1708 __U); 1709 } 1710 1711 static __inline__ __m256d __DEFAULT_FN_ATTRS 1712 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1713 { 1714 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, 1715 (__v4df) __B, 1716 (__v4df) __C, 1717 (__mmask8) 1718 __U); 1719 } 1720 1721 static __inline__ __m128 __DEFAULT_FN_ATTRS 1722 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1723 { 1724 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, 1725 (__v4sf) __B, 1726 (__v4sf) __C, 1727 (__mmask8) __U); 1728 } 1729 1730 static __inline__ __m256 __DEFAULT_FN_ATTRS 1731 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1732 { 1733 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, 1734 (__v8sf) __B, 1735 (__v8sf) __C, 1736 (__mmask8) __U); 1737 } 1738 1739 static __inline__ __m128d __DEFAULT_FN_ATTRS 1740 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1741 { 1742 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, 1743 (__v2df) __B, 1744 (__v2df) __C, 1745 (__mmask8) __U); 1746 } 1747 1748 static __inline__ __m256d __DEFAULT_FN_ATTRS 1749 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1750 { 1751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, 1752 (__v4df) __B, 1753 (__v4df) __C, 1754 (__mmask8) __U); 1755 } 1756 1757 static __inline__ __m128 __DEFAULT_FN_ATTRS 1758 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1759 { 1760 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, 1761 (__v4sf) __B, 1762 (__v4sf) __C, 1763 (__mmask8) __U); 1764 } 1765 1766 static __inline__ __m256 __DEFAULT_FN_ATTRS 1767 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1768 { 1769 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, 1770 (__v8sf) __B, 1771 (__v8sf) __C, 1772 (__mmask8) __U); 1773 } 1774 1775 static __inline__ __m128d __DEFAULT_FN_ATTRS 1776 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1777 { 1778 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, 1779 (__v2df) __B, 1780 (__v2df) __C, 1781 (__mmask8) __U); 1782 } 1783 1784 static __inline__ __m128d __DEFAULT_FN_ATTRS 1785 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1786 { 1787 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, 1788 (__v2df) __B, 1789 (__v2df) __C, 1790 (__mmask8) __U); 1791 } 1792 1793 static __inline__ __m256d __DEFAULT_FN_ATTRS 1794 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1795 { 1796 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, 1797 (__v4df) __B, 1798 (__v4df) __C, 1799 (__mmask8) __U); 1800 } 1801 1802 static __inline__ __m256d __DEFAULT_FN_ATTRS 1803 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1804 { 1805 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, 1806 (__v4df) __B, 1807 (__v4df) __C, 1808 (__mmask8) __U); 1809 } 1810 1811 static __inline__ __m128 __DEFAULT_FN_ATTRS 1812 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1813 { 1814 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, 1815 (__v4sf) __B, 1816 (__v4sf) __C, 1817 (__mmask8) __U); 1818 } 1819 1820 static __inline__ __m128 __DEFAULT_FN_ATTRS 1821 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1822 { 1823 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, 1824 (__v4sf) __B, 1825 (__v4sf) __C, 1826 (__mmask8) __U); 1827 } 1828 1829 static __inline__ __m256 __DEFAULT_FN_ATTRS 1830 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1831 { 1832 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, 1833 (__v8sf) __B, 1834 (__v8sf) __C, 1835 (__mmask8) __U); 1836 } 1837 1838 static __inline__ __m256 __DEFAULT_FN_ATTRS 1839 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1840 { 1841 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, 1842 (__v8sf) __B, 1843 (__v8sf) __C, 1844 (__mmask8) __U); 1845 } 1846 1847 static __inline__ __m128d __DEFAULT_FN_ATTRS 1848 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1849 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1850 (__v2df)_mm_add_pd(__A, __B), 1851 (__v2df)__W); 1852 } 1853 1854 static __inline__ __m128d __DEFAULT_FN_ATTRS 1855 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1856 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1857 (__v2df)_mm_add_pd(__A, __B), 1858 (__v2df)_mm_setzero_pd()); 1859 } 1860 1861 static __inline__ __m256d __DEFAULT_FN_ATTRS 1862 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1863 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1864 (__v4df)_mm256_add_pd(__A, __B), 1865 (__v4df)__W); 1866 } 1867 1868 static __inline__ __m256d __DEFAULT_FN_ATTRS 1869 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1870 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1871 (__v4df)_mm256_add_pd(__A, __B), 1872 (__v4df)_mm256_setzero_pd()); 1873 } 1874 1875 static __inline__ __m128 __DEFAULT_FN_ATTRS 1876 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1877 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1878 (__v4sf)_mm_add_ps(__A, __B), 1879 (__v4sf)__W); 1880 } 1881 1882 static __inline__ __m128 __DEFAULT_FN_ATTRS 1883 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1884 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1885 (__v4sf)_mm_add_ps(__A, __B), 1886 (__v4sf)_mm_setzero_ps()); 1887 } 1888 1889 static __inline__ __m256 __DEFAULT_FN_ATTRS 1890 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1891 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1892 (__v8sf)_mm256_add_ps(__A, __B), 1893 (__v8sf)__W); 1894 } 1895 1896 static __inline__ __m256 __DEFAULT_FN_ATTRS 1897 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1898 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1899 (__v8sf)_mm256_add_ps(__A, __B), 1900 (__v8sf)_mm256_setzero_ps()); 1901 } 1902 1903 static __inline__ __m128i __DEFAULT_FN_ATTRS 1904 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1905 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1906 (__v4si) __W, 1907 (__v4si) __A); 1908 } 1909 1910 static __inline__ __m256i __DEFAULT_FN_ATTRS 1911 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1912 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1913 (__v8si) __W, 1914 (__v8si) __A); 1915 } 1916 1917 static __inline__ __m128d __DEFAULT_FN_ATTRS 1918 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1919 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1920 (__v2df) __W, 1921 (__v2df) __A); 1922 } 1923 1924 static __inline__ __m256d __DEFAULT_FN_ATTRS 1925 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1926 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1927 (__v4df) __W, 1928 (__v4df) __A); 1929 } 1930 1931 static __inline__ __m128 __DEFAULT_FN_ATTRS 1932 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1933 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1934 (__v4sf) __W, 1935 (__v4sf) __A); 1936 } 1937 1938 static __inline__ __m256 __DEFAULT_FN_ATTRS 1939 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1940 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1941 (__v8sf) __W, 1942 (__v8sf) __A); 1943 } 1944 1945 static __inline__ __m128i __DEFAULT_FN_ATTRS 1946 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1947 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1948 (__v2di) __W, 1949 (__v2di) __A); 1950 } 1951 1952 static __inline__ __m256i __DEFAULT_FN_ATTRS 1953 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1954 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1955 (__v4di) __W, 1956 (__v4di) __A); 1957 } 1958 1959 static __inline__ __m128d __DEFAULT_FN_ATTRS 1960 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1961 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1962 (__v2df) __W, 1963 (__mmask8) __U); 1964 } 1965 1966 static __inline__ __m128d __DEFAULT_FN_ATTRS 1967 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1968 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1969 (__v2df) 1970 _mm_setzero_pd (), 1971 (__mmask8) __U); 1972 } 1973 1974 static __inline__ __m256d __DEFAULT_FN_ATTRS 1975 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1976 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1977 (__v4df) __W, 1978 (__mmask8) __U); 1979 } 1980 1981 static __inline__ __m256d __DEFAULT_FN_ATTRS 1982 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1983 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1984 (__v4df) 1985 _mm256_setzero_pd (), 1986 (__mmask8) __U); 1987 } 1988 1989 static __inline__ __m128i __DEFAULT_FN_ATTRS 1990 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1991 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1992 (__v2di) __W, 1993 (__mmask8) __U); 1994 } 1995 1996 static __inline__ __m128i __DEFAULT_FN_ATTRS 1997 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1998 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1999 (__v2di) 2000 _mm_setzero_si128 (), 2001 (__mmask8) __U); 2002 } 2003 2004 static __inline__ __m256i __DEFAULT_FN_ATTRS 2005 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2006 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2007 (__v4di) __W, 2008 (__mmask8) __U); 2009 } 2010 2011 static __inline__ __m256i __DEFAULT_FN_ATTRS 2012 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 2013 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2014 (__v4di) 2015 _mm256_setzero_si256 (), 2016 (__mmask8) __U); 2017 } 2018 2019 static __inline__ __m128 __DEFAULT_FN_ATTRS 2020 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2021 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2022 (__v4sf) __W, 2023 (__mmask8) __U); 2024 } 2025 2026 static __inline__ __m128 __DEFAULT_FN_ATTRS 2027 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 2028 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2029 (__v4sf) 2030 _mm_setzero_ps (), 2031 (__mmask8) __U); 2032 } 2033 2034 static __inline__ __m256 __DEFAULT_FN_ATTRS 2035 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2036 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2037 (__v8sf) __W, 2038 (__mmask8) __U); 2039 } 2040 2041 static __inline__ __m256 __DEFAULT_FN_ATTRS 2042 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 2043 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2044 (__v8sf) 2045 _mm256_setzero_ps (), 2046 (__mmask8) __U); 2047 } 2048 2049 static __inline__ __m128i __DEFAULT_FN_ATTRS 2050 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2051 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2052 (__v4si) __W, 2053 (__mmask8) __U); 2054 } 2055 2056 static __inline__ __m128i __DEFAULT_FN_ATTRS 2057 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 2058 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2059 (__v4si) 2060 _mm_setzero_si128 (), 2061 (__mmask8) __U); 2062 } 2063 2064 static __inline__ __m256i __DEFAULT_FN_ATTRS 2065 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2066 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2067 (__v8si) __W, 2068 (__mmask8) __U); 2069 } 2070 2071 static __inline__ __m256i __DEFAULT_FN_ATTRS 2072 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 2073 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2074 (__v8si) 2075 _mm256_setzero_si256 (), 2076 (__mmask8) __U); 2077 } 2078 2079 static __inline__ void __DEFAULT_FN_ATTRS 2080 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 2081 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 2082 (__v2df) __A, 2083 (__mmask8) __U); 2084 } 2085 2086 static __inline__ void __DEFAULT_FN_ATTRS 2087 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 2088 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 2089 (__v4df) __A, 2090 (__mmask8) __U); 2091 } 2092 2093 static __inline__ void __DEFAULT_FN_ATTRS 2094 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 2095 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 2096 (__v2di) __A, 2097 (__mmask8) __U); 2098 } 2099 2100 static __inline__ void __DEFAULT_FN_ATTRS 2101 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 2102 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 2103 (__v4di) __A, 2104 (__mmask8) __U); 2105 } 2106 2107 static __inline__ void __DEFAULT_FN_ATTRS 2108 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 2109 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 2110 (__v4sf) __A, 2111 (__mmask8) __U); 2112 } 2113 2114 static __inline__ void __DEFAULT_FN_ATTRS 2115 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 2116 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 2117 (__v8sf) __A, 2118 (__mmask8) __U); 2119 } 2120 2121 static __inline__ void __DEFAULT_FN_ATTRS 2122 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 2123 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 2124 (__v4si) __A, 2125 (__mmask8) __U); 2126 } 2127 2128 static __inline__ void __DEFAULT_FN_ATTRS 2129 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 2130 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 2131 (__v8si) __A, 2132 (__mmask8) __U); 2133 } 2134 2135 static __inline__ __m128d __DEFAULT_FN_ATTRS 2136 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2137 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2138 (__v2df)_mm_cvtepi32_pd(__A), 2139 (__v2df)__W); 2140 } 2141 2142 static __inline__ __m128d __DEFAULT_FN_ATTRS 2143 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2145 (__v2df)_mm_cvtepi32_pd(__A), 2146 (__v2df)_mm_setzero_pd()); 2147 } 2148 2149 static __inline__ __m256d __DEFAULT_FN_ATTRS 2150 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2151 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2152 (__v4df)_mm256_cvtepi32_pd(__A), 2153 (__v4df)__W); 2154 } 2155 2156 static __inline__ __m256d __DEFAULT_FN_ATTRS 2157 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2158 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2159 (__v4df)_mm256_cvtepi32_pd(__A), 2160 (__v4df)_mm256_setzero_pd()); 2161 } 2162 2163 static __inline__ __m128 __DEFAULT_FN_ATTRS 2164 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2165 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2166 (__v4sf) __W, 2167 (__mmask8) __U); 2168 } 2169 2170 static __inline__ __m128 __DEFAULT_FN_ATTRS 2171 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { 2172 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2173 (__v4sf) 2174 _mm_setzero_ps (), 2175 (__mmask8) __U); 2176 } 2177 2178 static __inline__ __m256 __DEFAULT_FN_ATTRS 2179 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2180 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2181 (__v8sf) __W, 2182 (__mmask8) __U); 2183 } 2184 2185 static __inline__ __m256 __DEFAULT_FN_ATTRS 2186 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { 2187 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2188 (__v8sf) 2189 _mm256_setzero_ps (), 2190 (__mmask8) __U); 2191 } 2192 2193 static __inline__ __m128i __DEFAULT_FN_ATTRS 2194 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2195 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2196 (__v4si) __W, 2197 (__mmask8) __U); 2198 } 2199 2200 static __inline__ __m128i __DEFAULT_FN_ATTRS 2201 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 2202 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2203 (__v4si) 2204 _mm_setzero_si128 (), 2205 (__mmask8) __U); 2206 } 2207 2208 static __inline__ __m128i __DEFAULT_FN_ATTRS 2209 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2211 (__v4si) __W, 2212 (__mmask8) __U); 2213 } 2214 2215 static __inline__ __m128i __DEFAULT_FN_ATTRS 2216 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 2217 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2218 (__v4si) 2219 _mm_setzero_si128 (), 2220 (__mmask8) __U); 2221 } 2222 2223 static __inline__ __m128 __DEFAULT_FN_ATTRS 2224 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 2225 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2226 (__v4sf) __W, 2227 (__mmask8) __U); 2228 } 2229 2230 static __inline__ __m128 __DEFAULT_FN_ATTRS 2231 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 2232 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2233 (__v4sf) 2234 _mm_setzero_ps (), 2235 (__mmask8) __U); 2236 } 2237 2238 static __inline__ __m128 __DEFAULT_FN_ATTRS 2239 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2240 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2241 (__v4sf) __W, 2242 (__mmask8) __U); 2243 } 2244 2245 static __inline__ __m128 __DEFAULT_FN_ATTRS 2246 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2247 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2248 (__v4sf) 2249 _mm_setzero_ps (), 2250 (__mmask8) __U); 2251 } 2252 2253 static __inline__ __m128i __DEFAULT_FN_ATTRS 2254 _mm_cvtpd_epu32 (__m128d __A) { 2255 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2256 (__v4si) 2257 _mm_setzero_si128 (), 2258 (__mmask8) -1); 2259 } 2260 2261 static __inline__ __m128i __DEFAULT_FN_ATTRS 2262 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2263 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2264 (__v4si) __W, 2265 (__mmask8) __U); 2266 } 2267 2268 static __inline__ __m128i __DEFAULT_FN_ATTRS 2269 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2270 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2271 (__v4si) 2272 _mm_setzero_si128 (), 2273 (__mmask8) __U); 2274 } 2275 2276 static __inline__ __m128i __DEFAULT_FN_ATTRS 2277 _mm256_cvtpd_epu32 (__m256d __A) { 2278 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2279 (__v4si) 2280 _mm_setzero_si128 (), 2281 (__mmask8) -1); 2282 } 2283 2284 static __inline__ __m128i __DEFAULT_FN_ATTRS 2285 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2286 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2287 (__v4si) __W, 2288 (__mmask8) __U); 2289 } 2290 2291 static __inline__ __m128i __DEFAULT_FN_ATTRS 2292 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2293 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2294 (__v4si) 2295 _mm_setzero_si128 (), 2296 (__mmask8) __U); 2297 } 2298 2299 static __inline__ __m128i __DEFAULT_FN_ATTRS 2300 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2301 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2302 (__v4si) __W, 2303 (__mmask8) __U); 2304 } 2305 2306 static __inline__ __m128i __DEFAULT_FN_ATTRS 2307 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2308 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2309 (__v4si) 2310 _mm_setzero_si128 (), 2311 (__mmask8) __U); 2312 } 2313 2314 static __inline__ __m256i __DEFAULT_FN_ATTRS 2315 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2316 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2317 (__v8si) __W, 2318 (__mmask8) __U); 2319 } 2320 2321 static __inline__ __m256i __DEFAULT_FN_ATTRS 2322 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2323 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2324 (__v8si) 2325 _mm256_setzero_si256 (), 2326 (__mmask8) __U); 2327 } 2328 2329 static __inline__ __m128d __DEFAULT_FN_ATTRS 2330 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2331 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2332 (__v2df) __W, 2333 (__mmask8) __U); 2334 } 2335 2336 static __inline__ __m128d __DEFAULT_FN_ATTRS 2337 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2338 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2339 (__v2df) 2340 _mm_setzero_pd (), 2341 (__mmask8) __U); 2342 } 2343 2344 static __inline__ __m256d __DEFAULT_FN_ATTRS 2345 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2346 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2347 (__v4df) __W, 2348 (__mmask8) __U); 2349 } 2350 2351 static __inline__ __m256d __DEFAULT_FN_ATTRS 2352 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2353 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2354 (__v4df) 2355 _mm256_setzero_pd (), 2356 (__mmask8) __U); 2357 } 2358 2359 static __inline__ __m128i __DEFAULT_FN_ATTRS 2360 _mm_cvtps_epu32 (__m128 __A) { 2361 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2362 (__v4si) 2363 _mm_setzero_si128 (), 2364 (__mmask8) -1); 2365 } 2366 2367 static __inline__ __m128i __DEFAULT_FN_ATTRS 2368 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2369 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2370 (__v4si) __W, 2371 (__mmask8) __U); 2372 } 2373 2374 static __inline__ __m128i __DEFAULT_FN_ATTRS 2375 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2376 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2377 (__v4si) 2378 _mm_setzero_si128 (), 2379 (__mmask8) __U); 2380 } 2381 2382 static __inline__ __m256i __DEFAULT_FN_ATTRS 2383 _mm256_cvtps_epu32 (__m256 __A) { 2384 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2385 (__v8si) 2386 _mm256_setzero_si256 (), 2387 (__mmask8) -1); 2388 } 2389 2390 static __inline__ __m256i __DEFAULT_FN_ATTRS 2391 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2392 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2393 (__v8si) __W, 2394 (__mmask8) __U); 2395 } 2396 2397 static __inline__ __m256i __DEFAULT_FN_ATTRS 2398 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2399 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2400 (__v8si) 2401 _mm256_setzero_si256 (), 2402 (__mmask8) __U); 2403 } 2404 2405 static __inline__ __m128i __DEFAULT_FN_ATTRS 2406 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2407 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2408 (__v4si) __W, 2409 (__mmask8) __U); 2410 } 2411 2412 static __inline__ __m128i __DEFAULT_FN_ATTRS 2413 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2414 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2415 (__v4si) 2416 _mm_setzero_si128 (), 2417 (__mmask8) __U); 2418 } 2419 2420 static __inline__ __m128i __DEFAULT_FN_ATTRS 2421 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2422 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2423 (__v4si) __W, 2424 (__mmask8) __U); 2425 } 2426 2427 static __inline__ __m128i __DEFAULT_FN_ATTRS 2428 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2429 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2430 (__v4si) 2431 _mm_setzero_si128 (), 2432 (__mmask8) __U); 2433 } 2434 2435 static __inline__ __m128i __DEFAULT_FN_ATTRS 2436 _mm_cvttpd_epu32 (__m128d __A) { 2437 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2438 (__v4si) 2439 _mm_setzero_si128 (), 2440 (__mmask8) -1); 2441 } 2442 2443 static __inline__ __m128i __DEFAULT_FN_ATTRS 2444 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2445 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2446 (__v4si) __W, 2447 (__mmask8) __U); 2448 } 2449 2450 static __inline__ __m128i __DEFAULT_FN_ATTRS 2451 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2452 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2453 (__v4si) 2454 _mm_setzero_si128 (), 2455 (__mmask8) __U); 2456 } 2457 2458 static __inline__ __m128i __DEFAULT_FN_ATTRS 2459 _mm256_cvttpd_epu32 (__m256d __A) { 2460 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2461 (__v4si) 2462 _mm_setzero_si128 (), 2463 (__mmask8) -1); 2464 } 2465 2466 static __inline__ __m128i __DEFAULT_FN_ATTRS 2467 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2468 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2469 (__v4si) __W, 2470 (__mmask8) __U); 2471 } 2472 2473 static __inline__ __m128i __DEFAULT_FN_ATTRS 2474 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2475 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2476 (__v4si) 2477 _mm_setzero_si128 (), 2478 (__mmask8) __U); 2479 } 2480 2481 static __inline__ __m128i __DEFAULT_FN_ATTRS 2482 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2483 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2484 (__v4si) __W, 2485 (__mmask8) __U); 2486 } 2487 2488 static __inline__ __m128i __DEFAULT_FN_ATTRS 2489 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2490 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2491 (__v4si) 2492 _mm_setzero_si128 (), 2493 (__mmask8) __U); 2494 } 2495 2496 static __inline__ __m256i __DEFAULT_FN_ATTRS 2497 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2498 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2499 (__v8si) __W, 2500 (__mmask8) __U); 2501 } 2502 2503 static __inline__ __m256i __DEFAULT_FN_ATTRS 2504 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2505 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2506 (__v8si) 2507 _mm256_setzero_si256 (), 2508 (__mmask8) __U); 2509 } 2510 2511 static __inline__ __m128i __DEFAULT_FN_ATTRS 2512 _mm_cvttps_epu32 (__m128 __A) { 2513 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2514 (__v4si) 2515 _mm_setzero_si128 (), 2516 (__mmask8) -1); 2517 } 2518 2519 static __inline__ __m128i __DEFAULT_FN_ATTRS 2520 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2521 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2522 (__v4si) __W, 2523 (__mmask8) __U); 2524 } 2525 2526 static __inline__ __m128i __DEFAULT_FN_ATTRS 2527 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2528 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2529 (__v4si) 2530 _mm_setzero_si128 (), 2531 (__mmask8) __U); 2532 } 2533 2534 static __inline__ __m256i __DEFAULT_FN_ATTRS 2535 _mm256_cvttps_epu32 (__m256 __A) { 2536 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2537 (__v8si) 2538 _mm256_setzero_si256 (), 2539 (__mmask8) -1); 2540 } 2541 2542 static __inline__ __m256i __DEFAULT_FN_ATTRS 2543 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2544 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2545 (__v8si) __W, 2546 (__mmask8) __U); 2547 } 2548 2549 static __inline__ __m256i __DEFAULT_FN_ATTRS 2550 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2551 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2552 (__v8si) 2553 _mm256_setzero_si256 (), 2554 (__mmask8) __U); 2555 } 2556 2557 static __inline__ __m128d __DEFAULT_FN_ATTRS 2558 _mm_cvtepu32_pd (__m128i __A) { 2559 return (__m128d) __builtin_convertvector( 2560 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2561 } 2562 2563 static __inline__ __m128d __DEFAULT_FN_ATTRS 2564 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2565 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2566 (__v2df)_mm_cvtepu32_pd(__A), 2567 (__v2df)__W); 2568 } 2569 2570 static __inline__ __m128d __DEFAULT_FN_ATTRS 2571 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2572 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2573 (__v2df)_mm_cvtepu32_pd(__A), 2574 (__v2df)_mm_setzero_pd()); 2575 } 2576 2577 static __inline__ __m256d __DEFAULT_FN_ATTRS 2578 _mm256_cvtepu32_pd (__m128i __A) { 2579 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2580 } 2581 2582 static __inline__ __m256d __DEFAULT_FN_ATTRS 2583 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2584 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2585 (__v4df)_mm256_cvtepu32_pd(__A), 2586 (__v4df)__W); 2587 } 2588 2589 static __inline__ __m256d __DEFAULT_FN_ATTRS 2590 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2591 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2592 (__v4df)_mm256_cvtepu32_pd(__A), 2593 (__v4df)_mm256_setzero_pd()); 2594 } 2595 2596 static __inline__ __m128 __DEFAULT_FN_ATTRS 2597 _mm_cvtepu32_ps (__m128i __A) { 2598 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2599 (__v4sf) 2600 _mm_setzero_ps (), 2601 (__mmask8) -1); 2602 } 2603 2604 static __inline__ __m128 __DEFAULT_FN_ATTRS 2605 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2606 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2607 (__v4sf) __W, 2608 (__mmask8) __U); 2609 } 2610 2611 static __inline__ __m128 __DEFAULT_FN_ATTRS 2612 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2613 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2614 (__v4sf) 2615 _mm_setzero_ps (), 2616 (__mmask8) __U); 2617 } 2618 2619 static __inline__ __m256 __DEFAULT_FN_ATTRS 2620 _mm256_cvtepu32_ps (__m256i __A) { 2621 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2622 (__v8sf) 2623 _mm256_setzero_ps (), 2624 (__mmask8) -1); 2625 } 2626 2627 static __inline__ __m256 __DEFAULT_FN_ATTRS 2628 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2629 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2630 (__v8sf) __W, 2631 (__mmask8) __U); 2632 } 2633 2634 static __inline__ __m256 __DEFAULT_FN_ATTRS 2635 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2636 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2637 (__v8sf) 2638 _mm256_setzero_ps (), 2639 (__mmask8) __U); 2640 } 2641 2642 static __inline__ __m128d __DEFAULT_FN_ATTRS 2643 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2644 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2645 (__v2df)_mm_div_pd(__A, __B), 2646 (__v2df)__W); 2647 } 2648 2649 static __inline__ __m128d __DEFAULT_FN_ATTRS 2650 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2651 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2652 (__v2df)_mm_div_pd(__A, __B), 2653 (__v2df)_mm_setzero_pd()); 2654 } 2655 2656 static __inline__ __m256d __DEFAULT_FN_ATTRS 2657 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2658 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2659 (__v4df)_mm256_div_pd(__A, __B), 2660 (__v4df)__W); 2661 } 2662 2663 static __inline__ __m256d __DEFAULT_FN_ATTRS 2664 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2665 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2666 (__v4df)_mm256_div_pd(__A, __B), 2667 (__v4df)_mm256_setzero_pd()); 2668 } 2669 2670 static __inline__ __m128 __DEFAULT_FN_ATTRS 2671 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2672 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2673 (__v4sf)_mm_div_ps(__A, __B), 2674 (__v4sf)__W); 2675 } 2676 2677 static __inline__ __m128 __DEFAULT_FN_ATTRS 2678 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2679 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2680 (__v4sf)_mm_div_ps(__A, __B), 2681 (__v4sf)_mm_setzero_ps()); 2682 } 2683 2684 static __inline__ __m256 __DEFAULT_FN_ATTRS 2685 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2686 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2687 (__v8sf)_mm256_div_ps(__A, __B), 2688 (__v8sf)__W); 2689 } 2690 2691 static __inline__ __m256 __DEFAULT_FN_ATTRS 2692 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2693 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2694 (__v8sf)_mm256_div_ps(__A, __B), 2695 (__v8sf)_mm256_setzero_ps()); 2696 } 2697 2698 static __inline__ __m128d __DEFAULT_FN_ATTRS 2699 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2700 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2701 (__v2df) __W, 2702 (__mmask8) __U); 2703 } 2704 2705 static __inline__ __m128d __DEFAULT_FN_ATTRS 2706 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2707 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2708 (__v2df) 2709 _mm_setzero_pd (), 2710 (__mmask8) __U); 2711 } 2712 2713 static __inline__ __m256d __DEFAULT_FN_ATTRS 2714 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2715 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2716 (__v4df) __W, 2717 (__mmask8) __U); 2718 } 2719 2720 static __inline__ __m256d __DEFAULT_FN_ATTRS 2721 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2722 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2723 (__v4df) 2724 _mm256_setzero_pd (), 2725 (__mmask8) __U); 2726 } 2727 2728 static __inline__ __m128i __DEFAULT_FN_ATTRS 2729 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2730 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2731 (__v2di) __W, 2732 (__mmask8) __U); 2733 } 2734 2735 static __inline__ __m128i __DEFAULT_FN_ATTRS 2736 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2737 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2738 (__v2di) 2739 _mm_setzero_si128 (), 2740 (__mmask8) __U); 2741 } 2742 2743 static __inline__ __m256i __DEFAULT_FN_ATTRS 2744 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2745 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2746 (__v4di) __W, 2747 (__mmask8) __U); 2748 } 2749 2750 static __inline__ __m256i __DEFAULT_FN_ATTRS 2751 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2752 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2753 (__v4di) 2754 _mm256_setzero_si256 (), 2755 (__mmask8) __U); 2756 } 2757 2758 static __inline__ __m128d __DEFAULT_FN_ATTRS 2759 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2760 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2761 (__v2df) __W, 2762 (__mmask8) 2763 __U); 2764 } 2765 2766 static __inline__ __m128d __DEFAULT_FN_ATTRS 2767 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2768 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2769 (__v2df) 2770 _mm_setzero_pd (), 2771 (__mmask8) 2772 __U); 2773 } 2774 2775 static __inline__ __m256d __DEFAULT_FN_ATTRS 2776 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2777 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2778 (__v4df) __W, 2779 (__mmask8) 2780 __U); 2781 } 2782 2783 static __inline__ __m256d __DEFAULT_FN_ATTRS 2784 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2785 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2786 (__v4df) 2787 _mm256_setzero_pd (), 2788 (__mmask8) 2789 __U); 2790 } 2791 2792 static __inline__ __m128i __DEFAULT_FN_ATTRS 2793 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2794 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2795 (__v2di) __W, 2796 (__mmask8) 2797 __U); 2798 } 2799 2800 static __inline__ __m128i __DEFAULT_FN_ATTRS 2801 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2802 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2803 (__v2di) 2804 _mm_setzero_si128 (), 2805 (__mmask8) 2806 __U); 2807 } 2808 2809 static __inline__ __m256i __DEFAULT_FN_ATTRS 2810 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2811 void const *__P) { 2812 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2813 (__v4di) __W, 2814 (__mmask8) 2815 __U); 2816 } 2817 2818 static __inline__ __m256i __DEFAULT_FN_ATTRS 2819 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2820 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2821 (__v4di) 2822 _mm256_setzero_si256 (), 2823 (__mmask8) 2824 __U); 2825 } 2826 2827 static __inline__ __m128 __DEFAULT_FN_ATTRS 2828 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2829 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2830 (__v4sf) __W, 2831 (__mmask8) __U); 2832 } 2833 2834 static __inline__ __m128 __DEFAULT_FN_ATTRS 2835 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2836 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2837 (__v4sf) 2838 _mm_setzero_ps (), 2839 (__mmask8) 2840 __U); 2841 } 2842 2843 static __inline__ __m256 __DEFAULT_FN_ATTRS 2844 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2845 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2846 (__v8sf) __W, 2847 (__mmask8) __U); 2848 } 2849 2850 static __inline__ __m256 __DEFAULT_FN_ATTRS 2851 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2852 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2853 (__v8sf) 2854 _mm256_setzero_ps (), 2855 (__mmask8) 2856 __U); 2857 } 2858 2859 static __inline__ __m128i __DEFAULT_FN_ATTRS 2860 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2861 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2862 (__v4si) __W, 2863 (__mmask8) 2864 __U); 2865 } 2866 2867 static __inline__ __m128i __DEFAULT_FN_ATTRS 2868 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2869 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2870 (__v4si) 2871 _mm_setzero_si128 (), 2872 (__mmask8) __U); 2873 } 2874 2875 static __inline__ __m256i __DEFAULT_FN_ATTRS 2876 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2877 void const *__P) { 2878 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2879 (__v8si) __W, 2880 (__mmask8) 2881 __U); 2882 } 2883 2884 static __inline__ __m256i __DEFAULT_FN_ATTRS 2885 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2886 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2887 (__v8si) 2888 _mm256_setzero_si256 (), 2889 (__mmask8) 2890 __U); 2891 } 2892 2893 static __inline__ __m128 __DEFAULT_FN_ATTRS 2894 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2895 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2896 (__v4sf) __W, 2897 (__mmask8) __U); 2898 } 2899 2900 static __inline__ __m128 __DEFAULT_FN_ATTRS 2901 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2902 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2903 (__v4sf) 2904 _mm_setzero_ps (), 2905 (__mmask8) __U); 2906 } 2907 2908 static __inline__ __m256 __DEFAULT_FN_ATTRS 2909 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2910 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2911 (__v8sf) __W, 2912 (__mmask8) __U); 2913 } 2914 2915 static __inline__ __m256 __DEFAULT_FN_ATTRS 2916 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2917 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2918 (__v8sf) 2919 _mm256_setzero_ps (), 2920 (__mmask8) __U); 2921 } 2922 2923 static __inline__ __m128i __DEFAULT_FN_ATTRS 2924 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2925 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2926 (__v4si) __W, 2927 (__mmask8) __U); 2928 } 2929 2930 static __inline__ __m128i __DEFAULT_FN_ATTRS 2931 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2932 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2933 (__v4si) 2934 _mm_setzero_si128 (), 2935 (__mmask8) __U); 2936 } 2937 2938 static __inline__ __m256i __DEFAULT_FN_ATTRS 2939 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2940 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2941 (__v8si) __W, 2942 (__mmask8) __U); 2943 } 2944 2945 static __inline__ __m256i __DEFAULT_FN_ATTRS 2946 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2947 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2948 (__v8si) 2949 _mm256_setzero_si256 (), 2950 (__mmask8) __U); 2951 } 2952 2953 static __inline__ __m128d __DEFAULT_FN_ATTRS 2954 _mm_getexp_pd (__m128d __A) { 2955 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2956 (__v2df) 2957 _mm_setzero_pd (), 2958 (__mmask8) -1); 2959 } 2960 2961 static __inline__ __m128d __DEFAULT_FN_ATTRS 2962 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2963 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2964 (__v2df) __W, 2965 (__mmask8) __U); 2966 } 2967 2968 static __inline__ __m128d __DEFAULT_FN_ATTRS 2969 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2970 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2971 (__v2df) 2972 _mm_setzero_pd (), 2973 (__mmask8) __U); 2974 } 2975 2976 static __inline__ __m256d __DEFAULT_FN_ATTRS 2977 _mm256_getexp_pd (__m256d __A) { 2978 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2979 (__v4df) 2980 _mm256_setzero_pd (), 2981 (__mmask8) -1); 2982 } 2983 2984 static __inline__ __m256d __DEFAULT_FN_ATTRS 2985 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2986 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2987 (__v4df) __W, 2988 (__mmask8) __U); 2989 } 2990 2991 static __inline__ __m256d __DEFAULT_FN_ATTRS 2992 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2993 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2994 (__v4df) 2995 _mm256_setzero_pd (), 2996 (__mmask8) __U); 2997 } 2998 2999 static __inline__ __m128 __DEFAULT_FN_ATTRS 3000 _mm_getexp_ps (__m128 __A) { 3001 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3002 (__v4sf) 3003 _mm_setzero_ps (), 3004 (__mmask8) -1); 3005 } 3006 3007 static __inline__ __m128 __DEFAULT_FN_ATTRS 3008 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 3009 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3010 (__v4sf) __W, 3011 (__mmask8) __U); 3012 } 3013 3014 static __inline__ __m128 __DEFAULT_FN_ATTRS 3015 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 3016 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3017 (__v4sf) 3018 _mm_setzero_ps (), 3019 (__mmask8) __U); 3020 } 3021 3022 static __inline__ __m256 __DEFAULT_FN_ATTRS 3023 _mm256_getexp_ps (__m256 __A) { 3024 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3025 (__v8sf) 3026 _mm256_setzero_ps (), 3027 (__mmask8) -1); 3028 } 3029 3030 static __inline__ __m256 __DEFAULT_FN_ATTRS 3031 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 3032 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3033 (__v8sf) __W, 3034 (__mmask8) __U); 3035 } 3036 3037 static __inline__ __m256 __DEFAULT_FN_ATTRS 3038 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 3039 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3040 (__v8sf) 3041 _mm256_setzero_ps (), 3042 (__mmask8) __U); 3043 } 3044 3045 static __inline__ __m128d __DEFAULT_FN_ATTRS 3046 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3047 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3048 (__v2df)_mm_max_pd(__A, __B), 3049 (__v2df)__W); 3050 } 3051 3052 static __inline__ __m128d __DEFAULT_FN_ATTRS 3053 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3054 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3055 (__v2df)_mm_max_pd(__A, __B), 3056 (__v2df)_mm_setzero_pd()); 3057 } 3058 3059 static __inline__ __m256d __DEFAULT_FN_ATTRS 3060 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3061 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3062 (__v4df)_mm256_max_pd(__A, __B), 3063 (__v4df)__W); 3064 } 3065 3066 static __inline__ __m256d __DEFAULT_FN_ATTRS 3067 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3068 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3069 (__v4df)_mm256_max_pd(__A, __B), 3070 (__v4df)_mm256_setzero_pd()); 3071 } 3072 3073 static __inline__ __m128 __DEFAULT_FN_ATTRS 3074 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3075 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3076 (__v4sf)_mm_max_ps(__A, __B), 3077 (__v4sf)__W); 3078 } 3079 3080 static __inline__ __m128 __DEFAULT_FN_ATTRS 3081 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3082 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3083 (__v4sf)_mm_max_ps(__A, __B), 3084 (__v4sf)_mm_setzero_ps()); 3085 } 3086 3087 static __inline__ __m256 __DEFAULT_FN_ATTRS 3088 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3089 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3090 (__v8sf)_mm256_max_ps(__A, __B), 3091 (__v8sf)__W); 3092 } 3093 3094 static __inline__ __m256 __DEFAULT_FN_ATTRS 3095 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3096 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3097 (__v8sf)_mm256_max_ps(__A, __B), 3098 (__v8sf)_mm256_setzero_ps()); 3099 } 3100 3101 static __inline__ __m128d __DEFAULT_FN_ATTRS 3102 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3103 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3104 (__v2df)_mm_min_pd(__A, __B), 3105 (__v2df)__W); 3106 } 3107 3108 static __inline__ __m128d __DEFAULT_FN_ATTRS 3109 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3110 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3111 (__v2df)_mm_min_pd(__A, __B), 3112 (__v2df)_mm_setzero_pd()); 3113 } 3114 3115 static __inline__ __m256d __DEFAULT_FN_ATTRS 3116 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3118 (__v4df)_mm256_min_pd(__A, __B), 3119 (__v4df)__W); 3120 } 3121 3122 static __inline__ __m256d __DEFAULT_FN_ATTRS 3123 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3124 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3125 (__v4df)_mm256_min_pd(__A, __B), 3126 (__v4df)_mm256_setzero_pd()); 3127 } 3128 3129 static __inline__ __m128 __DEFAULT_FN_ATTRS 3130 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3131 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3132 (__v4sf)_mm_min_ps(__A, __B), 3133 (__v4sf)__W); 3134 } 3135 3136 static __inline__ __m128 __DEFAULT_FN_ATTRS 3137 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3138 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3139 (__v4sf)_mm_min_ps(__A, __B), 3140 (__v4sf)_mm_setzero_ps()); 3141 } 3142 3143 static __inline__ __m256 __DEFAULT_FN_ATTRS 3144 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3146 (__v8sf)_mm256_min_ps(__A, __B), 3147 (__v8sf)__W); 3148 } 3149 3150 static __inline__ __m256 __DEFAULT_FN_ATTRS 3151 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3153 (__v8sf)_mm256_min_ps(__A, __B), 3154 (__v8sf)_mm256_setzero_ps()); 3155 } 3156 3157 static __inline__ __m128d __DEFAULT_FN_ATTRS 3158 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3159 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3160 (__v2df)_mm_mul_pd(__A, __B), 3161 (__v2df)__W); 3162 } 3163 3164 static __inline__ __m128d __DEFAULT_FN_ATTRS 3165 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3166 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3167 (__v2df)_mm_mul_pd(__A, __B), 3168 (__v2df)_mm_setzero_pd()); 3169 } 3170 3171 static __inline__ __m256d __DEFAULT_FN_ATTRS 3172 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3174 (__v4df)_mm256_mul_pd(__A, __B), 3175 (__v4df)__W); 3176 } 3177 3178 static __inline__ __m256d __DEFAULT_FN_ATTRS 3179 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3180 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3181 (__v4df)_mm256_mul_pd(__A, __B), 3182 (__v4df)_mm256_setzero_pd()); 3183 } 3184 3185 static __inline__ __m128 __DEFAULT_FN_ATTRS 3186 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3187 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3188 (__v4sf)_mm_mul_ps(__A, __B), 3189 (__v4sf)__W); 3190 } 3191 3192 static __inline__ __m128 __DEFAULT_FN_ATTRS 3193 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3194 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3195 (__v4sf)_mm_mul_ps(__A, __B), 3196 (__v4sf)_mm_setzero_ps()); 3197 } 3198 3199 static __inline__ __m256 __DEFAULT_FN_ATTRS 3200 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3202 (__v8sf)_mm256_mul_ps(__A, __B), 3203 (__v8sf)__W); 3204 } 3205 3206 static __inline__ __m256 __DEFAULT_FN_ATTRS 3207 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3208 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3209 (__v8sf)_mm256_mul_ps(__A, __B), 3210 (__v8sf)_mm256_setzero_ps()); 3211 } 3212 3213 static __inline__ __m128i __DEFAULT_FN_ATTRS 3214 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 3215 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3216 (__v4si)_mm_abs_epi32(__A), 3217 (__v4si)__W); 3218 } 3219 3220 static __inline__ __m128i __DEFAULT_FN_ATTRS 3221 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 3222 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3223 (__v4si)_mm_abs_epi32(__A), 3224 (__v4si)_mm_setzero_si128()); 3225 } 3226 3227 static __inline__ __m256i __DEFAULT_FN_ATTRS 3228 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 3229 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3230 (__v8si)_mm256_abs_epi32(__A), 3231 (__v8si)__W); 3232 } 3233 3234 static __inline__ __m256i __DEFAULT_FN_ATTRS 3235 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 3236 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3237 (__v8si)_mm256_abs_epi32(__A), 3238 (__v8si)_mm256_setzero_si256()); 3239 } 3240 3241 static __inline__ __m128i __DEFAULT_FN_ATTRS 3242 _mm_abs_epi64 (__m128i __A) { 3243 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3244 (__v2di) 3245 _mm_setzero_si128 (), 3246 (__mmask8) -1); 3247 } 3248 3249 static __inline__ __m128i __DEFAULT_FN_ATTRS 3250 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 3251 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3252 (__v2di) __W, 3253 (__mmask8) __U); 3254 } 3255 3256 static __inline__ __m128i __DEFAULT_FN_ATTRS 3257 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3258 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3259 (__v2di) 3260 _mm_setzero_si128 (), 3261 (__mmask8) __U); 3262 } 3263 3264 static __inline__ __m256i __DEFAULT_FN_ATTRS 3265 _mm256_abs_epi64 (__m256i __A) { 3266 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3267 (__v4di) 3268 _mm256_setzero_si256 (), 3269 (__mmask8) -1); 3270 } 3271 3272 static __inline__ __m256i __DEFAULT_FN_ATTRS 3273 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3274 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3275 (__v4di) __W, 3276 (__mmask8) __U); 3277 } 3278 3279 static __inline__ __m256i __DEFAULT_FN_ATTRS 3280 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3281 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3282 (__v4di) 3283 _mm256_setzero_si256 (), 3284 (__mmask8) __U); 3285 } 3286 3287 static __inline__ __m128i __DEFAULT_FN_ATTRS 3288 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3289 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3290 (__v4si)_mm_max_epi32(__A, __B), 3291 (__v4si)_mm_setzero_si128()); 3292 } 3293 3294 static __inline__ __m128i __DEFAULT_FN_ATTRS 3295 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3297 (__v4si)_mm_max_epi32(__A, __B), 3298 (__v4si)__W); 3299 } 3300 3301 static __inline__ __m256i __DEFAULT_FN_ATTRS 3302 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3303 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3304 (__v8si)_mm256_max_epi32(__A, __B), 3305 (__v8si)_mm256_setzero_si256()); 3306 } 3307 3308 static __inline__ __m256i __DEFAULT_FN_ATTRS 3309 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3310 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3311 (__v8si)_mm256_max_epi32(__A, __B), 3312 (__v8si)__W); 3313 } 3314 3315 static __inline__ __m128i __DEFAULT_FN_ATTRS 3316 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3317 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3318 (__v2di) __B, 3319 (__v2di) 3320 _mm_setzero_si128 (), 3321 __M); 3322 } 3323 3324 static __inline__ __m128i __DEFAULT_FN_ATTRS 3325 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3326 __m128i __B) { 3327 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3328 (__v2di) __B, 3329 (__v2di) __W, __M); 3330 } 3331 3332 static __inline__ __m128i __DEFAULT_FN_ATTRS 3333 _mm_max_epi64 (__m128i __A, __m128i __B) { 3334 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3335 (__v2di) __B, 3336 (__v2di) 3337 _mm_setzero_si128 (), 3338 (__mmask8) -1); 3339 } 3340 3341 static __inline__ __m256i __DEFAULT_FN_ATTRS 3342 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3343 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3344 (__v4di) __B, 3345 (__v4di) 3346 _mm256_setzero_si256 (), 3347 __M); 3348 } 3349 3350 static __inline__ __m256i __DEFAULT_FN_ATTRS 3351 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3352 __m256i __B) { 3353 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3354 (__v4di) __B, 3355 (__v4di) __W, __M); 3356 } 3357 3358 static __inline__ __m256i __DEFAULT_FN_ATTRS 3359 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3360 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3361 (__v4di) __B, 3362 (__v4di) 3363 _mm256_setzero_si256 (), 3364 (__mmask8) -1); 3365 } 3366 3367 static __inline__ __m128i __DEFAULT_FN_ATTRS 3368 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3369 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3370 (__v4si)_mm_max_epu32(__A, __B), 3371 (__v4si)_mm_setzero_si128()); 3372 } 3373 3374 static __inline__ __m128i __DEFAULT_FN_ATTRS 3375 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3376 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3377 (__v4si)_mm_max_epu32(__A, __B), 3378 (__v4si)__W); 3379 } 3380 3381 static __inline__ __m256i __DEFAULT_FN_ATTRS 3382 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3383 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3384 (__v8si)_mm256_max_epu32(__A, __B), 3385 (__v8si)_mm256_setzero_si256()); 3386 } 3387 3388 static __inline__ __m256i __DEFAULT_FN_ATTRS 3389 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3390 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3391 (__v8si)_mm256_max_epu32(__A, __B), 3392 (__v8si)__W); 3393 } 3394 3395 static __inline__ __m128i __DEFAULT_FN_ATTRS 3396 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3397 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3398 (__v2di) __B, 3399 (__v2di) 3400 _mm_setzero_si128 (), 3401 __M); 3402 } 3403 3404 static __inline__ __m128i __DEFAULT_FN_ATTRS 3405 _mm_max_epu64 (__m128i __A, __m128i __B) { 3406 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3407 (__v2di) __B, 3408 (__v2di) 3409 _mm_setzero_si128 (), 3410 (__mmask8) -1); 3411 } 3412 3413 static __inline__ __m128i __DEFAULT_FN_ATTRS 3414 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3415 __m128i __B) { 3416 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3417 (__v2di) __B, 3418 (__v2di) __W, __M); 3419 } 3420 3421 static __inline__ __m256i __DEFAULT_FN_ATTRS 3422 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3423 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3424 (__v4di) __B, 3425 (__v4di) 3426 _mm256_setzero_si256 (), 3427 __M); 3428 } 3429 3430 static __inline__ __m256i __DEFAULT_FN_ATTRS 3431 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3432 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3433 (__v4di) __B, 3434 (__v4di) 3435 _mm256_setzero_si256 (), 3436 (__mmask8) -1); 3437 } 3438 3439 static __inline__ __m256i __DEFAULT_FN_ATTRS 3440 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3441 __m256i __B) { 3442 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3443 (__v4di) __B, 3444 (__v4di) __W, __M); 3445 } 3446 3447 static __inline__ __m128i __DEFAULT_FN_ATTRS 3448 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3449 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3450 (__v4si)_mm_min_epi32(__A, __B), 3451 (__v4si)_mm_setzero_si128()); 3452 } 3453 3454 static __inline__ __m128i __DEFAULT_FN_ATTRS 3455 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3457 (__v4si)_mm_min_epi32(__A, __B), 3458 (__v4si)__W); 3459 } 3460 3461 static __inline__ __m256i __DEFAULT_FN_ATTRS 3462 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3463 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3464 (__v8si)_mm256_min_epi32(__A, __B), 3465 (__v8si)_mm256_setzero_si256()); 3466 } 3467 3468 static __inline__ __m256i __DEFAULT_FN_ATTRS 3469 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3470 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3471 (__v8si)_mm256_min_epi32(__A, __B), 3472 (__v8si)__W); 3473 } 3474 3475 static __inline__ __m128i __DEFAULT_FN_ATTRS 3476 _mm_min_epi64 (__m128i __A, __m128i __B) { 3477 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3478 (__v2di) __B, 3479 (__v2di) 3480 _mm_setzero_si128 (), 3481 (__mmask8) -1); 3482 } 3483 3484 static __inline__ __m128i __DEFAULT_FN_ATTRS 3485 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3486 __m128i __B) { 3487 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3488 (__v2di) __B, 3489 (__v2di) __W, __M); 3490 } 3491 3492 static __inline__ __m128i __DEFAULT_FN_ATTRS 3493 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3494 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3495 (__v2di) __B, 3496 (__v2di) 3497 _mm_setzero_si128 (), 3498 __M); 3499 } 3500 3501 static __inline__ __m256i __DEFAULT_FN_ATTRS 3502 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3503 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3504 (__v4di) __B, 3505 (__v4di) 3506 _mm256_setzero_si256 (), 3507 (__mmask8) -1); 3508 } 3509 3510 static __inline__ __m256i __DEFAULT_FN_ATTRS 3511 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3512 __m256i __B) { 3513 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3514 (__v4di) __B, 3515 (__v4di) __W, __M); 3516 } 3517 3518 static __inline__ __m256i __DEFAULT_FN_ATTRS 3519 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3520 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3521 (__v4di) __B, 3522 (__v4di) 3523 _mm256_setzero_si256 (), 3524 __M); 3525 } 3526 3527 static __inline__ __m128i __DEFAULT_FN_ATTRS 3528 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3529 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3530 (__v4si)_mm_min_epu32(__A, __B), 3531 (__v4si)_mm_setzero_si128()); 3532 } 3533 3534 static __inline__ __m128i __DEFAULT_FN_ATTRS 3535 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3536 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3537 (__v4si)_mm_min_epu32(__A, __B), 3538 (__v4si)__W); 3539 } 3540 3541 static __inline__ __m256i __DEFAULT_FN_ATTRS 3542 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3544 (__v8si)_mm256_min_epu32(__A, __B), 3545 (__v8si)_mm256_setzero_si256()); 3546 } 3547 3548 static __inline__ __m256i __DEFAULT_FN_ATTRS 3549 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3550 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3551 (__v8si)_mm256_min_epu32(__A, __B), 3552 (__v8si)__W); 3553 } 3554 3555 static __inline__ __m128i __DEFAULT_FN_ATTRS 3556 _mm_min_epu64 (__m128i __A, __m128i __B) { 3557 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3558 (__v2di) __B, 3559 (__v2di) 3560 _mm_setzero_si128 (), 3561 (__mmask8) -1); 3562 } 3563 3564 static __inline__ __m128i __DEFAULT_FN_ATTRS 3565 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3566 __m128i __B) { 3567 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3568 (__v2di) __B, 3569 (__v2di) __W, __M); 3570 } 3571 3572 static __inline__ __m128i __DEFAULT_FN_ATTRS 3573 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3574 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3575 (__v2di) __B, 3576 (__v2di) 3577 _mm_setzero_si128 (), 3578 __M); 3579 } 3580 3581 static __inline__ __m256i __DEFAULT_FN_ATTRS 3582 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3583 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3584 (__v4di) __B, 3585 (__v4di) 3586 _mm256_setzero_si256 (), 3587 (__mmask8) -1); 3588 } 3589 3590 static __inline__ __m256i __DEFAULT_FN_ATTRS 3591 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3592 __m256i __B) { 3593 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3594 (__v4di) __B, 3595 (__v4di) __W, __M); 3596 } 3597 3598 static __inline__ __m256i __DEFAULT_FN_ATTRS 3599 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3600 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3601 (__v4di) __B, 3602 (__v4di) 3603 _mm256_setzero_si256 (), 3604 __M); 3605 } 3606 3607 #define _mm_roundscale_pd(A, imm) __extension__ ({ \ 3608 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3609 (int)(imm), \ 3610 (__v2df)_mm_setzero_pd(), \ 3611 (__mmask8)-1); }) 3612 3613 3614 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3615 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3616 (int)(imm), \ 3617 (__v2df)(__m128d)(W), \ 3618 (__mmask8)(U)); }) 3619 3620 3621 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3622 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3623 (int)(imm), \ 3624 (__v2df)_mm_setzero_pd(), \ 3625 (__mmask8)(U)); }) 3626 3627 3628 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \ 3629 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3630 (int)(imm), \ 3631 (__v4df)_mm256_setzero_pd(), \ 3632 (__mmask8)-1); }) 3633 3634 3635 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3636 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3637 (int)(imm), \ 3638 (__v4df)(__m256d)(W), \ 3639 (__mmask8)(U)); }) 3640 3641 3642 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3643 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3644 (int)(imm), \ 3645 (__v4df)_mm256_setzero_pd(), \ 3646 (__mmask8)(U)); }) 3647 3648 #define _mm_roundscale_ps(A, imm) __extension__ ({ \ 3649 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3650 (__v4sf)_mm_setzero_ps(), \ 3651 (__mmask8)-1); }) 3652 3653 3654 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3655 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3656 (__v4sf)(__m128)(W), \ 3657 (__mmask8)(U)); }) 3658 3659 3660 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3661 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3662 (__v4sf)_mm_setzero_ps(), \ 3663 (__mmask8)(U)); }) 3664 3665 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \ 3666 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3667 (__v8sf)_mm256_setzero_ps(), \ 3668 (__mmask8)-1); }) 3669 3670 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3671 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3672 (__v8sf)(__m256)(W), \ 3673 (__mmask8)(U)); }) 3674 3675 3676 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3677 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3678 (__v8sf)_mm256_setzero_ps(), \ 3679 (__mmask8)(U)); }) 3680 3681 static __inline__ __m128d __DEFAULT_FN_ATTRS 3682 _mm_scalef_pd (__m128d __A, __m128d __B) { 3683 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3684 (__v2df) __B, 3685 (__v2df) 3686 _mm_setzero_pd (), 3687 (__mmask8) -1); 3688 } 3689 3690 static __inline__ __m128d __DEFAULT_FN_ATTRS 3691 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3692 __m128d __B) { 3693 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3694 (__v2df) __B, 3695 (__v2df) __W, 3696 (__mmask8) __U); 3697 } 3698 3699 static __inline__ __m128d __DEFAULT_FN_ATTRS 3700 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3701 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3702 (__v2df) __B, 3703 (__v2df) 3704 _mm_setzero_pd (), 3705 (__mmask8) __U); 3706 } 3707 3708 static __inline__ __m256d __DEFAULT_FN_ATTRS 3709 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3710 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3711 (__v4df) __B, 3712 (__v4df) 3713 _mm256_setzero_pd (), 3714 (__mmask8) -1); 3715 } 3716 3717 static __inline__ __m256d __DEFAULT_FN_ATTRS 3718 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3719 __m256d __B) { 3720 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3721 (__v4df) __B, 3722 (__v4df) __W, 3723 (__mmask8) __U); 3724 } 3725 3726 static __inline__ __m256d __DEFAULT_FN_ATTRS 3727 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3728 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3729 (__v4df) __B, 3730 (__v4df) 3731 _mm256_setzero_pd (), 3732 (__mmask8) __U); 3733 } 3734 3735 static __inline__ __m128 __DEFAULT_FN_ATTRS 3736 _mm_scalef_ps (__m128 __A, __m128 __B) { 3737 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3738 (__v4sf) __B, 3739 (__v4sf) 3740 _mm_setzero_ps (), 3741 (__mmask8) -1); 3742 } 3743 3744 static __inline__ __m128 __DEFAULT_FN_ATTRS 3745 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3746 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3747 (__v4sf) __B, 3748 (__v4sf) __W, 3749 (__mmask8) __U); 3750 } 3751 3752 static __inline__ __m128 __DEFAULT_FN_ATTRS 3753 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3754 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3755 (__v4sf) __B, 3756 (__v4sf) 3757 _mm_setzero_ps (), 3758 (__mmask8) __U); 3759 } 3760 3761 static __inline__ __m256 __DEFAULT_FN_ATTRS 3762 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3763 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3764 (__v8sf) __B, 3765 (__v8sf) 3766 _mm256_setzero_ps (), 3767 (__mmask8) -1); 3768 } 3769 3770 static __inline__ __m256 __DEFAULT_FN_ATTRS 3771 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3772 __m256 __B) { 3773 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3774 (__v8sf) __B, 3775 (__v8sf) __W, 3776 (__mmask8) __U); 3777 } 3778 3779 static __inline__ __m256 __DEFAULT_FN_ATTRS 3780 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3781 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3782 (__v8sf) __B, 3783 (__v8sf) 3784 _mm256_setzero_ps (), 3785 (__mmask8) __U); 3786 } 3787 3788 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3789 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ 3790 (__v2di)(__m128i)(index), \ 3791 (__v2df)(__m128d)(v1), (int)(scale)); }) 3792 3793 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3794 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ 3795 (__v2di)(__m128i)(index), \ 3796 (__v2df)(__m128d)(v1), (int)(scale)); }) 3797 3798 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3799 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ 3800 (__v2di)(__m128i)(index), \ 3801 (__v2di)(__m128i)(v1), (int)(scale)); }) 3802 3803 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3804 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ 3805 (__v2di)(__m128i)(index), \ 3806 (__v2di)(__m128i)(v1), (int)(scale)); }) 3807 3808 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3809 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ 3810 (__v4di)(__m256i)(index), \ 3811 (__v4df)(__m256d)(v1), (int)(scale)); }) 3812 3813 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3814 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ 3815 (__v4di)(__m256i)(index), \ 3816 (__v4df)(__m256d)(v1), (int)(scale)); }) 3817 3818 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3819 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ 3820 (__v4di)(__m256i)(index), \ 3821 (__v4di)(__m256i)(v1), (int)(scale)); }) 3822 3823 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3824 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ 3825 (__v4di)(__m256i)(index), \ 3826 (__v4di)(__m256i)(v1), (int)(scale)); }) 3827 3828 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3829 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ 3830 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3831 (int)(scale)); }) 3832 3833 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3834 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ 3835 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3836 (int)(scale)); }) 3837 3838 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3839 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ 3840 (__v2di)(__m128i)(index), \ 3841 (__v4si)(__m128i)(v1), (int)(scale)); }) 3842 3843 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3844 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ 3845 (__v2di)(__m128i)(index), \ 3846 (__v4si)(__m128i)(v1), (int)(scale)); }) 3847 3848 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3849 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ 3850 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3851 (int)(scale)); }) 3852 3853 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3854 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ 3855 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3856 (int)(scale)); }) 3857 3858 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3859 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ 3860 (__v4di)(__m256i)(index), \ 3861 (__v4si)(__m128i)(v1), (int)(scale)); }) 3862 3863 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3864 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ 3865 (__v4di)(__m256i)(index), \ 3866 (__v4si)(__m128i)(v1), (int)(scale)); }) 3867 3868 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3869 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ 3870 (__v4si)(__m128i)(index), \ 3871 (__v2df)(__m128d)(v1), (int)(scale)); }) 3872 3873 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3874 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ 3875 (__v4si)(__m128i)(index), \ 3876 (__v2df)(__m128d)(v1), (int)(scale)); }) 3877 3878 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3879 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ 3880 (__v4si)(__m128i)(index), \ 3881 (__v2di)(__m128i)(v1), (int)(scale)); }) 3882 3883 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3884 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ 3885 (__v4si)(__m128i)(index), \ 3886 (__v2di)(__m128i)(v1), (int)(scale)); }) 3887 3888 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3889 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ 3890 (__v4si)(__m128i)(index), \ 3891 (__v4df)(__m256d)(v1), (int)(scale)); }) 3892 3893 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3894 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ 3895 (__v4si)(__m128i)(index), \ 3896 (__v4df)(__m256d)(v1), (int)(scale)); }) 3897 3898 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3899 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ 3900 (__v4si)(__m128i)(index), \ 3901 (__v4di)(__m256i)(v1), (int)(scale)); }) 3902 3903 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3904 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ 3905 (__v4si)(__m128i)(index), \ 3906 (__v4di)(__m256i)(v1), (int)(scale)); }) 3907 3908 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3909 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ 3910 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3911 (int)(scale)); }) 3912 3913 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3914 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ 3915 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3916 (int)(scale)); }) 3917 3918 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3919 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ 3920 (__v4si)(__m128i)(index), \ 3921 (__v4si)(__m128i)(v1), (int)(scale)); }) 3922 3923 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3924 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ 3925 (__v4si)(__m128i)(index), \ 3926 (__v4si)(__m128i)(v1), (int)(scale)); }) 3927 3928 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3929 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ 3930 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3931 (int)(scale)); }) 3932 3933 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3934 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ 3935 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3936 (int)(scale)); }) 3937 3938 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3939 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ 3940 (__v8si)(__m256i)(index), \ 3941 (__v8si)(__m256i)(v1), (int)(scale)); }) 3942 3943 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3944 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ 3945 (__v8si)(__m256i)(index), \ 3946 (__v8si)(__m256i)(v1), (int)(scale)); }) 3947 3948 static __inline__ __m128d __DEFAULT_FN_ATTRS 3949 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3950 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3951 (__v2df)_mm_sqrt_pd(__A), 3952 (__v2df)__W); 3953 } 3954 3955 static __inline__ __m128d __DEFAULT_FN_ATTRS 3956 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3957 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3958 (__v2df)_mm_sqrt_pd(__A), 3959 (__v2df)_mm_setzero_pd()); 3960 } 3961 3962 static __inline__ __m256d __DEFAULT_FN_ATTRS 3963 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3964 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3965 (__v4df)_mm256_sqrt_pd(__A), 3966 (__v4df)__W); 3967 } 3968 3969 static __inline__ __m256d __DEFAULT_FN_ATTRS 3970 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3971 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3972 (__v4df)_mm256_sqrt_pd(__A), 3973 (__v4df)_mm256_setzero_pd()); 3974 } 3975 3976 static __inline__ __m128 __DEFAULT_FN_ATTRS 3977 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3978 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3979 (__v4sf)_mm_sqrt_ps(__A), 3980 (__v4sf)__W); 3981 } 3982 3983 static __inline__ __m128 __DEFAULT_FN_ATTRS 3984 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3985 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3986 (__v4sf)_mm_sqrt_ps(__A), 3987 (__v4sf)_mm_setzero_pd()); 3988 } 3989 3990 static __inline__ __m256 __DEFAULT_FN_ATTRS 3991 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3992 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3993 (__v8sf)_mm256_sqrt_ps(__A), 3994 (__v8sf)__W); 3995 } 3996 3997 static __inline__ __m256 __DEFAULT_FN_ATTRS 3998 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3999 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4000 (__v8sf)_mm256_sqrt_ps(__A), 4001 (__v8sf)_mm256_setzero_ps()); 4002 } 4003 4004 static __inline__ __m128d __DEFAULT_FN_ATTRS 4005 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 4006 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4007 (__v2df)_mm_sub_pd(__A, __B), 4008 (__v2df)__W); 4009 } 4010 4011 static __inline__ __m128d __DEFAULT_FN_ATTRS 4012 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 4013 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4014 (__v2df)_mm_sub_pd(__A, __B), 4015 (__v2df)_mm_setzero_pd()); 4016 } 4017 4018 static __inline__ __m256d __DEFAULT_FN_ATTRS 4019 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 4020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4021 (__v4df)_mm256_sub_pd(__A, __B), 4022 (__v4df)__W); 4023 } 4024 4025 static __inline__ __m256d __DEFAULT_FN_ATTRS 4026 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 4027 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4028 (__v4df)_mm256_sub_pd(__A, __B), 4029 (__v4df)_mm256_setzero_pd()); 4030 } 4031 4032 static __inline__ __m128 __DEFAULT_FN_ATTRS 4033 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 4034 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4035 (__v4sf)_mm_sub_ps(__A, __B), 4036 (__v4sf)__W); 4037 } 4038 4039 static __inline__ __m128 __DEFAULT_FN_ATTRS 4040 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 4041 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4042 (__v4sf)_mm_sub_ps(__A, __B), 4043 (__v4sf)_mm_setzero_ps()); 4044 } 4045 4046 static __inline__ __m256 __DEFAULT_FN_ATTRS 4047 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 4048 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4049 (__v8sf)_mm256_sub_ps(__A, __B), 4050 (__v8sf)__W); 4051 } 4052 4053 static __inline__ __m256 __DEFAULT_FN_ATTRS 4054 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 4055 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4056 (__v8sf)_mm256_sub_ps(__A, __B), 4057 (__v8sf)_mm256_setzero_ps()); 4058 } 4059 4060 static __inline__ __m128i __DEFAULT_FN_ATTRS 4061 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, 4062 __m128i __B) { 4063 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, 4064 (__v4si) __I 4065 /* idx */ , 4066 (__v4si) __B, 4067 (__mmask8) __U); 4068 } 4069 4070 static __inline__ __m256i __DEFAULT_FN_ATTRS 4071 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, 4072 __mmask8 __U, __m256i __B) { 4073 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, 4074 (__v8si) __I 4075 /* idx */ , 4076 (__v8si) __B, 4077 (__mmask8) __U); 4078 } 4079 4080 static __inline__ __m128d __DEFAULT_FN_ATTRS 4081 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, 4082 __m128d __B) { 4083 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, 4084 (__v2di) __I 4085 /* idx */ , 4086 (__v2df) __B, 4087 (__mmask8) 4088 __U); 4089 } 4090 4091 static __inline__ __m256d __DEFAULT_FN_ATTRS 4092 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, 4093 __m256d __B) { 4094 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, 4095 (__v4di) __I 4096 /* idx */ , 4097 (__v4df) __B, 4098 (__mmask8) 4099 __U); 4100 } 4101 4102 static __inline__ __m128 __DEFAULT_FN_ATTRS 4103 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, 4104 __m128 __B) { 4105 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, 4106 (__v4si) __I 4107 /* idx */ , 4108 (__v4sf) __B, 4109 (__mmask8) __U); 4110 } 4111 4112 static __inline__ __m256 __DEFAULT_FN_ATTRS 4113 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, 4114 __m256 __B) { 4115 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, 4116 (__v8si) __I 4117 /* idx */ , 4118 (__v8sf) __B, 4119 (__mmask8) __U); 4120 } 4121 4122 static __inline__ __m128i __DEFAULT_FN_ATTRS 4123 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, 4124 __m128i __B) { 4125 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, 4126 (__v2di) __I 4127 /* idx */ , 4128 (__v2di) __B, 4129 (__mmask8) __U); 4130 } 4131 4132 static __inline__ __m256i __DEFAULT_FN_ATTRS 4133 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, 4134 __mmask8 __U, __m256i __B) { 4135 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, 4136 (__v4di) __I 4137 /* idx */ , 4138 (__v4di) __B, 4139 (__mmask8) __U); 4140 } 4141 4142 static __inline__ __m128i __DEFAULT_FN_ATTRS 4143 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) { 4144 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4145 /* idx */ , 4146 (__v4si) __A, 4147 (__v4si) __B, 4148 (__mmask8) -1); 4149 } 4150 4151 static __inline__ __m128i __DEFAULT_FN_ATTRS 4152 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, 4153 __m128i __B) { 4154 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4155 /* idx */ , 4156 (__v4si) __A, 4157 (__v4si) __B, 4158 (__mmask8) __U); 4159 } 4160 4161 static __inline__ __m128i __DEFAULT_FN_ATTRS 4162 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, 4163 __m128i __B) { 4164 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I 4165 /* idx */ , 4166 (__v4si) __A, 4167 (__v4si) __B, 4168 (__mmask8) 4169 __U); 4170 } 4171 4172 static __inline__ __m256i __DEFAULT_FN_ATTRS 4173 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) { 4174 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4175 /* idx */ , 4176 (__v8si) __A, 4177 (__v8si) __B, 4178 (__mmask8) -1); 4179 } 4180 4181 static __inline__ __m256i __DEFAULT_FN_ATTRS 4182 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, 4183 __m256i __B) { 4184 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4185 /* idx */ , 4186 (__v8si) __A, 4187 (__v8si) __B, 4188 (__mmask8) __U); 4189 } 4190 4191 static __inline__ __m256i __DEFAULT_FN_ATTRS 4192 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, 4193 __m256i __I, __m256i __B) { 4194 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I 4195 /* idx */ , 4196 (__v8si) __A, 4197 (__v8si) __B, 4198 (__mmask8) 4199 __U); 4200 } 4201 4202 static __inline__ __m128d __DEFAULT_FN_ATTRS 4203 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) { 4204 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4205 /* idx */ , 4206 (__v2df) __A, 4207 (__v2df) __B, 4208 (__mmask8) - 4209 1); 4210 } 4211 4212 static __inline__ __m128d __DEFAULT_FN_ATTRS 4213 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, 4214 __m128d __B) { 4215 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4216 /* idx */ , 4217 (__v2df) __A, 4218 (__v2df) __B, 4219 (__mmask8) 4220 __U); 4221 } 4222 4223 static __inline__ __m128d __DEFAULT_FN_ATTRS 4224 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, 4225 __m128d __B) { 4226 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I 4227 /* idx */ , 4228 (__v2df) __A, 4229 (__v2df) __B, 4230 (__mmask8) 4231 __U); 4232 } 4233 4234 static __inline__ __m256d __DEFAULT_FN_ATTRS 4235 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) { 4236 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4237 /* idx */ , 4238 (__v4df) __A, 4239 (__v4df) __B, 4240 (__mmask8) - 4241 1); 4242 } 4243 4244 static __inline__ __m256d __DEFAULT_FN_ATTRS 4245 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, 4246 __m256d __B) { 4247 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4248 /* idx */ , 4249 (__v4df) __A, 4250 (__v4df) __B, 4251 (__mmask8) 4252 __U); 4253 } 4254 4255 static __inline__ __m256d __DEFAULT_FN_ATTRS 4256 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, 4257 __m256d __B) { 4258 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I 4259 /* idx */ , 4260 (__v4df) __A, 4261 (__v4df) __B, 4262 (__mmask8) 4263 __U); 4264 } 4265 4266 static __inline__ __m128 __DEFAULT_FN_ATTRS 4267 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) { 4268 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4269 /* idx */ , 4270 (__v4sf) __A, 4271 (__v4sf) __B, 4272 (__mmask8) -1); 4273 } 4274 4275 static __inline__ __m128 __DEFAULT_FN_ATTRS 4276 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, 4277 __m128 __B) { 4278 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4279 /* idx */ , 4280 (__v4sf) __A, 4281 (__v4sf) __B, 4282 (__mmask8) __U); 4283 } 4284 4285 static __inline__ __m128 __DEFAULT_FN_ATTRS 4286 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, 4287 __m128 __B) { 4288 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I 4289 /* idx */ , 4290 (__v4sf) __A, 4291 (__v4sf) __B, 4292 (__mmask8) 4293 __U); 4294 } 4295 4296 static __inline__ __m256 __DEFAULT_FN_ATTRS 4297 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) { 4298 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4299 /* idx */ , 4300 (__v8sf) __A, 4301 (__v8sf) __B, 4302 (__mmask8) -1); 4303 } 4304 4305 static __inline__ __m256 __DEFAULT_FN_ATTRS 4306 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, 4307 __m256 __B) { 4308 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4309 /* idx */ , 4310 (__v8sf) __A, 4311 (__v8sf) __B, 4312 (__mmask8) __U); 4313 } 4314 4315 static __inline__ __m256 __DEFAULT_FN_ATTRS 4316 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, 4317 __m256 __B) { 4318 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I 4319 /* idx */ , 4320 (__v8sf) __A, 4321 (__v8sf) __B, 4322 (__mmask8) 4323 __U); 4324 } 4325 4326 static __inline__ __m128i __DEFAULT_FN_ATTRS 4327 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) { 4328 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4329 /* idx */ , 4330 (__v2di) __A, 4331 (__v2di) __B, 4332 (__mmask8) -1); 4333 } 4334 4335 static __inline__ __m128i __DEFAULT_FN_ATTRS 4336 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, 4337 __m128i __B) { 4338 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4339 /* idx */ , 4340 (__v2di) __A, 4341 (__v2di) __B, 4342 (__mmask8) __U); 4343 } 4344 4345 static __inline__ __m128i __DEFAULT_FN_ATTRS 4346 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, 4347 __m128i __B) { 4348 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I 4349 /* idx */ , 4350 (__v2di) __A, 4351 (__v2di) __B, 4352 (__mmask8) 4353 __U); 4354 } 4355 4356 4357 static __inline__ __m256i __DEFAULT_FN_ATTRS 4358 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) { 4359 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4360 /* idx */ , 4361 (__v4di) __A, 4362 (__v4di) __B, 4363 (__mmask8) -1); 4364 } 4365 4366 static __inline__ __m256i __DEFAULT_FN_ATTRS 4367 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, 4368 __m256i __B) { 4369 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4370 /* idx */ , 4371 (__v4di) __A, 4372 (__v4di) __B, 4373 (__mmask8) __U); 4374 } 4375 4376 static __inline__ __m256i __DEFAULT_FN_ATTRS 4377 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, 4378 __m256i __I, __m256i __B) { 4379 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I 4380 /* idx */ , 4381 (__v4di) __A, 4382 (__v4di) __B, 4383 (__mmask8) 4384 __U); 4385 } 4386 4387 static __inline__ __m128i __DEFAULT_FN_ATTRS 4388 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4389 { 4390 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4391 (__v4si)_mm_cvtepi8_epi32(__A), 4392 (__v4si)__W); 4393 } 4394 4395 static __inline__ __m128i __DEFAULT_FN_ATTRS 4396 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 4397 { 4398 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4399 (__v4si)_mm_cvtepi8_epi32(__A), 4400 (__v4si)_mm_setzero_si128()); 4401 } 4402 4403 static __inline__ __m256i __DEFAULT_FN_ATTRS 4404 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4405 { 4406 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4407 (__v8si)_mm256_cvtepi8_epi32(__A), 4408 (__v8si)__W); 4409 } 4410 4411 static __inline__ __m256i __DEFAULT_FN_ATTRS 4412 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4413 { 4414 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4415 (__v8si)_mm256_cvtepi8_epi32(__A), 4416 (__v8si)_mm256_setzero_si256()); 4417 } 4418 4419 static __inline__ __m128i __DEFAULT_FN_ATTRS 4420 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4421 { 4422 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4423 (__v2di)_mm_cvtepi8_epi64(__A), 4424 (__v2di)__W); 4425 } 4426 4427 static __inline__ __m128i __DEFAULT_FN_ATTRS 4428 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4429 { 4430 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4431 (__v2di)_mm_cvtepi8_epi64(__A), 4432 (__v2di)_mm_setzero_si128()); 4433 } 4434 4435 static __inline__ __m256i __DEFAULT_FN_ATTRS 4436 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4437 { 4438 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4439 (__v4di)_mm256_cvtepi8_epi64(__A), 4440 (__v4di)__W); 4441 } 4442 4443 static __inline__ __m256i __DEFAULT_FN_ATTRS 4444 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4445 { 4446 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4447 (__v4di)_mm256_cvtepi8_epi64(__A), 4448 (__v4di)_mm256_setzero_si256()); 4449 } 4450 4451 static __inline__ __m128i __DEFAULT_FN_ATTRS 4452 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4453 { 4454 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4455 (__v2di)_mm_cvtepi32_epi64(__X), 4456 (__v2di)__W); 4457 } 4458 4459 static __inline__ __m128i __DEFAULT_FN_ATTRS 4460 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4461 { 4462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4463 (__v2di)_mm_cvtepi32_epi64(__X), 4464 (__v2di)_mm_setzero_si128()); 4465 } 4466 4467 static __inline__ __m256i __DEFAULT_FN_ATTRS 4468 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4469 { 4470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4471 (__v4di)_mm256_cvtepi32_epi64(__X), 4472 (__v4di)__W); 4473 } 4474 4475 static __inline__ __m256i __DEFAULT_FN_ATTRS 4476 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4477 { 4478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4479 (__v4di)_mm256_cvtepi32_epi64(__X), 4480 (__v4di)_mm256_setzero_si256()); 4481 } 4482 4483 static __inline__ __m128i __DEFAULT_FN_ATTRS 4484 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4485 { 4486 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4487 (__v4si)_mm_cvtepi16_epi32(__A), 4488 (__v4si)__W); 4489 } 4490 4491 static __inline__ __m128i __DEFAULT_FN_ATTRS 4492 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4493 { 4494 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4495 (__v4si)_mm_cvtepi16_epi32(__A), 4496 (__v4si)_mm_setzero_si128()); 4497 } 4498 4499 static __inline__ __m256i __DEFAULT_FN_ATTRS 4500 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4501 { 4502 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4503 (__v8si)_mm256_cvtepi16_epi32(__A), 4504 (__v8si)__W); 4505 } 4506 4507 static __inline__ __m256i __DEFAULT_FN_ATTRS 4508 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4509 { 4510 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4511 (__v8si)_mm256_cvtepi16_epi32(__A), 4512 (__v8si)_mm256_setzero_si256()); 4513 } 4514 4515 static __inline__ __m128i __DEFAULT_FN_ATTRS 4516 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4517 { 4518 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4519 (__v2di)_mm_cvtepi16_epi64(__A), 4520 (__v2di)__W); 4521 } 4522 4523 static __inline__ __m128i __DEFAULT_FN_ATTRS 4524 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4525 { 4526 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4527 (__v2di)_mm_cvtepi16_epi64(__A), 4528 (__v2di)_mm_setzero_si128()); 4529 } 4530 4531 static __inline__ __m256i __DEFAULT_FN_ATTRS 4532 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4533 { 4534 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4535 (__v4di)_mm256_cvtepi16_epi64(__A), 4536 (__v4di)__W); 4537 } 4538 4539 static __inline__ __m256i __DEFAULT_FN_ATTRS 4540 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4541 { 4542 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4543 (__v4di)_mm256_cvtepi16_epi64(__A), 4544 (__v4di)_mm256_setzero_si256()); 4545 } 4546 4547 4548 static __inline__ __m128i __DEFAULT_FN_ATTRS 4549 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4550 { 4551 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4552 (__v4si)_mm_cvtepu8_epi32(__A), 4553 (__v4si)__W); 4554 } 4555 4556 static __inline__ __m128i __DEFAULT_FN_ATTRS 4557 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4558 { 4559 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4560 (__v4si)_mm_cvtepu8_epi32(__A), 4561 (__v4si)_mm_setzero_si128()); 4562 } 4563 4564 static __inline__ __m256i __DEFAULT_FN_ATTRS 4565 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4566 { 4567 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4568 (__v8si)_mm256_cvtepu8_epi32(__A), 4569 (__v8si)__W); 4570 } 4571 4572 static __inline__ __m256i __DEFAULT_FN_ATTRS 4573 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4574 { 4575 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4576 (__v8si)_mm256_cvtepu8_epi32(__A), 4577 (__v8si)_mm256_setzero_si256()); 4578 } 4579 4580 static __inline__ __m128i __DEFAULT_FN_ATTRS 4581 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4582 { 4583 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4584 (__v2di)_mm_cvtepu8_epi64(__A), 4585 (__v2di)__W); 4586 } 4587 4588 static __inline__ __m128i __DEFAULT_FN_ATTRS 4589 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4590 { 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_cvtepu8_epi64(__A), 4593 (__v2di)_mm_setzero_si128()); 4594 } 4595 4596 static __inline__ __m256i __DEFAULT_FN_ATTRS 4597 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4598 { 4599 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4600 (__v4di)_mm256_cvtepu8_epi64(__A), 4601 (__v4di)__W); 4602 } 4603 4604 static __inline__ __m256i __DEFAULT_FN_ATTRS 4605 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4606 { 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_cvtepu8_epi64(__A), 4609 (__v4di)_mm256_setzero_si256()); 4610 } 4611 4612 static __inline__ __m128i __DEFAULT_FN_ATTRS 4613 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4614 { 4615 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4616 (__v2di)_mm_cvtepu32_epi64(__X), 4617 (__v2di)__W); 4618 } 4619 4620 static __inline__ __m128i __DEFAULT_FN_ATTRS 4621 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4622 { 4623 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4624 (__v2di)_mm_cvtepu32_epi64(__X), 4625 (__v2di)_mm_setzero_si128()); 4626 } 4627 4628 static __inline__ __m256i __DEFAULT_FN_ATTRS 4629 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4630 { 4631 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4632 (__v4di)_mm256_cvtepu32_epi64(__X), 4633 (__v4di)__W); 4634 } 4635 4636 static __inline__ __m256i __DEFAULT_FN_ATTRS 4637 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4638 { 4639 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4640 (__v4di)_mm256_cvtepu32_epi64(__X), 4641 (__v4di)_mm256_setzero_si256()); 4642 } 4643 4644 static __inline__ __m128i __DEFAULT_FN_ATTRS 4645 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4646 { 4647 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4648 (__v4si)_mm_cvtepu16_epi32(__A), 4649 (__v4si)__W); 4650 } 4651 4652 static __inline__ __m128i __DEFAULT_FN_ATTRS 4653 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4654 { 4655 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4656 (__v4si)_mm_cvtepu16_epi32(__A), 4657 (__v4si)_mm_setzero_si128()); 4658 } 4659 4660 static __inline__ __m256i __DEFAULT_FN_ATTRS 4661 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4662 { 4663 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4664 (__v8si)_mm256_cvtepu16_epi32(__A), 4665 (__v8si)__W); 4666 } 4667 4668 static __inline__ __m256i __DEFAULT_FN_ATTRS 4669 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4670 { 4671 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4672 (__v8si)_mm256_cvtepu16_epi32(__A), 4673 (__v8si)_mm256_setzero_si256()); 4674 } 4675 4676 static __inline__ __m128i __DEFAULT_FN_ATTRS 4677 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4678 { 4679 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4680 (__v2di)_mm_cvtepu16_epi64(__A), 4681 (__v2di)__W); 4682 } 4683 4684 static __inline__ __m128i __DEFAULT_FN_ATTRS 4685 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4686 { 4687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4688 (__v2di)_mm_cvtepu16_epi64(__A), 4689 (__v2di)_mm_setzero_si128()); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS 4693 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4694 { 4695 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4696 (__v4di)_mm256_cvtepu16_epi64(__A), 4697 (__v4di)__W); 4698 } 4699 4700 static __inline__ __m256i __DEFAULT_FN_ATTRS 4701 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4702 { 4703 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4704 (__v4di)_mm256_cvtepu16_epi64(__A), 4705 (__v4di)_mm256_setzero_si256()); 4706 } 4707 4708 4709 #define _mm_rol_epi32(a, b) __extension__ ({\ 4710 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4711 (__v4si)_mm_setzero_si128(), \ 4712 (__mmask8)-1); }) 4713 4714 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4715 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4716 (__v4si)(__m128i)(w), (__mmask8)(u)); }) 4717 4718 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ 4719 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4720 (__v4si)_mm_setzero_si128(), \ 4721 (__mmask8)(u)); }) 4722 4723 #define _mm256_rol_epi32(a, b) __extension__ ({\ 4724 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4725 (__v8si)_mm256_setzero_si256(), \ 4726 (__mmask8)-1); }) 4727 4728 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4729 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4730 (__v8si)(__m256i)(w), (__mmask8)(u)); }) 4731 4732 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ 4733 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4734 (__v8si)_mm256_setzero_si256(), \ 4735 (__mmask8)(u)); }) 4736 4737 #define _mm_rol_epi64(a, b) __extension__ ({\ 4738 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4739 (__v2di)_mm_setzero_di(), \ 4740 (__mmask8)-1); }) 4741 4742 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4743 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4744 (__v2di)(__m128i)(w), (__mmask8)(u)); }) 4745 4746 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ 4747 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4748 (__v2di)_mm_setzero_di(), \ 4749 (__mmask8)(u)); }) 4750 4751 #define _mm256_rol_epi64(a, b) __extension__ ({\ 4752 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4753 (__v4di)_mm256_setzero_si256(), \ 4754 (__mmask8)-1); }) 4755 4756 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4757 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4758 (__v4di)(__m256i)(w), (__mmask8)(u)); }) 4759 4760 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ 4761 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4762 (__v4di)_mm256_setzero_si256(), \ 4763 (__mmask8)(u)); }) 4764 4765 static __inline__ __m128i __DEFAULT_FN_ATTRS 4766 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4767 { 4768 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4769 (__v4si) __B, 4770 (__v4si) 4771 _mm_setzero_si128 (), 4772 (__mmask8) -1); 4773 } 4774 4775 static __inline__ __m128i __DEFAULT_FN_ATTRS 4776 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4777 __m128i __B) 4778 { 4779 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4780 (__v4si) __B, 4781 (__v4si) __W, 4782 (__mmask8) __U); 4783 } 4784 4785 static __inline__ __m128i __DEFAULT_FN_ATTRS 4786 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4787 { 4788 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4789 (__v4si) __B, 4790 (__v4si) 4791 _mm_setzero_si128 (), 4792 (__mmask8) __U); 4793 } 4794 4795 static __inline__ __m256i __DEFAULT_FN_ATTRS 4796 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4797 { 4798 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4799 (__v8si) __B, 4800 (__v8si) 4801 _mm256_setzero_si256 (), 4802 (__mmask8) -1); 4803 } 4804 4805 static __inline__ __m256i __DEFAULT_FN_ATTRS 4806 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4807 __m256i __B) 4808 { 4809 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4810 (__v8si) __B, 4811 (__v8si) __W, 4812 (__mmask8) __U); 4813 } 4814 4815 static __inline__ __m256i __DEFAULT_FN_ATTRS 4816 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4817 { 4818 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4819 (__v8si) __B, 4820 (__v8si) 4821 _mm256_setzero_si256 (), 4822 (__mmask8) __U); 4823 } 4824 4825 static __inline__ __m128i __DEFAULT_FN_ATTRS 4826 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4827 { 4828 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4829 (__v2di) __B, 4830 (__v2di) 4831 _mm_setzero_di (), 4832 (__mmask8) -1); 4833 } 4834 4835 static __inline__ __m128i __DEFAULT_FN_ATTRS 4836 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 4837 __m128i __B) 4838 { 4839 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4840 (__v2di) __B, 4841 (__v2di) __W, 4842 (__mmask8) __U); 4843 } 4844 4845 static __inline__ __m128i __DEFAULT_FN_ATTRS 4846 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4847 { 4848 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4849 (__v2di) __B, 4850 (__v2di) 4851 _mm_setzero_di (), 4852 (__mmask8) __U); 4853 } 4854 4855 static __inline__ __m256i __DEFAULT_FN_ATTRS 4856 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4857 { 4858 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4859 (__v4di) __B, 4860 (__v4di) 4861 _mm256_setzero_si256 (), 4862 (__mmask8) -1); 4863 } 4864 4865 static __inline__ __m256i __DEFAULT_FN_ATTRS 4866 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 4867 __m256i __B) 4868 { 4869 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4870 (__v4di) __B, 4871 (__v4di) __W, 4872 (__mmask8) __U); 4873 } 4874 4875 static __inline__ __m256i __DEFAULT_FN_ATTRS 4876 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4877 { 4878 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4879 (__v4di) __B, 4880 (__v4di) 4881 _mm256_setzero_si256 (), 4882 (__mmask8) __U); 4883 } 4884 4885 #define _mm_ror_epi32(A, B) __extension__ ({ \ 4886 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4887 (__v4si)_mm_setzero_si128(), \ 4888 (__mmask8)-1); }) 4889 4890 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4891 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4892 (__v4si)(__m128i)(W), (__mmask8)(U)); }) 4893 4894 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4895 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4896 (__v4si)_mm_setzero_si128(), \ 4897 (__mmask8)(U)); }) 4898 4899 #define _mm256_ror_epi32(A, B) __extension__ ({ \ 4900 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4901 (__v8si)_mm256_setzero_si256(), \ 4902 (__mmask8)-1); }) 4903 4904 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4905 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4906 (__v8si)(__m256i)(W), (__mmask8)(U)); }) 4907 4908 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4909 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4910 (__v8si)_mm256_setzero_si256(), \ 4911 (__mmask8)(U)); }) 4912 4913 #define _mm_ror_epi64(A, B) __extension__ ({ \ 4914 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4915 (__v2di)_mm_setzero_di(), \ 4916 (__mmask8)-1); }) 4917 4918 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4919 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4920 (__v2di)(__m128i)(W), (__mmask8)(U)); }) 4921 4922 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4923 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4924 (__v2di)_mm_setzero_di(), \ 4925 (__mmask8)(U)); }) 4926 4927 #define _mm256_ror_epi64(A, B) __extension__ ({ \ 4928 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4929 (__v4di)_mm256_setzero_si256(), \ 4930 (__mmask8)-1); }) 4931 4932 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4933 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4934 (__v4di)(__m256i)(W), (__mmask8)(U)); }) 4935 4936 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4937 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4938 (__v4di)_mm256_setzero_si256(), \ 4939 (__mmask8)(U)); }) 4940 4941 static __inline__ __m128i __DEFAULT_FN_ATTRS 4942 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4943 { 4944 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4945 (__v4si)_mm_sll_epi32(__A, __B), 4946 (__v4si)__W); 4947 } 4948 4949 static __inline__ __m128i __DEFAULT_FN_ATTRS 4950 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4951 { 4952 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4953 (__v4si)_mm_sll_epi32(__A, __B), 4954 (__v4si)_mm_setzero_si128()); 4955 } 4956 4957 static __inline__ __m256i __DEFAULT_FN_ATTRS 4958 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4959 { 4960 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4961 (__v8si)_mm256_sll_epi32(__A, __B), 4962 (__v8si)__W); 4963 } 4964 4965 static __inline__ __m256i __DEFAULT_FN_ATTRS 4966 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4967 { 4968 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4969 (__v8si)_mm256_sll_epi32(__A, __B), 4970 (__v8si)_mm256_setzero_si256()); 4971 } 4972 4973 static __inline__ __m128i __DEFAULT_FN_ATTRS 4974 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4975 { 4976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4977 (__v4si)_mm_slli_epi32(__A, __B), 4978 (__v4si)__W); 4979 } 4980 4981 static __inline__ __m128i __DEFAULT_FN_ATTRS 4982 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) 4983 { 4984 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4985 (__v4si)_mm_slli_epi32(__A, __B), 4986 (__v4si)_mm_setzero_si128()); 4987 } 4988 4989 static __inline__ __m256i __DEFAULT_FN_ATTRS 4990 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4991 { 4992 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4993 (__v8si)_mm256_slli_epi32(__A, __B), 4994 (__v8si)__W); 4995 } 4996 4997 static __inline__ __m256i __DEFAULT_FN_ATTRS 4998 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) 4999 { 5000 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5001 (__v8si)_mm256_slli_epi32(__A, __B), 5002 (__v8si)_mm256_setzero_si256()); 5003 } 5004 5005 static __inline__ __m128i __DEFAULT_FN_ATTRS 5006 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5007 { 5008 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5009 (__v2di)_mm_sll_epi64(__A, __B), 5010 (__v2di)__W); 5011 } 5012 5013 static __inline__ __m128i __DEFAULT_FN_ATTRS 5014 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5015 { 5016 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5017 (__v2di)_mm_sll_epi64(__A, __B), 5018 (__v2di)_mm_setzero_di()); 5019 } 5020 5021 static __inline__ __m256i __DEFAULT_FN_ATTRS 5022 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5023 { 5024 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5025 (__v4di)_mm256_sll_epi64(__A, __B), 5026 (__v4di)__W); 5027 } 5028 5029 static __inline__ __m256i __DEFAULT_FN_ATTRS 5030 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5031 { 5032 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5033 (__v4di)_mm256_sll_epi64(__A, __B), 5034 (__v4di)_mm256_setzero_si256()); 5035 } 5036 5037 static __inline__ __m128i __DEFAULT_FN_ATTRS 5038 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5039 { 5040 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5041 (__v2di)_mm_slli_epi64(__A, __B), 5042 (__v2di)__W); 5043 } 5044 5045 static __inline__ __m128i __DEFAULT_FN_ATTRS 5046 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) 5047 { 5048 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5049 (__v2di)_mm_slli_epi64(__A, __B), 5050 (__v2di)_mm_setzero_di()); 5051 } 5052 5053 static __inline__ __m256i __DEFAULT_FN_ATTRS 5054 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5055 { 5056 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5057 (__v4di)_mm256_slli_epi64(__A, __B), 5058 (__v4di)__W); 5059 } 5060 5061 static __inline__ __m256i __DEFAULT_FN_ATTRS 5062 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) 5063 { 5064 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5065 (__v4di)_mm256_slli_epi64(__A, __B), 5066 (__v4di)_mm256_setzero_si256()); 5067 } 5068 5069 static __inline__ __m128i __DEFAULT_FN_ATTRS 5070 _mm_rorv_epi32 (__m128i __A, __m128i __B) 5071 { 5072 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5073 (__v4si) __B, 5074 (__v4si) 5075 _mm_setzero_si128 (), 5076 (__mmask8) -1); 5077 } 5078 5079 static __inline__ __m128i __DEFAULT_FN_ATTRS 5080 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5081 __m128i __B) 5082 { 5083 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5084 (__v4si) __B, 5085 (__v4si) __W, 5086 (__mmask8) __U); 5087 } 5088 5089 static __inline__ __m128i __DEFAULT_FN_ATTRS 5090 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5091 { 5092 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5093 (__v4si) __B, 5094 (__v4si) 5095 _mm_setzero_si128 (), 5096 (__mmask8) __U); 5097 } 5098 5099 static __inline__ __m256i __DEFAULT_FN_ATTRS 5100 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 5101 { 5102 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5103 (__v8si) __B, 5104 (__v8si) 5105 _mm256_setzero_si256 (), 5106 (__mmask8) -1); 5107 } 5108 5109 static __inline__ __m256i __DEFAULT_FN_ATTRS 5110 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5111 __m256i __B) 5112 { 5113 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5114 (__v8si) __B, 5115 (__v8si) __W, 5116 (__mmask8) __U); 5117 } 5118 5119 static __inline__ __m256i __DEFAULT_FN_ATTRS 5120 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5121 { 5122 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5123 (__v8si) __B, 5124 (__v8si) 5125 _mm256_setzero_si256 (), 5126 (__mmask8) __U); 5127 } 5128 5129 static __inline__ __m128i __DEFAULT_FN_ATTRS 5130 _mm_rorv_epi64 (__m128i __A, __m128i __B) 5131 { 5132 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5133 (__v2di) __B, 5134 (__v2di) 5135 _mm_setzero_di (), 5136 (__mmask8) -1); 5137 } 5138 5139 static __inline__ __m128i __DEFAULT_FN_ATTRS 5140 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5141 __m128i __B) 5142 { 5143 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5144 (__v2di) __B, 5145 (__v2di) __W, 5146 (__mmask8) __U); 5147 } 5148 5149 static __inline__ __m128i __DEFAULT_FN_ATTRS 5150 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5151 { 5152 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5153 (__v2di) __B, 5154 (__v2di) 5155 _mm_setzero_di (), 5156 (__mmask8) __U); 5157 } 5158 5159 static __inline__ __m256i __DEFAULT_FN_ATTRS 5160 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 5161 { 5162 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5163 (__v4di) __B, 5164 (__v4di) 5165 _mm256_setzero_si256 (), 5166 (__mmask8) -1); 5167 } 5168 5169 static __inline__ __m256i __DEFAULT_FN_ATTRS 5170 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5171 __m256i __B) 5172 { 5173 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5174 (__v4di) __B, 5175 (__v4di) __W, 5176 (__mmask8) __U); 5177 } 5178 5179 static __inline__ __m256i __DEFAULT_FN_ATTRS 5180 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5181 { 5182 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5183 (__v4di) __B, 5184 (__v4di) 5185 _mm256_setzero_si256 (), 5186 (__mmask8) __U); 5187 } 5188 5189 static __inline__ __m128i __DEFAULT_FN_ATTRS 5190 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5191 { 5192 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5193 (__v2di)_mm_sllv_epi64(__X, __Y), 5194 (__v2di)__W); 5195 } 5196 5197 static __inline__ __m128i __DEFAULT_FN_ATTRS 5198 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5199 { 5200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5201 (__v2di)_mm_sllv_epi64(__X, __Y), 5202 (__v2di)_mm_setzero_di()); 5203 } 5204 5205 static __inline__ __m256i __DEFAULT_FN_ATTRS 5206 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5207 { 5208 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5209 (__v4di)_mm256_sllv_epi64(__X, __Y), 5210 (__v4di)__W); 5211 } 5212 5213 static __inline__ __m256i __DEFAULT_FN_ATTRS 5214 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5215 { 5216 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5217 (__v4di)_mm256_sllv_epi64(__X, __Y), 5218 (__v4di)_mm256_setzero_si256()); 5219 } 5220 5221 static __inline__ __m128i __DEFAULT_FN_ATTRS 5222 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5223 { 5224 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5225 (__v4si)_mm_sllv_epi32(__X, __Y), 5226 (__v4si)__W); 5227 } 5228 5229 static __inline__ __m128i __DEFAULT_FN_ATTRS 5230 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5231 { 5232 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5233 (__v4si)_mm_sllv_epi32(__X, __Y), 5234 (__v4si)_mm_setzero_si128()); 5235 } 5236 5237 static __inline__ __m256i __DEFAULT_FN_ATTRS 5238 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5239 { 5240 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5241 (__v8si)_mm256_sllv_epi32(__X, __Y), 5242 (__v8si)__W); 5243 } 5244 5245 static __inline__ __m256i __DEFAULT_FN_ATTRS 5246 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5247 { 5248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5249 (__v8si)_mm256_sllv_epi32(__X, __Y), 5250 (__v8si)_mm256_setzero_si256()); 5251 } 5252 5253 static __inline__ __m128i __DEFAULT_FN_ATTRS 5254 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5255 { 5256 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5257 (__v2di)_mm_srlv_epi64(__X, __Y), 5258 (__v2di)__W); 5259 } 5260 5261 static __inline__ __m128i __DEFAULT_FN_ATTRS 5262 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5263 { 5264 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5265 (__v2di)_mm_srlv_epi64(__X, __Y), 5266 (__v2di)_mm_setzero_di()); 5267 } 5268 5269 static __inline__ __m256i __DEFAULT_FN_ATTRS 5270 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5271 { 5272 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5273 (__v4di)_mm256_srlv_epi64(__X, __Y), 5274 (__v4di)__W); 5275 } 5276 5277 static __inline__ __m256i __DEFAULT_FN_ATTRS 5278 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5279 { 5280 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5281 (__v4di)_mm256_srlv_epi64(__X, __Y), 5282 (__v4di)_mm256_setzero_si256()); 5283 } 5284 5285 static __inline__ __m128i __DEFAULT_FN_ATTRS 5286 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5287 { 5288 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5289 (__v4si)_mm_srlv_epi32(__X, __Y), 5290 (__v4si)__W); 5291 } 5292 5293 static __inline__ __m128i __DEFAULT_FN_ATTRS 5294 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5295 { 5296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5297 (__v4si)_mm_srlv_epi32(__X, __Y), 5298 (__v4si)_mm_setzero_si128()); 5299 } 5300 5301 static __inline__ __m256i __DEFAULT_FN_ATTRS 5302 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5303 { 5304 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5305 (__v8si)_mm256_srlv_epi32(__X, __Y), 5306 (__v8si)__W); 5307 } 5308 5309 static __inline__ __m256i __DEFAULT_FN_ATTRS 5310 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5311 { 5312 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5313 (__v8si)_mm256_srlv_epi32(__X, __Y), 5314 (__v8si)_mm256_setzero_si256()); 5315 } 5316 5317 static __inline__ __m128i __DEFAULT_FN_ATTRS 5318 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5319 { 5320 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5321 (__v4si)_mm_srl_epi32(__A, __B), 5322 (__v4si)__W); 5323 } 5324 5325 static __inline__ __m128i __DEFAULT_FN_ATTRS 5326 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 5327 { 5328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5329 (__v4si)_mm_srl_epi32(__A, __B), 5330 (__v4si)_mm_setzero_si128()); 5331 } 5332 5333 static __inline__ __m256i __DEFAULT_FN_ATTRS 5334 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5335 { 5336 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5337 (__v8si)_mm256_srl_epi32(__A, __B), 5338 (__v8si)__W); 5339 } 5340 5341 static __inline__ __m256i __DEFAULT_FN_ATTRS 5342 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 5343 { 5344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5345 (__v8si)_mm256_srl_epi32(__A, __B), 5346 (__v8si)_mm256_setzero_si256()); 5347 } 5348 5349 static __inline__ __m128i __DEFAULT_FN_ATTRS 5350 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5351 { 5352 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5353 (__v4si)_mm_srli_epi32(__A, __B), 5354 (__v4si)__W); 5355 } 5356 5357 static __inline__ __m128i __DEFAULT_FN_ATTRS 5358 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) 5359 { 5360 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5361 (__v4si)_mm_srli_epi32(__A, __B), 5362 (__v4si)_mm_setzero_si128()); 5363 } 5364 5365 static __inline__ __m256i __DEFAULT_FN_ATTRS 5366 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5367 { 5368 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5369 (__v8si)_mm256_srli_epi32(__A, __B), 5370 (__v8si)__W); 5371 } 5372 5373 static __inline__ __m256i __DEFAULT_FN_ATTRS 5374 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) 5375 { 5376 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5377 (__v8si)_mm256_srli_epi32(__A, __B), 5378 (__v8si)_mm256_setzero_si256()); 5379 } 5380 5381 static __inline__ __m128i __DEFAULT_FN_ATTRS 5382 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5383 { 5384 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5385 (__v2di)_mm_srl_epi64(__A, __B), 5386 (__v2di)__W); 5387 } 5388 5389 static __inline__ __m128i __DEFAULT_FN_ATTRS 5390 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5391 { 5392 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5393 (__v2di)_mm_srl_epi64(__A, __B), 5394 (__v2di)_mm_setzero_di()); 5395 } 5396 5397 static __inline__ __m256i __DEFAULT_FN_ATTRS 5398 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5399 { 5400 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5401 (__v4di)_mm256_srl_epi64(__A, __B), 5402 (__v4di)__W); 5403 } 5404 5405 static __inline__ __m256i __DEFAULT_FN_ATTRS 5406 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5407 { 5408 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5409 (__v4di)_mm256_srl_epi64(__A, __B), 5410 (__v4di)_mm256_setzero_si256()); 5411 } 5412 5413 static __inline__ __m128i __DEFAULT_FN_ATTRS 5414 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5415 { 5416 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5417 (__v2di)_mm_srli_epi64(__A, __B), 5418 (__v2di)__W); 5419 } 5420 5421 static __inline__ __m128i __DEFAULT_FN_ATTRS 5422 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) 5423 { 5424 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5425 (__v2di)_mm_srli_epi64(__A, __B), 5426 (__v2di)_mm_setzero_di()); 5427 } 5428 5429 static __inline__ __m256i __DEFAULT_FN_ATTRS 5430 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5431 { 5432 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5433 (__v4di)_mm256_srli_epi64(__A, __B), 5434 (__v4di)__W); 5435 } 5436 5437 static __inline__ __m256i __DEFAULT_FN_ATTRS 5438 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) 5439 { 5440 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5441 (__v4di)_mm256_srli_epi64(__A, __B), 5442 (__v4di)_mm256_setzero_si256()); 5443 } 5444 5445 static __inline__ __m128i __DEFAULT_FN_ATTRS 5446 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5447 { 5448 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5449 (__v4si)_mm_srav_epi32(__X, __Y), 5450 (__v4si)__W); 5451 } 5452 5453 static __inline__ __m128i __DEFAULT_FN_ATTRS 5454 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5455 { 5456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5457 (__v4si)_mm_srav_epi32(__X, __Y), 5458 (__v4si)_mm_setzero_si128()); 5459 } 5460 5461 static __inline__ __m256i __DEFAULT_FN_ATTRS 5462 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5463 { 5464 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5465 (__v8si)_mm256_srav_epi32(__X, __Y), 5466 (__v8si)__W); 5467 } 5468 5469 static __inline__ __m256i __DEFAULT_FN_ATTRS 5470 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5471 { 5472 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5473 (__v8si)_mm256_srav_epi32(__X, __Y), 5474 (__v8si)_mm256_setzero_si256()); 5475 } 5476 5477 static __inline__ __m128i __DEFAULT_FN_ATTRS 5478 _mm_srav_epi64(__m128i __X, __m128i __Y) 5479 { 5480 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5481 } 5482 5483 static __inline__ __m128i __DEFAULT_FN_ATTRS 5484 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5485 { 5486 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5487 (__v2di)_mm_srav_epi64(__X, __Y), 5488 (__v2di)__W); 5489 } 5490 5491 static __inline__ __m128i __DEFAULT_FN_ATTRS 5492 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5493 { 5494 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5495 (__v2di)_mm_srav_epi64(__X, __Y), 5496 (__v2di)_mm_setzero_di()); 5497 } 5498 5499 static __inline__ __m256i __DEFAULT_FN_ATTRS 5500 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5501 { 5502 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5503 } 5504 5505 static __inline__ __m256i __DEFAULT_FN_ATTRS 5506 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5507 { 5508 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5509 (__v4di)_mm256_srav_epi64(__X, __Y), 5510 (__v4di)__W); 5511 } 5512 5513 static __inline__ __m256i __DEFAULT_FN_ATTRS 5514 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5515 { 5516 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5517 (__v4di)_mm256_srav_epi64(__X, __Y), 5518 (__v4di)_mm256_setzero_si256()); 5519 } 5520 5521 static __inline__ __m128i __DEFAULT_FN_ATTRS 5522 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5523 { 5524 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5525 (__v4si) __A, 5526 (__v4si) __W); 5527 } 5528 5529 static __inline__ __m128i __DEFAULT_FN_ATTRS 5530 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5531 { 5532 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5533 (__v4si) __A, 5534 (__v4si) _mm_setzero_si128 ()); 5535 } 5536 5537 5538 static __inline__ __m256i __DEFAULT_FN_ATTRS 5539 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5540 { 5541 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5542 (__v8si) __A, 5543 (__v8si) __W); 5544 } 5545 5546 static __inline__ __m256i __DEFAULT_FN_ATTRS 5547 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5548 { 5549 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5550 (__v8si) __A, 5551 (__v8si) _mm256_setzero_si256 ()); 5552 } 5553 5554 static __inline__ __m128i __DEFAULT_FN_ATTRS 5555 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5556 { 5557 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5558 (__v4si) __W, 5559 (__mmask8) 5560 __U); 5561 } 5562 5563 static __inline__ __m128i __DEFAULT_FN_ATTRS 5564 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5565 { 5566 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5567 (__v4si) 5568 _mm_setzero_si128 (), 5569 (__mmask8) 5570 __U); 5571 } 5572 5573 static __inline__ __m256i __DEFAULT_FN_ATTRS 5574 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5575 { 5576 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5577 (__v8si) __W, 5578 (__mmask8) 5579 __U); 5580 } 5581 5582 static __inline__ __m256i __DEFAULT_FN_ATTRS 5583 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5584 { 5585 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5586 (__v8si) 5587 _mm256_setzero_si256 (), 5588 (__mmask8) 5589 __U); 5590 } 5591 5592 static __inline__ void __DEFAULT_FN_ATTRS 5593 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5594 { 5595 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5596 (__v4si) __A, 5597 (__mmask8) __U); 5598 } 5599 5600 static __inline__ void __DEFAULT_FN_ATTRS 5601 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5602 { 5603 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5604 (__v8si) __A, 5605 (__mmask8) __U); 5606 } 5607 5608 static __inline__ __m128i __DEFAULT_FN_ATTRS 5609 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5610 { 5611 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5612 (__v2di) __A, 5613 (__v2di) __W); 5614 } 5615 5616 static __inline__ __m128i __DEFAULT_FN_ATTRS 5617 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5618 { 5619 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5620 (__v2di) __A, 5621 (__v2di) _mm_setzero_di ()); 5622 } 5623 5624 static __inline__ __m256i __DEFAULT_FN_ATTRS 5625 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5626 { 5627 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5628 (__v4di) __A, 5629 (__v4di) __W); 5630 } 5631 5632 static __inline__ __m256i __DEFAULT_FN_ATTRS 5633 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5634 { 5635 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5636 (__v4di) __A, 5637 (__v4di) _mm256_setzero_si256 ()); 5638 } 5639 5640 static __inline__ __m128i __DEFAULT_FN_ATTRS 5641 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5642 { 5643 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5644 (__v2di) __W, 5645 (__mmask8) 5646 __U); 5647 } 5648 5649 static __inline__ __m128i __DEFAULT_FN_ATTRS 5650 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5651 { 5652 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5653 (__v2di) 5654 _mm_setzero_di (), 5655 (__mmask8) 5656 __U); 5657 } 5658 5659 static __inline__ __m256i __DEFAULT_FN_ATTRS 5660 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5661 { 5662 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5663 (__v4di) __W, 5664 (__mmask8) 5665 __U); 5666 } 5667 5668 static __inline__ __m256i __DEFAULT_FN_ATTRS 5669 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5670 { 5671 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5672 (__v4di) 5673 _mm256_setzero_si256 (), 5674 (__mmask8) 5675 __U); 5676 } 5677 5678 static __inline__ void __DEFAULT_FN_ATTRS 5679 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5680 { 5681 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5682 (__v2di) __A, 5683 (__mmask8) __U); 5684 } 5685 5686 static __inline__ void __DEFAULT_FN_ATTRS 5687 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5688 { 5689 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5690 (__v4di) __A, 5691 (__mmask8) __U); 5692 } 5693 5694 static __inline__ __m128d __DEFAULT_FN_ATTRS 5695 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5696 { 5697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5698 (__v2df)_mm_movedup_pd(__A), 5699 (__v2df)__W); 5700 } 5701 5702 static __inline__ __m128d __DEFAULT_FN_ATTRS 5703 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5704 { 5705 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5706 (__v2df)_mm_movedup_pd(__A), 5707 (__v2df)_mm_setzero_pd()); 5708 } 5709 5710 static __inline__ __m256d __DEFAULT_FN_ATTRS 5711 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5712 { 5713 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5714 (__v4df)_mm256_movedup_pd(__A), 5715 (__v4df)__W); 5716 } 5717 5718 static __inline__ __m256d __DEFAULT_FN_ATTRS 5719 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5720 { 5721 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5722 (__v4df)_mm256_movedup_pd(__A), 5723 (__v4df)_mm256_setzero_pd()); 5724 } 5725 5726 static __inline__ __m128i __DEFAULT_FN_ATTRS 5727 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5728 { 5729 return (__m128i)__builtin_ia32_selectd_128(__M, 5730 (__v4si) _mm_set1_epi32(__A), 5731 (__v4si)__O); 5732 } 5733 5734 static __inline__ __m128i __DEFAULT_FN_ATTRS 5735 _mm_maskz_set1_epi32( __mmask8 __M, int __A) 5736 { 5737 return (__m128i)__builtin_ia32_selectd_128(__M, 5738 (__v4si) _mm_set1_epi32(__A), 5739 (__v4si)_mm_setzero_si128()); 5740 } 5741 5742 static __inline__ __m256i __DEFAULT_FN_ATTRS 5743 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5744 { 5745 return (__m256i)__builtin_ia32_selectd_256(__M, 5746 (__v8si) _mm256_set1_epi32(__A), 5747 (__v8si)__O); 5748 } 5749 5750 static __inline__ __m256i __DEFAULT_FN_ATTRS 5751 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5752 { 5753 return (__m256i)__builtin_ia32_selectd_256(__M, 5754 (__v8si) _mm256_set1_epi32(__A), 5755 (__v8si)_mm256_setzero_si256()); 5756 } 5757 5758 5759 #ifdef __x86_64__ 5760 static __inline__ __m128i __DEFAULT_FN_ATTRS 5761 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5762 { 5763 return (__m128i) __builtin_ia32_selectq_128(__M, 5764 (__v2di) _mm_set1_epi64x(__A), 5765 (__v2di) __O); 5766 } 5767 5768 static __inline__ __m128i __DEFAULT_FN_ATTRS 5769 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5770 { 5771 return (__m128i) __builtin_ia32_selectq_128(__M, 5772 (__v2di) _mm_set1_epi64x(__A), 5773 (__v2di) _mm_setzero_si128()); 5774 } 5775 5776 static __inline__ __m256i __DEFAULT_FN_ATTRS 5777 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5778 { 5779 return (__m256i) __builtin_ia32_selectq_256(__M, 5780 (__v4di) _mm256_set1_epi64x(__A), 5781 (__v4di) __O) ; 5782 } 5783 5784 static __inline__ __m256i __DEFAULT_FN_ATTRS 5785 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5786 { 5787 return (__m256i) __builtin_ia32_selectq_256(__M, 5788 (__v4di) _mm256_set1_epi64x(__A), 5789 (__v4di) _mm256_setzero_si256()); 5790 } 5791 5792 #endif 5793 5794 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5795 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5796 (__v2df)(__m128d)(B), \ 5797 (__v2di)(__m128i)(C), (int)(imm), \ 5798 (__mmask8)-1); }) 5799 5800 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5801 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5802 (__v2df)(__m128d)(B), \ 5803 (__v2di)(__m128i)(C), (int)(imm), \ 5804 (__mmask8)(U)); }) 5805 5806 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5807 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5808 (__v2df)(__m128d)(B), \ 5809 (__v2di)(__m128i)(C), \ 5810 (int)(imm), (__mmask8)(U)); }) 5811 5812 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5813 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5814 (__v4df)(__m256d)(B), \ 5815 (__v4di)(__m256i)(C), (int)(imm), \ 5816 (__mmask8)-1); }) 5817 5818 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5819 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5820 (__v4df)(__m256d)(B), \ 5821 (__v4di)(__m256i)(C), (int)(imm), \ 5822 (__mmask8)(U)); }) 5823 5824 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5825 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5826 (__v4df)(__m256d)(B), \ 5827 (__v4di)(__m256i)(C), \ 5828 (int)(imm), (__mmask8)(U)); }) 5829 5830 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5831 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5832 (__v4sf)(__m128)(B), \ 5833 (__v4si)(__m128i)(C), (int)(imm), \ 5834 (__mmask8)-1); }) 5835 5836 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5837 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5838 (__v4sf)(__m128)(B), \ 5839 (__v4si)(__m128i)(C), (int)(imm), \ 5840 (__mmask8)(U)); }) 5841 5842 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5843 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5844 (__v4sf)(__m128)(B), \ 5845 (__v4si)(__m128i)(C), (int)(imm), \ 5846 (__mmask8)(U)); }) 5847 5848 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5849 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5850 (__v8sf)(__m256)(B), \ 5851 (__v8si)(__m256i)(C), (int)(imm), \ 5852 (__mmask8)-1); }) 5853 5854 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5855 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5856 (__v8sf)(__m256)(B), \ 5857 (__v8si)(__m256i)(C), (int)(imm), \ 5858 (__mmask8)(U)); }) 5859 5860 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5861 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5862 (__v8sf)(__m256)(B), \ 5863 (__v8si)(__m256i)(C), (int)(imm), \ 5864 (__mmask8)(U)); }) 5865 5866 static __inline__ __m128d __DEFAULT_FN_ATTRS 5867 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5868 { 5869 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5870 (__v2df) __W, 5871 (__mmask8) __U); 5872 } 5873 5874 static __inline__ __m128d __DEFAULT_FN_ATTRS 5875 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5876 { 5877 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5878 (__v2df) 5879 _mm_setzero_pd (), 5880 (__mmask8) __U); 5881 } 5882 5883 static __inline__ __m256d __DEFAULT_FN_ATTRS 5884 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5885 { 5886 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5887 (__v4df) __W, 5888 (__mmask8) __U); 5889 } 5890 5891 static __inline__ __m256d __DEFAULT_FN_ATTRS 5892 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5893 { 5894 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5895 (__v4df) 5896 _mm256_setzero_pd (), 5897 (__mmask8) __U); 5898 } 5899 5900 static __inline__ __m128 __DEFAULT_FN_ATTRS 5901 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5902 { 5903 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5904 (__v4sf) __W, 5905 (__mmask8) __U); 5906 } 5907 5908 static __inline__ __m128 __DEFAULT_FN_ATTRS 5909 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5910 { 5911 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5912 (__v4sf) 5913 _mm_setzero_ps (), 5914 (__mmask8) __U); 5915 } 5916 5917 static __inline__ __m256 __DEFAULT_FN_ATTRS 5918 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5919 { 5920 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5921 (__v8sf) __W, 5922 (__mmask8) __U); 5923 } 5924 5925 static __inline__ __m256 __DEFAULT_FN_ATTRS 5926 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5927 { 5928 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5929 (__v8sf) 5930 _mm256_setzero_ps (), 5931 (__mmask8) __U); 5932 } 5933 5934 static __inline__ __m128i __DEFAULT_FN_ATTRS 5935 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5936 { 5937 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5938 (__v2di) __W, 5939 (__mmask8) __U); 5940 } 5941 5942 static __inline__ __m128i __DEFAULT_FN_ATTRS 5943 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5944 { 5945 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5946 (__v2di) 5947 _mm_setzero_si128 (), 5948 (__mmask8) __U); 5949 } 5950 5951 static __inline__ __m256i __DEFAULT_FN_ATTRS 5952 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5953 { 5954 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5955 (__v4di) __W, 5956 (__mmask8) __U); 5957 } 5958 5959 static __inline__ __m256i __DEFAULT_FN_ATTRS 5960 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5961 { 5962 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5963 (__v4di) 5964 _mm256_setzero_si256 (), 5965 (__mmask8) __U); 5966 } 5967 5968 static __inline__ __m128i __DEFAULT_FN_ATTRS 5969 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5970 { 5971 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5972 (__v4si) __W, 5973 (__mmask8) __U); 5974 } 5975 5976 static __inline__ __m128i __DEFAULT_FN_ATTRS 5977 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5978 { 5979 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5980 (__v4si) 5981 _mm_setzero_si128 (), 5982 (__mmask8) __U); 5983 } 5984 5985 static __inline__ __m256i __DEFAULT_FN_ATTRS 5986 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5987 { 5988 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5989 (__v8si) __W, 5990 (__mmask8) __U); 5991 } 5992 5993 static __inline__ __m256i __DEFAULT_FN_ATTRS 5994 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5995 { 5996 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5997 (__v8si) 5998 _mm256_setzero_si256 (), 5999 (__mmask8) __U); 6000 } 6001 6002 static __inline__ __m128d __DEFAULT_FN_ATTRS 6003 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 6004 { 6005 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6006 (__v2df) __W, 6007 (__mmask8) __U); 6008 } 6009 6010 static __inline__ __m128d __DEFAULT_FN_ATTRS 6011 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 6012 { 6013 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6014 (__v2df) 6015 _mm_setzero_pd (), 6016 (__mmask8) __U); 6017 } 6018 6019 static __inline__ __m256d __DEFAULT_FN_ATTRS 6020 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 6021 { 6022 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6023 (__v4df) __W, 6024 (__mmask8) __U); 6025 } 6026 6027 static __inline__ __m256d __DEFAULT_FN_ATTRS 6028 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 6029 { 6030 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6031 (__v4df) 6032 _mm256_setzero_pd (), 6033 (__mmask8) __U); 6034 } 6035 6036 static __inline__ __m128 __DEFAULT_FN_ATTRS 6037 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 6038 { 6039 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6040 (__v4sf) __W, 6041 (__mmask8) __U); 6042 } 6043 6044 static __inline__ __m128 __DEFAULT_FN_ATTRS 6045 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 6046 { 6047 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6048 (__v4sf) 6049 _mm_setzero_ps (), 6050 (__mmask8) __U); 6051 } 6052 6053 static __inline__ __m256 __DEFAULT_FN_ATTRS 6054 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 6055 { 6056 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6057 (__v8sf) __W, 6058 (__mmask8) __U); 6059 } 6060 6061 static __inline__ __m256 __DEFAULT_FN_ATTRS 6062 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 6063 { 6064 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6065 (__v8sf) 6066 _mm256_setzero_ps (), 6067 (__mmask8) __U); 6068 } 6069 6070 static __inline__ void __DEFAULT_FN_ATTRS 6071 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 6072 { 6073 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 6074 (__v2df) __A, 6075 (__mmask8) __U); 6076 } 6077 6078 static __inline__ void __DEFAULT_FN_ATTRS 6079 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 6080 { 6081 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 6082 (__v4df) __A, 6083 (__mmask8) __U); 6084 } 6085 6086 static __inline__ void __DEFAULT_FN_ATTRS 6087 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 6088 { 6089 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 6090 (__v4sf) __A, 6091 (__mmask8) __U); 6092 } 6093 6094 static __inline__ void __DEFAULT_FN_ATTRS 6095 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 6096 { 6097 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 6098 (__v8sf) __A, 6099 (__mmask8) __U); 6100 } 6101 6102 static __inline__ void __DEFAULT_FN_ATTRS 6103 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 6104 { 6105 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 6106 (__v2di) __A, 6107 (__mmask8) __U); 6108 } 6109 6110 static __inline__ void __DEFAULT_FN_ATTRS 6111 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 6112 { 6113 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 6114 (__v4di) __A, 6115 (__mmask8) __U); 6116 } 6117 6118 static __inline__ void __DEFAULT_FN_ATTRS 6119 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 6120 { 6121 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 6122 (__v4si) __A, 6123 (__mmask8) __U); 6124 } 6125 6126 static __inline__ void __DEFAULT_FN_ATTRS 6127 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 6128 { 6129 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 6130 (__v8si) __A, 6131 (__mmask8) __U); 6132 } 6133 6134 static __inline__ void __DEFAULT_FN_ATTRS 6135 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 6136 { 6137 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 6138 (__v2df) __A, 6139 (__mmask8) __U); 6140 } 6141 6142 static __inline__ void __DEFAULT_FN_ATTRS 6143 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 6144 { 6145 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 6146 (__v4df) __A, 6147 (__mmask8) __U); 6148 } 6149 6150 static __inline__ void __DEFAULT_FN_ATTRS 6151 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 6152 { 6153 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 6154 (__v4sf) __A, 6155 (__mmask8) __U); 6156 } 6157 6158 static __inline__ void __DEFAULT_FN_ATTRS 6159 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 6160 { 6161 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 6162 (__v8sf) __A, 6163 (__mmask8) __U); 6164 } 6165 6166 6167 static __inline__ __m128d __DEFAULT_FN_ATTRS 6168 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6169 { 6170 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6171 (__v2df)_mm_unpackhi_pd(__A, __B), 6172 (__v2df)__W); 6173 } 6174 6175 static __inline__ __m128d __DEFAULT_FN_ATTRS 6176 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 6177 { 6178 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6179 (__v2df)_mm_unpackhi_pd(__A, __B), 6180 (__v2df)_mm_setzero_pd()); 6181 } 6182 6183 static __inline__ __m256d __DEFAULT_FN_ATTRS 6184 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6185 { 6186 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6187 (__v4df)_mm256_unpackhi_pd(__A, __B), 6188 (__v4df)__W); 6189 } 6190 6191 static __inline__ __m256d __DEFAULT_FN_ATTRS 6192 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 6193 { 6194 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6195 (__v4df)_mm256_unpackhi_pd(__A, __B), 6196 (__v4df)_mm256_setzero_pd()); 6197 } 6198 6199 static __inline__ __m128 __DEFAULT_FN_ATTRS 6200 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6201 { 6202 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6203 (__v4sf)_mm_unpackhi_ps(__A, __B), 6204 (__v4sf)__W); 6205 } 6206 6207 static __inline__ __m128 __DEFAULT_FN_ATTRS 6208 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 6209 { 6210 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6211 (__v4sf)_mm_unpackhi_ps(__A, __B), 6212 (__v4sf)_mm_setzero_ps()); 6213 } 6214 6215 static __inline__ __m256 __DEFAULT_FN_ATTRS 6216 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6217 { 6218 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6219 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6220 (__v8sf)__W); 6221 } 6222 6223 static __inline__ __m256 __DEFAULT_FN_ATTRS 6224 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 6225 { 6226 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6227 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6228 (__v8sf)_mm256_setzero_ps()); 6229 } 6230 6231 static __inline__ __m128d __DEFAULT_FN_ATTRS 6232 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6233 { 6234 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6235 (__v2df)_mm_unpacklo_pd(__A, __B), 6236 (__v2df)__W); 6237 } 6238 6239 static __inline__ __m128d __DEFAULT_FN_ATTRS 6240 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 6241 { 6242 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6243 (__v2df)_mm_unpacklo_pd(__A, __B), 6244 (__v2df)_mm_setzero_pd()); 6245 } 6246 6247 static __inline__ __m256d __DEFAULT_FN_ATTRS 6248 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6249 { 6250 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6251 (__v4df)_mm256_unpacklo_pd(__A, __B), 6252 (__v4df)__W); 6253 } 6254 6255 static __inline__ __m256d __DEFAULT_FN_ATTRS 6256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 6257 { 6258 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6259 (__v4df)_mm256_unpacklo_pd(__A, __B), 6260 (__v4df)_mm256_setzero_pd()); 6261 } 6262 6263 static __inline__ __m128 __DEFAULT_FN_ATTRS 6264 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6265 { 6266 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6267 (__v4sf)_mm_unpacklo_ps(__A, __B), 6268 (__v4sf)__W); 6269 } 6270 6271 static __inline__ __m128 __DEFAULT_FN_ATTRS 6272 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 6273 { 6274 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6275 (__v4sf)_mm_unpacklo_ps(__A, __B), 6276 (__v4sf)_mm_setzero_ps()); 6277 } 6278 6279 static __inline__ __m256 __DEFAULT_FN_ATTRS 6280 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6281 { 6282 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6283 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6284 (__v8sf)__W); 6285 } 6286 6287 static __inline__ __m256 __DEFAULT_FN_ATTRS 6288 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 6289 { 6290 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6291 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6292 (__v8sf)_mm256_setzero_ps()); 6293 } 6294 6295 static __inline__ __m128d __DEFAULT_FN_ATTRS 6296 _mm_rcp14_pd (__m128d __A) 6297 { 6298 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6299 (__v2df) 6300 _mm_setzero_pd (), 6301 (__mmask8) -1); 6302 } 6303 6304 static __inline__ __m128d __DEFAULT_FN_ATTRS 6305 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6306 { 6307 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6308 (__v2df) __W, 6309 (__mmask8) __U); 6310 } 6311 6312 static __inline__ __m128d __DEFAULT_FN_ATTRS 6313 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 6314 { 6315 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6316 (__v2df) 6317 _mm_setzero_pd (), 6318 (__mmask8) __U); 6319 } 6320 6321 static __inline__ __m256d __DEFAULT_FN_ATTRS 6322 _mm256_rcp14_pd (__m256d __A) 6323 { 6324 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6325 (__v4df) 6326 _mm256_setzero_pd (), 6327 (__mmask8) -1); 6328 } 6329 6330 static __inline__ __m256d __DEFAULT_FN_ATTRS 6331 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6332 { 6333 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6334 (__v4df) __W, 6335 (__mmask8) __U); 6336 } 6337 6338 static __inline__ __m256d __DEFAULT_FN_ATTRS 6339 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 6340 { 6341 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6342 (__v4df) 6343 _mm256_setzero_pd (), 6344 (__mmask8) __U); 6345 } 6346 6347 static __inline__ __m128 __DEFAULT_FN_ATTRS 6348 _mm_rcp14_ps (__m128 __A) 6349 { 6350 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6351 (__v4sf) 6352 _mm_setzero_ps (), 6353 (__mmask8) -1); 6354 } 6355 6356 static __inline__ __m128 __DEFAULT_FN_ATTRS 6357 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6358 { 6359 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6360 (__v4sf) __W, 6361 (__mmask8) __U); 6362 } 6363 6364 static __inline__ __m128 __DEFAULT_FN_ATTRS 6365 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6366 { 6367 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6368 (__v4sf) 6369 _mm_setzero_ps (), 6370 (__mmask8) __U); 6371 } 6372 6373 static __inline__ __m256 __DEFAULT_FN_ATTRS 6374 _mm256_rcp14_ps (__m256 __A) 6375 { 6376 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6377 (__v8sf) 6378 _mm256_setzero_ps (), 6379 (__mmask8) -1); 6380 } 6381 6382 static __inline__ __m256 __DEFAULT_FN_ATTRS 6383 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6384 { 6385 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6386 (__v8sf) __W, 6387 (__mmask8) __U); 6388 } 6389 6390 static __inline__ __m256 __DEFAULT_FN_ATTRS 6391 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6392 { 6393 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6394 (__v8sf) 6395 _mm256_setzero_ps (), 6396 (__mmask8) __U); 6397 } 6398 6399 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6400 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6401 (__v2df)_mm_permute_pd((X), (C)), \ 6402 (__v2df)(__m128d)(W)); }) 6403 6404 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ 6405 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6406 (__v2df)_mm_permute_pd((X), (C)), \ 6407 (__v2df)_mm_setzero_pd()); }) 6408 6409 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6410 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6411 (__v4df)_mm256_permute_pd((X), (C)), \ 6412 (__v4df)(__m256d)(W)); }) 6413 6414 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ 6415 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6416 (__v4df)_mm256_permute_pd((X), (C)), \ 6417 (__v4df)_mm256_setzero_pd()); }) 6418 6419 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6420 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6421 (__v4sf)_mm_permute_ps((X), (C)), \ 6422 (__v4sf)(__m128)(W)); }) 6423 6424 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ 6425 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6426 (__v4sf)_mm_permute_ps((X), (C)), \ 6427 (__v4sf)_mm_setzero_ps()); }) 6428 6429 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6430 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6431 (__v8sf)_mm256_permute_ps((X), (C)), \ 6432 (__v8sf)(__m256)(W)); }) 6433 6434 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ 6435 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6436 (__v8sf)_mm256_permute_ps((X), (C)), \ 6437 (__v8sf)_mm256_setzero_ps()); }) 6438 6439 static __inline__ __m128d __DEFAULT_FN_ATTRS 6440 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6441 { 6442 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6443 (__v2df)_mm_permutevar_pd(__A, __C), 6444 (__v2df)__W); 6445 } 6446 6447 static __inline__ __m128d __DEFAULT_FN_ATTRS 6448 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6449 { 6450 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6451 (__v2df)_mm_permutevar_pd(__A, __C), 6452 (__v2df)_mm_setzero_pd()); 6453 } 6454 6455 static __inline__ __m256d __DEFAULT_FN_ATTRS 6456 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6457 { 6458 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6459 (__v4df)_mm256_permutevar_pd(__A, __C), 6460 (__v4df)__W); 6461 } 6462 6463 static __inline__ __m256d __DEFAULT_FN_ATTRS 6464 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6465 { 6466 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6467 (__v4df)_mm256_permutevar_pd(__A, __C), 6468 (__v4df)_mm256_setzero_pd()); 6469 } 6470 6471 static __inline__ __m128 __DEFAULT_FN_ATTRS 6472 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6473 { 6474 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6475 (__v4sf)_mm_permutevar_ps(__A, __C), 6476 (__v4sf)__W); 6477 } 6478 6479 static __inline__ __m128 __DEFAULT_FN_ATTRS 6480 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6481 { 6482 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6483 (__v4sf)_mm_permutevar_ps(__A, __C), 6484 (__v4sf)_mm_setzero_ps()); 6485 } 6486 6487 static __inline__ __m256 __DEFAULT_FN_ATTRS 6488 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6489 { 6490 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6491 (__v8sf)_mm256_permutevar_ps(__A, __C), 6492 (__v8sf)__W); 6493 } 6494 6495 static __inline__ __m256 __DEFAULT_FN_ATTRS 6496 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6497 { 6498 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6499 (__v8sf)_mm256_permutevar_ps(__A, __C), 6500 (__v8sf)_mm256_setzero_ps()); 6501 } 6502 6503 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6504 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6505 { 6506 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6507 (__v4si) __B, 6508 (__mmask8) -1); 6509 } 6510 6511 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6512 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6513 { 6514 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6515 (__v4si) __B, __U); 6516 } 6517 6518 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6519 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6520 { 6521 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6522 (__v8si) __B, 6523 (__mmask8) -1); 6524 } 6525 6526 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6527 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6528 { 6529 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6530 (__v8si) __B, __U); 6531 } 6532 6533 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6534 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6535 { 6536 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6537 (__v2di) __B, 6538 (__mmask8) -1); 6539 } 6540 6541 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6542 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6543 { 6544 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6545 (__v2di) __B, __U); 6546 } 6547 6548 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6549 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6550 { 6551 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6552 (__v4di) __B, 6553 (__mmask8) -1); 6554 } 6555 6556 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6557 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6558 { 6559 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6560 (__v4di) __B, __U); 6561 } 6562 6563 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6564 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6565 { 6566 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6567 (__v4si) __B, 6568 (__mmask8) -1); 6569 } 6570 6571 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6572 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6573 { 6574 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6575 (__v4si) __B, __U); 6576 } 6577 6578 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6579 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6580 { 6581 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6582 (__v8si) __B, 6583 (__mmask8) -1); 6584 } 6585 6586 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6587 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6588 { 6589 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6590 (__v8si) __B, __U); 6591 } 6592 6593 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6594 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6595 { 6596 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6597 (__v2di) __B, 6598 (__mmask8) -1); 6599 } 6600 6601 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6602 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6603 { 6604 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6605 (__v2di) __B, __U); 6606 } 6607 6608 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6609 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6610 { 6611 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6612 (__v4di) __B, 6613 (__mmask8) -1); 6614 } 6615 6616 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6617 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6618 { 6619 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6620 (__v4di) __B, __U); 6621 } 6622 6623 6624 6625 static __inline__ __m128i __DEFAULT_FN_ATTRS 6626 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6627 { 6628 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6629 (__v4si)_mm_unpackhi_epi32(__A, __B), 6630 (__v4si)__W); 6631 } 6632 6633 static __inline__ __m128i __DEFAULT_FN_ATTRS 6634 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6635 { 6636 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6637 (__v4si)_mm_unpackhi_epi32(__A, __B), 6638 (__v4si)_mm_setzero_si128()); 6639 } 6640 6641 static __inline__ __m256i __DEFAULT_FN_ATTRS 6642 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6643 { 6644 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6645 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6646 (__v8si)__W); 6647 } 6648 6649 static __inline__ __m256i __DEFAULT_FN_ATTRS 6650 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6651 { 6652 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6653 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6654 (__v8si)_mm256_setzero_si256()); 6655 } 6656 6657 static __inline__ __m128i __DEFAULT_FN_ATTRS 6658 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6659 { 6660 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6661 (__v2di)_mm_unpackhi_epi64(__A, __B), 6662 (__v2di)__W); 6663 } 6664 6665 static __inline__ __m128i __DEFAULT_FN_ATTRS 6666 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6667 { 6668 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6669 (__v2di)_mm_unpackhi_epi64(__A, __B), 6670 (__v2di)_mm_setzero_di()); 6671 } 6672 6673 static __inline__ __m256i __DEFAULT_FN_ATTRS 6674 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6675 { 6676 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6677 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6678 (__v4di)__W); 6679 } 6680 6681 static __inline__ __m256i __DEFAULT_FN_ATTRS 6682 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6683 { 6684 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6685 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6686 (__v4di)_mm256_setzero_si256()); 6687 } 6688 6689 static __inline__ __m128i __DEFAULT_FN_ATTRS 6690 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6691 { 6692 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6693 (__v4si)_mm_unpacklo_epi32(__A, __B), 6694 (__v4si)__W); 6695 } 6696 6697 static __inline__ __m128i __DEFAULT_FN_ATTRS 6698 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6699 { 6700 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6701 (__v4si)_mm_unpacklo_epi32(__A, __B), 6702 (__v4si)_mm_setzero_si128()); 6703 } 6704 6705 static __inline__ __m256i __DEFAULT_FN_ATTRS 6706 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6707 { 6708 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6709 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6710 (__v8si)__W); 6711 } 6712 6713 static __inline__ __m256i __DEFAULT_FN_ATTRS 6714 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6715 { 6716 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6717 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6718 (__v8si)_mm256_setzero_si256()); 6719 } 6720 6721 static __inline__ __m128i __DEFAULT_FN_ATTRS 6722 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6723 { 6724 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6725 (__v2di)_mm_unpacklo_epi64(__A, __B), 6726 (__v2di)__W); 6727 } 6728 6729 static __inline__ __m128i __DEFAULT_FN_ATTRS 6730 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6731 { 6732 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6733 (__v2di)_mm_unpacklo_epi64(__A, __B), 6734 (__v2di)_mm_setzero_di()); 6735 } 6736 6737 static __inline__ __m256i __DEFAULT_FN_ATTRS 6738 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6739 { 6740 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6741 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6742 (__v4di)__W); 6743 } 6744 6745 static __inline__ __m256i __DEFAULT_FN_ATTRS 6746 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6747 { 6748 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6749 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6750 (__v4di)_mm256_setzero_si256()); 6751 } 6752 6753 static __inline__ __m128i __DEFAULT_FN_ATTRS 6754 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6755 { 6756 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6757 (__v4si)_mm_sra_epi32(__A, __B), 6758 (__v4si)__W); 6759 } 6760 6761 static __inline__ __m128i __DEFAULT_FN_ATTRS 6762 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6763 { 6764 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6765 (__v4si)_mm_sra_epi32(__A, __B), 6766 (__v4si)_mm_setzero_si128()); 6767 } 6768 6769 static __inline__ __m256i __DEFAULT_FN_ATTRS 6770 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6771 { 6772 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6773 (__v8si)_mm256_sra_epi32(__A, __B), 6774 (__v8si)__W); 6775 } 6776 6777 static __inline__ __m256i __DEFAULT_FN_ATTRS 6778 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6779 { 6780 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6781 (__v8si)_mm256_sra_epi32(__A, __B), 6782 (__v8si)_mm256_setzero_si256()); 6783 } 6784 6785 static __inline__ __m128i __DEFAULT_FN_ATTRS 6786 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 6787 { 6788 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6789 (__v4si)_mm_srai_epi32(__A, __B), 6790 (__v4si)__W); 6791 } 6792 6793 static __inline__ __m128i __DEFAULT_FN_ATTRS 6794 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) 6795 { 6796 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6797 (__v4si)_mm_srai_epi32(__A, __B), 6798 (__v4si)_mm_setzero_si128()); 6799 } 6800 6801 static __inline__ __m256i __DEFAULT_FN_ATTRS 6802 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 6803 { 6804 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6805 (__v8si)_mm256_srai_epi32(__A, __B), 6806 (__v8si)__W); 6807 } 6808 6809 static __inline__ __m256i __DEFAULT_FN_ATTRS 6810 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) 6811 { 6812 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6813 (__v8si)_mm256_srai_epi32(__A, __B), 6814 (__v8si)_mm256_setzero_si256()); 6815 } 6816 6817 static __inline__ __m128i __DEFAULT_FN_ATTRS 6818 _mm_sra_epi64(__m128i __A, __m128i __B) 6819 { 6820 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6821 } 6822 6823 static __inline__ __m128i __DEFAULT_FN_ATTRS 6824 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6825 { 6826 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6827 (__v2di)_mm_sra_epi64(__A, __B), \ 6828 (__v2di)__W); 6829 } 6830 6831 static __inline__ __m128i __DEFAULT_FN_ATTRS 6832 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6833 { 6834 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6835 (__v2di)_mm_sra_epi64(__A, __B), \ 6836 (__v2di)_mm_setzero_di()); 6837 } 6838 6839 static __inline__ __m256i __DEFAULT_FN_ATTRS 6840 _mm256_sra_epi64(__m256i __A, __m128i __B) 6841 { 6842 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6843 } 6844 6845 static __inline__ __m256i __DEFAULT_FN_ATTRS 6846 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6847 { 6848 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6849 (__v4di)_mm256_sra_epi64(__A, __B), \ 6850 (__v4di)__W); 6851 } 6852 6853 static __inline__ __m256i __DEFAULT_FN_ATTRS 6854 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6855 { 6856 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6857 (__v4di)_mm256_sra_epi64(__A, __B), \ 6858 (__v4di)_mm256_setzero_si256()); 6859 } 6860 6861 static __inline__ __m128i __DEFAULT_FN_ATTRS 6862 _mm_srai_epi64(__m128i __A, int __imm) 6863 { 6864 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); 6865 } 6866 6867 static __inline__ __m128i __DEFAULT_FN_ATTRS 6868 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) 6869 { 6870 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6871 (__v2di)_mm_srai_epi64(__A, __imm), \ 6872 (__v2di)__W); 6873 } 6874 6875 static __inline__ __m128i __DEFAULT_FN_ATTRS 6876 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) 6877 { 6878 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6879 (__v2di)_mm_srai_epi64(__A, __imm), \ 6880 (__v2di)_mm_setzero_di()); 6881 } 6882 6883 static __inline__ __m256i __DEFAULT_FN_ATTRS 6884 _mm256_srai_epi64(__m256i __A, int __imm) 6885 { 6886 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); 6887 } 6888 6889 static __inline__ __m256i __DEFAULT_FN_ATTRS 6890 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) 6891 { 6892 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6893 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6894 (__v4di)__W); 6895 } 6896 6897 static __inline__ __m256i __DEFAULT_FN_ATTRS 6898 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) 6899 { 6900 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6901 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6902 (__v4di)_mm256_setzero_si256()); 6903 } 6904 6905 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6906 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6907 (__v4si)(__m128i)(B), \ 6908 (__v4si)(__m128i)(C), (int)(imm), \ 6909 (__mmask8)-1); }) 6910 6911 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6912 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6913 (__v4si)(__m128i)(B), \ 6914 (__v4si)(__m128i)(C), (int)(imm), \ 6915 (__mmask8)(U)); }) 6916 6917 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6918 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6919 (__v4si)(__m128i)(B), \ 6920 (__v4si)(__m128i)(C), (int)(imm), \ 6921 (__mmask8)(U)); }) 6922 6923 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6924 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6925 (__v8si)(__m256i)(B), \ 6926 (__v8si)(__m256i)(C), (int)(imm), \ 6927 (__mmask8)-1); }) 6928 6929 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6930 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6931 (__v8si)(__m256i)(B), \ 6932 (__v8si)(__m256i)(C), (int)(imm), \ 6933 (__mmask8)(U)); }) 6934 6935 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6936 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6937 (__v8si)(__m256i)(B), \ 6938 (__v8si)(__m256i)(C), (int)(imm), \ 6939 (__mmask8)(U)); }) 6940 6941 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6942 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6943 (__v2di)(__m128i)(B), \ 6944 (__v2di)(__m128i)(C), (int)(imm), \ 6945 (__mmask8)-1); }) 6946 6947 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6948 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6949 (__v2di)(__m128i)(B), \ 6950 (__v2di)(__m128i)(C), (int)(imm), \ 6951 (__mmask8)(U)); }) 6952 6953 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6954 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6955 (__v2di)(__m128i)(B), \ 6956 (__v2di)(__m128i)(C), (int)(imm), \ 6957 (__mmask8)(U)); }) 6958 6959 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6960 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6961 (__v4di)(__m256i)(B), \ 6962 (__v4di)(__m256i)(C), (int)(imm), \ 6963 (__mmask8)-1); }) 6964 6965 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6966 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6967 (__v4di)(__m256i)(B), \ 6968 (__v4di)(__m256i)(C), (int)(imm), \ 6969 (__mmask8)(U)); }) 6970 6971 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6972 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 6973 (__v4di)(__m256i)(B), \ 6974 (__v4di)(__m256i)(C), (int)(imm), \ 6975 (__mmask8)(U)); }) 6976 6977 6978 6979 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ 6980 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6981 (__v8sf)(__m256)(B), (int)(imm), \ 6982 (__v8sf)_mm256_setzero_ps(), \ 6983 (__mmask8)-1); }) 6984 6985 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 6986 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6987 (__v8sf)(__m256)(B), (int)(imm), \ 6988 (__v8sf)(__m256)(W), \ 6989 (__mmask8)(U)); }) 6990 6991 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 6992 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6993 (__v8sf)(__m256)(B), (int)(imm), \ 6994 (__v8sf)_mm256_setzero_ps(), \ 6995 (__mmask8)(U)); }) 6996 6997 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ 6998 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 6999 (__v4df)(__m256d)(B), \ 7000 (int)(imm), \ 7001 (__v4df)_mm256_setzero_pd(), \ 7002 (__mmask8)-1); }) 7003 7004 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7005 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7006 (__v4df)(__m256d)(B), \ 7007 (int)(imm), \ 7008 (__v4df)(__m256d)(W), \ 7009 (__mmask8)(U)); }) 7010 7011 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7012 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7013 (__v4df)(__m256d)(B), \ 7014 (int)(imm), \ 7015 (__v4df)_mm256_setzero_pd(), \ 7016 (__mmask8)(U)); }) 7017 7018 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7019 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7020 (__v8si)(__m256i)(B), \ 7021 (int)(imm), \ 7022 (__v8si)_mm256_setzero_si256(), \ 7023 (__mmask8)-1); }) 7024 7025 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7026 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7027 (__v8si)(__m256i)(B), \ 7028 (int)(imm), \ 7029 (__v8si)(__m256i)(W), \ 7030 (__mmask8)(U)); }) 7031 7032 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7033 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7034 (__v8si)(__m256i)(B), \ 7035 (int)(imm), \ 7036 (__v8si)_mm256_setzero_si256(), \ 7037 (__mmask8)(U)); }) 7038 7039 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7040 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7041 (__v4di)(__m256i)(B), \ 7042 (int)(imm), \ 7043 (__v4di)_mm256_setzero_si256(), \ 7044 (__mmask8)-1); }) 7045 7046 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7047 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7048 (__v4di)(__m256i)(B), \ 7049 (int)(imm), \ 7050 (__v4di)(__m256i)(W), \ 7051 (__mmask8)(U)); }) 7052 7053 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7054 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7055 (__v4di)(__m256i)(B), \ 7056 (int)(imm), \ 7057 (__v4di)_mm256_setzero_si256(), \ 7058 (__mmask8)(U)); }) 7059 7060 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7061 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7062 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7063 (__v2df)(__m128d)(W)); }) 7064 7065 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7066 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7067 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7068 (__v2df)_mm_setzero_pd()); }) 7069 7070 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7071 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7072 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7073 (__v4df)(__m256d)(W)); }) 7074 7075 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7076 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7077 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7078 (__v4df)_mm256_setzero_pd()); }) 7079 7080 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7081 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7082 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7083 (__v4sf)(__m128)(W)); }) 7084 7085 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7086 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7087 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7088 (__v4sf)_mm_setzero_ps()); }) 7089 7090 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7091 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7092 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7093 (__v8sf)(__m256)(W)); }) 7094 7095 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7096 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7097 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7098 (__v8sf)_mm256_setzero_ps()); }) 7099 7100 static __inline__ __m128d __DEFAULT_FN_ATTRS 7101 _mm_rsqrt14_pd (__m128d __A) 7102 { 7103 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7104 (__v2df) 7105 _mm_setzero_pd (), 7106 (__mmask8) -1); 7107 } 7108 7109 static __inline__ __m128d __DEFAULT_FN_ATTRS 7110 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 7111 { 7112 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7113 (__v2df) __W, 7114 (__mmask8) __U); 7115 } 7116 7117 static __inline__ __m128d __DEFAULT_FN_ATTRS 7118 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 7119 { 7120 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7121 (__v2df) 7122 _mm_setzero_pd (), 7123 (__mmask8) __U); 7124 } 7125 7126 static __inline__ __m256d __DEFAULT_FN_ATTRS 7127 _mm256_rsqrt14_pd (__m256d __A) 7128 { 7129 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7130 (__v4df) 7131 _mm256_setzero_pd (), 7132 (__mmask8) -1); 7133 } 7134 7135 static __inline__ __m256d __DEFAULT_FN_ATTRS 7136 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 7137 { 7138 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7139 (__v4df) __W, 7140 (__mmask8) __U); 7141 } 7142 7143 static __inline__ __m256d __DEFAULT_FN_ATTRS 7144 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 7145 { 7146 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7147 (__v4df) 7148 _mm256_setzero_pd (), 7149 (__mmask8) __U); 7150 } 7151 7152 static __inline__ __m128 __DEFAULT_FN_ATTRS 7153 _mm_rsqrt14_ps (__m128 __A) 7154 { 7155 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7156 (__v4sf) 7157 _mm_setzero_ps (), 7158 (__mmask8) -1); 7159 } 7160 7161 static __inline__ __m128 __DEFAULT_FN_ATTRS 7162 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 7163 { 7164 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7165 (__v4sf) __W, 7166 (__mmask8) __U); 7167 } 7168 7169 static __inline__ __m128 __DEFAULT_FN_ATTRS 7170 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 7171 { 7172 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7173 (__v4sf) 7174 _mm_setzero_ps (), 7175 (__mmask8) __U); 7176 } 7177 7178 static __inline__ __m256 __DEFAULT_FN_ATTRS 7179 _mm256_rsqrt14_ps (__m256 __A) 7180 { 7181 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7182 (__v8sf) 7183 _mm256_setzero_ps (), 7184 (__mmask8) -1); 7185 } 7186 7187 static __inline__ __m256 __DEFAULT_FN_ATTRS 7188 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 7189 { 7190 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7191 (__v8sf) __W, 7192 (__mmask8) __U); 7193 } 7194 7195 static __inline__ __m256 __DEFAULT_FN_ATTRS 7196 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 7197 { 7198 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7199 (__v8sf) 7200 _mm256_setzero_ps (), 7201 (__mmask8) __U); 7202 } 7203 7204 static __inline__ __m256 __DEFAULT_FN_ATTRS 7205 _mm256_broadcast_f32x4(__m128 __A) 7206 { 7207 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 7208 0, 1, 2, 3, 0, 1, 2, 3); 7209 } 7210 7211 static __inline__ __m256 __DEFAULT_FN_ATTRS 7212 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 7213 { 7214 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 7215 (__v8sf)_mm256_broadcast_f32x4(__A), 7216 (__v8sf)__O); 7217 } 7218 7219 static __inline__ __m256 __DEFAULT_FN_ATTRS 7220 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 7221 { 7222 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 7223 (__v8sf)_mm256_broadcast_f32x4(__A), 7224 (__v8sf)_mm256_setzero_ps()); 7225 } 7226 7227 static __inline__ __m256i __DEFAULT_FN_ATTRS 7228 _mm256_broadcast_i32x4(__m128i __A) 7229 { 7230 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 7231 0, 1, 2, 3, 0, 1, 2, 3); 7232 } 7233 7234 static __inline__ __m256i __DEFAULT_FN_ATTRS 7235 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 7236 { 7237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 7238 (__v8si)_mm256_broadcast_i32x4(__A), 7239 (__v8si)__O); 7240 } 7241 7242 static __inline__ __m256i __DEFAULT_FN_ATTRS 7243 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 7244 { 7245 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 7246 (__v8si)_mm256_broadcast_i32x4(__A), 7247 (__v8si)_mm256_setzero_si256()); 7248 } 7249 7250 static __inline__ __m256d __DEFAULT_FN_ATTRS 7251 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 7252 { 7253 return (__m256d)__builtin_ia32_selectpd_256(__M, 7254 (__v4df) _mm256_broadcastsd_pd(__A), 7255 (__v4df) __O); 7256 } 7257 7258 static __inline__ __m256d __DEFAULT_FN_ATTRS 7259 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7260 { 7261 return (__m256d)__builtin_ia32_selectpd_256(__M, 7262 (__v4df) _mm256_broadcastsd_pd(__A), 7263 (__v4df) _mm256_setzero_pd()); 7264 } 7265 7266 static __inline__ __m128 __DEFAULT_FN_ATTRS 7267 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 7268 { 7269 return (__m128)__builtin_ia32_selectps_128(__M, 7270 (__v4sf) _mm_broadcastss_ps(__A), 7271 (__v4sf) __O); 7272 } 7273 7274 static __inline__ __m128 __DEFAULT_FN_ATTRS 7275 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7276 { 7277 return (__m128)__builtin_ia32_selectps_128(__M, 7278 (__v4sf) _mm_broadcastss_ps(__A), 7279 (__v4sf) _mm_setzero_ps()); 7280 } 7281 7282 static __inline__ __m256 __DEFAULT_FN_ATTRS 7283 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 7284 { 7285 return (__m256)__builtin_ia32_selectps_256(__M, 7286 (__v8sf) _mm256_broadcastss_ps(__A), 7287 (__v8sf) __O); 7288 } 7289 7290 static __inline__ __m256 __DEFAULT_FN_ATTRS 7291 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7292 { 7293 return (__m256)__builtin_ia32_selectps_256(__M, 7294 (__v8sf) _mm256_broadcastss_ps(__A), 7295 (__v8sf) _mm256_setzero_ps()); 7296 } 7297 7298 static __inline__ __m128i __DEFAULT_FN_ATTRS 7299 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7300 { 7301 return (__m128i)__builtin_ia32_selectd_128(__M, 7302 (__v4si) _mm_broadcastd_epi32(__A), 7303 (__v4si) __O); 7304 } 7305 7306 static __inline__ __m128i __DEFAULT_FN_ATTRS 7307 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7308 { 7309 return (__m128i)__builtin_ia32_selectd_128(__M, 7310 (__v4si) _mm_broadcastd_epi32(__A), 7311 (__v4si) _mm_setzero_si128()); 7312 } 7313 7314 static __inline__ __m256i __DEFAULT_FN_ATTRS 7315 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 7316 { 7317 return (__m256i)__builtin_ia32_selectd_256(__M, 7318 (__v8si) _mm256_broadcastd_epi32(__A), 7319 (__v8si) __O); 7320 } 7321 7322 static __inline__ __m256i __DEFAULT_FN_ATTRS 7323 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7324 { 7325 return (__m256i)__builtin_ia32_selectd_256(__M, 7326 (__v8si) _mm256_broadcastd_epi32(__A), 7327 (__v8si) _mm256_setzero_si256()); 7328 } 7329 7330 static __inline__ __m128i __DEFAULT_FN_ATTRS 7331 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 7332 { 7333 return (__m128i)__builtin_ia32_selectq_128(__M, 7334 (__v2di) _mm_broadcastq_epi64(__A), 7335 (__v2di) __O); 7336 } 7337 7338 static __inline__ __m128i __DEFAULT_FN_ATTRS 7339 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7340 { 7341 return (__m128i)__builtin_ia32_selectq_128(__M, 7342 (__v2di) _mm_broadcastq_epi64(__A), 7343 (__v2di) _mm_setzero_si128()); 7344 } 7345 7346 static __inline__ __m256i __DEFAULT_FN_ATTRS 7347 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 7348 { 7349 return (__m256i)__builtin_ia32_selectq_256(__M, 7350 (__v4di) _mm256_broadcastq_epi64(__A), 7351 (__v4di) __O); 7352 } 7353 7354 static __inline__ __m256i __DEFAULT_FN_ATTRS 7355 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7356 { 7357 return (__m256i)__builtin_ia32_selectq_256(__M, 7358 (__v4di) _mm256_broadcastq_epi64(__A), 7359 (__v4di) _mm256_setzero_si256()); 7360 } 7361 7362 static __inline__ __m128i __DEFAULT_FN_ATTRS 7363 _mm_cvtsepi32_epi8 (__m128i __A) 7364 { 7365 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7366 (__v16qi)_mm_undefined_si128(), 7367 (__mmask8) -1); 7368 } 7369 7370 static __inline__ __m128i __DEFAULT_FN_ATTRS 7371 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7372 { 7373 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7374 (__v16qi) __O, __M); 7375 } 7376 7377 static __inline__ __m128i __DEFAULT_FN_ATTRS 7378 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 7379 { 7380 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7381 (__v16qi) _mm_setzero_si128 (), 7382 __M); 7383 } 7384 7385 static __inline__ void __DEFAULT_FN_ATTRS 7386 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7387 { 7388 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7389 } 7390 7391 static __inline__ __m128i __DEFAULT_FN_ATTRS 7392 _mm256_cvtsepi32_epi8 (__m256i __A) 7393 { 7394 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7395 (__v16qi)_mm_undefined_si128(), 7396 (__mmask8) -1); 7397 } 7398 7399 static __inline__ __m128i __DEFAULT_FN_ATTRS 7400 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7401 { 7402 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7403 (__v16qi) __O, __M); 7404 } 7405 7406 static __inline__ __m128i __DEFAULT_FN_ATTRS 7407 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7408 { 7409 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7410 (__v16qi) _mm_setzero_si128 (), 7411 __M); 7412 } 7413 7414 static __inline__ void __DEFAULT_FN_ATTRS 7415 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7416 { 7417 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7418 } 7419 7420 static __inline__ __m128i __DEFAULT_FN_ATTRS 7421 _mm_cvtsepi32_epi16 (__m128i __A) 7422 { 7423 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7424 (__v8hi)_mm_setzero_si128 (), 7425 (__mmask8) -1); 7426 } 7427 7428 static __inline__ __m128i __DEFAULT_FN_ATTRS 7429 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7430 { 7431 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7432 (__v8hi)__O, 7433 __M); 7434 } 7435 7436 static __inline__ __m128i __DEFAULT_FN_ATTRS 7437 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7438 { 7439 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7440 (__v8hi) _mm_setzero_si128 (), 7441 __M); 7442 } 7443 7444 static __inline__ void __DEFAULT_FN_ATTRS 7445 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7446 { 7447 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7448 } 7449 7450 static __inline__ __m128i __DEFAULT_FN_ATTRS 7451 _mm256_cvtsepi32_epi16 (__m256i __A) 7452 { 7453 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7454 (__v8hi)_mm_undefined_si128(), 7455 (__mmask8) -1); 7456 } 7457 7458 static __inline__ __m128i __DEFAULT_FN_ATTRS 7459 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7460 { 7461 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7462 (__v8hi) __O, __M); 7463 } 7464 7465 static __inline__ __m128i __DEFAULT_FN_ATTRS 7466 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7467 { 7468 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7469 (__v8hi) _mm_setzero_si128 (), 7470 __M); 7471 } 7472 7473 static __inline__ void __DEFAULT_FN_ATTRS 7474 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7475 { 7476 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7477 } 7478 7479 static __inline__ __m128i __DEFAULT_FN_ATTRS 7480 _mm_cvtsepi64_epi8 (__m128i __A) 7481 { 7482 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7483 (__v16qi)_mm_undefined_si128(), 7484 (__mmask8) -1); 7485 } 7486 7487 static __inline__ __m128i __DEFAULT_FN_ATTRS 7488 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7489 { 7490 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7491 (__v16qi) __O, __M); 7492 } 7493 7494 static __inline__ __m128i __DEFAULT_FN_ATTRS 7495 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7496 { 7497 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7498 (__v16qi) _mm_setzero_si128 (), 7499 __M); 7500 } 7501 7502 static __inline__ void __DEFAULT_FN_ATTRS 7503 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7504 { 7505 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7506 } 7507 7508 static __inline__ __m128i __DEFAULT_FN_ATTRS 7509 _mm256_cvtsepi64_epi8 (__m256i __A) 7510 { 7511 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7512 (__v16qi)_mm_undefined_si128(), 7513 (__mmask8) -1); 7514 } 7515 7516 static __inline__ __m128i __DEFAULT_FN_ATTRS 7517 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7518 { 7519 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7520 (__v16qi) __O, __M); 7521 } 7522 7523 static __inline__ __m128i __DEFAULT_FN_ATTRS 7524 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7525 { 7526 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7527 (__v16qi) _mm_setzero_si128 (), 7528 __M); 7529 } 7530 7531 static __inline__ void __DEFAULT_FN_ATTRS 7532 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7533 { 7534 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7535 } 7536 7537 static __inline__ __m128i __DEFAULT_FN_ATTRS 7538 _mm_cvtsepi64_epi32 (__m128i __A) 7539 { 7540 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7541 (__v4si)_mm_undefined_si128(), 7542 (__mmask8) -1); 7543 } 7544 7545 static __inline__ __m128i __DEFAULT_FN_ATTRS 7546 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7547 { 7548 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7549 (__v4si) __O, __M); 7550 } 7551 7552 static __inline__ __m128i __DEFAULT_FN_ATTRS 7553 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7554 { 7555 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7556 (__v4si) _mm_setzero_si128 (), 7557 __M); 7558 } 7559 7560 static __inline__ void __DEFAULT_FN_ATTRS 7561 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7562 { 7563 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7564 } 7565 7566 static __inline__ __m128i __DEFAULT_FN_ATTRS 7567 _mm256_cvtsepi64_epi32 (__m256i __A) 7568 { 7569 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7570 (__v4si)_mm_undefined_si128(), 7571 (__mmask8) -1); 7572 } 7573 7574 static __inline__ __m128i __DEFAULT_FN_ATTRS 7575 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7576 { 7577 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7578 (__v4si)__O, 7579 __M); 7580 } 7581 7582 static __inline__ __m128i __DEFAULT_FN_ATTRS 7583 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7584 { 7585 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7586 (__v4si) _mm_setzero_si128 (), 7587 __M); 7588 } 7589 7590 static __inline__ void __DEFAULT_FN_ATTRS 7591 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7592 { 7593 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7594 } 7595 7596 static __inline__ __m128i __DEFAULT_FN_ATTRS 7597 _mm_cvtsepi64_epi16 (__m128i __A) 7598 { 7599 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7600 (__v8hi)_mm_undefined_si128(), 7601 (__mmask8) -1); 7602 } 7603 7604 static __inline__ __m128i __DEFAULT_FN_ATTRS 7605 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7606 { 7607 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7608 (__v8hi) __O, __M); 7609 } 7610 7611 static __inline__ __m128i __DEFAULT_FN_ATTRS 7612 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7613 { 7614 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7615 (__v8hi) _mm_setzero_si128 (), 7616 __M); 7617 } 7618 7619 static __inline__ void __DEFAULT_FN_ATTRS 7620 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7621 { 7622 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7623 } 7624 7625 static __inline__ __m128i __DEFAULT_FN_ATTRS 7626 _mm256_cvtsepi64_epi16 (__m256i __A) 7627 { 7628 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7629 (__v8hi)_mm_undefined_si128(), 7630 (__mmask8) -1); 7631 } 7632 7633 static __inline__ __m128i __DEFAULT_FN_ATTRS 7634 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7635 { 7636 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7637 (__v8hi) __O, __M); 7638 } 7639 7640 static __inline__ __m128i __DEFAULT_FN_ATTRS 7641 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7642 { 7643 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7644 (__v8hi) _mm_setzero_si128 (), 7645 __M); 7646 } 7647 7648 static __inline__ void __DEFAULT_FN_ATTRS 7649 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7650 { 7651 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7652 } 7653 7654 static __inline__ __m128i __DEFAULT_FN_ATTRS 7655 _mm_cvtusepi32_epi8 (__m128i __A) 7656 { 7657 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7658 (__v16qi)_mm_undefined_si128(), 7659 (__mmask8) -1); 7660 } 7661 7662 static __inline__ __m128i __DEFAULT_FN_ATTRS 7663 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7664 { 7665 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7666 (__v16qi) __O, 7667 __M); 7668 } 7669 7670 static __inline__ __m128i __DEFAULT_FN_ATTRS 7671 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7672 { 7673 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7674 (__v16qi) _mm_setzero_si128 (), 7675 __M); 7676 } 7677 7678 static __inline__ void __DEFAULT_FN_ATTRS 7679 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7680 { 7681 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7682 } 7683 7684 static __inline__ __m128i __DEFAULT_FN_ATTRS 7685 _mm256_cvtusepi32_epi8 (__m256i __A) 7686 { 7687 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7688 (__v16qi)_mm_undefined_si128(), 7689 (__mmask8) -1); 7690 } 7691 7692 static __inline__ __m128i __DEFAULT_FN_ATTRS 7693 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7694 { 7695 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7696 (__v16qi) __O, 7697 __M); 7698 } 7699 7700 static __inline__ __m128i __DEFAULT_FN_ATTRS 7701 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7702 { 7703 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7704 (__v16qi) _mm_setzero_si128 (), 7705 __M); 7706 } 7707 7708 static __inline__ void __DEFAULT_FN_ATTRS 7709 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7710 { 7711 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7712 } 7713 7714 static __inline__ __m128i __DEFAULT_FN_ATTRS 7715 _mm_cvtusepi32_epi16 (__m128i __A) 7716 { 7717 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7718 (__v8hi)_mm_undefined_si128(), 7719 (__mmask8) -1); 7720 } 7721 7722 static __inline__ __m128i __DEFAULT_FN_ATTRS 7723 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7724 { 7725 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7726 (__v8hi) __O, __M); 7727 } 7728 7729 static __inline__ __m128i __DEFAULT_FN_ATTRS 7730 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7731 { 7732 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7733 (__v8hi) _mm_setzero_si128 (), 7734 __M); 7735 } 7736 7737 static __inline__ void __DEFAULT_FN_ATTRS 7738 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7739 { 7740 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7741 } 7742 7743 static __inline__ __m128i __DEFAULT_FN_ATTRS 7744 _mm256_cvtusepi32_epi16 (__m256i __A) 7745 { 7746 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7747 (__v8hi) _mm_undefined_si128(), 7748 (__mmask8) -1); 7749 } 7750 7751 static __inline__ __m128i __DEFAULT_FN_ATTRS 7752 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7753 { 7754 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7755 (__v8hi) __O, __M); 7756 } 7757 7758 static __inline__ __m128i __DEFAULT_FN_ATTRS 7759 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7760 { 7761 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7762 (__v8hi) _mm_setzero_si128 (), 7763 __M); 7764 } 7765 7766 static __inline__ void __DEFAULT_FN_ATTRS 7767 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7768 { 7769 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7770 } 7771 7772 static __inline__ __m128i __DEFAULT_FN_ATTRS 7773 _mm_cvtusepi64_epi8 (__m128i __A) 7774 { 7775 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7776 (__v16qi)_mm_undefined_si128(), 7777 (__mmask8) -1); 7778 } 7779 7780 static __inline__ __m128i __DEFAULT_FN_ATTRS 7781 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7782 { 7783 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7784 (__v16qi) __O, 7785 __M); 7786 } 7787 7788 static __inline__ __m128i __DEFAULT_FN_ATTRS 7789 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7790 { 7791 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7792 (__v16qi) _mm_setzero_si128 (), 7793 __M); 7794 } 7795 7796 static __inline__ void __DEFAULT_FN_ATTRS 7797 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7798 { 7799 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7800 } 7801 7802 static __inline__ __m128i __DEFAULT_FN_ATTRS 7803 _mm256_cvtusepi64_epi8 (__m256i __A) 7804 { 7805 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7806 (__v16qi)_mm_undefined_si128(), 7807 (__mmask8) -1); 7808 } 7809 7810 static __inline__ __m128i __DEFAULT_FN_ATTRS 7811 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7812 { 7813 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7814 (__v16qi) __O, 7815 __M); 7816 } 7817 7818 static __inline__ __m128i __DEFAULT_FN_ATTRS 7819 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7820 { 7821 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7822 (__v16qi) _mm_setzero_si128 (), 7823 __M); 7824 } 7825 7826 static __inline__ void __DEFAULT_FN_ATTRS 7827 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7828 { 7829 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7830 } 7831 7832 static __inline__ __m128i __DEFAULT_FN_ATTRS 7833 _mm_cvtusepi64_epi32 (__m128i __A) 7834 { 7835 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7836 (__v4si)_mm_undefined_si128(), 7837 (__mmask8) -1); 7838 } 7839 7840 static __inline__ __m128i __DEFAULT_FN_ATTRS 7841 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7842 { 7843 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7844 (__v4si) __O, __M); 7845 } 7846 7847 static __inline__ __m128i __DEFAULT_FN_ATTRS 7848 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7849 { 7850 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7851 (__v4si) _mm_setzero_si128 (), 7852 __M); 7853 } 7854 7855 static __inline__ void __DEFAULT_FN_ATTRS 7856 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7857 { 7858 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7859 } 7860 7861 static __inline__ __m128i __DEFAULT_FN_ATTRS 7862 _mm256_cvtusepi64_epi32 (__m256i __A) 7863 { 7864 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7865 (__v4si)_mm_undefined_si128(), 7866 (__mmask8) -1); 7867 } 7868 7869 static __inline__ __m128i __DEFAULT_FN_ATTRS 7870 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7871 { 7872 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7873 (__v4si) __O, __M); 7874 } 7875 7876 static __inline__ __m128i __DEFAULT_FN_ATTRS 7877 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7878 { 7879 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7880 (__v4si) _mm_setzero_si128 (), 7881 __M); 7882 } 7883 7884 static __inline__ void __DEFAULT_FN_ATTRS 7885 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7886 { 7887 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7888 } 7889 7890 static __inline__ __m128i __DEFAULT_FN_ATTRS 7891 _mm_cvtusepi64_epi16 (__m128i __A) 7892 { 7893 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7894 (__v8hi)_mm_undefined_si128(), 7895 (__mmask8) -1); 7896 } 7897 7898 static __inline__ __m128i __DEFAULT_FN_ATTRS 7899 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7900 { 7901 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7902 (__v8hi) __O, __M); 7903 } 7904 7905 static __inline__ __m128i __DEFAULT_FN_ATTRS 7906 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7907 { 7908 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7909 (__v8hi) _mm_setzero_si128 (), 7910 __M); 7911 } 7912 7913 static __inline__ void __DEFAULT_FN_ATTRS 7914 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7915 { 7916 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7917 } 7918 7919 static __inline__ __m128i __DEFAULT_FN_ATTRS 7920 _mm256_cvtusepi64_epi16 (__m256i __A) 7921 { 7922 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7923 (__v8hi)_mm_undefined_si128(), 7924 (__mmask8) -1); 7925 } 7926 7927 static __inline__ __m128i __DEFAULT_FN_ATTRS 7928 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7929 { 7930 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7931 (__v8hi) __O, __M); 7932 } 7933 7934 static __inline__ __m128i __DEFAULT_FN_ATTRS 7935 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7936 { 7937 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7938 (__v8hi) _mm_setzero_si128 (), 7939 __M); 7940 } 7941 7942 static __inline__ void __DEFAULT_FN_ATTRS 7943 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7944 { 7945 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7946 } 7947 7948 static __inline__ __m128i __DEFAULT_FN_ATTRS 7949 _mm_cvtepi32_epi8 (__m128i __A) 7950 { 7951 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7952 (__v16qi)_mm_undefined_si128(), 7953 (__mmask8) -1); 7954 } 7955 7956 static __inline__ __m128i __DEFAULT_FN_ATTRS 7957 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7958 { 7959 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7960 (__v16qi) __O, __M); 7961 } 7962 7963 static __inline__ __m128i __DEFAULT_FN_ATTRS 7964 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7965 { 7966 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7967 (__v16qi) 7968 _mm_setzero_si128 (), 7969 __M); 7970 } 7971 7972 static __inline__ void __DEFAULT_FN_ATTRS 7973 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7974 { 7975 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7976 } 7977 7978 static __inline__ __m128i __DEFAULT_FN_ATTRS 7979 _mm256_cvtepi32_epi8 (__m256i __A) 7980 { 7981 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7982 (__v16qi)_mm_undefined_si128(), 7983 (__mmask8) -1); 7984 } 7985 7986 static __inline__ __m128i __DEFAULT_FN_ATTRS 7987 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7988 { 7989 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7990 (__v16qi) __O, __M); 7991 } 7992 7993 static __inline__ __m128i __DEFAULT_FN_ATTRS 7994 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7995 { 7996 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7997 (__v16qi) _mm_setzero_si128 (), 7998 __M); 7999 } 8000 8001 static __inline__ void __DEFAULT_FN_ATTRS 8002 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8003 { 8004 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 8005 } 8006 8007 static __inline__ __m128i __DEFAULT_FN_ATTRS 8008 _mm_cvtepi32_epi16 (__m128i __A) 8009 { 8010 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8011 (__v8hi) _mm_setzero_si128 (), 8012 (__mmask8) -1); 8013 } 8014 8015 static __inline__ __m128i __DEFAULT_FN_ATTRS 8016 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8017 { 8018 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8019 (__v8hi) __O, __M); 8020 } 8021 8022 static __inline__ __m128i __DEFAULT_FN_ATTRS 8023 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 8024 { 8025 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8026 (__v8hi) _mm_setzero_si128 (), 8027 __M); 8028 } 8029 8030 static __inline__ void __DEFAULT_FN_ATTRS 8031 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8032 { 8033 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 8034 } 8035 8036 static __inline__ __m128i __DEFAULT_FN_ATTRS 8037 _mm256_cvtepi32_epi16 (__m256i __A) 8038 { 8039 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8040 (__v8hi)_mm_setzero_si128 (), 8041 (__mmask8) -1); 8042 } 8043 8044 static __inline__ __m128i __DEFAULT_FN_ATTRS 8045 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8046 { 8047 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8048 (__v8hi) __O, __M); 8049 } 8050 8051 static __inline__ __m128i __DEFAULT_FN_ATTRS 8052 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 8053 { 8054 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8055 (__v8hi) _mm_setzero_si128 (), 8056 __M); 8057 } 8058 8059 static __inline__ void __DEFAULT_FN_ATTRS 8060 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8061 { 8062 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 8063 } 8064 8065 static __inline__ __m128i __DEFAULT_FN_ATTRS 8066 _mm_cvtepi64_epi8 (__m128i __A) 8067 { 8068 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8069 (__v16qi) _mm_undefined_si128(), 8070 (__mmask8) -1); 8071 } 8072 8073 static __inline__ __m128i __DEFAULT_FN_ATTRS 8074 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8075 { 8076 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8077 (__v16qi) __O, __M); 8078 } 8079 8080 static __inline__ __m128i __DEFAULT_FN_ATTRS 8081 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 8082 { 8083 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8084 (__v16qi) _mm_setzero_si128 (), 8085 __M); 8086 } 8087 8088 static __inline__ void __DEFAULT_FN_ATTRS 8089 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8090 { 8091 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 8092 } 8093 8094 static __inline__ __m128i __DEFAULT_FN_ATTRS 8095 _mm256_cvtepi64_epi8 (__m256i __A) 8096 { 8097 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8098 (__v16qi) _mm_undefined_si128(), 8099 (__mmask8) -1); 8100 } 8101 8102 static __inline__ __m128i __DEFAULT_FN_ATTRS 8103 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8104 { 8105 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8106 (__v16qi) __O, __M); 8107 } 8108 8109 static __inline__ __m128i __DEFAULT_FN_ATTRS 8110 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 8111 { 8112 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8113 (__v16qi) _mm_setzero_si128 (), 8114 __M); 8115 } 8116 8117 static __inline__ void __DEFAULT_FN_ATTRS 8118 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8119 { 8120 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 8121 } 8122 8123 static __inline__ __m128i __DEFAULT_FN_ATTRS 8124 _mm_cvtepi64_epi32 (__m128i __A) 8125 { 8126 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8127 (__v4si)_mm_undefined_si128(), 8128 (__mmask8) -1); 8129 } 8130 8131 static __inline__ __m128i __DEFAULT_FN_ATTRS 8132 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 8133 { 8134 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8135 (__v4si) __O, __M); 8136 } 8137 8138 static __inline__ __m128i __DEFAULT_FN_ATTRS 8139 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 8140 { 8141 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8142 (__v4si) _mm_setzero_si128 (), 8143 __M); 8144 } 8145 8146 static __inline__ void __DEFAULT_FN_ATTRS 8147 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 8148 { 8149 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 8150 } 8151 8152 static __inline__ __m128i __DEFAULT_FN_ATTRS 8153 _mm256_cvtepi64_epi32 (__m256i __A) 8154 { 8155 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8156 (__v4si) _mm_undefined_si128(), 8157 (__mmask8) -1); 8158 } 8159 8160 static __inline__ __m128i __DEFAULT_FN_ATTRS 8161 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 8162 { 8163 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8164 (__v4si) __O, __M); 8165 } 8166 8167 static __inline__ __m128i __DEFAULT_FN_ATTRS 8168 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 8169 { 8170 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8171 (__v4si) _mm_setzero_si128 (), 8172 __M); 8173 } 8174 8175 static __inline__ void __DEFAULT_FN_ATTRS 8176 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 8177 { 8178 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 8179 } 8180 8181 static __inline__ __m128i __DEFAULT_FN_ATTRS 8182 _mm_cvtepi64_epi16 (__m128i __A) 8183 { 8184 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8185 (__v8hi) _mm_undefined_si128(), 8186 (__mmask8) -1); 8187 } 8188 8189 static __inline__ __m128i __DEFAULT_FN_ATTRS 8190 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8191 { 8192 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8193 (__v8hi)__O, 8194 __M); 8195 } 8196 8197 static __inline__ __m128i __DEFAULT_FN_ATTRS 8198 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 8199 { 8200 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8201 (__v8hi) _mm_setzero_si128 (), 8202 __M); 8203 } 8204 8205 static __inline__ void __DEFAULT_FN_ATTRS 8206 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8207 { 8208 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 8209 } 8210 8211 static __inline__ __m128i __DEFAULT_FN_ATTRS 8212 _mm256_cvtepi64_epi16 (__m256i __A) 8213 { 8214 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8215 (__v8hi)_mm_undefined_si128(), 8216 (__mmask8) -1); 8217 } 8218 8219 static __inline__ __m128i __DEFAULT_FN_ATTRS 8220 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8221 { 8222 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8223 (__v8hi) __O, __M); 8224 } 8225 8226 static __inline__ __m128i __DEFAULT_FN_ATTRS 8227 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 8228 { 8229 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8230 (__v8hi) _mm_setzero_si128 (), 8231 __M); 8232 } 8233 8234 static __inline__ void __DEFAULT_FN_ATTRS 8235 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8236 { 8237 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 8238 } 8239 8240 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ 8241 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ 8242 (__v8sf)_mm256_undefined_ps(), \ 8243 ((imm) & 1) ? 4 : 0, \ 8244 ((imm) & 1) ? 5 : 1, \ 8245 ((imm) & 1) ? 6 : 2, \ 8246 ((imm) & 1) ? 7 : 3); }) 8247 8248 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ 8249 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8250 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8251 (__v4sf)(W)); }) 8252 8253 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ 8254 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8255 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8256 (__v4sf)_mm_setzero_ps()); }) 8257 8258 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ 8259 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ 8260 (__v8si)_mm256_undefined_si256(), \ 8261 ((imm) & 1) ? 4 : 0, \ 8262 ((imm) & 1) ? 5 : 1, \ 8263 ((imm) & 1) ? 6 : 2, \ 8264 ((imm) & 1) ? 7 : 3); }) 8265 8266 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 8267 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8268 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8269 (__v4si)(W)); }) 8270 8271 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 8272 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8273 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8274 (__v4si)_mm_setzero_si128()); }) 8275 8276 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ 8277 (__m256)__builtin_shufflevector((__v8sf)(A), \ 8278 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ 8279 ((imm) & 0x1) ? 0 : 8, \ 8280 ((imm) & 0x1) ? 1 : 9, \ 8281 ((imm) & 0x1) ? 2 : 10, \ 8282 ((imm) & 0x1) ? 3 : 11, \ 8283 ((imm) & 0x1) ? 8 : 4, \ 8284 ((imm) & 0x1) ? 9 : 5, \ 8285 ((imm) & 0x1) ? 10 : 6, \ 8286 ((imm) & 0x1) ? 11 : 7); }) 8287 8288 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 8289 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8290 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8291 (__v8sf)(W)); }) 8292 8293 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 8294 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8295 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8296 (__v8sf)_mm256_setzero_ps()); }) 8297 8298 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ 8299 (__m256i)__builtin_shufflevector((__v8si)(A), \ 8300 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ 8301 ((imm) & 0x1) ? 0 : 8, \ 8302 ((imm) & 0x1) ? 1 : 9, \ 8303 ((imm) & 0x1) ? 2 : 10, \ 8304 ((imm) & 0x1) ? 3 : 11, \ 8305 ((imm) & 0x1) ? 8 : 4, \ 8306 ((imm) & 0x1) ? 9 : 5, \ 8307 ((imm) & 0x1) ? 10 : 6, \ 8308 ((imm) & 0x1) ? 11 : 7); }) 8309 8310 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 8311 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8312 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8313 (__v8si)(W)); }) 8314 8315 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 8316 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8317 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8318 (__v8si)_mm256_setzero_si256()); }) 8319 8320 #define _mm_getmant_pd(A, B, C) __extension__({\ 8321 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8322 (int)(((C)<<2) | (B)), \ 8323 (__v2df)_mm_setzero_pd(), \ 8324 (__mmask8)-1); }) 8325 8326 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ 8327 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8328 (int)(((C)<<2) | (B)), \ 8329 (__v2df)(__m128d)(W), \ 8330 (__mmask8)(U)); }) 8331 8332 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ 8333 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8334 (int)(((C)<<2) | (B)), \ 8335 (__v2df)_mm_setzero_pd(), \ 8336 (__mmask8)(U)); }) 8337 8338 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \ 8339 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8340 (int)(((C)<<2) | (B)), \ 8341 (__v4df)_mm256_setzero_pd(), \ 8342 (__mmask8)-1); }) 8343 8344 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8345 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8346 (int)(((C)<<2) | (B)), \ 8347 (__v4df)(__m256d)(W), \ 8348 (__mmask8)(U)); }) 8349 8350 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8351 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8352 (int)(((C)<<2) | (B)), \ 8353 (__v4df)_mm256_setzero_pd(), \ 8354 (__mmask8)(U)); }) 8355 8356 #define _mm_getmant_ps(A, B, C) __extension__ ({ \ 8357 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8358 (int)(((C)<<2) | (B)), \ 8359 (__v4sf)_mm_setzero_ps(), \ 8360 (__mmask8)-1); }) 8361 8362 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8363 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8364 (int)(((C)<<2) | (B)), \ 8365 (__v4sf)(__m128)(W), \ 8366 (__mmask8)(U)); }) 8367 8368 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8369 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8370 (int)(((C)<<2) | (B)), \ 8371 (__v4sf)_mm_setzero_ps(), \ 8372 (__mmask8)(U)); }) 8373 8374 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \ 8375 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8376 (int)(((C)<<2) | (B)), \ 8377 (__v8sf)_mm256_setzero_ps(), \ 8378 (__mmask8)-1); }) 8379 8380 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8381 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8382 (int)(((C)<<2) | (B)), \ 8383 (__v8sf)(__m256)(W), \ 8384 (__mmask8)(U)); }) 8385 8386 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8387 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8388 (int)(((C)<<2) | (B)), \ 8389 (__v8sf)_mm256_setzero_ps(), \ 8390 (__mmask8)(U)); }) 8391 8392 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8393 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 8394 (double const *)(addr), \ 8395 (__v2di)(__m128i)(index), \ 8396 (__mmask8)(mask), (int)(scale)); }) 8397 8398 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8399 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 8400 (long long const *)(addr), \ 8401 (__v2di)(__m128i)(index), \ 8402 (__mmask8)(mask), (int)(scale)); }) 8403 8404 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8405 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 8406 (double const *)(addr), \ 8407 (__v4di)(__m256i)(index), \ 8408 (__mmask8)(mask), (int)(scale)); }) 8409 8410 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8411 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 8412 (long long const *)(addr), \ 8413 (__v4di)(__m256i)(index), \ 8414 (__mmask8)(mask), (int)(scale)); }) 8415 8416 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8417 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 8418 (float const *)(addr), \ 8419 (__v2di)(__m128i)(index), \ 8420 (__mmask8)(mask), (int)(scale)); }) 8421 8422 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8423 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8424 (int const *)(addr), \ 8425 (__v2di)(__m128i)(index), \ 8426 (__mmask8)(mask), (int)(scale)); }) 8427 8428 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8429 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8430 (float const *)(addr), \ 8431 (__v4di)(__m256i)(index), \ 8432 (__mmask8)(mask), (int)(scale)); }) 8433 8434 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8435 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8436 (int const *)(addr), \ 8437 (__v4di)(__m256i)(index), \ 8438 (__mmask8)(mask), (int)(scale)); }) 8439 8440 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8441 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8442 (double const *)(addr), \ 8443 (__v4si)(__m128i)(index), \ 8444 (__mmask8)(mask), (int)(scale)); }) 8445 8446 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8447 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8448 (long long const *)(addr), \ 8449 (__v4si)(__m128i)(index), \ 8450 (__mmask8)(mask), (int)(scale)); }) 8451 8452 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8453 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8454 (double const *)(addr), \ 8455 (__v4si)(__m128i)(index), \ 8456 (__mmask8)(mask), (int)(scale)); }) 8457 8458 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8459 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8460 (long long const *)(addr), \ 8461 (__v4si)(__m128i)(index), \ 8462 (__mmask8)(mask), (int)(scale)); }) 8463 8464 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8465 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8466 (float const *)(addr), \ 8467 (__v4si)(__m128i)(index), \ 8468 (__mmask8)(mask), (int)(scale)); }) 8469 8470 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8471 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8472 (int const *)(addr), \ 8473 (__v4si)(__m128i)(index), \ 8474 (__mmask8)(mask), (int)(scale)); }) 8475 8476 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8477 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8478 (float const *)(addr), \ 8479 (__v8si)(__m256i)(index), \ 8480 (__mmask8)(mask), (int)(scale)); }) 8481 8482 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8483 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8484 (int const *)(addr), \ 8485 (__v8si)(__m256i)(index), \ 8486 (__mmask8)(mask), (int)(scale)); }) 8487 8488 #define _mm256_permutex_pd(X, C) __extension__ ({ \ 8489 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ 8490 (__v4df)_mm256_undefined_pd(), \ 8491 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8492 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8493 8494 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8495 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8496 (__v4df)_mm256_permutex_pd((X), (C)), \ 8497 (__v4df)(__m256d)(W)); }) 8498 8499 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8500 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8501 (__v4df)_mm256_permutex_pd((X), (C)), \ 8502 (__v4df)_mm256_setzero_pd()); }) 8503 8504 #define _mm256_permutex_epi64(X, C) __extension__ ({ \ 8505 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ 8506 (__v4di)_mm256_undefined_si256(), \ 8507 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8508 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8509 8510 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8511 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8512 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8513 (__v4di)(__m256i)(W)); }) 8514 8515 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8516 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8517 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8518 (__v4di)_mm256_setzero_si256()); }) 8519 8520 static __inline__ __m256d __DEFAULT_FN_ATTRS 8521 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8522 { 8523 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8524 (__v4di) __X, 8525 (__v4df) _mm256_undefined_si256 (), 8526 (__mmask8) -1); 8527 } 8528 8529 static __inline__ __m256d __DEFAULT_FN_ATTRS 8530 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8531 __m256d __Y) 8532 { 8533 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8534 (__v4di) __X, 8535 (__v4df) __W, 8536 (__mmask8) __U); 8537 } 8538 8539 static __inline__ __m256d __DEFAULT_FN_ATTRS 8540 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8541 { 8542 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8543 (__v4di) __X, 8544 (__v4df) _mm256_setzero_pd (), 8545 (__mmask8) __U); 8546 } 8547 8548 static __inline__ __m256i __DEFAULT_FN_ATTRS 8549 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8550 { 8551 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8552 (__v4di) __X, 8553 (__v4di) _mm256_setzero_si256 (), 8554 (__mmask8) __M); 8555 } 8556 8557 static __inline__ __m256i __DEFAULT_FN_ATTRS 8558 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8559 { 8560 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8561 (__v4di) __X, 8562 (__v4di) _mm256_undefined_si256 (), 8563 (__mmask8) -1); 8564 } 8565 8566 static __inline__ __m256i __DEFAULT_FN_ATTRS 8567 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8568 __m256i __Y) 8569 { 8570 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8571 (__v4di) __X, 8572 (__v4di) __W, 8573 __M); 8574 } 8575 8576 static __inline__ __m256 __DEFAULT_FN_ATTRS 8577 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, 8578 __m256 __Y) 8579 { 8580 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8581 (__v8si) __X, 8582 (__v8sf) __W, 8583 (__mmask8) __U); 8584 } 8585 8586 static __inline__ __m256 __DEFAULT_FN_ATTRS 8587 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) 8588 { 8589 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8590 (__v8si) __X, 8591 (__v8sf) _mm256_setzero_ps (), 8592 (__mmask8) __U); 8593 } 8594 8595 static __inline__ __m256 __DEFAULT_FN_ATTRS 8596 _mm256_permutexvar_ps (__m256i __X, __m256 __Y) 8597 { 8598 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8599 (__v8si) __X, 8600 (__v8sf) _mm256_undefined_si256 (), 8601 (__mmask8) -1); 8602 } 8603 8604 static __inline__ __m256i __DEFAULT_FN_ATTRS 8605 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 8606 { 8607 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8608 (__v8si) __X, 8609 (__v8si) _mm256_setzero_si256 (), 8610 __M); 8611 } 8612 8613 static __inline__ __m256i __DEFAULT_FN_ATTRS 8614 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 8615 __m256i __Y) 8616 { 8617 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8618 (__v8si) __X, 8619 (__v8si) __W, 8620 (__mmask8) __M); 8621 } 8622 8623 static __inline__ __m256i __DEFAULT_FN_ATTRS 8624 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) 8625 { 8626 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8627 (__v8si) __X, 8628 (__v8si) _mm256_undefined_si256(), 8629 (__mmask8) -1); 8630 } 8631 8632 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ 8633 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ 8634 (__v4si)(__m128i)(A), \ 8635 ((int)(imm) & 0x3) + 0, \ 8636 ((int)(imm) & 0x3) + 1, \ 8637 ((int)(imm) & 0x3) + 2, \ 8638 ((int)(imm) & 0x3) + 3); }) 8639 8640 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8641 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8642 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8643 (__v4si)(__m128i)(W)); }) 8644 8645 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8646 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8647 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8648 (__v4si)_mm_setzero_si128()); }) 8649 8650 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ 8651 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ 8652 (__v8si)(__m256i)(A), \ 8653 ((int)(imm) & 0x7) + 0, \ 8654 ((int)(imm) & 0x7) + 1, \ 8655 ((int)(imm) & 0x7) + 2, \ 8656 ((int)(imm) & 0x7) + 3, \ 8657 ((int)(imm) & 0x7) + 4, \ 8658 ((int)(imm) & 0x7) + 5, \ 8659 ((int)(imm) & 0x7) + 6, \ 8660 ((int)(imm) & 0x7) + 7); }) 8661 8662 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8663 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8664 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8665 (__v8si)(__m256i)(W)); }) 8666 8667 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8668 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8669 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8670 (__v8si)_mm256_setzero_si256()); }) 8671 8672 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ 8673 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ 8674 (__v2di)(__m128i)(A), \ 8675 ((int)(imm) & 0x1) + 0, \ 8676 ((int)(imm) & 0x1) + 1); }) 8677 8678 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8679 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8680 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8681 (__v2di)(__m128i)(W)); }) 8682 8683 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8684 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8685 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8686 (__v2di)_mm_setzero_di()); }) 8687 8688 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ 8689 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ 8690 (__v4di)(__m256i)(A), \ 8691 ((int)(imm) & 0x3) + 0, \ 8692 ((int)(imm) & 0x3) + 1, \ 8693 ((int)(imm) & 0x3) + 2, \ 8694 ((int)(imm) & 0x3) + 3); }) 8695 8696 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8697 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8698 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8699 (__v4di)(__m256i)(W)); }) 8700 8701 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8702 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8703 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8704 (__v4di)_mm256_setzero_si256()); }) 8705 8706 static __inline__ __m128 __DEFAULT_FN_ATTRS 8707 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8708 { 8709 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8710 (__v4sf)_mm_movehdup_ps(__A), 8711 (__v4sf)__W); 8712 } 8713 8714 static __inline__ __m128 __DEFAULT_FN_ATTRS 8715 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8716 { 8717 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8718 (__v4sf)_mm_movehdup_ps(__A), 8719 (__v4sf)_mm_setzero_ps()); 8720 } 8721 8722 static __inline__ __m256 __DEFAULT_FN_ATTRS 8723 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8724 { 8725 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8726 (__v8sf)_mm256_movehdup_ps(__A), 8727 (__v8sf)__W); 8728 } 8729 8730 static __inline__ __m256 __DEFAULT_FN_ATTRS 8731 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8732 { 8733 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8734 (__v8sf)_mm256_movehdup_ps(__A), 8735 (__v8sf)_mm256_setzero_ps()); 8736 } 8737 8738 static __inline__ __m128 __DEFAULT_FN_ATTRS 8739 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8740 { 8741 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8742 (__v4sf)_mm_moveldup_ps(__A), 8743 (__v4sf)__W); 8744 } 8745 8746 static __inline__ __m128 __DEFAULT_FN_ATTRS 8747 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8748 { 8749 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8750 (__v4sf)_mm_moveldup_ps(__A), 8751 (__v4sf)_mm_setzero_ps()); 8752 } 8753 8754 static __inline__ __m256 __DEFAULT_FN_ATTRS 8755 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8756 { 8757 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8758 (__v8sf)_mm256_moveldup_ps(__A), 8759 (__v8sf)__W); 8760 } 8761 8762 static __inline__ __m256 __DEFAULT_FN_ATTRS 8763 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8764 { 8765 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8766 (__v8sf)_mm256_moveldup_ps(__A), 8767 (__v8sf)_mm256_setzero_ps()); 8768 } 8769 8770 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8771 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8772 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8773 (__v8si)(__m256i)(W)); }) 8774 8775 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ 8776 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8777 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8778 (__v8si)_mm256_setzero_si256()); }) 8779 8780 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8781 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8782 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8783 (__v4si)(__m128i)(W)); }) 8784 8785 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ 8786 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8787 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8788 (__v4si)_mm_setzero_si128()); }) 8789 8790 static __inline__ __m128d __DEFAULT_FN_ATTRS 8791 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8792 { 8793 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8794 (__v2df) __A, 8795 (__v2df) __W); 8796 } 8797 8798 static __inline__ __m128d __DEFAULT_FN_ATTRS 8799 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8800 { 8801 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8802 (__v2df) __A, 8803 (__v2df) _mm_setzero_pd ()); 8804 } 8805 8806 static __inline__ __m256d __DEFAULT_FN_ATTRS 8807 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8808 { 8809 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8810 (__v4df) __A, 8811 (__v4df) __W); 8812 } 8813 8814 static __inline__ __m256d __DEFAULT_FN_ATTRS 8815 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8816 { 8817 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8818 (__v4df) __A, 8819 (__v4df) _mm256_setzero_pd ()); 8820 } 8821 8822 static __inline__ __m128 __DEFAULT_FN_ATTRS 8823 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8824 { 8825 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8826 (__v4sf) __A, 8827 (__v4sf) __W); 8828 } 8829 8830 static __inline__ __m128 __DEFAULT_FN_ATTRS 8831 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8832 { 8833 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8834 (__v4sf) __A, 8835 (__v4sf) _mm_setzero_ps ()); 8836 } 8837 8838 static __inline__ __m256 __DEFAULT_FN_ATTRS 8839 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8840 { 8841 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8842 (__v8sf) __A, 8843 (__v8sf) __W); 8844 } 8845 8846 static __inline__ __m256 __DEFAULT_FN_ATTRS 8847 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8848 { 8849 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8850 (__v8sf) __A, 8851 (__v8sf) _mm256_setzero_ps ()); 8852 } 8853 8854 static __inline__ __m128 __DEFAULT_FN_ATTRS 8855 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8856 { 8857 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8858 (__v4sf) __W, 8859 (__mmask8) __U); 8860 } 8861 8862 static __inline__ __m128 __DEFAULT_FN_ATTRS 8863 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8864 { 8865 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8866 (__v4sf) 8867 _mm_setzero_ps (), 8868 (__mmask8) __U); 8869 } 8870 8871 static __inline__ __m256 __DEFAULT_FN_ATTRS 8872 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8873 { 8874 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8875 (__v8sf) __W, 8876 (__mmask8) __U); 8877 } 8878 8879 static __inline__ __m256 __DEFAULT_FN_ATTRS 8880 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8881 { 8882 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8883 (__v8sf) 8884 _mm256_setzero_ps (), 8885 (__mmask8) __U); 8886 } 8887 8888 static __inline __m128i __DEFAULT_FN_ATTRS 8889 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) 8890 { 8891 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8892 (__v8hi) __W, 8893 (__mmask8) __U); 8894 } 8895 8896 static __inline __m128i __DEFAULT_FN_ATTRS 8897 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) 8898 { 8899 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8900 (__v8hi) _mm_setzero_si128 (), 8901 (__mmask8) __U); 8902 } 8903 8904 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8905 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8906 (__v8hi)(__m128i)(W), \ 8907 (__mmask8)(U)); }) 8908 8909 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8910 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8911 (__v8hi)_mm_setzero_si128(), \ 8912 (__mmask8)(U)); }) 8913 8914 static __inline __m128i __DEFAULT_FN_ATTRS 8915 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) 8916 { 8917 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8918 (__v8hi) __W, 8919 (__mmask8) __U); 8920 } 8921 8922 static __inline __m128i __DEFAULT_FN_ATTRS 8923 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) 8924 { 8925 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8926 (__v8hi) _mm_setzero_si128(), 8927 (__mmask8) __U); 8928 } 8929 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8930 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8931 (__v8hi)(__m128i)(W), \ 8932 (__mmask8)(U)); }) 8933 8934 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8935 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8936 (__v8hi)_mm_setzero_si128(), \ 8937 (__mmask8)(U)); }) 8938 8939 8940 #undef __DEFAULT_FN_ATTRS 8941 8942 #endif /* __AVX512VLINTRIN_H */ 8943