1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512VLINTRIN_H 29 #define __AVX512VLINTRIN_H 30 31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) 32 33 /* Doesn't require avx512vl, used in avx512dqintrin.h */ 34 static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 35 _mm_setzero_di(void) { 36 return (__m128i)(__v2di){ 0LL, 0LL}; 37 } 38 39 /* Integer compare */ 40 41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 42 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { 43 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 44 (__mmask8)-1); 45 } 46 47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 48 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 49 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 50 __u); 51 } 52 53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 54 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { 55 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 56 (__mmask8)-1); 57 } 58 59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 60 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 61 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 62 __u); 63 } 64 65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 66 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { 67 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 68 (__mmask8)-1); 69 } 70 71 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 72 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 73 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 74 __u); 75 } 76 77 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 78 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { 79 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 80 (__mmask8)-1); 81 } 82 83 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 84 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 85 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 86 __u); 87 } 88 89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 90 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { 91 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 92 (__mmask8)-1); 93 } 94 95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 96 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 97 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 98 __u); 99 } 100 101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 102 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { 103 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 104 (__mmask8)-1); 105 } 106 107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 108 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 109 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 110 __u); 111 } 112 113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 114 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { 115 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 116 (__mmask8)-1); 117 } 118 119 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 120 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 121 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 122 __u); 123 } 124 125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 126 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { 127 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 128 (__mmask8)-1); 129 } 130 131 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 132 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 133 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 134 __u); 135 } 136 137 138 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 139 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { 140 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 141 (__mmask8)-1); 142 } 143 144 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 145 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 146 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 147 __u); 148 } 149 150 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 151 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { 152 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 153 (__mmask8)-1); 154 } 155 156 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 157 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 158 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 159 __u); 160 } 161 162 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 163 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { 164 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 165 (__mmask8)-1); 166 } 167 168 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 169 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 170 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 171 __u); 172 } 173 174 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 175 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { 176 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 177 (__mmask8)-1); 178 } 179 180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 181 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 182 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 183 __u); 184 } 185 186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 187 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { 188 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 189 (__mmask8)-1); 190 } 191 192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 193 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 194 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 195 __u); 196 } 197 198 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 199 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { 200 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 201 (__mmask8)-1); 202 } 203 204 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 205 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 206 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 207 __u); 208 } 209 210 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 211 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { 212 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 213 (__mmask8)-1); 214 } 215 216 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 217 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 218 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 219 __u); 220 } 221 222 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 223 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { 224 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 225 (__mmask8)-1); 226 } 227 228 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 229 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 230 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 231 __u); 232 } 233 234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 235 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { 236 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 237 (__mmask8)-1); 238 } 239 240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 241 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 242 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 243 __u); 244 } 245 246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 247 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { 248 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 249 (__mmask8)-1); 250 } 251 252 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 253 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 254 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 255 __u); 256 } 257 258 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 259 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { 260 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 261 (__mmask8)-1); 262 } 263 264 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 265 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 266 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 267 __u); 268 } 269 270 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 271 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { 272 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 273 (__mmask8)-1); 274 } 275 276 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 277 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 278 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 279 __u); 280 } 281 282 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 283 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { 284 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 285 (__mmask8)-1); 286 } 287 288 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 289 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 290 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 291 __u); 292 } 293 294 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 295 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { 296 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 297 (__mmask8)-1); 298 } 299 300 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 301 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 302 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 303 __u); 304 } 305 306 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 307 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { 308 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 309 (__mmask8)-1); 310 } 311 312 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 313 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 314 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 315 __u); 316 } 317 318 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 319 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { 320 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 321 (__mmask8)-1); 322 } 323 324 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 325 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 326 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 327 __u); 328 } 329 330 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 331 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) { 332 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 333 (__mmask8)-1); 334 } 335 336 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 337 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 338 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 339 __u); 340 } 341 342 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 343 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) { 344 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 345 (__mmask8)-1); 346 } 347 348 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 349 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 350 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 351 __u); 352 } 353 354 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 355 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { 356 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 357 (__mmask8)-1); 358 } 359 360 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 361 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 362 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 363 __u); 364 } 365 366 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 367 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { 368 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 369 (__mmask8)-1); 370 } 371 372 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 373 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 374 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 375 __u); 376 } 377 378 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 379 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) { 380 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 381 (__mmask8)-1); 382 } 383 384 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 385 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 386 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 387 __u); 388 } 389 390 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 391 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) { 392 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 393 (__mmask8)-1); 394 } 395 396 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 397 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 398 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 399 __u); 400 } 401 402 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 403 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { 404 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 405 (__mmask8)-1); 406 } 407 408 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 409 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 410 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 411 __u); 412 } 413 414 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 415 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { 416 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 417 (__mmask8)-1); 418 } 419 420 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 421 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 422 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 423 __u); 424 } 425 426 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 427 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { 428 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 429 (__mmask8)-1); 430 } 431 432 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 433 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 434 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 435 __u); 436 } 437 438 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 439 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { 440 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 441 (__mmask8)-1); 442 } 443 444 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 445 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 446 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 447 __u); 448 } 449 450 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 451 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { 452 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 453 (__mmask8)-1); 454 } 455 456 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 457 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 458 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 459 __u); 460 } 461 462 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 463 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { 464 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 465 (__mmask8)-1); 466 } 467 468 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 469 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 470 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 471 __u); 472 } 473 474 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 475 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { 476 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 477 (__mmask8)-1); 478 } 479 480 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 481 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 482 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 483 __u); 484 } 485 486 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 487 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { 488 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 489 (__mmask8)-1); 490 } 491 492 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 493 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 494 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 495 __u); 496 } 497 498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 499 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { 500 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 501 (__mmask8)-1); 502 } 503 504 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 505 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 506 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 507 __u); 508 } 509 510 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 511 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { 512 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 513 (__mmask8)-1); 514 } 515 516 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 517 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 518 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 519 __u); 520 } 521 522 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 523 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { 524 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 525 (__mmask8)-1); 526 } 527 528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 529 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 530 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 531 __u); 532 } 533 534 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 535 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { 536 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 537 (__mmask8)-1); 538 } 539 540 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 541 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 542 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 543 __u); 544 } 545 546 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 547 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { 548 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 549 (__mmask8)-1); 550 } 551 552 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 553 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 554 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 555 __u); 556 } 557 558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 559 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { 560 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 561 (__mmask8)-1); 562 } 563 564 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 565 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 566 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 567 __u); 568 } 569 570 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 571 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { 572 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 573 (__mmask8)-1); 574 } 575 576 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 577 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 578 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 579 __u); 580 } 581 582 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 583 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { 584 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 585 (__mmask8)-1); 586 } 587 588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 589 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 590 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 591 __u); 592 } 593 594 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 595 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { 596 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 597 (__mmask8)-1); 598 } 599 600 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 601 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 602 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 603 __u); 604 } 605 606 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 607 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { 608 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 609 (__mmask8)-1); 610 } 611 612 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 613 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 614 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 615 __u); 616 } 617 618 static __inline__ __m256i __DEFAULT_FN_ATTRS 619 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 620 { 621 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 622 (__v8si)_mm256_add_epi32(__A, __B), 623 (__v8si)__W); 624 } 625 626 static __inline__ __m256i __DEFAULT_FN_ATTRS 627 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 628 { 629 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 630 (__v8si)_mm256_add_epi32(__A, __B), 631 (__v8si)_mm256_setzero_si256()); 632 } 633 634 static __inline__ __m256i __DEFAULT_FN_ATTRS 635 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 636 { 637 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 638 (__v4di)_mm256_add_epi64(__A, __B), 639 (__v4di)__W); 640 } 641 642 static __inline__ __m256i __DEFAULT_FN_ATTRS 643 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 644 { 645 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 646 (__v4di)_mm256_add_epi64(__A, __B), 647 (__v4di)_mm256_setzero_si256()); 648 } 649 650 static __inline__ __m256i __DEFAULT_FN_ATTRS 651 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 652 { 653 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 654 (__v8si)_mm256_sub_epi32(__A, __B), 655 (__v8si)__W); 656 } 657 658 static __inline__ __m256i __DEFAULT_FN_ATTRS 659 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 660 { 661 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 662 (__v8si)_mm256_sub_epi32(__A, __B), 663 (__v8si)_mm256_setzero_si256()); 664 } 665 666 static __inline__ __m256i __DEFAULT_FN_ATTRS 667 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 668 { 669 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 670 (__v4di)_mm256_sub_epi64(__A, __B), 671 (__v4di)__W); 672 } 673 674 static __inline__ __m256i __DEFAULT_FN_ATTRS 675 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 676 { 677 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 678 (__v4di)_mm256_sub_epi64(__A, __B), 679 (__v4di)_mm256_setzero_si256()); 680 } 681 682 static __inline__ __m128i __DEFAULT_FN_ATTRS 683 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 684 { 685 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 686 (__v4si)_mm_add_epi32(__A, __B), 687 (__v4si)__W); 688 } 689 690 static __inline__ __m128i __DEFAULT_FN_ATTRS 691 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 692 { 693 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 694 (__v4si)_mm_add_epi32(__A, __B), 695 (__v4si)_mm_setzero_si128()); 696 } 697 698 static __inline__ __m128i __DEFAULT_FN_ATTRS 699 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 700 { 701 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 702 (__v2di)_mm_add_epi64(__A, __B), 703 (__v2di)__W); 704 } 705 706 static __inline__ __m128i __DEFAULT_FN_ATTRS 707 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 708 { 709 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 710 (__v2di)_mm_add_epi64(__A, __B), 711 (__v2di)_mm_setzero_si128()); 712 } 713 714 static __inline__ __m128i __DEFAULT_FN_ATTRS 715 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 716 { 717 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 718 (__v4si)_mm_sub_epi32(__A, __B), 719 (__v4si)__W); 720 } 721 722 static __inline__ __m128i __DEFAULT_FN_ATTRS 723 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 724 { 725 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 726 (__v4si)_mm_sub_epi32(__A, __B), 727 (__v4si)_mm_setzero_si128()); 728 } 729 730 static __inline__ __m128i __DEFAULT_FN_ATTRS 731 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 732 { 733 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 734 (__v2di)_mm_sub_epi64(__A, __B), 735 (__v2di)__W); 736 } 737 738 static __inline__ __m128i __DEFAULT_FN_ATTRS 739 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 740 { 741 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 742 (__v2di)_mm_sub_epi64(__A, __B), 743 (__v2di)_mm_setzero_si128()); 744 } 745 746 static __inline__ __m256i __DEFAULT_FN_ATTRS 747 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 748 { 749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 750 (__v4di)_mm256_mul_epi32(__X, __Y), 751 (__v4di)__W); 752 } 753 754 static __inline__ __m256i __DEFAULT_FN_ATTRS 755 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 756 { 757 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 758 (__v4di)_mm256_mul_epi32(__X, __Y), 759 (__v4di)_mm256_setzero_si256()); 760 } 761 762 static __inline__ __m128i __DEFAULT_FN_ATTRS 763 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 764 { 765 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 766 (__v2di)_mm_mul_epi32(__X, __Y), 767 (__v2di)__W); 768 } 769 770 static __inline__ __m128i __DEFAULT_FN_ATTRS 771 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 772 { 773 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 774 (__v2di)_mm_mul_epi32(__X, __Y), 775 (__v2di)_mm_setzero_si128()); 776 } 777 778 static __inline__ __m256i __DEFAULT_FN_ATTRS 779 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 780 { 781 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 782 (__v4di)_mm256_mul_epu32(__X, __Y), 783 (__v4di)__W); 784 } 785 786 static __inline__ __m256i __DEFAULT_FN_ATTRS 787 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 788 { 789 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 790 (__v4di)_mm256_mul_epu32(__X, __Y), 791 (__v4di)_mm256_setzero_si256()); 792 } 793 794 static __inline__ __m128i __DEFAULT_FN_ATTRS 795 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 796 { 797 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 798 (__v2di)_mm_mul_epu32(__X, __Y), 799 (__v2di)__W); 800 } 801 802 static __inline__ __m128i __DEFAULT_FN_ATTRS 803 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 804 { 805 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 806 (__v2di)_mm_mul_epu32(__X, __Y), 807 (__v2di)_mm_setzero_si128()); 808 } 809 810 static __inline__ __m256i __DEFAULT_FN_ATTRS 811 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 812 { 813 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 814 (__v8si)_mm256_mullo_epi32(__A, __B), 815 (__v8si)_mm256_setzero_si256()); 816 } 817 818 static __inline__ __m256i __DEFAULT_FN_ATTRS 819 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 820 { 821 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 822 (__v8si)_mm256_mullo_epi32(__A, __B), 823 (__v8si)__W); 824 } 825 826 static __inline__ __m128i __DEFAULT_FN_ATTRS 827 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 828 { 829 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 830 (__v4si)_mm_mullo_epi32(__A, __B), 831 (__v4si)_mm_setzero_si128()); 832 } 833 834 static __inline__ __m128i __DEFAULT_FN_ATTRS 835 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 836 { 837 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 838 (__v4si)_mm_mullo_epi32(__A, __B), 839 (__v4si)__W); 840 } 841 842 static __inline__ __m256i __DEFAULT_FN_ATTRS 843 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 844 { 845 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 846 (__v8si)_mm256_and_si256(__A, __B), 847 (__v8si)__W); 848 } 849 850 static __inline__ __m256i __DEFAULT_FN_ATTRS 851 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 852 { 853 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 854 } 855 856 static __inline__ __m128i __DEFAULT_FN_ATTRS 857 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 858 { 859 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 860 (__v4si)_mm_and_si128(__A, __B), 861 (__v4si)__W); 862 } 863 864 static __inline__ __m128i __DEFAULT_FN_ATTRS 865 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 866 { 867 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 868 } 869 870 static __inline__ __m256i __DEFAULT_FN_ATTRS 871 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 872 { 873 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 874 (__v8si)_mm256_andnot_si256(__A, __B), 875 (__v8si)__W); 876 } 877 878 static __inline__ __m256i __DEFAULT_FN_ATTRS 879 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 880 { 881 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 882 __U, __A, __B); 883 } 884 885 static __inline__ __m128i __DEFAULT_FN_ATTRS 886 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 887 { 888 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 889 (__v4si)_mm_andnot_si128(__A, __B), 890 (__v4si)__W); 891 } 892 893 static __inline__ __m128i __DEFAULT_FN_ATTRS 894 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 895 { 896 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 897 } 898 899 static __inline__ __m256i __DEFAULT_FN_ATTRS 900 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 901 { 902 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 903 (__v8si)_mm256_or_si256(__A, __B), 904 (__v8si)__W); 905 } 906 907 static __inline__ __m256i __DEFAULT_FN_ATTRS 908 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 909 { 910 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 911 } 912 913 static __inline__ __m128i __DEFAULT_FN_ATTRS 914 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 915 { 916 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 917 (__v4si)_mm_or_si128(__A, __B), 918 (__v4si)__W); 919 } 920 921 static __inline__ __m128i __DEFAULT_FN_ATTRS 922 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 923 { 924 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 925 } 926 927 static __inline__ __m256i __DEFAULT_FN_ATTRS 928 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 929 { 930 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 931 (__v8si)_mm256_xor_si256(__A, __B), 932 (__v8si)__W); 933 } 934 935 static __inline__ __m256i __DEFAULT_FN_ATTRS 936 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 937 { 938 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 939 } 940 941 static __inline__ __m128i __DEFAULT_FN_ATTRS 942 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, 943 __m128i __B) 944 { 945 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 946 (__v4si)_mm_xor_si128(__A, __B), 947 (__v4si)__W); 948 } 949 950 static __inline__ __m128i __DEFAULT_FN_ATTRS 951 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 952 { 953 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 954 } 955 956 static __inline__ __m256i __DEFAULT_FN_ATTRS 957 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 958 { 959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 960 (__v4di)_mm256_and_si256(__A, __B), 961 (__v4di)__W); 962 } 963 964 static __inline__ __m256i __DEFAULT_FN_ATTRS 965 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 966 { 967 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 968 } 969 970 static __inline__ __m128i __DEFAULT_FN_ATTRS 971 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 972 { 973 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 974 (__v2di)_mm_and_si128(__A, __B), 975 (__v2di)__W); 976 } 977 978 static __inline__ __m128i __DEFAULT_FN_ATTRS 979 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 980 { 981 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 982 } 983 984 static __inline__ __m256i __DEFAULT_FN_ATTRS 985 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 986 { 987 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 988 (__v4di)_mm256_andnot_si256(__A, __B), 989 (__v4di)__W); 990 } 991 992 static __inline__ __m256i __DEFAULT_FN_ATTRS 993 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 994 { 995 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 996 __U, __A, __B); 997 } 998 999 static __inline__ __m128i __DEFAULT_FN_ATTRS 1000 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1001 { 1002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1003 (__v2di)_mm_andnot_si128(__A, __B), 1004 (__v2di)__W); 1005 } 1006 1007 static __inline__ __m128i __DEFAULT_FN_ATTRS 1008 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1009 { 1010 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 1011 } 1012 1013 static __inline__ __m256i __DEFAULT_FN_ATTRS 1014 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1015 { 1016 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1017 (__v4di)_mm256_or_si256(__A, __B), 1018 (__v4di)__W); 1019 } 1020 1021 static __inline__ __m256i __DEFAULT_FN_ATTRS 1022 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1023 { 1024 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 1025 } 1026 1027 static __inline__ __m128i __DEFAULT_FN_ATTRS 1028 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1029 { 1030 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1031 (__v2di)_mm_or_si128(__A, __B), 1032 (__v2di)__W); 1033 } 1034 1035 static __inline__ __m128i __DEFAULT_FN_ATTRS 1036 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1037 { 1038 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 1039 } 1040 1041 static __inline__ __m256i __DEFAULT_FN_ATTRS 1042 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1043 { 1044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1045 (__v4di)_mm256_xor_si256(__A, __B), 1046 (__v4di)__W); 1047 } 1048 1049 static __inline__ __m256i __DEFAULT_FN_ATTRS 1050 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1051 { 1052 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 1053 } 1054 1055 static __inline__ __m128i __DEFAULT_FN_ATTRS 1056 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 1057 __m128i __B) 1058 { 1059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1060 (__v2di)_mm_xor_si128(__A, __B), 1061 (__v2di)__W); 1062 } 1063 1064 static __inline__ __m128i __DEFAULT_FN_ATTRS 1065 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1066 { 1067 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 1068 } 1069 1070 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1071 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1072 (__v4si)(__m128i)(b), (int)(p), \ 1073 (__mmask8)-1); }) 1074 1075 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1076 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1077 (__v4si)(__m128i)(b), (int)(p), \ 1078 (__mmask8)(m)); }) 1079 1080 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1081 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1082 (__v4si)(__m128i)(b), (int)(p), \ 1083 (__mmask8)-1); }) 1084 1085 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1086 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1087 (__v4si)(__m128i)(b), (int)(p), \ 1088 (__mmask8)(m)); }) 1089 1090 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1091 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1092 (__v8si)(__m256i)(b), (int)(p), \ 1093 (__mmask8)-1); }) 1094 1095 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1096 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1097 (__v8si)(__m256i)(b), (int)(p), \ 1098 (__mmask8)(m)); }) 1099 1100 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1101 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1102 (__v8si)(__m256i)(b), (int)(p), \ 1103 (__mmask8)-1); }) 1104 1105 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1106 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1107 (__v8si)(__m256i)(b), (int)(p), \ 1108 (__mmask8)(m)); }) 1109 1110 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1111 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1112 (__v2di)(__m128i)(b), (int)(p), \ 1113 (__mmask8)-1); }) 1114 1115 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1116 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1117 (__v2di)(__m128i)(b), (int)(p), \ 1118 (__mmask8)(m)); }) 1119 1120 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1121 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1122 (__v2di)(__m128i)(b), (int)(p), \ 1123 (__mmask8)-1); }) 1124 1125 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1126 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1127 (__v2di)(__m128i)(b), (int)(p), \ 1128 (__mmask8)(m)); }) 1129 1130 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1131 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1132 (__v4di)(__m256i)(b), (int)(p), \ 1133 (__mmask8)-1); }) 1134 1135 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1136 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1137 (__v4di)(__m256i)(b), (int)(p), \ 1138 (__mmask8)(m)); }) 1139 1140 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1141 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1142 (__v4di)(__m256i)(b), (int)(p), \ 1143 (__mmask8)-1); }) 1144 1145 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1146 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1147 (__v4di)(__m256i)(b), (int)(p), \ 1148 (__mmask8)(m)); }) 1149 1150 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ 1151 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1152 (__v8sf)(__m256)(b), (int)(p), \ 1153 (__mmask8)-1); }) 1154 1155 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1156 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1157 (__v8sf)(__m256)(b), (int)(p), \ 1158 (__mmask8)(m)); }) 1159 1160 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ 1161 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1162 (__v4df)(__m256d)(b), (int)(p), \ 1163 (__mmask8)-1); }) 1164 1165 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1166 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1167 (__v4df)(__m256d)(b), (int)(p), \ 1168 (__mmask8)(m)); }) 1169 1170 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ 1171 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1172 (__v4sf)(__m128)(b), (int)(p), \ 1173 (__mmask8)-1); }) 1174 1175 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1176 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1177 (__v4sf)(__m128)(b), (int)(p), \ 1178 (__mmask8)(m)); }) 1179 1180 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1182 (__v2df)(__m128d)(b), (int)(p), \ 1183 (__mmask8)-1); }) 1184 1185 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1186 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1187 (__v2df)(__m128d)(b), (int)(p), \ 1188 (__mmask8)(m)); }) 1189 1190 static __inline__ __m128d __DEFAULT_FN_ATTRS 1191 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1192 { 1193 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1194 (__v2df) __B, 1195 (__v2df) __C, 1196 (__mmask8) __U); 1197 } 1198 1199 static __inline__ __m128d __DEFAULT_FN_ATTRS 1200 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1201 { 1202 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, 1203 (__v2df) __B, 1204 (__v2df) __C, 1205 (__mmask8) __U); 1206 } 1207 1208 static __inline__ __m128d __DEFAULT_FN_ATTRS 1209 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1210 { 1211 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1212 (__v2df) __B, 1213 (__v2df) __C, 1214 (__mmask8) __U); 1215 } 1216 1217 static __inline__ __m128d __DEFAULT_FN_ATTRS 1218 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1219 { 1220 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1221 (__v2df) __B, 1222 -(__v2df) __C, 1223 (__mmask8) __U); 1224 } 1225 1226 static __inline__ __m128d __DEFAULT_FN_ATTRS 1227 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1228 { 1229 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1230 (__v2df) __B, 1231 -(__v2df) __C, 1232 (__mmask8) __U); 1233 } 1234 1235 static __inline__ __m128d __DEFAULT_FN_ATTRS 1236 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1237 { 1238 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, 1239 (__v2df) __B, 1240 (__v2df) __C, 1241 (__mmask8) __U); 1242 } 1243 1244 static __inline__ __m128d __DEFAULT_FN_ATTRS 1245 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1246 { 1247 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1248 (__v2df) __B, 1249 (__v2df) __C, 1250 (__mmask8) __U); 1251 } 1252 1253 static __inline__ __m128d __DEFAULT_FN_ATTRS 1254 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255 { 1256 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1257 (__v2df) __B, 1258 -(__v2df) __C, 1259 (__mmask8) __U); 1260 } 1261 1262 static __inline__ __m256d __DEFAULT_FN_ATTRS 1263 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1264 { 1265 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1266 (__v4df) __B, 1267 (__v4df) __C, 1268 (__mmask8) __U); 1269 } 1270 1271 static __inline__ __m256d __DEFAULT_FN_ATTRS 1272 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1273 { 1274 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, 1275 (__v4df) __B, 1276 (__v4df) __C, 1277 (__mmask8) __U); 1278 } 1279 1280 static __inline__ __m256d __DEFAULT_FN_ATTRS 1281 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1282 { 1283 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1284 (__v4df) __B, 1285 (__v4df) __C, 1286 (__mmask8) __U); 1287 } 1288 1289 static __inline__ __m256d __DEFAULT_FN_ATTRS 1290 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1291 { 1292 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1293 (__v4df) __B, 1294 -(__v4df) __C, 1295 (__mmask8) __U); 1296 } 1297 1298 static __inline__ __m256d __DEFAULT_FN_ATTRS 1299 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1300 { 1301 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1302 (__v4df) __B, 1303 -(__v4df) __C, 1304 (__mmask8) __U); 1305 } 1306 1307 static __inline__ __m256d __DEFAULT_FN_ATTRS 1308 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1309 { 1310 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, 1311 (__v4df) __B, 1312 (__v4df) __C, 1313 (__mmask8) __U); 1314 } 1315 1316 static __inline__ __m256d __DEFAULT_FN_ATTRS 1317 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1318 { 1319 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1320 (__v4df) __B, 1321 (__v4df) __C, 1322 (__mmask8) __U); 1323 } 1324 1325 static __inline__ __m256d __DEFAULT_FN_ATTRS 1326 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1327 { 1328 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1329 (__v4df) __B, 1330 -(__v4df) __C, 1331 (__mmask8) __U); 1332 } 1333 1334 static __inline__ __m128 __DEFAULT_FN_ATTRS 1335 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1336 { 1337 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C, 1340 (__mmask8) __U); 1341 } 1342 1343 static __inline__ __m128 __DEFAULT_FN_ATTRS 1344 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1345 { 1346 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, 1347 (__v4sf) __B, 1348 (__v4sf) __C, 1349 (__mmask8) __U); 1350 } 1351 1352 static __inline__ __m128 __DEFAULT_FN_ATTRS 1353 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1354 { 1355 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1356 (__v4sf) __B, 1357 (__v4sf) __C, 1358 (__mmask8) __U); 1359 } 1360 1361 static __inline__ __m128 __DEFAULT_FN_ATTRS 1362 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1363 { 1364 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1365 (__v4sf) __B, 1366 -(__v4sf) __C, 1367 (__mmask8) __U); 1368 } 1369 1370 static __inline__ __m128 __DEFAULT_FN_ATTRS 1371 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1372 { 1373 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1374 (__v4sf) __B, 1375 -(__v4sf) __C, 1376 (__mmask8) __U); 1377 } 1378 1379 static __inline__ __m128 __DEFAULT_FN_ATTRS 1380 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1381 { 1382 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, 1383 (__v4sf) __B, 1384 (__v4sf) __C, 1385 (__mmask8) __U); 1386 } 1387 1388 static __inline__ __m128 __DEFAULT_FN_ATTRS 1389 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1390 { 1391 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1392 (__v4sf) __B, 1393 (__v4sf) __C, 1394 (__mmask8) __U); 1395 } 1396 1397 static __inline__ __m128 __DEFAULT_FN_ATTRS 1398 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1399 { 1400 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1401 (__v4sf) __B, 1402 -(__v4sf) __C, 1403 (__mmask8) __U); 1404 } 1405 1406 static __inline__ __m256 __DEFAULT_FN_ATTRS 1407 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1408 { 1409 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1410 (__v8sf) __B, 1411 (__v8sf) __C, 1412 (__mmask8) __U); 1413 } 1414 1415 static __inline__ __m256 __DEFAULT_FN_ATTRS 1416 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1417 { 1418 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, 1419 (__v8sf) __B, 1420 (__v8sf) __C, 1421 (__mmask8) __U); 1422 } 1423 1424 static __inline__ __m256 __DEFAULT_FN_ATTRS 1425 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1426 { 1427 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1428 (__v8sf) __B, 1429 (__v8sf) __C, 1430 (__mmask8) __U); 1431 } 1432 1433 static __inline__ __m256 __DEFAULT_FN_ATTRS 1434 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1435 { 1436 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1437 (__v8sf) __B, 1438 -(__v8sf) __C, 1439 (__mmask8) __U); 1440 } 1441 1442 static __inline__ __m256 __DEFAULT_FN_ATTRS 1443 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1444 { 1445 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1446 (__v8sf) __B, 1447 -(__v8sf) __C, 1448 (__mmask8) __U); 1449 } 1450 1451 static __inline__ __m256 __DEFAULT_FN_ATTRS 1452 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1453 { 1454 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, 1455 (__v8sf) __B, 1456 (__v8sf) __C, 1457 (__mmask8) __U); 1458 } 1459 1460 static __inline__ __m256 __DEFAULT_FN_ATTRS 1461 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1462 { 1463 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1464 (__v8sf) __B, 1465 (__v8sf) __C, 1466 (__mmask8) __U); 1467 } 1468 1469 static __inline__ __m256 __DEFAULT_FN_ATTRS 1470 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1471 { 1472 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1473 (__v8sf) __B, 1474 -(__v8sf) __C, 1475 (__mmask8) __U); 1476 } 1477 1478 static __inline__ __m128d __DEFAULT_FN_ATTRS 1479 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1480 { 1481 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1482 (__v2df) __B, 1483 (__v2df) __C, 1484 (__mmask8) __U); 1485 } 1486 1487 static __inline__ __m128d __DEFAULT_FN_ATTRS 1488 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1489 { 1490 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, 1491 (__v2df) __B, 1492 (__v2df) __C, 1493 (__mmask8) 1494 __U); 1495 } 1496 1497 static __inline__ __m128d __DEFAULT_FN_ATTRS 1498 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1499 { 1500 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1501 (__v2df) __B, 1502 (__v2df) __C, 1503 (__mmask8) 1504 __U); 1505 } 1506 1507 static __inline__ __m128d __DEFAULT_FN_ATTRS 1508 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1509 { 1510 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1511 (__v2df) __B, 1512 -(__v2df) __C, 1513 (__mmask8) __U); 1514 } 1515 1516 static __inline__ __m128d __DEFAULT_FN_ATTRS 1517 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1518 { 1519 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1520 (__v2df) __B, 1521 -(__v2df) __C, 1522 (__mmask8) 1523 __U); 1524 } 1525 1526 static __inline__ __m256d __DEFAULT_FN_ATTRS 1527 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1528 { 1529 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1530 (__v4df) __B, 1531 (__v4df) __C, 1532 (__mmask8) __U); 1533 } 1534 1535 static __inline__ __m256d __DEFAULT_FN_ATTRS 1536 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1537 { 1538 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, 1539 (__v4df) __B, 1540 (__v4df) __C, 1541 (__mmask8) 1542 __U); 1543 } 1544 1545 static __inline__ __m256d __DEFAULT_FN_ATTRS 1546 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1547 { 1548 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1549 (__v4df) __B, 1550 (__v4df) __C, 1551 (__mmask8) 1552 __U); 1553 } 1554 1555 static __inline__ __m256d __DEFAULT_FN_ATTRS 1556 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1557 { 1558 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1559 (__v4df) __B, 1560 -(__v4df) __C, 1561 (__mmask8) __U); 1562 } 1563 1564 static __inline__ __m256d __DEFAULT_FN_ATTRS 1565 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1566 { 1567 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1568 (__v4df) __B, 1569 -(__v4df) __C, 1570 (__mmask8) 1571 __U); 1572 } 1573 1574 static __inline__ __m128 __DEFAULT_FN_ATTRS 1575 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576 { 1577 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1578 (__v4sf) __B, 1579 (__v4sf) __C, 1580 (__mmask8) __U); 1581 } 1582 1583 static __inline__ __m128 __DEFAULT_FN_ATTRS 1584 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1585 { 1586 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, 1587 (__v4sf) __B, 1588 (__v4sf) __C, 1589 (__mmask8) __U); 1590 } 1591 1592 static __inline__ __m128 __DEFAULT_FN_ATTRS 1593 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1594 { 1595 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1596 (__v4sf) __B, 1597 (__v4sf) __C, 1598 (__mmask8) __U); 1599 } 1600 1601 static __inline__ __m128 __DEFAULT_FN_ATTRS 1602 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1603 { 1604 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1605 (__v4sf) __B, 1606 -(__v4sf) __C, 1607 (__mmask8) __U); 1608 } 1609 1610 static __inline__ __m128 __DEFAULT_FN_ATTRS 1611 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1612 { 1613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1614 (__v4sf) __B, 1615 -(__v4sf) __C, 1616 (__mmask8) __U); 1617 } 1618 1619 static __inline__ __m256 __DEFAULT_FN_ATTRS 1620 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1621 __m256 __C) 1622 { 1623 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1624 (__v8sf) __B, 1625 (__v8sf) __C, 1626 (__mmask8) __U); 1627 } 1628 1629 static __inline__ __m256 __DEFAULT_FN_ATTRS 1630 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1631 { 1632 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, 1633 (__v8sf) __B, 1634 (__v8sf) __C, 1635 (__mmask8) __U); 1636 } 1637 1638 static __inline__ __m256 __DEFAULT_FN_ATTRS 1639 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1640 { 1641 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1642 (__v8sf) __B, 1643 (__v8sf) __C, 1644 (__mmask8) __U); 1645 } 1646 1647 static __inline__ __m256 __DEFAULT_FN_ATTRS 1648 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1649 { 1650 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1651 (__v8sf) __B, 1652 -(__v8sf) __C, 1653 (__mmask8) __U); 1654 } 1655 1656 static __inline__ __m256 __DEFAULT_FN_ATTRS 1657 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1658 { 1659 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1660 (__v8sf) __B, 1661 -(__v8sf) __C, 1662 (__mmask8) __U); 1663 } 1664 1665 static __inline__ __m128d __DEFAULT_FN_ATTRS 1666 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1667 { 1668 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, 1669 (__v2df) __B, 1670 (__v2df) __C, 1671 (__mmask8) __U); 1672 } 1673 1674 static __inline__ __m256d __DEFAULT_FN_ATTRS 1675 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1676 { 1677 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, 1678 (__v4df) __B, 1679 (__v4df) __C, 1680 (__mmask8) __U); 1681 } 1682 1683 static __inline__ __m128 __DEFAULT_FN_ATTRS 1684 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1685 { 1686 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, 1687 (__v4sf) __B, 1688 (__v4sf) __C, 1689 (__mmask8) __U); 1690 } 1691 1692 static __inline__ __m256 __DEFAULT_FN_ATTRS 1693 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1694 { 1695 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, 1696 (__v8sf) __B, 1697 (__v8sf) __C, 1698 (__mmask8) __U); 1699 } 1700 1701 static __inline__ __m128d __DEFAULT_FN_ATTRS 1702 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1703 { 1704 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, 1705 (__v2df) __B, 1706 (__v2df) __C, 1707 (__mmask8) 1708 __U); 1709 } 1710 1711 static __inline__ __m256d __DEFAULT_FN_ATTRS 1712 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1713 { 1714 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, 1715 (__v4df) __B, 1716 (__v4df) __C, 1717 (__mmask8) 1718 __U); 1719 } 1720 1721 static __inline__ __m128 __DEFAULT_FN_ATTRS 1722 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1723 { 1724 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, 1725 (__v4sf) __B, 1726 (__v4sf) __C, 1727 (__mmask8) __U); 1728 } 1729 1730 static __inline__ __m256 __DEFAULT_FN_ATTRS 1731 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1732 { 1733 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, 1734 (__v8sf) __B, 1735 (__v8sf) __C, 1736 (__mmask8) __U); 1737 } 1738 1739 static __inline__ __m128d __DEFAULT_FN_ATTRS 1740 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1741 { 1742 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, 1743 (__v2df) __B, 1744 (__v2df) __C, 1745 (__mmask8) __U); 1746 } 1747 1748 static __inline__ __m256d __DEFAULT_FN_ATTRS 1749 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1750 { 1751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, 1752 (__v4df) __B, 1753 (__v4df) __C, 1754 (__mmask8) __U); 1755 } 1756 1757 static __inline__ __m128 __DEFAULT_FN_ATTRS 1758 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1759 { 1760 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, 1761 (__v4sf) __B, 1762 (__v4sf) __C, 1763 (__mmask8) __U); 1764 } 1765 1766 static __inline__ __m256 __DEFAULT_FN_ATTRS 1767 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1768 { 1769 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, 1770 (__v8sf) __B, 1771 (__v8sf) __C, 1772 (__mmask8) __U); 1773 } 1774 1775 static __inline__ __m128d __DEFAULT_FN_ATTRS 1776 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1777 { 1778 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, 1779 (__v2df) __B, 1780 (__v2df) __C, 1781 (__mmask8) __U); 1782 } 1783 1784 static __inline__ __m128d __DEFAULT_FN_ATTRS 1785 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1786 { 1787 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, 1788 (__v2df) __B, 1789 (__v2df) __C, 1790 (__mmask8) __U); 1791 } 1792 1793 static __inline__ __m256d __DEFAULT_FN_ATTRS 1794 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1795 { 1796 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, 1797 (__v4df) __B, 1798 (__v4df) __C, 1799 (__mmask8) __U); 1800 } 1801 1802 static __inline__ __m256d __DEFAULT_FN_ATTRS 1803 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1804 { 1805 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, 1806 (__v4df) __B, 1807 (__v4df) __C, 1808 (__mmask8) __U); 1809 } 1810 1811 static __inline__ __m128 __DEFAULT_FN_ATTRS 1812 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1813 { 1814 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, 1815 (__v4sf) __B, 1816 (__v4sf) __C, 1817 (__mmask8) __U); 1818 } 1819 1820 static __inline__ __m128 __DEFAULT_FN_ATTRS 1821 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1822 { 1823 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, 1824 (__v4sf) __B, 1825 (__v4sf) __C, 1826 (__mmask8) __U); 1827 } 1828 1829 static __inline__ __m256 __DEFAULT_FN_ATTRS 1830 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1831 { 1832 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, 1833 (__v8sf) __B, 1834 (__v8sf) __C, 1835 (__mmask8) __U); 1836 } 1837 1838 static __inline__ __m256 __DEFAULT_FN_ATTRS 1839 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1840 { 1841 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, 1842 (__v8sf) __B, 1843 (__v8sf) __C, 1844 (__mmask8) __U); 1845 } 1846 1847 static __inline__ __m128d __DEFAULT_FN_ATTRS 1848 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1849 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1850 (__v2df)_mm_add_pd(__A, __B), 1851 (__v2df)__W); 1852 } 1853 1854 static __inline__ __m128d __DEFAULT_FN_ATTRS 1855 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1856 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1857 (__v2df)_mm_add_pd(__A, __B), 1858 (__v2df)_mm_setzero_pd()); 1859 } 1860 1861 static __inline__ __m256d __DEFAULT_FN_ATTRS 1862 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1863 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1864 (__v4df)_mm256_add_pd(__A, __B), 1865 (__v4df)__W); 1866 } 1867 1868 static __inline__ __m256d __DEFAULT_FN_ATTRS 1869 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1870 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1871 (__v4df)_mm256_add_pd(__A, __B), 1872 (__v4df)_mm256_setzero_pd()); 1873 } 1874 1875 static __inline__ __m128 __DEFAULT_FN_ATTRS 1876 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1877 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1878 (__v4sf)_mm_add_ps(__A, __B), 1879 (__v4sf)__W); 1880 } 1881 1882 static __inline__ __m128 __DEFAULT_FN_ATTRS 1883 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1884 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1885 (__v4sf)_mm_add_ps(__A, __B), 1886 (__v4sf)_mm_setzero_ps()); 1887 } 1888 1889 static __inline__ __m256 __DEFAULT_FN_ATTRS 1890 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1891 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1892 (__v8sf)_mm256_add_ps(__A, __B), 1893 (__v8sf)__W); 1894 } 1895 1896 static __inline__ __m256 __DEFAULT_FN_ATTRS 1897 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1898 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1899 (__v8sf)_mm256_add_ps(__A, __B), 1900 (__v8sf)_mm256_setzero_ps()); 1901 } 1902 1903 static __inline__ __m128i __DEFAULT_FN_ATTRS 1904 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1905 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1906 (__v4si) __W, 1907 (__v4si) __A); 1908 } 1909 1910 static __inline__ __m256i __DEFAULT_FN_ATTRS 1911 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1912 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1913 (__v8si) __W, 1914 (__v8si) __A); 1915 } 1916 1917 static __inline__ __m128d __DEFAULT_FN_ATTRS 1918 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1919 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1920 (__v2df) __W, 1921 (__v2df) __A); 1922 } 1923 1924 static __inline__ __m256d __DEFAULT_FN_ATTRS 1925 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1926 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1927 (__v4df) __W, 1928 (__v4df) __A); 1929 } 1930 1931 static __inline__ __m128 __DEFAULT_FN_ATTRS 1932 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1933 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1934 (__v4sf) __W, 1935 (__v4sf) __A); 1936 } 1937 1938 static __inline__ __m256 __DEFAULT_FN_ATTRS 1939 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1940 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1941 (__v8sf) __W, 1942 (__v8sf) __A); 1943 } 1944 1945 static __inline__ __m128i __DEFAULT_FN_ATTRS 1946 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1947 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1948 (__v2di) __W, 1949 (__v2di) __A); 1950 } 1951 1952 static __inline__ __m256i __DEFAULT_FN_ATTRS 1953 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1954 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1955 (__v4di) __W, 1956 (__v4di) __A); 1957 } 1958 1959 static __inline__ __m128d __DEFAULT_FN_ATTRS 1960 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1961 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1962 (__v2df) __W, 1963 (__mmask8) __U); 1964 } 1965 1966 static __inline__ __m128d __DEFAULT_FN_ATTRS 1967 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1968 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1969 (__v2df) 1970 _mm_setzero_pd (), 1971 (__mmask8) __U); 1972 } 1973 1974 static __inline__ __m256d __DEFAULT_FN_ATTRS 1975 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1976 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1977 (__v4df) __W, 1978 (__mmask8) __U); 1979 } 1980 1981 static __inline__ __m256d __DEFAULT_FN_ATTRS 1982 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1983 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1984 (__v4df) 1985 _mm256_setzero_pd (), 1986 (__mmask8) __U); 1987 } 1988 1989 static __inline__ __m128i __DEFAULT_FN_ATTRS 1990 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1991 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1992 (__v2di) __W, 1993 (__mmask8) __U); 1994 } 1995 1996 static __inline__ __m128i __DEFAULT_FN_ATTRS 1997 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1998 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1999 (__v2di) 2000 _mm_setzero_si128 (), 2001 (__mmask8) __U); 2002 } 2003 2004 static __inline__ __m256i __DEFAULT_FN_ATTRS 2005 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2006 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2007 (__v4di) __W, 2008 (__mmask8) __U); 2009 } 2010 2011 static __inline__ __m256i __DEFAULT_FN_ATTRS 2012 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 2013 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2014 (__v4di) 2015 _mm256_setzero_si256 (), 2016 (__mmask8) __U); 2017 } 2018 2019 static __inline__ __m128 __DEFAULT_FN_ATTRS 2020 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2021 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2022 (__v4sf) __W, 2023 (__mmask8) __U); 2024 } 2025 2026 static __inline__ __m128 __DEFAULT_FN_ATTRS 2027 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 2028 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2029 (__v4sf) 2030 _mm_setzero_ps (), 2031 (__mmask8) __U); 2032 } 2033 2034 static __inline__ __m256 __DEFAULT_FN_ATTRS 2035 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2036 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2037 (__v8sf) __W, 2038 (__mmask8) __U); 2039 } 2040 2041 static __inline__ __m256 __DEFAULT_FN_ATTRS 2042 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 2043 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2044 (__v8sf) 2045 _mm256_setzero_ps (), 2046 (__mmask8) __U); 2047 } 2048 2049 static __inline__ __m128i __DEFAULT_FN_ATTRS 2050 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2051 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2052 (__v4si) __W, 2053 (__mmask8) __U); 2054 } 2055 2056 static __inline__ __m128i __DEFAULT_FN_ATTRS 2057 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 2058 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2059 (__v4si) 2060 _mm_setzero_si128 (), 2061 (__mmask8) __U); 2062 } 2063 2064 static __inline__ __m256i __DEFAULT_FN_ATTRS 2065 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2066 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2067 (__v8si) __W, 2068 (__mmask8) __U); 2069 } 2070 2071 static __inline__ __m256i __DEFAULT_FN_ATTRS 2072 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 2073 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2074 (__v8si) 2075 _mm256_setzero_si256 (), 2076 (__mmask8) __U); 2077 } 2078 2079 static __inline__ void __DEFAULT_FN_ATTRS 2080 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 2081 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 2082 (__v2df) __A, 2083 (__mmask8) __U); 2084 } 2085 2086 static __inline__ void __DEFAULT_FN_ATTRS 2087 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 2088 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 2089 (__v4df) __A, 2090 (__mmask8) __U); 2091 } 2092 2093 static __inline__ void __DEFAULT_FN_ATTRS 2094 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 2095 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 2096 (__v2di) __A, 2097 (__mmask8) __U); 2098 } 2099 2100 static __inline__ void __DEFAULT_FN_ATTRS 2101 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 2102 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 2103 (__v4di) __A, 2104 (__mmask8) __U); 2105 } 2106 2107 static __inline__ void __DEFAULT_FN_ATTRS 2108 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 2109 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 2110 (__v4sf) __A, 2111 (__mmask8) __U); 2112 } 2113 2114 static __inline__ void __DEFAULT_FN_ATTRS 2115 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 2116 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 2117 (__v8sf) __A, 2118 (__mmask8) __U); 2119 } 2120 2121 static __inline__ void __DEFAULT_FN_ATTRS 2122 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 2123 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 2124 (__v4si) __A, 2125 (__mmask8) __U); 2126 } 2127 2128 static __inline__ void __DEFAULT_FN_ATTRS 2129 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 2130 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 2131 (__v8si) __A, 2132 (__mmask8) __U); 2133 } 2134 2135 static __inline__ __m128d __DEFAULT_FN_ATTRS 2136 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2137 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2138 (__v2df)_mm_cvtepi32_pd(__A), 2139 (__v2df)__W); 2140 } 2141 2142 static __inline__ __m128d __DEFAULT_FN_ATTRS 2143 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2145 (__v2df)_mm_cvtepi32_pd(__A), 2146 (__v2df)_mm_setzero_pd()); 2147 } 2148 2149 static __inline__ __m256d __DEFAULT_FN_ATTRS 2150 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2151 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2152 (__v4df)_mm256_cvtepi32_pd(__A), 2153 (__v4df)__W); 2154 } 2155 2156 static __inline__ __m256d __DEFAULT_FN_ATTRS 2157 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2158 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2159 (__v4df)_mm256_cvtepi32_pd(__A), 2160 (__v4df)_mm256_setzero_pd()); 2161 } 2162 2163 static __inline__ __m128 __DEFAULT_FN_ATTRS 2164 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2165 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2166 (__v4sf) __W, 2167 (__mmask8) __U); 2168 } 2169 2170 static __inline__ __m128 __DEFAULT_FN_ATTRS 2171 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { 2172 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2173 (__v4sf) 2174 _mm_setzero_ps (), 2175 (__mmask8) __U); 2176 } 2177 2178 static __inline__ __m256 __DEFAULT_FN_ATTRS 2179 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2180 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2181 (__v8sf) __W, 2182 (__mmask8) __U); 2183 } 2184 2185 static __inline__ __m256 __DEFAULT_FN_ATTRS 2186 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { 2187 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2188 (__v8sf) 2189 _mm256_setzero_ps (), 2190 (__mmask8) __U); 2191 } 2192 2193 static __inline__ __m128i __DEFAULT_FN_ATTRS 2194 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2195 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2196 (__v4si) __W, 2197 (__mmask8) __U); 2198 } 2199 2200 static __inline__ __m128i __DEFAULT_FN_ATTRS 2201 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 2202 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2203 (__v4si) 2204 _mm_setzero_si128 (), 2205 (__mmask8) __U); 2206 } 2207 2208 static __inline__ __m128i __DEFAULT_FN_ATTRS 2209 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2211 (__v4si) __W, 2212 (__mmask8) __U); 2213 } 2214 2215 static __inline__ __m128i __DEFAULT_FN_ATTRS 2216 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 2217 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2218 (__v4si) 2219 _mm_setzero_si128 (), 2220 (__mmask8) __U); 2221 } 2222 2223 static __inline__ __m128 __DEFAULT_FN_ATTRS 2224 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 2225 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2226 (__v4sf) __W, 2227 (__mmask8) __U); 2228 } 2229 2230 static __inline__ __m128 __DEFAULT_FN_ATTRS 2231 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 2232 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2233 (__v4sf) 2234 _mm_setzero_ps (), 2235 (__mmask8) __U); 2236 } 2237 2238 static __inline__ __m128 __DEFAULT_FN_ATTRS 2239 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2240 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2241 (__v4sf) __W, 2242 (__mmask8) __U); 2243 } 2244 2245 static __inline__ __m128 __DEFAULT_FN_ATTRS 2246 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2247 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2248 (__v4sf) 2249 _mm_setzero_ps (), 2250 (__mmask8) __U); 2251 } 2252 2253 static __inline__ __m128i __DEFAULT_FN_ATTRS 2254 _mm_cvtpd_epu32 (__m128d __A) { 2255 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2256 (__v4si) 2257 _mm_setzero_si128 (), 2258 (__mmask8) -1); 2259 } 2260 2261 static __inline__ __m128i __DEFAULT_FN_ATTRS 2262 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2263 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2264 (__v4si) __W, 2265 (__mmask8) __U); 2266 } 2267 2268 static __inline__ __m128i __DEFAULT_FN_ATTRS 2269 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2270 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2271 (__v4si) 2272 _mm_setzero_si128 (), 2273 (__mmask8) __U); 2274 } 2275 2276 static __inline__ __m128i __DEFAULT_FN_ATTRS 2277 _mm256_cvtpd_epu32 (__m256d __A) { 2278 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2279 (__v4si) 2280 _mm_setzero_si128 (), 2281 (__mmask8) -1); 2282 } 2283 2284 static __inline__ __m128i __DEFAULT_FN_ATTRS 2285 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2286 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2287 (__v4si) __W, 2288 (__mmask8) __U); 2289 } 2290 2291 static __inline__ __m128i __DEFAULT_FN_ATTRS 2292 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2293 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2294 (__v4si) 2295 _mm_setzero_si128 (), 2296 (__mmask8) __U); 2297 } 2298 2299 static __inline__ __m128i __DEFAULT_FN_ATTRS 2300 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2301 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2302 (__v4si) __W, 2303 (__mmask8) __U); 2304 } 2305 2306 static __inline__ __m128i __DEFAULT_FN_ATTRS 2307 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2308 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2309 (__v4si) 2310 _mm_setzero_si128 (), 2311 (__mmask8) __U); 2312 } 2313 2314 static __inline__ __m256i __DEFAULT_FN_ATTRS 2315 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2316 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2317 (__v8si) __W, 2318 (__mmask8) __U); 2319 } 2320 2321 static __inline__ __m256i __DEFAULT_FN_ATTRS 2322 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2323 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2324 (__v8si) 2325 _mm256_setzero_si256 (), 2326 (__mmask8) __U); 2327 } 2328 2329 static __inline__ __m128d __DEFAULT_FN_ATTRS 2330 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2331 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2332 (__v2df) __W, 2333 (__mmask8) __U); 2334 } 2335 2336 static __inline__ __m128d __DEFAULT_FN_ATTRS 2337 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2338 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2339 (__v2df) 2340 _mm_setzero_pd (), 2341 (__mmask8) __U); 2342 } 2343 2344 static __inline__ __m256d __DEFAULT_FN_ATTRS 2345 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2346 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2347 (__v4df) __W, 2348 (__mmask8) __U); 2349 } 2350 2351 static __inline__ __m256d __DEFAULT_FN_ATTRS 2352 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2353 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2354 (__v4df) 2355 _mm256_setzero_pd (), 2356 (__mmask8) __U); 2357 } 2358 2359 static __inline__ __m128i __DEFAULT_FN_ATTRS 2360 _mm_cvtps_epu32 (__m128 __A) { 2361 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2362 (__v4si) 2363 _mm_setzero_si128 (), 2364 (__mmask8) -1); 2365 } 2366 2367 static __inline__ __m128i __DEFAULT_FN_ATTRS 2368 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2369 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2370 (__v4si) __W, 2371 (__mmask8) __U); 2372 } 2373 2374 static __inline__ __m128i __DEFAULT_FN_ATTRS 2375 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2376 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2377 (__v4si) 2378 _mm_setzero_si128 (), 2379 (__mmask8) __U); 2380 } 2381 2382 static __inline__ __m256i __DEFAULT_FN_ATTRS 2383 _mm256_cvtps_epu32 (__m256 __A) { 2384 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2385 (__v8si) 2386 _mm256_setzero_si256 (), 2387 (__mmask8) -1); 2388 } 2389 2390 static __inline__ __m256i __DEFAULT_FN_ATTRS 2391 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2392 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2393 (__v8si) __W, 2394 (__mmask8) __U); 2395 } 2396 2397 static __inline__ __m256i __DEFAULT_FN_ATTRS 2398 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2399 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2400 (__v8si) 2401 _mm256_setzero_si256 (), 2402 (__mmask8) __U); 2403 } 2404 2405 static __inline__ __m128i __DEFAULT_FN_ATTRS 2406 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2407 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2408 (__v4si) __W, 2409 (__mmask8) __U); 2410 } 2411 2412 static __inline__ __m128i __DEFAULT_FN_ATTRS 2413 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2414 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2415 (__v4si) 2416 _mm_setzero_si128 (), 2417 (__mmask8) __U); 2418 } 2419 2420 static __inline__ __m128i __DEFAULT_FN_ATTRS 2421 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2422 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2423 (__v4si) __W, 2424 (__mmask8) __U); 2425 } 2426 2427 static __inline__ __m128i __DEFAULT_FN_ATTRS 2428 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2429 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2430 (__v4si) 2431 _mm_setzero_si128 (), 2432 (__mmask8) __U); 2433 } 2434 2435 static __inline__ __m128i __DEFAULT_FN_ATTRS 2436 _mm_cvttpd_epu32 (__m128d __A) { 2437 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2438 (__v4si) 2439 _mm_setzero_si128 (), 2440 (__mmask8) -1); 2441 } 2442 2443 static __inline__ __m128i __DEFAULT_FN_ATTRS 2444 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2445 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2446 (__v4si) __W, 2447 (__mmask8) __U); 2448 } 2449 2450 static __inline__ __m128i __DEFAULT_FN_ATTRS 2451 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2452 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2453 (__v4si) 2454 _mm_setzero_si128 (), 2455 (__mmask8) __U); 2456 } 2457 2458 static __inline__ __m128i __DEFAULT_FN_ATTRS 2459 _mm256_cvttpd_epu32 (__m256d __A) { 2460 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2461 (__v4si) 2462 _mm_setzero_si128 (), 2463 (__mmask8) -1); 2464 } 2465 2466 static __inline__ __m128i __DEFAULT_FN_ATTRS 2467 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2468 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2469 (__v4si) __W, 2470 (__mmask8) __U); 2471 } 2472 2473 static __inline__ __m128i __DEFAULT_FN_ATTRS 2474 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2475 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2476 (__v4si) 2477 _mm_setzero_si128 (), 2478 (__mmask8) __U); 2479 } 2480 2481 static __inline__ __m128i __DEFAULT_FN_ATTRS 2482 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2483 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2484 (__v4si) __W, 2485 (__mmask8) __U); 2486 } 2487 2488 static __inline__ __m128i __DEFAULT_FN_ATTRS 2489 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2490 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2491 (__v4si) 2492 _mm_setzero_si128 (), 2493 (__mmask8) __U); 2494 } 2495 2496 static __inline__ __m256i __DEFAULT_FN_ATTRS 2497 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2498 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2499 (__v8si) __W, 2500 (__mmask8) __U); 2501 } 2502 2503 static __inline__ __m256i __DEFAULT_FN_ATTRS 2504 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2505 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2506 (__v8si) 2507 _mm256_setzero_si256 (), 2508 (__mmask8) __U); 2509 } 2510 2511 static __inline__ __m128i __DEFAULT_FN_ATTRS 2512 _mm_cvttps_epu32 (__m128 __A) { 2513 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2514 (__v4si) 2515 _mm_setzero_si128 (), 2516 (__mmask8) -1); 2517 } 2518 2519 static __inline__ __m128i __DEFAULT_FN_ATTRS 2520 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2521 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2522 (__v4si) __W, 2523 (__mmask8) __U); 2524 } 2525 2526 static __inline__ __m128i __DEFAULT_FN_ATTRS 2527 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2528 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2529 (__v4si) 2530 _mm_setzero_si128 (), 2531 (__mmask8) __U); 2532 } 2533 2534 static __inline__ __m256i __DEFAULT_FN_ATTRS 2535 _mm256_cvttps_epu32 (__m256 __A) { 2536 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2537 (__v8si) 2538 _mm256_setzero_si256 (), 2539 (__mmask8) -1); 2540 } 2541 2542 static __inline__ __m256i __DEFAULT_FN_ATTRS 2543 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2544 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2545 (__v8si) __W, 2546 (__mmask8) __U); 2547 } 2548 2549 static __inline__ __m256i __DEFAULT_FN_ATTRS 2550 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2551 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2552 (__v8si) 2553 _mm256_setzero_si256 (), 2554 (__mmask8) __U); 2555 } 2556 2557 static __inline__ __m128d __DEFAULT_FN_ATTRS 2558 _mm_cvtepu32_pd (__m128i __A) { 2559 return (__m128d) __builtin_convertvector( 2560 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2561 } 2562 2563 static __inline__ __m128d __DEFAULT_FN_ATTRS 2564 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2565 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2566 (__v2df)_mm_cvtepu32_pd(__A), 2567 (__v2df)__W); 2568 } 2569 2570 static __inline__ __m128d __DEFAULT_FN_ATTRS 2571 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2572 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2573 (__v2df)_mm_cvtepu32_pd(__A), 2574 (__v2df)_mm_setzero_pd()); 2575 } 2576 2577 static __inline__ __m256d __DEFAULT_FN_ATTRS 2578 _mm256_cvtepu32_pd (__m128i __A) { 2579 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2580 } 2581 2582 static __inline__ __m256d __DEFAULT_FN_ATTRS 2583 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2584 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2585 (__v4df)_mm256_cvtepu32_pd(__A), 2586 (__v4df)__W); 2587 } 2588 2589 static __inline__ __m256d __DEFAULT_FN_ATTRS 2590 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2591 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2592 (__v4df)_mm256_cvtepu32_pd(__A), 2593 (__v4df)_mm256_setzero_pd()); 2594 } 2595 2596 static __inline__ __m128 __DEFAULT_FN_ATTRS 2597 _mm_cvtepu32_ps (__m128i __A) { 2598 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2599 (__v4sf) 2600 _mm_setzero_ps (), 2601 (__mmask8) -1); 2602 } 2603 2604 static __inline__ __m128 __DEFAULT_FN_ATTRS 2605 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2606 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2607 (__v4sf) __W, 2608 (__mmask8) __U); 2609 } 2610 2611 static __inline__ __m128 __DEFAULT_FN_ATTRS 2612 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2613 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2614 (__v4sf) 2615 _mm_setzero_ps (), 2616 (__mmask8) __U); 2617 } 2618 2619 static __inline__ __m256 __DEFAULT_FN_ATTRS 2620 _mm256_cvtepu32_ps (__m256i __A) { 2621 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2622 (__v8sf) 2623 _mm256_setzero_ps (), 2624 (__mmask8) -1); 2625 } 2626 2627 static __inline__ __m256 __DEFAULT_FN_ATTRS 2628 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2629 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2630 (__v8sf) __W, 2631 (__mmask8) __U); 2632 } 2633 2634 static __inline__ __m256 __DEFAULT_FN_ATTRS 2635 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2636 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2637 (__v8sf) 2638 _mm256_setzero_ps (), 2639 (__mmask8) __U); 2640 } 2641 2642 static __inline__ __m128d __DEFAULT_FN_ATTRS 2643 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2644 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2645 (__v2df)_mm_div_pd(__A, __B), 2646 (__v2df)__W); 2647 } 2648 2649 static __inline__ __m128d __DEFAULT_FN_ATTRS 2650 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2651 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2652 (__v2df)_mm_div_pd(__A, __B), 2653 (__v2df)_mm_setzero_pd()); 2654 } 2655 2656 static __inline__ __m256d __DEFAULT_FN_ATTRS 2657 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2658 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2659 (__v4df)_mm256_div_pd(__A, __B), 2660 (__v4df)__W); 2661 } 2662 2663 static __inline__ __m256d __DEFAULT_FN_ATTRS 2664 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2665 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2666 (__v4df)_mm256_div_pd(__A, __B), 2667 (__v4df)_mm256_setzero_pd()); 2668 } 2669 2670 static __inline__ __m128 __DEFAULT_FN_ATTRS 2671 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2672 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2673 (__v4sf)_mm_div_ps(__A, __B), 2674 (__v4sf)__W); 2675 } 2676 2677 static __inline__ __m128 __DEFAULT_FN_ATTRS 2678 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2679 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2680 (__v4sf)_mm_div_ps(__A, __B), 2681 (__v4sf)_mm_setzero_ps()); 2682 } 2683 2684 static __inline__ __m256 __DEFAULT_FN_ATTRS 2685 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2686 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2687 (__v8sf)_mm256_div_ps(__A, __B), 2688 (__v8sf)__W); 2689 } 2690 2691 static __inline__ __m256 __DEFAULT_FN_ATTRS 2692 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2693 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2694 (__v8sf)_mm256_div_ps(__A, __B), 2695 (__v8sf)_mm256_setzero_ps()); 2696 } 2697 2698 static __inline__ __m128d __DEFAULT_FN_ATTRS 2699 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2700 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2701 (__v2df) __W, 2702 (__mmask8) __U); 2703 } 2704 2705 static __inline__ __m128d __DEFAULT_FN_ATTRS 2706 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2707 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2708 (__v2df) 2709 _mm_setzero_pd (), 2710 (__mmask8) __U); 2711 } 2712 2713 static __inline__ __m256d __DEFAULT_FN_ATTRS 2714 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2715 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2716 (__v4df) __W, 2717 (__mmask8) __U); 2718 } 2719 2720 static __inline__ __m256d __DEFAULT_FN_ATTRS 2721 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2722 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2723 (__v4df) 2724 _mm256_setzero_pd (), 2725 (__mmask8) __U); 2726 } 2727 2728 static __inline__ __m128i __DEFAULT_FN_ATTRS 2729 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2730 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2731 (__v2di) __W, 2732 (__mmask8) __U); 2733 } 2734 2735 static __inline__ __m128i __DEFAULT_FN_ATTRS 2736 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2737 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2738 (__v2di) 2739 _mm_setzero_si128 (), 2740 (__mmask8) __U); 2741 } 2742 2743 static __inline__ __m256i __DEFAULT_FN_ATTRS 2744 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2745 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2746 (__v4di) __W, 2747 (__mmask8) __U); 2748 } 2749 2750 static __inline__ __m256i __DEFAULT_FN_ATTRS 2751 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2752 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2753 (__v4di) 2754 _mm256_setzero_si256 (), 2755 (__mmask8) __U); 2756 } 2757 2758 static __inline__ __m128d __DEFAULT_FN_ATTRS 2759 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2760 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2761 (__v2df) __W, 2762 (__mmask8) 2763 __U); 2764 } 2765 2766 static __inline__ __m128d __DEFAULT_FN_ATTRS 2767 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2768 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2769 (__v2df) 2770 _mm_setzero_pd (), 2771 (__mmask8) 2772 __U); 2773 } 2774 2775 static __inline__ __m256d __DEFAULT_FN_ATTRS 2776 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2777 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2778 (__v4df) __W, 2779 (__mmask8) 2780 __U); 2781 } 2782 2783 static __inline__ __m256d __DEFAULT_FN_ATTRS 2784 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2785 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2786 (__v4df) 2787 _mm256_setzero_pd (), 2788 (__mmask8) 2789 __U); 2790 } 2791 2792 static __inline__ __m128i __DEFAULT_FN_ATTRS 2793 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2794 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2795 (__v2di) __W, 2796 (__mmask8) 2797 __U); 2798 } 2799 2800 static __inline__ __m128i __DEFAULT_FN_ATTRS 2801 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2802 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2803 (__v2di) 2804 _mm_setzero_si128 (), 2805 (__mmask8) 2806 __U); 2807 } 2808 2809 static __inline__ __m256i __DEFAULT_FN_ATTRS 2810 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2811 void const *__P) { 2812 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2813 (__v4di) __W, 2814 (__mmask8) 2815 __U); 2816 } 2817 2818 static __inline__ __m256i __DEFAULT_FN_ATTRS 2819 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2820 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2821 (__v4di) 2822 _mm256_setzero_si256 (), 2823 (__mmask8) 2824 __U); 2825 } 2826 2827 static __inline__ __m128 __DEFAULT_FN_ATTRS 2828 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2829 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2830 (__v4sf) __W, 2831 (__mmask8) __U); 2832 } 2833 2834 static __inline__ __m128 __DEFAULT_FN_ATTRS 2835 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2836 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2837 (__v4sf) 2838 _mm_setzero_ps (), 2839 (__mmask8) 2840 __U); 2841 } 2842 2843 static __inline__ __m256 __DEFAULT_FN_ATTRS 2844 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2845 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2846 (__v8sf) __W, 2847 (__mmask8) __U); 2848 } 2849 2850 static __inline__ __m256 __DEFAULT_FN_ATTRS 2851 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2852 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2853 (__v8sf) 2854 _mm256_setzero_ps (), 2855 (__mmask8) 2856 __U); 2857 } 2858 2859 static __inline__ __m128i __DEFAULT_FN_ATTRS 2860 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2861 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2862 (__v4si) __W, 2863 (__mmask8) 2864 __U); 2865 } 2866 2867 static __inline__ __m128i __DEFAULT_FN_ATTRS 2868 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2869 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2870 (__v4si) 2871 _mm_setzero_si128 (), 2872 (__mmask8) __U); 2873 } 2874 2875 static __inline__ __m256i __DEFAULT_FN_ATTRS 2876 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2877 void const *__P) { 2878 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2879 (__v8si) __W, 2880 (__mmask8) 2881 __U); 2882 } 2883 2884 static __inline__ __m256i __DEFAULT_FN_ATTRS 2885 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2886 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2887 (__v8si) 2888 _mm256_setzero_si256 (), 2889 (__mmask8) 2890 __U); 2891 } 2892 2893 static __inline__ __m128 __DEFAULT_FN_ATTRS 2894 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2895 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2896 (__v4sf) __W, 2897 (__mmask8) __U); 2898 } 2899 2900 static __inline__ __m128 __DEFAULT_FN_ATTRS 2901 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2902 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2903 (__v4sf) 2904 _mm_setzero_ps (), 2905 (__mmask8) __U); 2906 } 2907 2908 static __inline__ __m256 __DEFAULT_FN_ATTRS 2909 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2910 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2911 (__v8sf) __W, 2912 (__mmask8) __U); 2913 } 2914 2915 static __inline__ __m256 __DEFAULT_FN_ATTRS 2916 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2917 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2918 (__v8sf) 2919 _mm256_setzero_ps (), 2920 (__mmask8) __U); 2921 } 2922 2923 static __inline__ __m128i __DEFAULT_FN_ATTRS 2924 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2925 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2926 (__v4si) __W, 2927 (__mmask8) __U); 2928 } 2929 2930 static __inline__ __m128i __DEFAULT_FN_ATTRS 2931 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2932 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2933 (__v4si) 2934 _mm_setzero_si128 (), 2935 (__mmask8) __U); 2936 } 2937 2938 static __inline__ __m256i __DEFAULT_FN_ATTRS 2939 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2940 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2941 (__v8si) __W, 2942 (__mmask8) __U); 2943 } 2944 2945 static __inline__ __m256i __DEFAULT_FN_ATTRS 2946 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2947 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2948 (__v8si) 2949 _mm256_setzero_si256 (), 2950 (__mmask8) __U); 2951 } 2952 2953 static __inline__ __m128d __DEFAULT_FN_ATTRS 2954 _mm_getexp_pd (__m128d __A) { 2955 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2956 (__v2df) 2957 _mm_setzero_pd (), 2958 (__mmask8) -1); 2959 } 2960 2961 static __inline__ __m128d __DEFAULT_FN_ATTRS 2962 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2963 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2964 (__v2df) __W, 2965 (__mmask8) __U); 2966 } 2967 2968 static __inline__ __m128d __DEFAULT_FN_ATTRS 2969 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2970 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2971 (__v2df) 2972 _mm_setzero_pd (), 2973 (__mmask8) __U); 2974 } 2975 2976 static __inline__ __m256d __DEFAULT_FN_ATTRS 2977 _mm256_getexp_pd (__m256d __A) { 2978 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2979 (__v4df) 2980 _mm256_setzero_pd (), 2981 (__mmask8) -1); 2982 } 2983 2984 static __inline__ __m256d __DEFAULT_FN_ATTRS 2985 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2986 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2987 (__v4df) __W, 2988 (__mmask8) __U); 2989 } 2990 2991 static __inline__ __m256d __DEFAULT_FN_ATTRS 2992 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2993 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2994 (__v4df) 2995 _mm256_setzero_pd (), 2996 (__mmask8) __U); 2997 } 2998 2999 static __inline__ __m128 __DEFAULT_FN_ATTRS 3000 _mm_getexp_ps (__m128 __A) { 3001 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3002 (__v4sf) 3003 _mm_setzero_ps (), 3004 (__mmask8) -1); 3005 } 3006 3007 static __inline__ __m128 __DEFAULT_FN_ATTRS 3008 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 3009 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3010 (__v4sf) __W, 3011 (__mmask8) __U); 3012 } 3013 3014 static __inline__ __m128 __DEFAULT_FN_ATTRS 3015 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 3016 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3017 (__v4sf) 3018 _mm_setzero_ps (), 3019 (__mmask8) __U); 3020 } 3021 3022 static __inline__ __m256 __DEFAULT_FN_ATTRS 3023 _mm256_getexp_ps (__m256 __A) { 3024 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3025 (__v8sf) 3026 _mm256_setzero_ps (), 3027 (__mmask8) -1); 3028 } 3029 3030 static __inline__ __m256 __DEFAULT_FN_ATTRS 3031 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 3032 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3033 (__v8sf) __W, 3034 (__mmask8) __U); 3035 } 3036 3037 static __inline__ __m256 __DEFAULT_FN_ATTRS 3038 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 3039 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3040 (__v8sf) 3041 _mm256_setzero_ps (), 3042 (__mmask8) __U); 3043 } 3044 3045 static __inline__ __m128d __DEFAULT_FN_ATTRS 3046 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3047 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3048 (__v2df)_mm_max_pd(__A, __B), 3049 (__v2df)__W); 3050 } 3051 3052 static __inline__ __m128d __DEFAULT_FN_ATTRS 3053 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3054 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3055 (__v2df)_mm_max_pd(__A, __B), 3056 (__v2df)_mm_setzero_pd()); 3057 } 3058 3059 static __inline__ __m256d __DEFAULT_FN_ATTRS 3060 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3061 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3062 (__v4df)_mm256_max_pd(__A, __B), 3063 (__v4df)__W); 3064 } 3065 3066 static __inline__ __m256d __DEFAULT_FN_ATTRS 3067 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3068 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3069 (__v4df)_mm256_max_pd(__A, __B), 3070 (__v4df)_mm256_setzero_pd()); 3071 } 3072 3073 static __inline__ __m128 __DEFAULT_FN_ATTRS 3074 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3075 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3076 (__v4sf)_mm_max_ps(__A, __B), 3077 (__v4sf)__W); 3078 } 3079 3080 static __inline__ __m128 __DEFAULT_FN_ATTRS 3081 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3082 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3083 (__v4sf)_mm_max_ps(__A, __B), 3084 (__v4sf)_mm_setzero_ps()); 3085 } 3086 3087 static __inline__ __m256 __DEFAULT_FN_ATTRS 3088 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3089 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3090 (__v8sf)_mm256_max_ps(__A, __B), 3091 (__v8sf)__W); 3092 } 3093 3094 static __inline__ __m256 __DEFAULT_FN_ATTRS 3095 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3096 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3097 (__v8sf)_mm256_max_ps(__A, __B), 3098 (__v8sf)_mm256_setzero_ps()); 3099 } 3100 3101 static __inline__ __m128d __DEFAULT_FN_ATTRS 3102 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3103 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3104 (__v2df)_mm_min_pd(__A, __B), 3105 (__v2df)__W); 3106 } 3107 3108 static __inline__ __m128d __DEFAULT_FN_ATTRS 3109 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3110 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3111 (__v2df)_mm_min_pd(__A, __B), 3112 (__v2df)_mm_setzero_pd()); 3113 } 3114 3115 static __inline__ __m256d __DEFAULT_FN_ATTRS 3116 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3118 (__v4df)_mm256_min_pd(__A, __B), 3119 (__v4df)__W); 3120 } 3121 3122 static __inline__ __m256d __DEFAULT_FN_ATTRS 3123 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3124 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3125 (__v4df)_mm256_min_pd(__A, __B), 3126 (__v4df)_mm256_setzero_pd()); 3127 } 3128 3129 static __inline__ __m128 __DEFAULT_FN_ATTRS 3130 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3131 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3132 (__v4sf)_mm_min_ps(__A, __B), 3133 (__v4sf)__W); 3134 } 3135 3136 static __inline__ __m128 __DEFAULT_FN_ATTRS 3137 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3138 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3139 (__v4sf)_mm_min_ps(__A, __B), 3140 (__v4sf)_mm_setzero_ps()); 3141 } 3142 3143 static __inline__ __m256 __DEFAULT_FN_ATTRS 3144 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3146 (__v8sf)_mm256_min_ps(__A, __B), 3147 (__v8sf)__W); 3148 } 3149 3150 static __inline__ __m256 __DEFAULT_FN_ATTRS 3151 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3153 (__v8sf)_mm256_min_ps(__A, __B), 3154 (__v8sf)_mm256_setzero_ps()); 3155 } 3156 3157 static __inline__ __m128d __DEFAULT_FN_ATTRS 3158 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3159 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3160 (__v2df)_mm_mul_pd(__A, __B), 3161 (__v2df)__W); 3162 } 3163 3164 static __inline__ __m128d __DEFAULT_FN_ATTRS 3165 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3166 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3167 (__v2df)_mm_mul_pd(__A, __B), 3168 (__v2df)_mm_setzero_pd()); 3169 } 3170 3171 static __inline__ __m256d __DEFAULT_FN_ATTRS 3172 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3174 (__v4df)_mm256_mul_pd(__A, __B), 3175 (__v4df)__W); 3176 } 3177 3178 static __inline__ __m256d __DEFAULT_FN_ATTRS 3179 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3180 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3181 (__v4df)_mm256_mul_pd(__A, __B), 3182 (__v4df)_mm256_setzero_pd()); 3183 } 3184 3185 static __inline__ __m128 __DEFAULT_FN_ATTRS 3186 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3187 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3188 (__v4sf)_mm_mul_ps(__A, __B), 3189 (__v4sf)__W); 3190 } 3191 3192 static __inline__ __m128 __DEFAULT_FN_ATTRS 3193 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3194 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3195 (__v4sf)_mm_mul_ps(__A, __B), 3196 (__v4sf)_mm_setzero_ps()); 3197 } 3198 3199 static __inline__ __m256 __DEFAULT_FN_ATTRS 3200 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3202 (__v8sf)_mm256_mul_ps(__A, __B), 3203 (__v8sf)__W); 3204 } 3205 3206 static __inline__ __m256 __DEFAULT_FN_ATTRS 3207 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3208 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3209 (__v8sf)_mm256_mul_ps(__A, __B), 3210 (__v8sf)_mm256_setzero_ps()); 3211 } 3212 3213 static __inline__ __m128i __DEFAULT_FN_ATTRS 3214 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 3215 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3216 (__v4si)_mm_abs_epi32(__A), 3217 (__v4si)__W); 3218 } 3219 3220 static __inline__ __m128i __DEFAULT_FN_ATTRS 3221 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 3222 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3223 (__v4si)_mm_abs_epi32(__A), 3224 (__v4si)_mm_setzero_si128()); 3225 } 3226 3227 static __inline__ __m256i __DEFAULT_FN_ATTRS 3228 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 3229 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3230 (__v8si)_mm256_abs_epi32(__A), 3231 (__v8si)__W); 3232 } 3233 3234 static __inline__ __m256i __DEFAULT_FN_ATTRS 3235 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 3236 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3237 (__v8si)_mm256_abs_epi32(__A), 3238 (__v8si)_mm256_setzero_si256()); 3239 } 3240 3241 static __inline__ __m128i __DEFAULT_FN_ATTRS 3242 _mm_abs_epi64 (__m128i __A) { 3243 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3244 (__v2di) 3245 _mm_setzero_si128 (), 3246 (__mmask8) -1); 3247 } 3248 3249 static __inline__ __m128i __DEFAULT_FN_ATTRS 3250 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 3251 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3252 (__v2di) __W, 3253 (__mmask8) __U); 3254 } 3255 3256 static __inline__ __m128i __DEFAULT_FN_ATTRS 3257 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3258 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3259 (__v2di) 3260 _mm_setzero_si128 (), 3261 (__mmask8) __U); 3262 } 3263 3264 static __inline__ __m256i __DEFAULT_FN_ATTRS 3265 _mm256_abs_epi64 (__m256i __A) { 3266 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3267 (__v4di) 3268 _mm256_setzero_si256 (), 3269 (__mmask8) -1); 3270 } 3271 3272 static __inline__ __m256i __DEFAULT_FN_ATTRS 3273 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3274 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3275 (__v4di) __W, 3276 (__mmask8) __U); 3277 } 3278 3279 static __inline__ __m256i __DEFAULT_FN_ATTRS 3280 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3281 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3282 (__v4di) 3283 _mm256_setzero_si256 (), 3284 (__mmask8) __U); 3285 } 3286 3287 static __inline__ __m128i __DEFAULT_FN_ATTRS 3288 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3289 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3290 (__v4si)_mm_max_epi32(__A, __B), 3291 (__v4si)_mm_setzero_si128()); 3292 } 3293 3294 static __inline__ __m128i __DEFAULT_FN_ATTRS 3295 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3297 (__v4si)_mm_max_epi32(__A, __B), 3298 (__v4si)__W); 3299 } 3300 3301 static __inline__ __m256i __DEFAULT_FN_ATTRS 3302 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3303 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3304 (__v8si)_mm256_max_epi32(__A, __B), 3305 (__v8si)_mm256_setzero_si256()); 3306 } 3307 3308 static __inline__ __m256i __DEFAULT_FN_ATTRS 3309 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3310 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3311 (__v8si)_mm256_max_epi32(__A, __B), 3312 (__v8si)__W); 3313 } 3314 3315 static __inline__ __m128i __DEFAULT_FN_ATTRS 3316 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3317 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3318 (__v2di) __B, 3319 (__v2di) 3320 _mm_setzero_si128 (), 3321 __M); 3322 } 3323 3324 static __inline__ __m128i __DEFAULT_FN_ATTRS 3325 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3326 __m128i __B) { 3327 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3328 (__v2di) __B, 3329 (__v2di) __W, __M); 3330 } 3331 3332 static __inline__ __m128i __DEFAULT_FN_ATTRS 3333 _mm_max_epi64 (__m128i __A, __m128i __B) { 3334 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3335 (__v2di) __B, 3336 (__v2di) 3337 _mm_setzero_si128 (), 3338 (__mmask8) -1); 3339 } 3340 3341 static __inline__ __m256i __DEFAULT_FN_ATTRS 3342 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3343 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3344 (__v4di) __B, 3345 (__v4di) 3346 _mm256_setzero_si256 (), 3347 __M); 3348 } 3349 3350 static __inline__ __m256i __DEFAULT_FN_ATTRS 3351 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3352 __m256i __B) { 3353 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3354 (__v4di) __B, 3355 (__v4di) __W, __M); 3356 } 3357 3358 static __inline__ __m256i __DEFAULT_FN_ATTRS 3359 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3360 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3361 (__v4di) __B, 3362 (__v4di) 3363 _mm256_setzero_si256 (), 3364 (__mmask8) -1); 3365 } 3366 3367 static __inline__ __m128i __DEFAULT_FN_ATTRS 3368 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3369 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3370 (__v4si)_mm_max_epu32(__A, __B), 3371 (__v4si)_mm_setzero_si128()); 3372 } 3373 3374 static __inline__ __m128i __DEFAULT_FN_ATTRS 3375 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3376 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3377 (__v4si)_mm_max_epu32(__A, __B), 3378 (__v4si)__W); 3379 } 3380 3381 static __inline__ __m256i __DEFAULT_FN_ATTRS 3382 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3383 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3384 (__v8si)_mm256_max_epu32(__A, __B), 3385 (__v8si)_mm256_setzero_si256()); 3386 } 3387 3388 static __inline__ __m256i __DEFAULT_FN_ATTRS 3389 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3390 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3391 (__v8si)_mm256_max_epu32(__A, __B), 3392 (__v8si)__W); 3393 } 3394 3395 static __inline__ __m128i __DEFAULT_FN_ATTRS 3396 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3397 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3398 (__v2di) __B, 3399 (__v2di) 3400 _mm_setzero_si128 (), 3401 __M); 3402 } 3403 3404 static __inline__ __m128i __DEFAULT_FN_ATTRS 3405 _mm_max_epu64 (__m128i __A, __m128i __B) { 3406 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3407 (__v2di) __B, 3408 (__v2di) 3409 _mm_setzero_si128 (), 3410 (__mmask8) -1); 3411 } 3412 3413 static __inline__ __m128i __DEFAULT_FN_ATTRS 3414 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3415 __m128i __B) { 3416 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3417 (__v2di) __B, 3418 (__v2di) __W, __M); 3419 } 3420 3421 static __inline__ __m256i __DEFAULT_FN_ATTRS 3422 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3423 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3424 (__v4di) __B, 3425 (__v4di) 3426 _mm256_setzero_si256 (), 3427 __M); 3428 } 3429 3430 static __inline__ __m256i __DEFAULT_FN_ATTRS 3431 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3432 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3433 (__v4di) __B, 3434 (__v4di) 3435 _mm256_setzero_si256 (), 3436 (__mmask8) -1); 3437 } 3438 3439 static __inline__ __m256i __DEFAULT_FN_ATTRS 3440 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3441 __m256i __B) { 3442 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3443 (__v4di) __B, 3444 (__v4di) __W, __M); 3445 } 3446 3447 static __inline__ __m128i __DEFAULT_FN_ATTRS 3448 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3449 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3450 (__v4si)_mm_min_epi32(__A, __B), 3451 (__v4si)_mm_setzero_si128()); 3452 } 3453 3454 static __inline__ __m128i __DEFAULT_FN_ATTRS 3455 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3457 (__v4si)_mm_min_epi32(__A, __B), 3458 (__v4si)__W); 3459 } 3460 3461 static __inline__ __m256i __DEFAULT_FN_ATTRS 3462 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3463 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3464 (__v8si)_mm256_min_epi32(__A, __B), 3465 (__v8si)_mm256_setzero_si256()); 3466 } 3467 3468 static __inline__ __m256i __DEFAULT_FN_ATTRS 3469 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3470 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3471 (__v8si)_mm256_min_epi32(__A, __B), 3472 (__v8si)__W); 3473 } 3474 3475 static __inline__ __m128i __DEFAULT_FN_ATTRS 3476 _mm_min_epi64 (__m128i __A, __m128i __B) { 3477 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3478 (__v2di) __B, 3479 (__v2di) 3480 _mm_setzero_si128 (), 3481 (__mmask8) -1); 3482 } 3483 3484 static __inline__ __m128i __DEFAULT_FN_ATTRS 3485 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3486 __m128i __B) { 3487 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3488 (__v2di) __B, 3489 (__v2di) __W, __M); 3490 } 3491 3492 static __inline__ __m128i __DEFAULT_FN_ATTRS 3493 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3494 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3495 (__v2di) __B, 3496 (__v2di) 3497 _mm_setzero_si128 (), 3498 __M); 3499 } 3500 3501 static __inline__ __m256i __DEFAULT_FN_ATTRS 3502 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3503 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3504 (__v4di) __B, 3505 (__v4di) 3506 _mm256_setzero_si256 (), 3507 (__mmask8) -1); 3508 } 3509 3510 static __inline__ __m256i __DEFAULT_FN_ATTRS 3511 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3512 __m256i __B) { 3513 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3514 (__v4di) __B, 3515 (__v4di) __W, __M); 3516 } 3517 3518 static __inline__ __m256i __DEFAULT_FN_ATTRS 3519 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3520 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3521 (__v4di) __B, 3522 (__v4di) 3523 _mm256_setzero_si256 (), 3524 __M); 3525 } 3526 3527 static __inline__ __m128i __DEFAULT_FN_ATTRS 3528 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3529 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3530 (__v4si)_mm_min_epu32(__A, __B), 3531 (__v4si)_mm_setzero_si128()); 3532 } 3533 3534 static __inline__ __m128i __DEFAULT_FN_ATTRS 3535 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3536 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3537 (__v4si)_mm_min_epu32(__A, __B), 3538 (__v4si)__W); 3539 } 3540 3541 static __inline__ __m256i __DEFAULT_FN_ATTRS 3542 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3544 (__v8si)_mm256_min_epu32(__A, __B), 3545 (__v8si)_mm256_setzero_si256()); 3546 } 3547 3548 static __inline__ __m256i __DEFAULT_FN_ATTRS 3549 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3550 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3551 (__v8si)_mm256_min_epu32(__A, __B), 3552 (__v8si)__W); 3553 } 3554 3555 static __inline__ __m128i __DEFAULT_FN_ATTRS 3556 _mm_min_epu64 (__m128i __A, __m128i __B) { 3557 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3558 (__v2di) __B, 3559 (__v2di) 3560 _mm_setzero_si128 (), 3561 (__mmask8) -1); 3562 } 3563 3564 static __inline__ __m128i __DEFAULT_FN_ATTRS 3565 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3566 __m128i __B) { 3567 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3568 (__v2di) __B, 3569 (__v2di) __W, __M); 3570 } 3571 3572 static __inline__ __m128i __DEFAULT_FN_ATTRS 3573 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3574 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3575 (__v2di) __B, 3576 (__v2di) 3577 _mm_setzero_si128 (), 3578 __M); 3579 } 3580 3581 static __inline__ __m256i __DEFAULT_FN_ATTRS 3582 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3583 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3584 (__v4di) __B, 3585 (__v4di) 3586 _mm256_setzero_si256 (), 3587 (__mmask8) -1); 3588 } 3589 3590 static __inline__ __m256i __DEFAULT_FN_ATTRS 3591 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3592 __m256i __B) { 3593 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3594 (__v4di) __B, 3595 (__v4di) __W, __M); 3596 } 3597 3598 static __inline__ __m256i __DEFAULT_FN_ATTRS 3599 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3600 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3601 (__v4di) __B, 3602 (__v4di) 3603 _mm256_setzero_si256 (), 3604 __M); 3605 } 3606 3607 #define _mm_roundscale_pd(A, imm) __extension__ ({ \ 3608 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3609 (int)(imm), \ 3610 (__v2df)_mm_setzero_pd(), \ 3611 (__mmask8)-1); }) 3612 3613 3614 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3615 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3616 (int)(imm), \ 3617 (__v2df)(__m128d)(W), \ 3618 (__mmask8)(U)); }) 3619 3620 3621 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3622 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3623 (int)(imm), \ 3624 (__v2df)_mm_setzero_pd(), \ 3625 (__mmask8)(U)); }) 3626 3627 3628 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \ 3629 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3630 (int)(imm), \ 3631 (__v4df)_mm256_setzero_pd(), \ 3632 (__mmask8)-1); }) 3633 3634 3635 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3636 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3637 (int)(imm), \ 3638 (__v4df)(__m256d)(W), \ 3639 (__mmask8)(U)); }) 3640 3641 3642 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3643 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3644 (int)(imm), \ 3645 (__v4df)_mm256_setzero_pd(), \ 3646 (__mmask8)(U)); }) 3647 3648 #define _mm_roundscale_ps(A, imm) __extension__ ({ \ 3649 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3650 (__v4sf)_mm_setzero_ps(), \ 3651 (__mmask8)-1); }) 3652 3653 3654 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3655 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3656 (__v4sf)(__m128)(W), \ 3657 (__mmask8)(U)); }) 3658 3659 3660 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3661 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3662 (__v4sf)_mm_setzero_ps(), \ 3663 (__mmask8)(U)); }) 3664 3665 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \ 3666 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3667 (__v8sf)_mm256_setzero_ps(), \ 3668 (__mmask8)-1); }) 3669 3670 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3671 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3672 (__v8sf)(__m256)(W), \ 3673 (__mmask8)(U)); }) 3674 3675 3676 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3677 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3678 (__v8sf)_mm256_setzero_ps(), \ 3679 (__mmask8)(U)); }) 3680 3681 static __inline__ __m128d __DEFAULT_FN_ATTRS 3682 _mm_scalef_pd (__m128d __A, __m128d __B) { 3683 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3684 (__v2df) __B, 3685 (__v2df) 3686 _mm_setzero_pd (), 3687 (__mmask8) -1); 3688 } 3689 3690 static __inline__ __m128d __DEFAULT_FN_ATTRS 3691 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3692 __m128d __B) { 3693 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3694 (__v2df) __B, 3695 (__v2df) __W, 3696 (__mmask8) __U); 3697 } 3698 3699 static __inline__ __m128d __DEFAULT_FN_ATTRS 3700 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3701 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3702 (__v2df) __B, 3703 (__v2df) 3704 _mm_setzero_pd (), 3705 (__mmask8) __U); 3706 } 3707 3708 static __inline__ __m256d __DEFAULT_FN_ATTRS 3709 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3710 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3711 (__v4df) __B, 3712 (__v4df) 3713 _mm256_setzero_pd (), 3714 (__mmask8) -1); 3715 } 3716 3717 static __inline__ __m256d __DEFAULT_FN_ATTRS 3718 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3719 __m256d __B) { 3720 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3721 (__v4df) __B, 3722 (__v4df) __W, 3723 (__mmask8) __U); 3724 } 3725 3726 static __inline__ __m256d __DEFAULT_FN_ATTRS 3727 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3728 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3729 (__v4df) __B, 3730 (__v4df) 3731 _mm256_setzero_pd (), 3732 (__mmask8) __U); 3733 } 3734 3735 static __inline__ __m128 __DEFAULT_FN_ATTRS 3736 _mm_scalef_ps (__m128 __A, __m128 __B) { 3737 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3738 (__v4sf) __B, 3739 (__v4sf) 3740 _mm_setzero_ps (), 3741 (__mmask8) -1); 3742 } 3743 3744 static __inline__ __m128 __DEFAULT_FN_ATTRS 3745 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3746 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3747 (__v4sf) __B, 3748 (__v4sf) __W, 3749 (__mmask8) __U); 3750 } 3751 3752 static __inline__ __m128 __DEFAULT_FN_ATTRS 3753 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3754 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3755 (__v4sf) __B, 3756 (__v4sf) 3757 _mm_setzero_ps (), 3758 (__mmask8) __U); 3759 } 3760 3761 static __inline__ __m256 __DEFAULT_FN_ATTRS 3762 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3763 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3764 (__v8sf) __B, 3765 (__v8sf) 3766 _mm256_setzero_ps (), 3767 (__mmask8) -1); 3768 } 3769 3770 static __inline__ __m256 __DEFAULT_FN_ATTRS 3771 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3772 __m256 __B) { 3773 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3774 (__v8sf) __B, 3775 (__v8sf) __W, 3776 (__mmask8) __U); 3777 } 3778 3779 static __inline__ __m256 __DEFAULT_FN_ATTRS 3780 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3781 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3782 (__v8sf) __B, 3783 (__v8sf) 3784 _mm256_setzero_ps (), 3785 (__mmask8) __U); 3786 } 3787 3788 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3789 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ 3790 (__v2di)(__m128i)(index), \ 3791 (__v2df)(__m128d)(v1), (int)(scale)); }) 3792 3793 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3794 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ 3795 (__v2di)(__m128i)(index), \ 3796 (__v2df)(__m128d)(v1), (int)(scale)); }) 3797 3798 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3799 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ 3800 (__v2di)(__m128i)(index), \ 3801 (__v2di)(__m128i)(v1), (int)(scale)); }) 3802 3803 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3804 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ 3805 (__v2di)(__m128i)(index), \ 3806 (__v2di)(__m128i)(v1), (int)(scale)); }) 3807 3808 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3809 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ 3810 (__v4di)(__m256i)(index), \ 3811 (__v4df)(__m256d)(v1), (int)(scale)); }) 3812 3813 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3814 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ 3815 (__v4di)(__m256i)(index), \ 3816 (__v4df)(__m256d)(v1), (int)(scale)); }) 3817 3818 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3819 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ 3820 (__v4di)(__m256i)(index), \ 3821 (__v4di)(__m256i)(v1), (int)(scale)); }) 3822 3823 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3824 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ 3825 (__v4di)(__m256i)(index), \ 3826 (__v4di)(__m256i)(v1), (int)(scale)); }) 3827 3828 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3829 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ 3830 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3831 (int)(scale)); }) 3832 3833 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3834 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ 3835 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3836 (int)(scale)); }) 3837 3838 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3839 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ 3840 (__v2di)(__m128i)(index), \ 3841 (__v4si)(__m128i)(v1), (int)(scale)); }) 3842 3843 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3844 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ 3845 (__v2di)(__m128i)(index), \ 3846 (__v4si)(__m128i)(v1), (int)(scale)); }) 3847 3848 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3849 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ 3850 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3851 (int)(scale)); }) 3852 3853 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3854 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ 3855 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3856 (int)(scale)); }) 3857 3858 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3859 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ 3860 (__v4di)(__m256i)(index), \ 3861 (__v4si)(__m128i)(v1), (int)(scale)); }) 3862 3863 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3864 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ 3865 (__v4di)(__m256i)(index), \ 3866 (__v4si)(__m128i)(v1), (int)(scale)); }) 3867 3868 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3869 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ 3870 (__v4si)(__m128i)(index), \ 3871 (__v2df)(__m128d)(v1), (int)(scale)); }) 3872 3873 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3874 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ 3875 (__v4si)(__m128i)(index), \ 3876 (__v2df)(__m128d)(v1), (int)(scale)); }) 3877 3878 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3879 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ 3880 (__v4si)(__m128i)(index), \ 3881 (__v2di)(__m128i)(v1), (int)(scale)); }) 3882 3883 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3884 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ 3885 (__v4si)(__m128i)(index), \ 3886 (__v2di)(__m128i)(v1), (int)(scale)); }) 3887 3888 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3889 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ 3890 (__v4si)(__m128i)(index), \ 3891 (__v4df)(__m256d)(v1), (int)(scale)); }) 3892 3893 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3894 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ 3895 (__v4si)(__m128i)(index), \ 3896 (__v4df)(__m256d)(v1), (int)(scale)); }) 3897 3898 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3899 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ 3900 (__v4si)(__m128i)(index), \ 3901 (__v4di)(__m256i)(v1), (int)(scale)); }) 3902 3903 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3904 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ 3905 (__v4si)(__m128i)(index), \ 3906 (__v4di)(__m256i)(v1), (int)(scale)); }) 3907 3908 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3909 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ 3910 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3911 (int)(scale)); }) 3912 3913 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3914 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ 3915 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3916 (int)(scale)); }) 3917 3918 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3919 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ 3920 (__v4si)(__m128i)(index), \ 3921 (__v4si)(__m128i)(v1), (int)(scale)); }) 3922 3923 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3924 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ 3925 (__v4si)(__m128i)(index), \ 3926 (__v4si)(__m128i)(v1), (int)(scale)); }) 3927 3928 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3929 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ 3930 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3931 (int)(scale)); }) 3932 3933 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3934 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ 3935 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3936 (int)(scale)); }) 3937 3938 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3939 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ 3940 (__v8si)(__m256i)(index), \ 3941 (__v8si)(__m256i)(v1), (int)(scale)); }) 3942 3943 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3944 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ 3945 (__v8si)(__m256i)(index), \ 3946 (__v8si)(__m256i)(v1), (int)(scale)); }) 3947 3948 static __inline__ __m128d __DEFAULT_FN_ATTRS 3949 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3950 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3951 (__v2df)_mm_sqrt_pd(__A), 3952 (__v2df)__W); 3953 } 3954 3955 static __inline__ __m128d __DEFAULT_FN_ATTRS 3956 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3957 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3958 (__v2df)_mm_sqrt_pd(__A), 3959 (__v2df)_mm_setzero_pd()); 3960 } 3961 3962 static __inline__ __m256d __DEFAULT_FN_ATTRS 3963 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3964 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3965 (__v4df)_mm256_sqrt_pd(__A), 3966 (__v4df)__W); 3967 } 3968 3969 static __inline__ __m256d __DEFAULT_FN_ATTRS 3970 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3971 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3972 (__v4df)_mm256_sqrt_pd(__A), 3973 (__v4df)_mm256_setzero_pd()); 3974 } 3975 3976 static __inline__ __m128 __DEFAULT_FN_ATTRS 3977 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3978 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3979 (__v4sf)_mm_sqrt_ps(__A), 3980 (__v4sf)__W); 3981 } 3982 3983 static __inline__ __m128 __DEFAULT_FN_ATTRS 3984 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3985 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3986 (__v4sf)_mm_sqrt_ps(__A), 3987 (__v4sf)_mm_setzero_pd()); 3988 } 3989 3990 static __inline__ __m256 __DEFAULT_FN_ATTRS 3991 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3992 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3993 (__v8sf)_mm256_sqrt_ps(__A), 3994 (__v8sf)__W); 3995 } 3996 3997 static __inline__ __m256 __DEFAULT_FN_ATTRS 3998 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3999 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4000 (__v8sf)_mm256_sqrt_ps(__A), 4001 (__v8sf)_mm256_setzero_ps()); 4002 } 4003 4004 static __inline__ __m128d __DEFAULT_FN_ATTRS 4005 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 4006 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4007 (__v2df)_mm_sub_pd(__A, __B), 4008 (__v2df)__W); 4009 } 4010 4011 static __inline__ __m128d __DEFAULT_FN_ATTRS 4012 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 4013 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4014 (__v2df)_mm_sub_pd(__A, __B), 4015 (__v2df)_mm_setzero_pd()); 4016 } 4017 4018 static __inline__ __m256d __DEFAULT_FN_ATTRS 4019 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 4020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4021 (__v4df)_mm256_sub_pd(__A, __B), 4022 (__v4df)__W); 4023 } 4024 4025 static __inline__ __m256d __DEFAULT_FN_ATTRS 4026 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 4027 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4028 (__v4df)_mm256_sub_pd(__A, __B), 4029 (__v4df)_mm256_setzero_pd()); 4030 } 4031 4032 static __inline__ __m128 __DEFAULT_FN_ATTRS 4033 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 4034 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4035 (__v4sf)_mm_sub_ps(__A, __B), 4036 (__v4sf)__W); 4037 } 4038 4039 static __inline__ __m128 __DEFAULT_FN_ATTRS 4040 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 4041 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4042 (__v4sf)_mm_sub_ps(__A, __B), 4043 (__v4sf)_mm_setzero_ps()); 4044 } 4045 4046 static __inline__ __m256 __DEFAULT_FN_ATTRS 4047 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 4048 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4049 (__v8sf)_mm256_sub_ps(__A, __B), 4050 (__v8sf)__W); 4051 } 4052 4053 static __inline__ __m256 __DEFAULT_FN_ATTRS 4054 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 4055 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4056 (__v8sf)_mm256_sub_ps(__A, __B), 4057 (__v8sf)_mm256_setzero_ps()); 4058 } 4059 4060 static __inline__ __m128i __DEFAULT_FN_ATTRS 4061 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, 4062 __m128i __B) { 4063 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, 4064 (__v4si) __I 4065 /* idx */ , 4066 (__v4si) __B, 4067 (__mmask8) __U); 4068 } 4069 4070 static __inline__ __m256i __DEFAULT_FN_ATTRS 4071 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, 4072 __mmask8 __U, __m256i __B) { 4073 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, 4074 (__v8si) __I 4075 /* idx */ , 4076 (__v8si) __B, 4077 (__mmask8) __U); 4078 } 4079 4080 static __inline__ __m128d __DEFAULT_FN_ATTRS 4081 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, 4082 __m128d __B) { 4083 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, 4084 (__v2di) __I 4085 /* idx */ , 4086 (__v2df) __B, 4087 (__mmask8) 4088 __U); 4089 } 4090 4091 static __inline__ __m256d __DEFAULT_FN_ATTRS 4092 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, 4093 __m256d __B) { 4094 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, 4095 (__v4di) __I 4096 /* idx */ , 4097 (__v4df) __B, 4098 (__mmask8) 4099 __U); 4100 } 4101 4102 static __inline__ __m128 __DEFAULT_FN_ATTRS 4103 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, 4104 __m128 __B) { 4105 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, 4106 (__v4si) __I 4107 /* idx */ , 4108 (__v4sf) __B, 4109 (__mmask8) __U); 4110 } 4111 4112 static __inline__ __m256 __DEFAULT_FN_ATTRS 4113 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, 4114 __m256 __B) { 4115 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, 4116 (__v8si) __I 4117 /* idx */ , 4118 (__v8sf) __B, 4119 (__mmask8) __U); 4120 } 4121 4122 static __inline__ __m128i __DEFAULT_FN_ATTRS 4123 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, 4124 __m128i __B) { 4125 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, 4126 (__v2di) __I 4127 /* idx */ , 4128 (__v2di) __B, 4129 (__mmask8) __U); 4130 } 4131 4132 static __inline__ __m256i __DEFAULT_FN_ATTRS 4133 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, 4134 __mmask8 __U, __m256i __B) { 4135 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, 4136 (__v4di) __I 4137 /* idx */ , 4138 (__v4di) __B, 4139 (__mmask8) __U); 4140 } 4141 4142 static __inline__ __m128i __DEFAULT_FN_ATTRS 4143 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) { 4144 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4145 /* idx */ , 4146 (__v4si) __A, 4147 (__v4si) __B, 4148 (__mmask8) -1); 4149 } 4150 4151 static __inline__ __m128i __DEFAULT_FN_ATTRS 4152 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, 4153 __m128i __B) { 4154 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4155 /* idx */ , 4156 (__v4si) __A, 4157 (__v4si) __B, 4158 (__mmask8) __U); 4159 } 4160 4161 static __inline__ __m128i __DEFAULT_FN_ATTRS 4162 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, 4163 __m128i __B) { 4164 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I 4165 /* idx */ , 4166 (__v4si) __A, 4167 (__v4si) __B, 4168 (__mmask8) 4169 __U); 4170 } 4171 4172 static __inline__ __m256i __DEFAULT_FN_ATTRS 4173 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) { 4174 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4175 /* idx */ , 4176 (__v8si) __A, 4177 (__v8si) __B, 4178 (__mmask8) -1); 4179 } 4180 4181 static __inline__ __m256i __DEFAULT_FN_ATTRS 4182 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, 4183 __m256i __B) { 4184 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4185 /* idx */ , 4186 (__v8si) __A, 4187 (__v8si) __B, 4188 (__mmask8) __U); 4189 } 4190 4191 static __inline__ __m256i __DEFAULT_FN_ATTRS 4192 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, 4193 __m256i __I, __m256i __B) { 4194 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I 4195 /* idx */ , 4196 (__v8si) __A, 4197 (__v8si) __B, 4198 (__mmask8) 4199 __U); 4200 } 4201 4202 static __inline__ __m128d __DEFAULT_FN_ATTRS 4203 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) { 4204 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4205 /* idx */ , 4206 (__v2df) __A, 4207 (__v2df) __B, 4208 (__mmask8) - 4209 1); 4210 } 4211 4212 static __inline__ __m128d __DEFAULT_FN_ATTRS 4213 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, 4214 __m128d __B) { 4215 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4216 /* idx */ , 4217 (__v2df) __A, 4218 (__v2df) __B, 4219 (__mmask8) 4220 __U); 4221 } 4222 4223 static __inline__ __m128d __DEFAULT_FN_ATTRS 4224 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, 4225 __m128d __B) { 4226 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I 4227 /* idx */ , 4228 (__v2df) __A, 4229 (__v2df) __B, 4230 (__mmask8) 4231 __U); 4232 } 4233 4234 static __inline__ __m256d __DEFAULT_FN_ATTRS 4235 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) { 4236 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4237 /* idx */ , 4238 (__v4df) __A, 4239 (__v4df) __B, 4240 (__mmask8) - 4241 1); 4242 } 4243 4244 static __inline__ __m256d __DEFAULT_FN_ATTRS 4245 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, 4246 __m256d __B) { 4247 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4248 /* idx */ , 4249 (__v4df) __A, 4250 (__v4df) __B, 4251 (__mmask8) 4252 __U); 4253 } 4254 4255 static __inline__ __m256d __DEFAULT_FN_ATTRS 4256 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, 4257 __m256d __B) { 4258 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I 4259 /* idx */ , 4260 (__v4df) __A, 4261 (__v4df) __B, 4262 (__mmask8) 4263 __U); 4264 } 4265 4266 static __inline__ __m128 __DEFAULT_FN_ATTRS 4267 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) { 4268 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4269 /* idx */ , 4270 (__v4sf) __A, 4271 (__v4sf) __B, 4272 (__mmask8) -1); 4273 } 4274 4275 static __inline__ __m128 __DEFAULT_FN_ATTRS 4276 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, 4277 __m128 __B) { 4278 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4279 /* idx */ , 4280 (__v4sf) __A, 4281 (__v4sf) __B, 4282 (__mmask8) __U); 4283 } 4284 4285 static __inline__ __m128 __DEFAULT_FN_ATTRS 4286 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, 4287 __m128 __B) { 4288 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I 4289 /* idx */ , 4290 (__v4sf) __A, 4291 (__v4sf) __B, 4292 (__mmask8) 4293 __U); 4294 } 4295 4296 static __inline__ __m256 __DEFAULT_FN_ATTRS 4297 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) { 4298 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4299 /* idx */ , 4300 (__v8sf) __A, 4301 (__v8sf) __B, 4302 (__mmask8) -1); 4303 } 4304 4305 static __inline__ __m256 __DEFAULT_FN_ATTRS 4306 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, 4307 __m256 __B) { 4308 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4309 /* idx */ , 4310 (__v8sf) __A, 4311 (__v8sf) __B, 4312 (__mmask8) __U); 4313 } 4314 4315 static __inline__ __m256 __DEFAULT_FN_ATTRS 4316 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, 4317 __m256 __B) { 4318 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I 4319 /* idx */ , 4320 (__v8sf) __A, 4321 (__v8sf) __B, 4322 (__mmask8) 4323 __U); 4324 } 4325 4326 static __inline__ __m128i __DEFAULT_FN_ATTRS 4327 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) { 4328 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4329 /* idx */ , 4330 (__v2di) __A, 4331 (__v2di) __B, 4332 (__mmask8) -1); 4333 } 4334 4335 static __inline__ __m128i __DEFAULT_FN_ATTRS 4336 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, 4337 __m128i __B) { 4338 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4339 /* idx */ , 4340 (__v2di) __A, 4341 (__v2di) __B, 4342 (__mmask8) __U); 4343 } 4344 4345 static __inline__ __m128i __DEFAULT_FN_ATTRS 4346 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, 4347 __m128i __B) { 4348 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I 4349 /* idx */ , 4350 (__v2di) __A, 4351 (__v2di) __B, 4352 (__mmask8) 4353 __U); 4354 } 4355 4356 4357 static __inline__ __m256i __DEFAULT_FN_ATTRS 4358 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) { 4359 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4360 /* idx */ , 4361 (__v4di) __A, 4362 (__v4di) __B, 4363 (__mmask8) -1); 4364 } 4365 4366 static __inline__ __m256i __DEFAULT_FN_ATTRS 4367 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, 4368 __m256i __B) { 4369 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4370 /* idx */ , 4371 (__v4di) __A, 4372 (__v4di) __B, 4373 (__mmask8) __U); 4374 } 4375 4376 static __inline__ __m256i __DEFAULT_FN_ATTRS 4377 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, 4378 __m256i __I, __m256i __B) { 4379 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I 4380 /* idx */ , 4381 (__v4di) __A, 4382 (__v4di) __B, 4383 (__mmask8) 4384 __U); 4385 } 4386 4387 static __inline__ __m128i __DEFAULT_FN_ATTRS 4388 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4389 { 4390 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4391 (__v4si)_mm_cvtepi8_epi32(__A), 4392 (__v4si)__W); 4393 } 4394 4395 static __inline__ __m128i __DEFAULT_FN_ATTRS 4396 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 4397 { 4398 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4399 (__v4si)_mm_cvtepi8_epi32(__A), 4400 (__v4si)_mm_setzero_si128()); 4401 } 4402 4403 static __inline__ __m256i __DEFAULT_FN_ATTRS 4404 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4405 { 4406 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4407 (__v8si)_mm256_cvtepi8_epi32(__A), 4408 (__v8si)__W); 4409 } 4410 4411 static __inline__ __m256i __DEFAULT_FN_ATTRS 4412 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4413 { 4414 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4415 (__v8si)_mm256_cvtepi8_epi32(__A), 4416 (__v8si)_mm256_setzero_si256()); 4417 } 4418 4419 static __inline__ __m128i __DEFAULT_FN_ATTRS 4420 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4421 { 4422 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4423 (__v2di)_mm_cvtepi8_epi64(__A), 4424 (__v2di)__W); 4425 } 4426 4427 static __inline__ __m128i __DEFAULT_FN_ATTRS 4428 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4429 { 4430 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4431 (__v2di)_mm_cvtepi8_epi64(__A), 4432 (__v2di)_mm_setzero_si128()); 4433 } 4434 4435 static __inline__ __m256i __DEFAULT_FN_ATTRS 4436 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4437 { 4438 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4439 (__v4di)_mm256_cvtepi8_epi64(__A), 4440 (__v4di)__W); 4441 } 4442 4443 static __inline__ __m256i __DEFAULT_FN_ATTRS 4444 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4445 { 4446 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4447 (__v4di)_mm256_cvtepi8_epi64(__A), 4448 (__v4di)_mm256_setzero_si256()); 4449 } 4450 4451 static __inline__ __m128i __DEFAULT_FN_ATTRS 4452 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4453 { 4454 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4455 (__v2di)_mm_cvtepi32_epi64(__X), 4456 (__v2di)__W); 4457 } 4458 4459 static __inline__ __m128i __DEFAULT_FN_ATTRS 4460 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4461 { 4462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4463 (__v2di)_mm_cvtepi32_epi64(__X), 4464 (__v2di)_mm_setzero_si128()); 4465 } 4466 4467 static __inline__ __m256i __DEFAULT_FN_ATTRS 4468 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4469 { 4470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4471 (__v4di)_mm256_cvtepi32_epi64(__X), 4472 (__v4di)__W); 4473 } 4474 4475 static __inline__ __m256i __DEFAULT_FN_ATTRS 4476 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4477 { 4478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4479 (__v4di)_mm256_cvtepi32_epi64(__X), 4480 (__v4di)_mm256_setzero_si256()); 4481 } 4482 4483 static __inline__ __m128i __DEFAULT_FN_ATTRS 4484 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4485 { 4486 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4487 (__v4si)_mm_cvtepi16_epi32(__A), 4488 (__v4si)__W); 4489 } 4490 4491 static __inline__ __m128i __DEFAULT_FN_ATTRS 4492 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4493 { 4494 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4495 (__v4si)_mm_cvtepi16_epi32(__A), 4496 (__v4si)_mm_setzero_si128()); 4497 } 4498 4499 static __inline__ __m256i __DEFAULT_FN_ATTRS 4500 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4501 { 4502 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4503 (__v8si)_mm256_cvtepi16_epi32(__A), 4504 (__v8si)__W); 4505 } 4506 4507 static __inline__ __m256i __DEFAULT_FN_ATTRS 4508 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4509 { 4510 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4511 (__v8si)_mm256_cvtepi16_epi32(__A), 4512 (__v8si)_mm256_setzero_si256()); 4513 } 4514 4515 static __inline__ __m128i __DEFAULT_FN_ATTRS 4516 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4517 { 4518 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4519 (__v2di)_mm_cvtepi16_epi64(__A), 4520 (__v2di)__W); 4521 } 4522 4523 static __inline__ __m128i __DEFAULT_FN_ATTRS 4524 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4525 { 4526 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4527 (__v2di)_mm_cvtepi16_epi64(__A), 4528 (__v2di)_mm_setzero_si128()); 4529 } 4530 4531 static __inline__ __m256i __DEFAULT_FN_ATTRS 4532 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4533 { 4534 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4535 (__v4di)_mm256_cvtepi16_epi64(__A), 4536 (__v4di)__W); 4537 } 4538 4539 static __inline__ __m256i __DEFAULT_FN_ATTRS 4540 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4541 { 4542 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4543 (__v4di)_mm256_cvtepi16_epi64(__A), 4544 (__v4di)_mm256_setzero_si256()); 4545 } 4546 4547 4548 static __inline__ __m128i __DEFAULT_FN_ATTRS 4549 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4550 { 4551 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4552 (__v4si)_mm_cvtepu8_epi32(__A), 4553 (__v4si)__W); 4554 } 4555 4556 static __inline__ __m128i __DEFAULT_FN_ATTRS 4557 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4558 { 4559 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4560 (__v4si)_mm_cvtepu8_epi32(__A), 4561 (__v4si)_mm_setzero_si128()); 4562 } 4563 4564 static __inline__ __m256i __DEFAULT_FN_ATTRS 4565 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4566 { 4567 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4568 (__v8si)_mm256_cvtepu8_epi32(__A), 4569 (__v8si)__W); 4570 } 4571 4572 static __inline__ __m256i __DEFAULT_FN_ATTRS 4573 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4574 { 4575 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4576 (__v8si)_mm256_cvtepu8_epi32(__A), 4577 (__v8si)_mm256_setzero_si256()); 4578 } 4579 4580 static __inline__ __m128i __DEFAULT_FN_ATTRS 4581 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4582 { 4583 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4584 (__v2di)_mm_cvtepu8_epi64(__A), 4585 (__v2di)__W); 4586 } 4587 4588 static __inline__ __m128i __DEFAULT_FN_ATTRS 4589 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4590 { 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_cvtepu8_epi64(__A), 4593 (__v2di)_mm_setzero_si128()); 4594 } 4595 4596 static __inline__ __m256i __DEFAULT_FN_ATTRS 4597 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4598 { 4599 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4600 (__v4di)_mm256_cvtepu8_epi64(__A), 4601 (__v4di)__W); 4602 } 4603 4604 static __inline__ __m256i __DEFAULT_FN_ATTRS 4605 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4606 { 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_cvtepu8_epi64(__A), 4609 (__v4di)_mm256_setzero_si256()); 4610 } 4611 4612 static __inline__ __m128i __DEFAULT_FN_ATTRS 4613 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4614 { 4615 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4616 (__v2di)_mm_cvtepu32_epi64(__X), 4617 (__v2di)__W); 4618 } 4619 4620 static __inline__ __m128i __DEFAULT_FN_ATTRS 4621 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4622 { 4623 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4624 (__v2di)_mm_cvtepu32_epi64(__X), 4625 (__v2di)_mm_setzero_si128()); 4626 } 4627 4628 static __inline__ __m256i __DEFAULT_FN_ATTRS 4629 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4630 { 4631 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4632 (__v4di)_mm256_cvtepu32_epi64(__X), 4633 (__v4di)__W); 4634 } 4635 4636 static __inline__ __m256i __DEFAULT_FN_ATTRS 4637 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4638 { 4639 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4640 (__v4di)_mm256_cvtepu32_epi64(__X), 4641 (__v4di)_mm256_setzero_si256()); 4642 } 4643 4644 static __inline__ __m128i __DEFAULT_FN_ATTRS 4645 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4646 { 4647 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4648 (__v4si)_mm_cvtepu16_epi32(__A), 4649 (__v4si)__W); 4650 } 4651 4652 static __inline__ __m128i __DEFAULT_FN_ATTRS 4653 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4654 { 4655 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4656 (__v4si)_mm_cvtepu16_epi32(__A), 4657 (__v4si)_mm_setzero_si128()); 4658 } 4659 4660 static __inline__ __m256i __DEFAULT_FN_ATTRS 4661 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4662 { 4663 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4664 (__v8si)_mm256_cvtepu16_epi32(__A), 4665 (__v8si)__W); 4666 } 4667 4668 static __inline__ __m256i __DEFAULT_FN_ATTRS 4669 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4670 { 4671 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4672 (__v8si)_mm256_cvtepu16_epi32(__A), 4673 (__v8si)_mm256_setzero_si256()); 4674 } 4675 4676 static __inline__ __m128i __DEFAULT_FN_ATTRS 4677 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4678 { 4679 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4680 (__v2di)_mm_cvtepu16_epi64(__A), 4681 (__v2di)__W); 4682 } 4683 4684 static __inline__ __m128i __DEFAULT_FN_ATTRS 4685 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4686 { 4687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4688 (__v2di)_mm_cvtepu16_epi64(__A), 4689 (__v2di)_mm_setzero_si128()); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS 4693 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4694 { 4695 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4696 (__v4di)_mm256_cvtepu16_epi64(__A), 4697 (__v4di)__W); 4698 } 4699 4700 static __inline__ __m256i __DEFAULT_FN_ATTRS 4701 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4702 { 4703 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4704 (__v4di)_mm256_cvtepu16_epi64(__A), 4705 (__v4di)_mm256_setzero_si256()); 4706 } 4707 4708 4709 #define _mm_rol_epi32(a, b) __extension__ ({\ 4710 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4711 (__v4si)_mm_setzero_si128(), \ 4712 (__mmask8)-1); }) 4713 4714 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4715 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4716 (__v4si)(__m128i)(w), (__mmask8)(u)); }) 4717 4718 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ 4719 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4720 (__v4si)_mm_setzero_si128(), \ 4721 (__mmask8)(u)); }) 4722 4723 #define _mm256_rol_epi32(a, b) __extension__ ({\ 4724 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4725 (__v8si)_mm256_setzero_si256(), \ 4726 (__mmask8)-1); }) 4727 4728 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4729 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4730 (__v8si)(__m256i)(w), (__mmask8)(u)); }) 4731 4732 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ 4733 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4734 (__v8si)_mm256_setzero_si256(), \ 4735 (__mmask8)(u)); }) 4736 4737 #define _mm_rol_epi64(a, b) __extension__ ({\ 4738 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4739 (__v2di)_mm_setzero_di(), \ 4740 (__mmask8)-1); }) 4741 4742 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4743 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4744 (__v2di)(__m128i)(w), (__mmask8)(u)); }) 4745 4746 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ 4747 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4748 (__v2di)_mm_setzero_di(), \ 4749 (__mmask8)(u)); }) 4750 4751 #define _mm256_rol_epi64(a, b) __extension__ ({\ 4752 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4753 (__v4di)_mm256_setzero_si256(), \ 4754 (__mmask8)-1); }) 4755 4756 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4757 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4758 (__v4di)(__m256i)(w), (__mmask8)(u)); }) 4759 4760 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ 4761 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4762 (__v4di)_mm256_setzero_si256(), \ 4763 (__mmask8)(u)); }) 4764 4765 static __inline__ __m128i __DEFAULT_FN_ATTRS 4766 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4767 { 4768 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4769 (__v4si) __B, 4770 (__v4si) 4771 _mm_setzero_si128 (), 4772 (__mmask8) -1); 4773 } 4774 4775 static __inline__ __m128i __DEFAULT_FN_ATTRS 4776 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4777 __m128i __B) 4778 { 4779 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4780 (__v4si) __B, 4781 (__v4si) __W, 4782 (__mmask8) __U); 4783 } 4784 4785 static __inline__ __m128i __DEFAULT_FN_ATTRS 4786 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4787 { 4788 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4789 (__v4si) __B, 4790 (__v4si) 4791 _mm_setzero_si128 (), 4792 (__mmask8) __U); 4793 } 4794 4795 static __inline__ __m256i __DEFAULT_FN_ATTRS 4796 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4797 { 4798 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4799 (__v8si) __B, 4800 (__v8si) 4801 _mm256_setzero_si256 (), 4802 (__mmask8) -1); 4803 } 4804 4805 static __inline__ __m256i __DEFAULT_FN_ATTRS 4806 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4807 __m256i __B) 4808 { 4809 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4810 (__v8si) __B, 4811 (__v8si) __W, 4812 (__mmask8) __U); 4813 } 4814 4815 static __inline__ __m256i __DEFAULT_FN_ATTRS 4816 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4817 { 4818 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4819 (__v8si) __B, 4820 (__v8si) 4821 _mm256_setzero_si256 (), 4822 (__mmask8) __U); 4823 } 4824 4825 static __inline__ __m128i __DEFAULT_FN_ATTRS 4826 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4827 { 4828 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4829 (__v2di) __B, 4830 (__v2di) 4831 _mm_setzero_di (), 4832 (__mmask8) -1); 4833 } 4834 4835 static __inline__ __m128i __DEFAULT_FN_ATTRS 4836 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 4837 __m128i __B) 4838 { 4839 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4840 (__v2di) __B, 4841 (__v2di) __W, 4842 (__mmask8) __U); 4843 } 4844 4845 static __inline__ __m128i __DEFAULT_FN_ATTRS 4846 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4847 { 4848 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4849 (__v2di) __B, 4850 (__v2di) 4851 _mm_setzero_di (), 4852 (__mmask8) __U); 4853 } 4854 4855 static __inline__ __m256i __DEFAULT_FN_ATTRS 4856 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4857 { 4858 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4859 (__v4di) __B, 4860 (__v4di) 4861 _mm256_setzero_si256 (), 4862 (__mmask8) -1); 4863 } 4864 4865 static __inline__ __m256i __DEFAULT_FN_ATTRS 4866 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 4867 __m256i __B) 4868 { 4869 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4870 (__v4di) __B, 4871 (__v4di) __W, 4872 (__mmask8) __U); 4873 } 4874 4875 static __inline__ __m256i __DEFAULT_FN_ATTRS 4876 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4877 { 4878 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4879 (__v4di) __B, 4880 (__v4di) 4881 _mm256_setzero_si256 (), 4882 (__mmask8) __U); 4883 } 4884 4885 #define _mm_ror_epi32(A, B) __extension__ ({ \ 4886 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4887 (__v4si)_mm_setzero_si128(), \ 4888 (__mmask8)-1); }) 4889 4890 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4891 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4892 (__v4si)(__m128i)(W), (__mmask8)(U)); }) 4893 4894 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4895 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4896 (__v4si)_mm_setzero_si128(), \ 4897 (__mmask8)(U)); }) 4898 4899 #define _mm256_ror_epi32(A, B) __extension__ ({ \ 4900 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4901 (__v8si)_mm256_setzero_si256(), \ 4902 (__mmask8)-1); }) 4903 4904 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4905 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4906 (__v8si)(__m256i)(W), (__mmask8)(U)); }) 4907 4908 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4909 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4910 (__v8si)_mm256_setzero_si256(), \ 4911 (__mmask8)(U)); }) 4912 4913 #define _mm_ror_epi64(A, B) __extension__ ({ \ 4914 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4915 (__v2di)_mm_setzero_di(), \ 4916 (__mmask8)-1); }) 4917 4918 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4919 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4920 (__v2di)(__m128i)(W), (__mmask8)(U)); }) 4921 4922 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4923 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4924 (__v2di)_mm_setzero_di(), \ 4925 (__mmask8)(U)); }) 4926 4927 #define _mm256_ror_epi64(A, B) __extension__ ({ \ 4928 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4929 (__v4di)_mm256_setzero_si256(), \ 4930 (__mmask8)-1); }) 4931 4932 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4933 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4934 (__v4di)(__m256i)(W), (__mmask8)(U)); }) 4935 4936 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4937 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4938 (__v4di)_mm256_setzero_si256(), \ 4939 (__mmask8)(U)); }) 4940 4941 static __inline__ __m128i __DEFAULT_FN_ATTRS 4942 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4943 { 4944 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4945 (__v4si)_mm_sll_epi32(__A, __B), 4946 (__v4si)__W); 4947 } 4948 4949 static __inline__ __m128i __DEFAULT_FN_ATTRS 4950 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4951 { 4952 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4953 (__v4si)_mm_sll_epi32(__A, __B), 4954 (__v4si)_mm_setzero_si128()); 4955 } 4956 4957 static __inline__ __m256i __DEFAULT_FN_ATTRS 4958 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4959 { 4960 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4961 (__v8si)_mm256_sll_epi32(__A, __B), 4962 (__v8si)__W); 4963 } 4964 4965 static __inline__ __m256i __DEFAULT_FN_ATTRS 4966 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4967 { 4968 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4969 (__v8si)_mm256_sll_epi32(__A, __B), 4970 (__v8si)_mm256_setzero_si256()); 4971 } 4972 4973 static __inline__ __m128i __DEFAULT_FN_ATTRS 4974 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4975 { 4976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4977 (__v4si)_mm_slli_epi32(__A, __B), 4978 (__v4si)__W); 4979 } 4980 4981 static __inline__ __m128i __DEFAULT_FN_ATTRS 4982 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) 4983 { 4984 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4985 (__v4si)_mm_slli_epi32(__A, __B), 4986 (__v4si)_mm_setzero_si128()); 4987 } 4988 4989 static __inline__ __m256i __DEFAULT_FN_ATTRS 4990 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4991 { 4992 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4993 (__v8si)_mm256_slli_epi32(__A, __B), 4994 (__v8si)__W); 4995 } 4996 4997 static __inline__ __m256i __DEFAULT_FN_ATTRS 4998 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) 4999 { 5000 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5001 (__v8si)_mm256_slli_epi32(__A, __B), 5002 (__v8si)_mm256_setzero_si256()); 5003 } 5004 5005 static __inline__ __m128i __DEFAULT_FN_ATTRS 5006 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5007 { 5008 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5009 (__v2di)_mm_sll_epi64(__A, __B), 5010 (__v2di)__W); 5011 } 5012 5013 static __inline__ __m128i __DEFAULT_FN_ATTRS 5014 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5015 { 5016 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5017 (__v2di)_mm_sll_epi64(__A, __B), 5018 (__v2di)_mm_setzero_di()); 5019 } 5020 5021 static __inline__ __m256i __DEFAULT_FN_ATTRS 5022 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5023 { 5024 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5025 (__v4di)_mm256_sll_epi64(__A, __B), 5026 (__v4di)__W); 5027 } 5028 5029 static __inline__ __m256i __DEFAULT_FN_ATTRS 5030 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5031 { 5032 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5033 (__v4di)_mm256_sll_epi64(__A, __B), 5034 (__v4di)_mm256_setzero_si256()); 5035 } 5036 5037 static __inline__ __m128i __DEFAULT_FN_ATTRS 5038 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5039 { 5040 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5041 (__v2di)_mm_slli_epi64(__A, __B), 5042 (__v2di)__W); 5043 } 5044 5045 static __inline__ __m128i __DEFAULT_FN_ATTRS 5046 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) 5047 { 5048 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5049 (__v2di)_mm_slli_epi64(__A, __B), 5050 (__v2di)_mm_setzero_di()); 5051 } 5052 5053 static __inline__ __m256i __DEFAULT_FN_ATTRS 5054 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5055 { 5056 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5057 (__v4di)_mm256_slli_epi64(__A, __B), 5058 (__v4di)__W); 5059 } 5060 5061 static __inline__ __m256i __DEFAULT_FN_ATTRS 5062 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) 5063 { 5064 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5065 (__v4di)_mm256_slli_epi64(__A, __B), 5066 (__v4di)_mm256_setzero_si256()); 5067 } 5068 5069 static __inline__ __m128i __DEFAULT_FN_ATTRS 5070 _mm_rorv_epi32 (__m128i __A, __m128i __B) 5071 { 5072 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5073 (__v4si) __B, 5074 (__v4si) 5075 _mm_setzero_si128 (), 5076 (__mmask8) -1); 5077 } 5078 5079 static __inline__ __m128i __DEFAULT_FN_ATTRS 5080 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5081 __m128i __B) 5082 { 5083 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5084 (__v4si) __B, 5085 (__v4si) __W, 5086 (__mmask8) __U); 5087 } 5088 5089 static __inline__ __m128i __DEFAULT_FN_ATTRS 5090 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5091 { 5092 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5093 (__v4si) __B, 5094 (__v4si) 5095 _mm_setzero_si128 (), 5096 (__mmask8) __U); 5097 } 5098 5099 static __inline__ __m256i __DEFAULT_FN_ATTRS 5100 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 5101 { 5102 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5103 (__v8si) __B, 5104 (__v8si) 5105 _mm256_setzero_si256 (), 5106 (__mmask8) -1); 5107 } 5108 5109 static __inline__ __m256i __DEFAULT_FN_ATTRS 5110 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5111 __m256i __B) 5112 { 5113 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5114 (__v8si) __B, 5115 (__v8si) __W, 5116 (__mmask8) __U); 5117 } 5118 5119 static __inline__ __m256i __DEFAULT_FN_ATTRS 5120 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5121 { 5122 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5123 (__v8si) __B, 5124 (__v8si) 5125 _mm256_setzero_si256 (), 5126 (__mmask8) __U); 5127 } 5128 5129 static __inline__ __m128i __DEFAULT_FN_ATTRS 5130 _mm_rorv_epi64 (__m128i __A, __m128i __B) 5131 { 5132 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5133 (__v2di) __B, 5134 (__v2di) 5135 _mm_setzero_di (), 5136 (__mmask8) -1); 5137 } 5138 5139 static __inline__ __m128i __DEFAULT_FN_ATTRS 5140 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5141 __m128i __B) 5142 { 5143 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5144 (__v2di) __B, 5145 (__v2di) __W, 5146 (__mmask8) __U); 5147 } 5148 5149 static __inline__ __m128i __DEFAULT_FN_ATTRS 5150 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5151 { 5152 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5153 (__v2di) __B, 5154 (__v2di) 5155 _mm_setzero_di (), 5156 (__mmask8) __U); 5157 } 5158 5159 static __inline__ __m256i __DEFAULT_FN_ATTRS 5160 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 5161 { 5162 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5163 (__v4di) __B, 5164 (__v4di) 5165 _mm256_setzero_si256 (), 5166 (__mmask8) -1); 5167 } 5168 5169 static __inline__ __m256i __DEFAULT_FN_ATTRS 5170 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5171 __m256i __B) 5172 { 5173 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5174 (__v4di) __B, 5175 (__v4di) __W, 5176 (__mmask8) __U); 5177 } 5178 5179 static __inline__ __m256i __DEFAULT_FN_ATTRS 5180 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5181 { 5182 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5183 (__v4di) __B, 5184 (__v4di) 5185 _mm256_setzero_si256 (), 5186 (__mmask8) __U); 5187 } 5188 5189 static __inline__ __m128i __DEFAULT_FN_ATTRS 5190 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5191 { 5192 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5193 (__v2di)_mm_sllv_epi64(__X, __Y), 5194 (__v2di)__W); 5195 } 5196 5197 static __inline__ __m128i __DEFAULT_FN_ATTRS 5198 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5199 { 5200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5201 (__v2di)_mm_sllv_epi64(__X, __Y), 5202 (__v2di)_mm_setzero_di()); 5203 } 5204 5205 static __inline__ __m256i __DEFAULT_FN_ATTRS 5206 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5207 { 5208 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5209 (__v4di)_mm256_sllv_epi64(__X, __Y), 5210 (__v4di)__W); 5211 } 5212 5213 static __inline__ __m256i __DEFAULT_FN_ATTRS 5214 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5215 { 5216 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5217 (__v4di)_mm256_sllv_epi64(__X, __Y), 5218 (__v4di)_mm256_setzero_si256()); 5219 } 5220 5221 static __inline__ __m128i __DEFAULT_FN_ATTRS 5222 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5223 { 5224 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5225 (__v4si)_mm_sllv_epi32(__X, __Y), 5226 (__v4si)__W); 5227 } 5228 5229 static __inline__ __m128i __DEFAULT_FN_ATTRS 5230 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5231 { 5232 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5233 (__v4si)_mm_sllv_epi32(__X, __Y), 5234 (__v4si)_mm_setzero_si128()); 5235 } 5236 5237 static __inline__ __m256i __DEFAULT_FN_ATTRS 5238 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5239 { 5240 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5241 (__v8si)_mm256_sllv_epi32(__X, __Y), 5242 (__v8si)__W); 5243 } 5244 5245 static __inline__ __m256i __DEFAULT_FN_ATTRS 5246 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5247 { 5248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5249 (__v8si)_mm256_sllv_epi32(__X, __Y), 5250 (__v8si)_mm256_setzero_si256()); 5251 } 5252 5253 static __inline__ __m128i __DEFAULT_FN_ATTRS 5254 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5255 { 5256 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5257 (__v2di)_mm_srlv_epi64(__X, __Y), 5258 (__v2di)__W); 5259 } 5260 5261 static __inline__ __m128i __DEFAULT_FN_ATTRS 5262 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5263 { 5264 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5265 (__v2di)_mm_srlv_epi64(__X, __Y), 5266 (__v2di)_mm_setzero_di()); 5267 } 5268 5269 static __inline__ __m256i __DEFAULT_FN_ATTRS 5270 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5271 { 5272 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5273 (__v4di)_mm256_srlv_epi64(__X, __Y), 5274 (__v4di)__W); 5275 } 5276 5277 static __inline__ __m256i __DEFAULT_FN_ATTRS 5278 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5279 { 5280 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5281 (__v4di)_mm256_srlv_epi64(__X, __Y), 5282 (__v4di)_mm256_setzero_si256()); 5283 } 5284 5285 static __inline__ __m128i __DEFAULT_FN_ATTRS 5286 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5287 { 5288 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5289 (__v4si)_mm_srlv_epi32(__X, __Y), 5290 (__v4si)__W); 5291 } 5292 5293 static __inline__ __m128i __DEFAULT_FN_ATTRS 5294 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5295 { 5296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5297 (__v4si)_mm_srlv_epi32(__X, __Y), 5298 (__v4si)_mm_setzero_si128()); 5299 } 5300 5301 static __inline__ __m256i __DEFAULT_FN_ATTRS 5302 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5303 { 5304 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5305 (__v8si)_mm256_srlv_epi32(__X, __Y), 5306 (__v8si)__W); 5307 } 5308 5309 static __inline__ __m256i __DEFAULT_FN_ATTRS 5310 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5311 { 5312 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5313 (__v8si)_mm256_srlv_epi32(__X, __Y), 5314 (__v8si)_mm256_setzero_si256()); 5315 } 5316 5317 static __inline__ __m128i __DEFAULT_FN_ATTRS 5318 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5319 { 5320 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5321 (__v4si)_mm_srl_epi32(__A, __B), 5322 (__v4si)__W); 5323 } 5324 5325 static __inline__ __m128i __DEFAULT_FN_ATTRS 5326 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 5327 { 5328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5329 (__v4si)_mm_srl_epi32(__A, __B), 5330 (__v4si)_mm_setzero_si128()); 5331 } 5332 5333 static __inline__ __m256i __DEFAULT_FN_ATTRS 5334 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5335 { 5336 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5337 (__v8si)_mm256_srl_epi32(__A, __B), 5338 (__v8si)__W); 5339 } 5340 5341 static __inline__ __m256i __DEFAULT_FN_ATTRS 5342 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 5343 { 5344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5345 (__v8si)_mm256_srl_epi32(__A, __B), 5346 (__v8si)_mm256_setzero_si256()); 5347 } 5348 5349 static __inline__ __m128i __DEFAULT_FN_ATTRS 5350 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5351 { 5352 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5353 (__v4si)_mm_srli_epi32(__A, __B), 5354 (__v4si)__W); 5355 } 5356 5357 static __inline__ __m128i __DEFAULT_FN_ATTRS 5358 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) 5359 { 5360 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5361 (__v4si)_mm_srli_epi32(__A, __B), 5362 (__v4si)_mm_setzero_si128()); 5363 } 5364 5365 static __inline__ __m256i __DEFAULT_FN_ATTRS 5366 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5367 { 5368 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5369 (__v8si)_mm256_srli_epi32(__A, __B), 5370 (__v8si)__W); 5371 } 5372 5373 static __inline__ __m256i __DEFAULT_FN_ATTRS 5374 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) 5375 { 5376 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5377 (__v8si)_mm256_srli_epi32(__A, __B), 5378 (__v8si)_mm256_setzero_si256()); 5379 } 5380 5381 static __inline__ __m128i __DEFAULT_FN_ATTRS 5382 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5383 { 5384 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5385 (__v2di)_mm_srl_epi64(__A, __B), 5386 (__v2di)__W); 5387 } 5388 5389 static __inline__ __m128i __DEFAULT_FN_ATTRS 5390 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5391 { 5392 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5393 (__v2di)_mm_srl_epi64(__A, __B), 5394 (__v2di)_mm_setzero_di()); 5395 } 5396 5397 static __inline__ __m256i __DEFAULT_FN_ATTRS 5398 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5399 { 5400 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5401 (__v4di)_mm256_srl_epi64(__A, __B), 5402 (__v4di)__W); 5403 } 5404 5405 static __inline__ __m256i __DEFAULT_FN_ATTRS 5406 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5407 { 5408 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5409 (__v4di)_mm256_srl_epi64(__A, __B), 5410 (__v4di)_mm256_setzero_si256()); 5411 } 5412 5413 static __inline__ __m128i __DEFAULT_FN_ATTRS 5414 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5415 { 5416 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5417 (__v2di)_mm_srli_epi64(__A, __B), 5418 (__v2di)__W); 5419 } 5420 5421 static __inline__ __m128i __DEFAULT_FN_ATTRS 5422 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) 5423 { 5424 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5425 (__v2di)_mm_srli_epi64(__A, __B), 5426 (__v2di)_mm_setzero_di()); 5427 } 5428 5429 static __inline__ __m256i __DEFAULT_FN_ATTRS 5430 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5431 { 5432 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5433 (__v4di)_mm256_srli_epi64(__A, __B), 5434 (__v4di)__W); 5435 } 5436 5437 static __inline__ __m256i __DEFAULT_FN_ATTRS 5438 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) 5439 { 5440 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5441 (__v4di)_mm256_srli_epi64(__A, __B), 5442 (__v4di)_mm256_setzero_si256()); 5443 } 5444 5445 static __inline__ __m128i __DEFAULT_FN_ATTRS 5446 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5447 { 5448 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5449 (__v4si)_mm_srav_epi32(__X, __Y), 5450 (__v4si)__W); 5451 } 5452 5453 static __inline__ __m128i __DEFAULT_FN_ATTRS 5454 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5455 { 5456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5457 (__v4si)_mm_srav_epi32(__X, __Y), 5458 (__v4si)_mm_setzero_si128()); 5459 } 5460 5461 static __inline__ __m256i __DEFAULT_FN_ATTRS 5462 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5463 { 5464 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5465 (__v8si)_mm256_srav_epi32(__X, __Y), 5466 (__v8si)__W); 5467 } 5468 5469 static __inline__ __m256i __DEFAULT_FN_ATTRS 5470 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5471 { 5472 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5473 (__v8si)_mm256_srav_epi32(__X, __Y), 5474 (__v8si)_mm256_setzero_si256()); 5475 } 5476 5477 static __inline__ __m128i __DEFAULT_FN_ATTRS 5478 _mm_srav_epi64(__m128i __X, __m128i __Y) 5479 { 5480 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5481 } 5482 5483 static __inline__ __m128i __DEFAULT_FN_ATTRS 5484 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5485 { 5486 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5487 (__v2di)_mm_srav_epi64(__X, __Y), 5488 (__v2di)__W); 5489 } 5490 5491 static __inline__ __m128i __DEFAULT_FN_ATTRS 5492 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5493 { 5494 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5495 (__v2di)_mm_srav_epi64(__X, __Y), 5496 (__v2di)_mm_setzero_di()); 5497 } 5498 5499 static __inline__ __m256i __DEFAULT_FN_ATTRS 5500 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5501 { 5502 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5503 } 5504 5505 static __inline__ __m256i __DEFAULT_FN_ATTRS 5506 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5507 { 5508 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5509 (__v4di)_mm256_srav_epi64(__X, __Y), 5510 (__v4di)__W); 5511 } 5512 5513 static __inline__ __m256i __DEFAULT_FN_ATTRS 5514 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5515 { 5516 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5517 (__v4di)_mm256_srav_epi64(__X, __Y), 5518 (__v4di)_mm256_setzero_si256()); 5519 } 5520 5521 static __inline__ __m128i __DEFAULT_FN_ATTRS 5522 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5523 { 5524 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5525 (__v4si) __A, 5526 (__v4si) __W); 5527 } 5528 5529 static __inline__ __m128i __DEFAULT_FN_ATTRS 5530 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5531 { 5532 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5533 (__v4si) __A, 5534 (__v4si) _mm_setzero_si128 ()); 5535 } 5536 5537 5538 static __inline__ __m256i __DEFAULT_FN_ATTRS 5539 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5540 { 5541 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5542 (__v8si) __A, 5543 (__v8si) __W); 5544 } 5545 5546 static __inline__ __m256i __DEFAULT_FN_ATTRS 5547 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5548 { 5549 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5550 (__v8si) __A, 5551 (__v8si) _mm256_setzero_si256 ()); 5552 } 5553 5554 static __inline__ __m128i __DEFAULT_FN_ATTRS 5555 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5556 { 5557 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5558 (__v4si) __W, 5559 (__mmask8) 5560 __U); 5561 } 5562 5563 static __inline__ __m128i __DEFAULT_FN_ATTRS 5564 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5565 { 5566 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5567 (__v4si) 5568 _mm_setzero_si128 (), 5569 (__mmask8) 5570 __U); 5571 } 5572 5573 static __inline__ __m256i __DEFAULT_FN_ATTRS 5574 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5575 { 5576 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5577 (__v8si) __W, 5578 (__mmask8) 5579 __U); 5580 } 5581 5582 static __inline__ __m256i __DEFAULT_FN_ATTRS 5583 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5584 { 5585 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5586 (__v8si) 5587 _mm256_setzero_si256 (), 5588 (__mmask8) 5589 __U); 5590 } 5591 5592 static __inline__ void __DEFAULT_FN_ATTRS 5593 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5594 { 5595 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5596 (__v4si) __A, 5597 (__mmask8) __U); 5598 } 5599 5600 static __inline__ void __DEFAULT_FN_ATTRS 5601 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5602 { 5603 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5604 (__v8si) __A, 5605 (__mmask8) __U); 5606 } 5607 5608 static __inline__ __m128i __DEFAULT_FN_ATTRS 5609 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5610 { 5611 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5612 (__v2di) __A, 5613 (__v2di) __W); 5614 } 5615 5616 static __inline__ __m128i __DEFAULT_FN_ATTRS 5617 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5618 { 5619 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5620 (__v2di) __A, 5621 (__v2di) _mm_setzero_di ()); 5622 } 5623 5624 static __inline__ __m256i __DEFAULT_FN_ATTRS 5625 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5626 { 5627 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5628 (__v4di) __A, 5629 (__v4di) __W); 5630 } 5631 5632 static __inline__ __m256i __DEFAULT_FN_ATTRS 5633 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5634 { 5635 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5636 (__v4di) __A, 5637 (__v4di) _mm256_setzero_si256 ()); 5638 } 5639 5640 static __inline__ __m128i __DEFAULT_FN_ATTRS 5641 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5642 { 5643 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5644 (__v2di) __W, 5645 (__mmask8) 5646 __U); 5647 } 5648 5649 static __inline__ __m128i __DEFAULT_FN_ATTRS 5650 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5651 { 5652 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5653 (__v2di) 5654 _mm_setzero_di (), 5655 (__mmask8) 5656 __U); 5657 } 5658 5659 static __inline__ __m256i __DEFAULT_FN_ATTRS 5660 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5661 { 5662 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5663 (__v4di) __W, 5664 (__mmask8) 5665 __U); 5666 } 5667 5668 static __inline__ __m256i __DEFAULT_FN_ATTRS 5669 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5670 { 5671 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5672 (__v4di) 5673 _mm256_setzero_si256 (), 5674 (__mmask8) 5675 __U); 5676 } 5677 5678 static __inline__ void __DEFAULT_FN_ATTRS 5679 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5680 { 5681 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5682 (__v2di) __A, 5683 (__mmask8) __U); 5684 } 5685 5686 static __inline__ void __DEFAULT_FN_ATTRS 5687 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5688 { 5689 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5690 (__v4di) __A, 5691 (__mmask8) __U); 5692 } 5693 5694 static __inline__ __m128d __DEFAULT_FN_ATTRS 5695 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5696 { 5697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5698 (__v2df)_mm_movedup_pd(__A), 5699 (__v2df)__W); 5700 } 5701 5702 static __inline__ __m128d __DEFAULT_FN_ATTRS 5703 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5704 { 5705 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5706 (__v2df)_mm_movedup_pd(__A), 5707 (__v2df)_mm_setzero_pd()); 5708 } 5709 5710 static __inline__ __m256d __DEFAULT_FN_ATTRS 5711 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5712 { 5713 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5714 (__v4df)_mm256_movedup_pd(__A), 5715 (__v4df)__W); 5716 } 5717 5718 static __inline__ __m256d __DEFAULT_FN_ATTRS 5719 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5720 { 5721 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5722 (__v4df)_mm256_movedup_pd(__A), 5723 (__v4df)_mm256_setzero_pd()); 5724 } 5725 5726 5727 #define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \ 5728 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5729 (__v4si)(__m128i)(O), \ 5730 (__mmask8)(M)); }) 5731 5732 #define _mm_maskz_set1_epi32(M, A) __extension__ ({ \ 5733 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5734 (__v4si)_mm_setzero_si128(), \ 5735 (__mmask8)(M)); }) 5736 5737 #define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \ 5738 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5739 (__v8si)(__m256i)(O), \ 5740 (__mmask8)(M)); }) 5741 5742 #define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \ 5743 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5744 (__v8si)_mm256_setzero_si256(), \ 5745 (__mmask8)(M)); }) 5746 5747 #ifdef __x86_64__ 5748 static __inline__ __m128i __DEFAULT_FN_ATTRS 5749 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5750 { 5751 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, 5752 __M); 5753 } 5754 5755 static __inline__ __m128i __DEFAULT_FN_ATTRS 5756 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5757 { 5758 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, 5759 (__v2di) 5760 _mm_setzero_si128 (), 5761 __M); 5762 } 5763 5764 static __inline__ __m256i __DEFAULT_FN_ATTRS 5765 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5766 { 5767 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, 5768 __M); 5769 } 5770 5771 static __inline__ __m256i __DEFAULT_FN_ATTRS 5772 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5773 { 5774 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, 5775 (__v4di) 5776 _mm256_setzero_si256 (), 5777 __M); 5778 } 5779 #endif 5780 5781 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5782 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5783 (__v2df)(__m128d)(B), \ 5784 (__v2di)(__m128i)(C), (int)(imm), \ 5785 (__mmask8)-1); }) 5786 5787 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5788 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5789 (__v2df)(__m128d)(B), \ 5790 (__v2di)(__m128i)(C), (int)(imm), \ 5791 (__mmask8)(U)); }) 5792 5793 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5794 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5795 (__v2df)(__m128d)(B), \ 5796 (__v2di)(__m128i)(C), \ 5797 (int)(imm), (__mmask8)(U)); }) 5798 5799 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5800 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5801 (__v4df)(__m256d)(B), \ 5802 (__v4di)(__m256i)(C), (int)(imm), \ 5803 (__mmask8)-1); }) 5804 5805 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5806 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5807 (__v4df)(__m256d)(B), \ 5808 (__v4di)(__m256i)(C), (int)(imm), \ 5809 (__mmask8)(U)); }) 5810 5811 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5812 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5813 (__v4df)(__m256d)(B), \ 5814 (__v4di)(__m256i)(C), \ 5815 (int)(imm), (__mmask8)(U)); }) 5816 5817 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5818 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5819 (__v4sf)(__m128)(B), \ 5820 (__v4si)(__m128i)(C), (int)(imm), \ 5821 (__mmask8)-1); }) 5822 5823 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5824 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5825 (__v4sf)(__m128)(B), \ 5826 (__v4si)(__m128i)(C), (int)(imm), \ 5827 (__mmask8)(U)); }) 5828 5829 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5830 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5831 (__v4sf)(__m128)(B), \ 5832 (__v4si)(__m128i)(C), (int)(imm), \ 5833 (__mmask8)(U)); }) 5834 5835 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5836 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5837 (__v8sf)(__m256)(B), \ 5838 (__v8si)(__m256i)(C), (int)(imm), \ 5839 (__mmask8)-1); }) 5840 5841 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5842 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5843 (__v8sf)(__m256)(B), \ 5844 (__v8si)(__m256i)(C), (int)(imm), \ 5845 (__mmask8)(U)); }) 5846 5847 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5848 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5849 (__v8sf)(__m256)(B), \ 5850 (__v8si)(__m256i)(C), (int)(imm), \ 5851 (__mmask8)(U)); }) 5852 5853 static __inline__ __m128d __DEFAULT_FN_ATTRS 5854 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5855 { 5856 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5857 (__v2df) __W, 5858 (__mmask8) __U); 5859 } 5860 5861 static __inline__ __m128d __DEFAULT_FN_ATTRS 5862 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5863 { 5864 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5865 (__v2df) 5866 _mm_setzero_pd (), 5867 (__mmask8) __U); 5868 } 5869 5870 static __inline__ __m256d __DEFAULT_FN_ATTRS 5871 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5872 { 5873 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5874 (__v4df) __W, 5875 (__mmask8) __U); 5876 } 5877 5878 static __inline__ __m256d __DEFAULT_FN_ATTRS 5879 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5880 { 5881 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5882 (__v4df) 5883 _mm256_setzero_pd (), 5884 (__mmask8) __U); 5885 } 5886 5887 static __inline__ __m128 __DEFAULT_FN_ATTRS 5888 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5889 { 5890 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5891 (__v4sf) __W, 5892 (__mmask8) __U); 5893 } 5894 5895 static __inline__ __m128 __DEFAULT_FN_ATTRS 5896 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5897 { 5898 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5899 (__v4sf) 5900 _mm_setzero_ps (), 5901 (__mmask8) __U); 5902 } 5903 5904 static __inline__ __m256 __DEFAULT_FN_ATTRS 5905 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5906 { 5907 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5908 (__v8sf) __W, 5909 (__mmask8) __U); 5910 } 5911 5912 static __inline__ __m256 __DEFAULT_FN_ATTRS 5913 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5914 { 5915 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5916 (__v8sf) 5917 _mm256_setzero_ps (), 5918 (__mmask8) __U); 5919 } 5920 5921 static __inline__ __m128i __DEFAULT_FN_ATTRS 5922 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5923 { 5924 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5925 (__v2di) __W, 5926 (__mmask8) __U); 5927 } 5928 5929 static __inline__ __m128i __DEFAULT_FN_ATTRS 5930 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5931 { 5932 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5933 (__v2di) 5934 _mm_setzero_si128 (), 5935 (__mmask8) __U); 5936 } 5937 5938 static __inline__ __m256i __DEFAULT_FN_ATTRS 5939 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5940 { 5941 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5942 (__v4di) __W, 5943 (__mmask8) __U); 5944 } 5945 5946 static __inline__ __m256i __DEFAULT_FN_ATTRS 5947 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5948 { 5949 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5950 (__v4di) 5951 _mm256_setzero_si256 (), 5952 (__mmask8) __U); 5953 } 5954 5955 static __inline__ __m128i __DEFAULT_FN_ATTRS 5956 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5957 { 5958 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5959 (__v4si) __W, 5960 (__mmask8) __U); 5961 } 5962 5963 static __inline__ __m128i __DEFAULT_FN_ATTRS 5964 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5965 { 5966 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5967 (__v4si) 5968 _mm_setzero_si128 (), 5969 (__mmask8) __U); 5970 } 5971 5972 static __inline__ __m256i __DEFAULT_FN_ATTRS 5973 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5974 { 5975 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5976 (__v8si) __W, 5977 (__mmask8) __U); 5978 } 5979 5980 static __inline__ __m256i __DEFAULT_FN_ATTRS 5981 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5982 { 5983 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5984 (__v8si) 5985 _mm256_setzero_si256 (), 5986 (__mmask8) __U); 5987 } 5988 5989 static __inline__ __m128d __DEFAULT_FN_ATTRS 5990 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5991 { 5992 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 5993 (__v2df) __W, 5994 (__mmask8) __U); 5995 } 5996 5997 static __inline__ __m128d __DEFAULT_FN_ATTRS 5998 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5999 { 6000 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6001 (__v2df) 6002 _mm_setzero_pd (), 6003 (__mmask8) __U); 6004 } 6005 6006 static __inline__ __m256d __DEFAULT_FN_ATTRS 6007 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 6008 { 6009 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6010 (__v4df) __W, 6011 (__mmask8) __U); 6012 } 6013 6014 static __inline__ __m256d __DEFAULT_FN_ATTRS 6015 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 6016 { 6017 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6018 (__v4df) 6019 _mm256_setzero_pd (), 6020 (__mmask8) __U); 6021 } 6022 6023 static __inline__ __m128 __DEFAULT_FN_ATTRS 6024 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 6025 { 6026 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6027 (__v4sf) __W, 6028 (__mmask8) __U); 6029 } 6030 6031 static __inline__ __m128 __DEFAULT_FN_ATTRS 6032 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 6033 { 6034 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6035 (__v4sf) 6036 _mm_setzero_ps (), 6037 (__mmask8) __U); 6038 } 6039 6040 static __inline__ __m256 __DEFAULT_FN_ATTRS 6041 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 6042 { 6043 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6044 (__v8sf) __W, 6045 (__mmask8) __U); 6046 } 6047 6048 static __inline__ __m256 __DEFAULT_FN_ATTRS 6049 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 6050 { 6051 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6052 (__v8sf) 6053 _mm256_setzero_ps (), 6054 (__mmask8) __U); 6055 } 6056 6057 static __inline__ void __DEFAULT_FN_ATTRS 6058 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 6059 { 6060 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 6061 (__v2df) __A, 6062 (__mmask8) __U); 6063 } 6064 6065 static __inline__ void __DEFAULT_FN_ATTRS 6066 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 6067 { 6068 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 6069 (__v4df) __A, 6070 (__mmask8) __U); 6071 } 6072 6073 static __inline__ void __DEFAULT_FN_ATTRS 6074 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 6075 { 6076 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 6077 (__v4sf) __A, 6078 (__mmask8) __U); 6079 } 6080 6081 static __inline__ void __DEFAULT_FN_ATTRS 6082 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 6083 { 6084 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 6085 (__v8sf) __A, 6086 (__mmask8) __U); 6087 } 6088 6089 static __inline__ void __DEFAULT_FN_ATTRS 6090 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 6091 { 6092 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 6093 (__v2di) __A, 6094 (__mmask8) __U); 6095 } 6096 6097 static __inline__ void __DEFAULT_FN_ATTRS 6098 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 6099 { 6100 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 6101 (__v4di) __A, 6102 (__mmask8) __U); 6103 } 6104 6105 static __inline__ void __DEFAULT_FN_ATTRS 6106 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 6107 { 6108 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 6109 (__v4si) __A, 6110 (__mmask8) __U); 6111 } 6112 6113 static __inline__ void __DEFAULT_FN_ATTRS 6114 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 6115 { 6116 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 6117 (__v8si) __A, 6118 (__mmask8) __U); 6119 } 6120 6121 static __inline__ void __DEFAULT_FN_ATTRS 6122 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 6123 { 6124 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 6125 (__v2df) __A, 6126 (__mmask8) __U); 6127 } 6128 6129 static __inline__ void __DEFAULT_FN_ATTRS 6130 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 6131 { 6132 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 6133 (__v4df) __A, 6134 (__mmask8) __U); 6135 } 6136 6137 static __inline__ void __DEFAULT_FN_ATTRS 6138 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 6139 { 6140 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 6141 (__v4sf) __A, 6142 (__mmask8) __U); 6143 } 6144 6145 static __inline__ void __DEFAULT_FN_ATTRS 6146 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 6147 { 6148 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 6149 (__v8sf) __A, 6150 (__mmask8) __U); 6151 } 6152 6153 6154 static __inline__ __m128d __DEFAULT_FN_ATTRS 6155 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6156 { 6157 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6158 (__v2df)_mm_unpackhi_pd(__A, __B), 6159 (__v2df)__W); 6160 } 6161 6162 static __inline__ __m128d __DEFAULT_FN_ATTRS 6163 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 6164 { 6165 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6166 (__v2df)_mm_unpackhi_pd(__A, __B), 6167 (__v2df)_mm_setzero_pd()); 6168 } 6169 6170 static __inline__ __m256d __DEFAULT_FN_ATTRS 6171 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6172 { 6173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6174 (__v4df)_mm256_unpackhi_pd(__A, __B), 6175 (__v4df)__W); 6176 } 6177 6178 static __inline__ __m256d __DEFAULT_FN_ATTRS 6179 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 6180 { 6181 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6182 (__v4df)_mm256_unpackhi_pd(__A, __B), 6183 (__v4df)_mm256_setzero_pd()); 6184 } 6185 6186 static __inline__ __m128 __DEFAULT_FN_ATTRS 6187 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6188 { 6189 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6190 (__v4sf)_mm_unpackhi_ps(__A, __B), 6191 (__v4sf)__W); 6192 } 6193 6194 static __inline__ __m128 __DEFAULT_FN_ATTRS 6195 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 6196 { 6197 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6198 (__v4sf)_mm_unpackhi_ps(__A, __B), 6199 (__v4sf)_mm_setzero_ps()); 6200 } 6201 6202 static __inline__ __m256 __DEFAULT_FN_ATTRS 6203 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6204 { 6205 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6206 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6207 (__v8sf)__W); 6208 } 6209 6210 static __inline__ __m256 __DEFAULT_FN_ATTRS 6211 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 6212 { 6213 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6214 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6215 (__v8sf)_mm256_setzero_ps()); 6216 } 6217 6218 static __inline__ __m128d __DEFAULT_FN_ATTRS 6219 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6220 { 6221 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6222 (__v2df)_mm_unpacklo_pd(__A, __B), 6223 (__v2df)__W); 6224 } 6225 6226 static __inline__ __m128d __DEFAULT_FN_ATTRS 6227 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 6228 { 6229 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6230 (__v2df)_mm_unpacklo_pd(__A, __B), 6231 (__v2df)_mm_setzero_pd()); 6232 } 6233 6234 static __inline__ __m256d __DEFAULT_FN_ATTRS 6235 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6236 { 6237 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6238 (__v4df)_mm256_unpacklo_pd(__A, __B), 6239 (__v4df)__W); 6240 } 6241 6242 static __inline__ __m256d __DEFAULT_FN_ATTRS 6243 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 6244 { 6245 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6246 (__v4df)_mm256_unpacklo_pd(__A, __B), 6247 (__v4df)_mm256_setzero_pd()); 6248 } 6249 6250 static __inline__ __m128 __DEFAULT_FN_ATTRS 6251 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6252 { 6253 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6254 (__v4sf)_mm_unpacklo_ps(__A, __B), 6255 (__v4sf)__W); 6256 } 6257 6258 static __inline__ __m128 __DEFAULT_FN_ATTRS 6259 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 6260 { 6261 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6262 (__v4sf)_mm_unpacklo_ps(__A, __B), 6263 (__v4sf)_mm_setzero_ps()); 6264 } 6265 6266 static __inline__ __m256 __DEFAULT_FN_ATTRS 6267 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6268 { 6269 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6270 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6271 (__v8sf)__W); 6272 } 6273 6274 static __inline__ __m256 __DEFAULT_FN_ATTRS 6275 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 6276 { 6277 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6278 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6279 (__v8sf)_mm256_setzero_ps()); 6280 } 6281 6282 static __inline__ __m128d __DEFAULT_FN_ATTRS 6283 _mm_rcp14_pd (__m128d __A) 6284 { 6285 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6286 (__v2df) 6287 _mm_setzero_pd (), 6288 (__mmask8) -1); 6289 } 6290 6291 static __inline__ __m128d __DEFAULT_FN_ATTRS 6292 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6293 { 6294 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6295 (__v2df) __W, 6296 (__mmask8) __U); 6297 } 6298 6299 static __inline__ __m128d __DEFAULT_FN_ATTRS 6300 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 6301 { 6302 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6303 (__v2df) 6304 _mm_setzero_pd (), 6305 (__mmask8) __U); 6306 } 6307 6308 static __inline__ __m256d __DEFAULT_FN_ATTRS 6309 _mm256_rcp14_pd (__m256d __A) 6310 { 6311 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6312 (__v4df) 6313 _mm256_setzero_pd (), 6314 (__mmask8) -1); 6315 } 6316 6317 static __inline__ __m256d __DEFAULT_FN_ATTRS 6318 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6319 { 6320 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6321 (__v4df) __W, 6322 (__mmask8) __U); 6323 } 6324 6325 static __inline__ __m256d __DEFAULT_FN_ATTRS 6326 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 6327 { 6328 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6329 (__v4df) 6330 _mm256_setzero_pd (), 6331 (__mmask8) __U); 6332 } 6333 6334 static __inline__ __m128 __DEFAULT_FN_ATTRS 6335 _mm_rcp14_ps (__m128 __A) 6336 { 6337 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6338 (__v4sf) 6339 _mm_setzero_ps (), 6340 (__mmask8) -1); 6341 } 6342 6343 static __inline__ __m128 __DEFAULT_FN_ATTRS 6344 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6345 { 6346 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6347 (__v4sf) __W, 6348 (__mmask8) __U); 6349 } 6350 6351 static __inline__ __m128 __DEFAULT_FN_ATTRS 6352 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6353 { 6354 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6355 (__v4sf) 6356 _mm_setzero_ps (), 6357 (__mmask8) __U); 6358 } 6359 6360 static __inline__ __m256 __DEFAULT_FN_ATTRS 6361 _mm256_rcp14_ps (__m256 __A) 6362 { 6363 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6364 (__v8sf) 6365 _mm256_setzero_ps (), 6366 (__mmask8) -1); 6367 } 6368 6369 static __inline__ __m256 __DEFAULT_FN_ATTRS 6370 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6371 { 6372 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6373 (__v8sf) __W, 6374 (__mmask8) __U); 6375 } 6376 6377 static __inline__ __m256 __DEFAULT_FN_ATTRS 6378 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6379 { 6380 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6381 (__v8sf) 6382 _mm256_setzero_ps (), 6383 (__mmask8) __U); 6384 } 6385 6386 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6387 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6388 (__v2df)_mm_permute_pd((X), (C)), \ 6389 (__v2df)(__m128d)(W)); }) 6390 6391 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ 6392 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6393 (__v2df)_mm_permute_pd((X), (C)), \ 6394 (__v2df)_mm_setzero_pd()); }) 6395 6396 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6397 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6398 (__v4df)_mm256_permute_pd((X), (C)), \ 6399 (__v4df)(__m256d)(W)); }) 6400 6401 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ 6402 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6403 (__v4df)_mm256_permute_pd((X), (C)), \ 6404 (__v4df)_mm256_setzero_pd()); }) 6405 6406 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6407 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6408 (__v4sf)_mm_permute_ps((X), (C)), \ 6409 (__v4sf)(__m128)(W)); }) 6410 6411 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ 6412 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6413 (__v4sf)_mm_permute_ps((X), (C)), \ 6414 (__v4sf)_mm_setzero_ps()); }) 6415 6416 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6417 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6418 (__v8sf)_mm256_permute_ps((X), (C)), \ 6419 (__v8sf)(__m256)(W)); }) 6420 6421 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ 6422 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6423 (__v8sf)_mm256_permute_ps((X), (C)), \ 6424 (__v8sf)_mm256_setzero_ps()); }) 6425 6426 static __inline__ __m128d __DEFAULT_FN_ATTRS 6427 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6428 { 6429 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6430 (__v2df)_mm_permutevar_pd(__A, __C), 6431 (__v2df)__W); 6432 } 6433 6434 static __inline__ __m128d __DEFAULT_FN_ATTRS 6435 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6436 { 6437 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6438 (__v2df)_mm_permutevar_pd(__A, __C), 6439 (__v2df)_mm_setzero_pd()); 6440 } 6441 6442 static __inline__ __m256d __DEFAULT_FN_ATTRS 6443 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6444 { 6445 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6446 (__v4df)_mm256_permutevar_pd(__A, __C), 6447 (__v4df)__W); 6448 } 6449 6450 static __inline__ __m256d __DEFAULT_FN_ATTRS 6451 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6452 { 6453 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6454 (__v4df)_mm256_permutevar_pd(__A, __C), 6455 (__v4df)_mm256_setzero_pd()); 6456 } 6457 6458 static __inline__ __m128 __DEFAULT_FN_ATTRS 6459 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6460 { 6461 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6462 (__v4sf)_mm_permutevar_ps(__A, __C), 6463 (__v4sf)__W); 6464 } 6465 6466 static __inline__ __m128 __DEFAULT_FN_ATTRS 6467 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6468 { 6469 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6470 (__v4sf)_mm_permutevar_ps(__A, __C), 6471 (__v4sf)_mm_setzero_ps()); 6472 } 6473 6474 static __inline__ __m256 __DEFAULT_FN_ATTRS 6475 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6476 { 6477 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6478 (__v8sf)_mm256_permutevar_ps(__A, __C), 6479 (__v8sf)__W); 6480 } 6481 6482 static __inline__ __m256 __DEFAULT_FN_ATTRS 6483 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6484 { 6485 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6486 (__v8sf)_mm256_permutevar_ps(__A, __C), 6487 (__v8sf)_mm256_setzero_ps()); 6488 } 6489 6490 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6491 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6492 { 6493 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6494 (__v4si) __B, 6495 (__mmask8) -1); 6496 } 6497 6498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6499 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6500 { 6501 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6502 (__v4si) __B, __U); 6503 } 6504 6505 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6506 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6507 { 6508 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6509 (__v8si) __B, 6510 (__mmask8) -1); 6511 } 6512 6513 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6514 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6515 { 6516 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6517 (__v8si) __B, __U); 6518 } 6519 6520 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6521 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6522 { 6523 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6524 (__v2di) __B, 6525 (__mmask8) -1); 6526 } 6527 6528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6529 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6530 { 6531 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6532 (__v2di) __B, __U); 6533 } 6534 6535 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6536 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6537 { 6538 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6539 (__v4di) __B, 6540 (__mmask8) -1); 6541 } 6542 6543 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6544 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6545 { 6546 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6547 (__v4di) __B, __U); 6548 } 6549 6550 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6551 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6552 { 6553 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6554 (__v4si) __B, 6555 (__mmask8) -1); 6556 } 6557 6558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6559 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6560 { 6561 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6562 (__v4si) __B, __U); 6563 } 6564 6565 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6566 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6567 { 6568 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6569 (__v8si) __B, 6570 (__mmask8) -1); 6571 } 6572 6573 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6574 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6575 { 6576 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6577 (__v8si) __B, __U); 6578 } 6579 6580 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6581 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6582 { 6583 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6584 (__v2di) __B, 6585 (__mmask8) -1); 6586 } 6587 6588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6589 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6590 { 6591 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6592 (__v2di) __B, __U); 6593 } 6594 6595 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6596 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6597 { 6598 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6599 (__v4di) __B, 6600 (__mmask8) -1); 6601 } 6602 6603 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6604 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6605 { 6606 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6607 (__v4di) __B, __U); 6608 } 6609 6610 6611 6612 static __inline__ __m128i __DEFAULT_FN_ATTRS 6613 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6614 { 6615 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6616 (__v4si)_mm_unpackhi_epi32(__A, __B), 6617 (__v4si)__W); 6618 } 6619 6620 static __inline__ __m128i __DEFAULT_FN_ATTRS 6621 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6622 { 6623 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6624 (__v4si)_mm_unpackhi_epi32(__A, __B), 6625 (__v4si)_mm_setzero_si128()); 6626 } 6627 6628 static __inline__ __m256i __DEFAULT_FN_ATTRS 6629 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6630 { 6631 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6632 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6633 (__v8si)__W); 6634 } 6635 6636 static __inline__ __m256i __DEFAULT_FN_ATTRS 6637 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6638 { 6639 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6640 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6641 (__v8si)_mm256_setzero_si256()); 6642 } 6643 6644 static __inline__ __m128i __DEFAULT_FN_ATTRS 6645 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6646 { 6647 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6648 (__v2di)_mm_unpackhi_epi64(__A, __B), 6649 (__v2di)__W); 6650 } 6651 6652 static __inline__ __m128i __DEFAULT_FN_ATTRS 6653 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6654 { 6655 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6656 (__v2di)_mm_unpackhi_epi64(__A, __B), 6657 (__v2di)_mm_setzero_di()); 6658 } 6659 6660 static __inline__ __m256i __DEFAULT_FN_ATTRS 6661 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6662 { 6663 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6664 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6665 (__v4di)__W); 6666 } 6667 6668 static __inline__ __m256i __DEFAULT_FN_ATTRS 6669 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6670 { 6671 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6672 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6673 (__v4di)_mm256_setzero_si256()); 6674 } 6675 6676 static __inline__ __m128i __DEFAULT_FN_ATTRS 6677 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6678 { 6679 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6680 (__v4si)_mm_unpacklo_epi32(__A, __B), 6681 (__v4si)__W); 6682 } 6683 6684 static __inline__ __m128i __DEFAULT_FN_ATTRS 6685 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6686 { 6687 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6688 (__v4si)_mm_unpacklo_epi32(__A, __B), 6689 (__v4si)_mm_setzero_si128()); 6690 } 6691 6692 static __inline__ __m256i __DEFAULT_FN_ATTRS 6693 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6694 { 6695 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6696 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6697 (__v8si)__W); 6698 } 6699 6700 static __inline__ __m256i __DEFAULT_FN_ATTRS 6701 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6702 { 6703 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6704 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6705 (__v8si)_mm256_setzero_si256()); 6706 } 6707 6708 static __inline__ __m128i __DEFAULT_FN_ATTRS 6709 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6710 { 6711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6712 (__v2di)_mm_unpacklo_epi64(__A, __B), 6713 (__v2di)__W); 6714 } 6715 6716 static __inline__ __m128i __DEFAULT_FN_ATTRS 6717 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6718 { 6719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6720 (__v2di)_mm_unpacklo_epi64(__A, __B), 6721 (__v2di)_mm_setzero_di()); 6722 } 6723 6724 static __inline__ __m256i __DEFAULT_FN_ATTRS 6725 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6726 { 6727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6728 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6729 (__v4di)__W); 6730 } 6731 6732 static __inline__ __m256i __DEFAULT_FN_ATTRS 6733 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6734 { 6735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6736 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6737 (__v4di)_mm256_setzero_si256()); 6738 } 6739 6740 static __inline__ __m128i __DEFAULT_FN_ATTRS 6741 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6742 { 6743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6744 (__v4si)_mm_sra_epi32(__A, __B), 6745 (__v4si)__W); 6746 } 6747 6748 static __inline__ __m128i __DEFAULT_FN_ATTRS 6749 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6750 { 6751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6752 (__v4si)_mm_sra_epi32(__A, __B), 6753 (__v4si)_mm_setzero_si128()); 6754 } 6755 6756 static __inline__ __m256i __DEFAULT_FN_ATTRS 6757 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6758 { 6759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6760 (__v8si)_mm256_sra_epi32(__A, __B), 6761 (__v8si)__W); 6762 } 6763 6764 static __inline__ __m256i __DEFAULT_FN_ATTRS 6765 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6766 { 6767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6768 (__v8si)_mm256_sra_epi32(__A, __B), 6769 (__v8si)_mm256_setzero_si256()); 6770 } 6771 6772 static __inline__ __m128i __DEFAULT_FN_ATTRS 6773 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 6774 { 6775 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6776 (__v4si)_mm_srai_epi32(__A, __B), 6777 (__v4si)__W); 6778 } 6779 6780 static __inline__ __m128i __DEFAULT_FN_ATTRS 6781 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) 6782 { 6783 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6784 (__v4si)_mm_srai_epi32(__A, __B), 6785 (__v4si)_mm_setzero_si128()); 6786 } 6787 6788 static __inline__ __m256i __DEFAULT_FN_ATTRS 6789 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 6790 { 6791 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6792 (__v8si)_mm256_srai_epi32(__A, __B), 6793 (__v8si)__W); 6794 } 6795 6796 static __inline__ __m256i __DEFAULT_FN_ATTRS 6797 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) 6798 { 6799 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6800 (__v8si)_mm256_srai_epi32(__A, __B), 6801 (__v8si)_mm256_setzero_si256()); 6802 } 6803 6804 static __inline__ __m128i __DEFAULT_FN_ATTRS 6805 _mm_sra_epi64(__m128i __A, __m128i __B) 6806 { 6807 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6808 } 6809 6810 static __inline__ __m128i __DEFAULT_FN_ATTRS 6811 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6812 { 6813 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6814 (__v2di)_mm_sra_epi64(__A, __B), \ 6815 (__v2di)__W); 6816 } 6817 6818 static __inline__ __m128i __DEFAULT_FN_ATTRS 6819 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6820 { 6821 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6822 (__v2di)_mm_sra_epi64(__A, __B), \ 6823 (__v2di)_mm_setzero_di()); 6824 } 6825 6826 static __inline__ __m256i __DEFAULT_FN_ATTRS 6827 _mm256_sra_epi64(__m256i __A, __m128i __B) 6828 { 6829 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6830 } 6831 6832 static __inline__ __m256i __DEFAULT_FN_ATTRS 6833 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6834 { 6835 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6836 (__v4di)_mm256_sra_epi64(__A, __B), \ 6837 (__v4di)__W); 6838 } 6839 6840 static __inline__ __m256i __DEFAULT_FN_ATTRS 6841 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6842 { 6843 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6844 (__v4di)_mm256_sra_epi64(__A, __B), \ 6845 (__v4di)_mm256_setzero_si256()); 6846 } 6847 6848 static __inline__ __m128i __DEFAULT_FN_ATTRS 6849 _mm_srai_epi64(__m128i __A, int __imm) 6850 { 6851 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); 6852 } 6853 6854 static __inline__ __m128i __DEFAULT_FN_ATTRS 6855 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) 6856 { 6857 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6858 (__v2di)_mm_srai_epi64(__A, __imm), \ 6859 (__v2di)__W); 6860 } 6861 6862 static __inline__ __m128i __DEFAULT_FN_ATTRS 6863 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) 6864 { 6865 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6866 (__v2di)_mm_srai_epi64(__A, __imm), \ 6867 (__v2di)_mm_setzero_di()); 6868 } 6869 6870 static __inline__ __m256i __DEFAULT_FN_ATTRS 6871 _mm256_srai_epi64(__m256i __A, int __imm) 6872 { 6873 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); 6874 } 6875 6876 static __inline__ __m256i __DEFAULT_FN_ATTRS 6877 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) 6878 { 6879 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6880 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6881 (__v4di)__W); 6882 } 6883 6884 static __inline__ __m256i __DEFAULT_FN_ATTRS 6885 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) 6886 { 6887 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6888 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6889 (__v4di)_mm256_setzero_si256()); 6890 } 6891 6892 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6893 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6894 (__v4si)(__m128i)(B), \ 6895 (__v4si)(__m128i)(C), (int)(imm), \ 6896 (__mmask8)-1); }) 6897 6898 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6899 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6900 (__v4si)(__m128i)(B), \ 6901 (__v4si)(__m128i)(C), (int)(imm), \ 6902 (__mmask8)(U)); }) 6903 6904 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6905 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6906 (__v4si)(__m128i)(B), \ 6907 (__v4si)(__m128i)(C), (int)(imm), \ 6908 (__mmask8)(U)); }) 6909 6910 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6911 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6912 (__v8si)(__m256i)(B), \ 6913 (__v8si)(__m256i)(C), (int)(imm), \ 6914 (__mmask8)-1); }) 6915 6916 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6917 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6918 (__v8si)(__m256i)(B), \ 6919 (__v8si)(__m256i)(C), (int)(imm), \ 6920 (__mmask8)(U)); }) 6921 6922 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6923 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6924 (__v8si)(__m256i)(B), \ 6925 (__v8si)(__m256i)(C), (int)(imm), \ 6926 (__mmask8)(U)); }) 6927 6928 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6929 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6930 (__v2di)(__m128i)(B), \ 6931 (__v2di)(__m128i)(C), (int)(imm), \ 6932 (__mmask8)-1); }) 6933 6934 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6935 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6936 (__v2di)(__m128i)(B), \ 6937 (__v2di)(__m128i)(C), (int)(imm), \ 6938 (__mmask8)(U)); }) 6939 6940 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6941 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6942 (__v2di)(__m128i)(B), \ 6943 (__v2di)(__m128i)(C), (int)(imm), \ 6944 (__mmask8)(U)); }) 6945 6946 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6947 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6948 (__v4di)(__m256i)(B), \ 6949 (__v4di)(__m256i)(C), (int)(imm), \ 6950 (__mmask8)-1); }) 6951 6952 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6953 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6954 (__v4di)(__m256i)(B), \ 6955 (__v4di)(__m256i)(C), (int)(imm), \ 6956 (__mmask8)(U)); }) 6957 6958 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6959 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 6960 (__v4di)(__m256i)(B), \ 6961 (__v4di)(__m256i)(C), (int)(imm), \ 6962 (__mmask8)(U)); }) 6963 6964 6965 6966 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ 6967 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6968 (__v8sf)(__m256)(B), (int)(imm), \ 6969 (__v8sf)_mm256_setzero_ps(), \ 6970 (__mmask8)-1); }) 6971 6972 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 6973 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6974 (__v8sf)(__m256)(B), (int)(imm), \ 6975 (__v8sf)(__m256)(W), \ 6976 (__mmask8)(U)); }) 6977 6978 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 6979 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 6980 (__v8sf)(__m256)(B), (int)(imm), \ 6981 (__v8sf)_mm256_setzero_ps(), \ 6982 (__mmask8)(U)); }) 6983 6984 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ 6985 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 6986 (__v4df)(__m256d)(B), \ 6987 (int)(imm), \ 6988 (__v4df)_mm256_setzero_pd(), \ 6989 (__mmask8)-1); }) 6990 6991 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 6992 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 6993 (__v4df)(__m256d)(B), \ 6994 (int)(imm), \ 6995 (__v4df)(__m256d)(W), \ 6996 (__mmask8)(U)); }) 6997 6998 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 6999 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7000 (__v4df)(__m256d)(B), \ 7001 (int)(imm), \ 7002 (__v4df)_mm256_setzero_pd(), \ 7003 (__mmask8)(U)); }) 7004 7005 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7006 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7007 (__v8si)(__m256i)(B), \ 7008 (int)(imm), \ 7009 (__v8si)_mm256_setzero_si256(), \ 7010 (__mmask8)-1); }) 7011 7012 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7013 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7014 (__v8si)(__m256i)(B), \ 7015 (int)(imm), \ 7016 (__v8si)(__m256i)(W), \ 7017 (__mmask8)(U)); }) 7018 7019 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7020 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7021 (__v8si)(__m256i)(B), \ 7022 (int)(imm), \ 7023 (__v8si)_mm256_setzero_si256(), \ 7024 (__mmask8)(U)); }) 7025 7026 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7027 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7028 (__v4di)(__m256i)(B), \ 7029 (int)(imm), \ 7030 (__v4di)_mm256_setzero_si256(), \ 7031 (__mmask8)-1); }) 7032 7033 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7034 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7035 (__v4di)(__m256i)(B), \ 7036 (int)(imm), \ 7037 (__v4di)(__m256i)(W), \ 7038 (__mmask8)(U)); }) 7039 7040 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7041 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7042 (__v4di)(__m256i)(B), \ 7043 (int)(imm), \ 7044 (__v4di)_mm256_setzero_si256(), \ 7045 (__mmask8)(U)); }) 7046 7047 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7048 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7049 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7050 (__v2df)(__m128d)(W)); }) 7051 7052 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7053 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7054 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7055 (__v2df)_mm_setzero_pd()); }) 7056 7057 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7058 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7059 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7060 (__v4df)(__m256d)(W)); }) 7061 7062 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7063 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7064 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7065 (__v4df)_mm256_setzero_pd()); }) 7066 7067 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7068 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7069 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7070 (__v4sf)(__m128)(W)); }) 7071 7072 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7073 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7074 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7075 (__v4sf)_mm_setzero_ps()); }) 7076 7077 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7078 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7079 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7080 (__v8sf)(__m256)(W)); }) 7081 7082 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7083 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7084 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7085 (__v8sf)_mm256_setzero_ps()); }) 7086 7087 static __inline__ __m128d __DEFAULT_FN_ATTRS 7088 _mm_rsqrt14_pd (__m128d __A) 7089 { 7090 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7091 (__v2df) 7092 _mm_setzero_pd (), 7093 (__mmask8) -1); 7094 } 7095 7096 static __inline__ __m128d __DEFAULT_FN_ATTRS 7097 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 7098 { 7099 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7100 (__v2df) __W, 7101 (__mmask8) __U); 7102 } 7103 7104 static __inline__ __m128d __DEFAULT_FN_ATTRS 7105 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 7106 { 7107 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7108 (__v2df) 7109 _mm_setzero_pd (), 7110 (__mmask8) __U); 7111 } 7112 7113 static __inline__ __m256d __DEFAULT_FN_ATTRS 7114 _mm256_rsqrt14_pd (__m256d __A) 7115 { 7116 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7117 (__v4df) 7118 _mm256_setzero_pd (), 7119 (__mmask8) -1); 7120 } 7121 7122 static __inline__ __m256d __DEFAULT_FN_ATTRS 7123 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 7124 { 7125 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7126 (__v4df) __W, 7127 (__mmask8) __U); 7128 } 7129 7130 static __inline__ __m256d __DEFAULT_FN_ATTRS 7131 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 7132 { 7133 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7134 (__v4df) 7135 _mm256_setzero_pd (), 7136 (__mmask8) __U); 7137 } 7138 7139 static __inline__ __m128 __DEFAULT_FN_ATTRS 7140 _mm_rsqrt14_ps (__m128 __A) 7141 { 7142 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7143 (__v4sf) 7144 _mm_setzero_ps (), 7145 (__mmask8) -1); 7146 } 7147 7148 static __inline__ __m128 __DEFAULT_FN_ATTRS 7149 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 7150 { 7151 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7152 (__v4sf) __W, 7153 (__mmask8) __U); 7154 } 7155 7156 static __inline__ __m128 __DEFAULT_FN_ATTRS 7157 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 7158 { 7159 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7160 (__v4sf) 7161 _mm_setzero_ps (), 7162 (__mmask8) __U); 7163 } 7164 7165 static __inline__ __m256 __DEFAULT_FN_ATTRS 7166 _mm256_rsqrt14_ps (__m256 __A) 7167 { 7168 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7169 (__v8sf) 7170 _mm256_setzero_ps (), 7171 (__mmask8) -1); 7172 } 7173 7174 static __inline__ __m256 __DEFAULT_FN_ATTRS 7175 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 7176 { 7177 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7178 (__v8sf) __W, 7179 (__mmask8) __U); 7180 } 7181 7182 static __inline__ __m256 __DEFAULT_FN_ATTRS 7183 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 7184 { 7185 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7186 (__v8sf) 7187 _mm256_setzero_ps (), 7188 (__mmask8) __U); 7189 } 7190 7191 static __inline__ __m256 __DEFAULT_FN_ATTRS 7192 _mm256_broadcast_f32x4(__m128 __A) 7193 { 7194 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 7195 0, 1, 2, 3, 0, 1, 2, 3); 7196 } 7197 7198 static __inline__ __m256 __DEFAULT_FN_ATTRS 7199 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 7200 { 7201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 7202 (__v8sf)_mm256_broadcast_f32x4(__A), 7203 (__v8sf)__O); 7204 } 7205 7206 static __inline__ __m256 __DEFAULT_FN_ATTRS 7207 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 7208 { 7209 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 7210 (__v8sf)_mm256_broadcast_f32x4(__A), 7211 (__v8sf)_mm256_setzero_ps()); 7212 } 7213 7214 static __inline__ __m256i __DEFAULT_FN_ATTRS 7215 _mm256_broadcast_i32x4(__m128i __A) 7216 { 7217 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 7218 0, 1, 2, 3, 0, 1, 2, 3); 7219 } 7220 7221 static __inline__ __m256i __DEFAULT_FN_ATTRS 7222 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 7223 { 7224 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 7225 (__v8si)_mm256_broadcast_i32x4(__A), 7226 (__v8si)__O); 7227 } 7228 7229 static __inline__ __m256i __DEFAULT_FN_ATTRS 7230 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 7231 { 7232 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 7233 (__v8si)_mm256_broadcast_i32x4(__A), 7234 (__v8si)_mm256_setzero_si256()); 7235 } 7236 7237 static __inline__ __m256d __DEFAULT_FN_ATTRS 7238 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 7239 { 7240 return (__m256d)__builtin_ia32_selectpd_256(__M, 7241 (__v4df) _mm256_broadcastsd_pd(__A), 7242 (__v4df) __O); 7243 } 7244 7245 static __inline__ __m256d __DEFAULT_FN_ATTRS 7246 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7247 { 7248 return (__m256d)__builtin_ia32_selectpd_256(__M, 7249 (__v4df) _mm256_broadcastsd_pd(__A), 7250 (__v4df) _mm256_setzero_pd()); 7251 } 7252 7253 static __inline__ __m128 __DEFAULT_FN_ATTRS 7254 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 7255 { 7256 return (__m128)__builtin_ia32_selectps_128(__M, 7257 (__v4sf) _mm_broadcastss_ps(__A), 7258 (__v4sf) __O); 7259 } 7260 7261 static __inline__ __m128 __DEFAULT_FN_ATTRS 7262 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7263 { 7264 return (__m128)__builtin_ia32_selectps_128(__M, 7265 (__v4sf) _mm_broadcastss_ps(__A), 7266 (__v4sf) _mm_setzero_ps()); 7267 } 7268 7269 static __inline__ __m256 __DEFAULT_FN_ATTRS 7270 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 7271 { 7272 return (__m256)__builtin_ia32_selectps_256(__M, 7273 (__v8sf) _mm256_broadcastss_ps(__A), 7274 (__v8sf) __O); 7275 } 7276 7277 static __inline__ __m256 __DEFAULT_FN_ATTRS 7278 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7279 { 7280 return (__m256)__builtin_ia32_selectps_256(__M, 7281 (__v8sf) _mm256_broadcastss_ps(__A), 7282 (__v8sf) _mm256_setzero_ps()); 7283 } 7284 7285 static __inline__ __m128i __DEFAULT_FN_ATTRS 7286 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7287 { 7288 return (__m128i)__builtin_ia32_selectd_128(__M, 7289 (__v4si) _mm_broadcastd_epi32(__A), 7290 (__v4si) __O); 7291 } 7292 7293 static __inline__ __m128i __DEFAULT_FN_ATTRS 7294 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7295 { 7296 return (__m128i)__builtin_ia32_selectd_128(__M, 7297 (__v4si) _mm_broadcastd_epi32(__A), 7298 (__v4si) _mm_setzero_si128()); 7299 } 7300 7301 static __inline__ __m256i __DEFAULT_FN_ATTRS 7302 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 7303 { 7304 return (__m256i)__builtin_ia32_selectd_256(__M, 7305 (__v8si) _mm256_broadcastd_epi32(__A), 7306 (__v8si) __O); 7307 } 7308 7309 static __inline__ __m256i __DEFAULT_FN_ATTRS 7310 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7311 { 7312 return (__m256i)__builtin_ia32_selectd_256(__M, 7313 (__v8si) _mm256_broadcastd_epi32(__A), 7314 (__v8si) _mm256_setzero_si256()); 7315 } 7316 7317 static __inline__ __m128i __DEFAULT_FN_ATTRS 7318 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 7319 { 7320 return (__m128i)__builtin_ia32_selectq_128(__M, 7321 (__v2di) _mm_broadcastq_epi64(__A), 7322 (__v2di) __O); 7323 } 7324 7325 static __inline__ __m128i __DEFAULT_FN_ATTRS 7326 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7327 { 7328 return (__m128i)__builtin_ia32_selectq_128(__M, 7329 (__v2di) _mm_broadcastq_epi64(__A), 7330 (__v2di) _mm_setzero_si128()); 7331 } 7332 7333 static __inline__ __m256i __DEFAULT_FN_ATTRS 7334 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 7335 { 7336 return (__m256i)__builtin_ia32_selectq_256(__M, 7337 (__v4di) _mm256_broadcastq_epi64(__A), 7338 (__v4di) __O); 7339 } 7340 7341 static __inline__ __m256i __DEFAULT_FN_ATTRS 7342 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7343 { 7344 return (__m256i)__builtin_ia32_selectq_256(__M, 7345 (__v4di) _mm256_broadcastq_epi64(__A), 7346 (__v4di) _mm256_setzero_si256()); 7347 } 7348 7349 static __inline__ __m128i __DEFAULT_FN_ATTRS 7350 _mm_cvtsepi32_epi8 (__m128i __A) 7351 { 7352 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7353 (__v16qi)_mm_undefined_si128(), 7354 (__mmask8) -1); 7355 } 7356 7357 static __inline__ __m128i __DEFAULT_FN_ATTRS 7358 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7359 { 7360 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7361 (__v16qi) __O, __M); 7362 } 7363 7364 static __inline__ __m128i __DEFAULT_FN_ATTRS 7365 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 7366 { 7367 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7368 (__v16qi) _mm_setzero_si128 (), 7369 __M); 7370 } 7371 7372 static __inline__ void __DEFAULT_FN_ATTRS 7373 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7374 { 7375 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7376 } 7377 7378 static __inline__ __m128i __DEFAULT_FN_ATTRS 7379 _mm256_cvtsepi32_epi8 (__m256i __A) 7380 { 7381 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7382 (__v16qi)_mm_undefined_si128(), 7383 (__mmask8) -1); 7384 } 7385 7386 static __inline__ __m128i __DEFAULT_FN_ATTRS 7387 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7388 { 7389 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7390 (__v16qi) __O, __M); 7391 } 7392 7393 static __inline__ __m128i __DEFAULT_FN_ATTRS 7394 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7395 { 7396 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7397 (__v16qi) _mm_setzero_si128 (), 7398 __M); 7399 } 7400 7401 static __inline__ void __DEFAULT_FN_ATTRS 7402 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7403 { 7404 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7405 } 7406 7407 static __inline__ __m128i __DEFAULT_FN_ATTRS 7408 _mm_cvtsepi32_epi16 (__m128i __A) 7409 { 7410 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7411 (__v8hi)_mm_setzero_si128 (), 7412 (__mmask8) -1); 7413 } 7414 7415 static __inline__ __m128i __DEFAULT_FN_ATTRS 7416 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7417 { 7418 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7419 (__v8hi)__O, 7420 __M); 7421 } 7422 7423 static __inline__ __m128i __DEFAULT_FN_ATTRS 7424 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7425 { 7426 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7427 (__v8hi) _mm_setzero_si128 (), 7428 __M); 7429 } 7430 7431 static __inline__ void __DEFAULT_FN_ATTRS 7432 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7433 { 7434 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7435 } 7436 7437 static __inline__ __m128i __DEFAULT_FN_ATTRS 7438 _mm256_cvtsepi32_epi16 (__m256i __A) 7439 { 7440 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7441 (__v8hi)_mm_undefined_si128(), 7442 (__mmask8) -1); 7443 } 7444 7445 static __inline__ __m128i __DEFAULT_FN_ATTRS 7446 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7447 { 7448 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7449 (__v8hi) __O, __M); 7450 } 7451 7452 static __inline__ __m128i __DEFAULT_FN_ATTRS 7453 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7454 { 7455 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7456 (__v8hi) _mm_setzero_si128 (), 7457 __M); 7458 } 7459 7460 static __inline__ void __DEFAULT_FN_ATTRS 7461 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7462 { 7463 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7464 } 7465 7466 static __inline__ __m128i __DEFAULT_FN_ATTRS 7467 _mm_cvtsepi64_epi8 (__m128i __A) 7468 { 7469 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7470 (__v16qi)_mm_undefined_si128(), 7471 (__mmask8) -1); 7472 } 7473 7474 static __inline__ __m128i __DEFAULT_FN_ATTRS 7475 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7476 { 7477 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7478 (__v16qi) __O, __M); 7479 } 7480 7481 static __inline__ __m128i __DEFAULT_FN_ATTRS 7482 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7483 { 7484 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7485 (__v16qi) _mm_setzero_si128 (), 7486 __M); 7487 } 7488 7489 static __inline__ void __DEFAULT_FN_ATTRS 7490 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7491 { 7492 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7493 } 7494 7495 static __inline__ __m128i __DEFAULT_FN_ATTRS 7496 _mm256_cvtsepi64_epi8 (__m256i __A) 7497 { 7498 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7499 (__v16qi)_mm_undefined_si128(), 7500 (__mmask8) -1); 7501 } 7502 7503 static __inline__ __m128i __DEFAULT_FN_ATTRS 7504 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7505 { 7506 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7507 (__v16qi) __O, __M); 7508 } 7509 7510 static __inline__ __m128i __DEFAULT_FN_ATTRS 7511 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7512 { 7513 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7514 (__v16qi) _mm_setzero_si128 (), 7515 __M); 7516 } 7517 7518 static __inline__ void __DEFAULT_FN_ATTRS 7519 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7520 { 7521 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7522 } 7523 7524 static __inline__ __m128i __DEFAULT_FN_ATTRS 7525 _mm_cvtsepi64_epi32 (__m128i __A) 7526 { 7527 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7528 (__v4si)_mm_undefined_si128(), 7529 (__mmask8) -1); 7530 } 7531 7532 static __inline__ __m128i __DEFAULT_FN_ATTRS 7533 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7534 { 7535 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7536 (__v4si) __O, __M); 7537 } 7538 7539 static __inline__ __m128i __DEFAULT_FN_ATTRS 7540 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7541 { 7542 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7543 (__v4si) _mm_setzero_si128 (), 7544 __M); 7545 } 7546 7547 static __inline__ void __DEFAULT_FN_ATTRS 7548 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7549 { 7550 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7551 } 7552 7553 static __inline__ __m128i __DEFAULT_FN_ATTRS 7554 _mm256_cvtsepi64_epi32 (__m256i __A) 7555 { 7556 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7557 (__v4si)_mm_undefined_si128(), 7558 (__mmask8) -1); 7559 } 7560 7561 static __inline__ __m128i __DEFAULT_FN_ATTRS 7562 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7563 { 7564 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7565 (__v4si)__O, 7566 __M); 7567 } 7568 7569 static __inline__ __m128i __DEFAULT_FN_ATTRS 7570 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7571 { 7572 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7573 (__v4si) _mm_setzero_si128 (), 7574 __M); 7575 } 7576 7577 static __inline__ void __DEFAULT_FN_ATTRS 7578 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7579 { 7580 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7581 } 7582 7583 static __inline__ __m128i __DEFAULT_FN_ATTRS 7584 _mm_cvtsepi64_epi16 (__m128i __A) 7585 { 7586 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7587 (__v8hi)_mm_undefined_si128(), 7588 (__mmask8) -1); 7589 } 7590 7591 static __inline__ __m128i __DEFAULT_FN_ATTRS 7592 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7593 { 7594 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7595 (__v8hi) __O, __M); 7596 } 7597 7598 static __inline__ __m128i __DEFAULT_FN_ATTRS 7599 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7600 { 7601 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7602 (__v8hi) _mm_setzero_si128 (), 7603 __M); 7604 } 7605 7606 static __inline__ void __DEFAULT_FN_ATTRS 7607 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7608 { 7609 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7610 } 7611 7612 static __inline__ __m128i __DEFAULT_FN_ATTRS 7613 _mm256_cvtsepi64_epi16 (__m256i __A) 7614 { 7615 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7616 (__v8hi)_mm_undefined_si128(), 7617 (__mmask8) -1); 7618 } 7619 7620 static __inline__ __m128i __DEFAULT_FN_ATTRS 7621 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7622 { 7623 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7624 (__v8hi) __O, __M); 7625 } 7626 7627 static __inline__ __m128i __DEFAULT_FN_ATTRS 7628 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7629 { 7630 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7631 (__v8hi) _mm_setzero_si128 (), 7632 __M); 7633 } 7634 7635 static __inline__ void __DEFAULT_FN_ATTRS 7636 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7637 { 7638 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7639 } 7640 7641 static __inline__ __m128i __DEFAULT_FN_ATTRS 7642 _mm_cvtusepi32_epi8 (__m128i __A) 7643 { 7644 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7645 (__v16qi)_mm_undefined_si128(), 7646 (__mmask8) -1); 7647 } 7648 7649 static __inline__ __m128i __DEFAULT_FN_ATTRS 7650 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7651 { 7652 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7653 (__v16qi) __O, 7654 __M); 7655 } 7656 7657 static __inline__ __m128i __DEFAULT_FN_ATTRS 7658 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7659 { 7660 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7661 (__v16qi) _mm_setzero_si128 (), 7662 __M); 7663 } 7664 7665 static __inline__ void __DEFAULT_FN_ATTRS 7666 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7667 { 7668 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7669 } 7670 7671 static __inline__ __m128i __DEFAULT_FN_ATTRS 7672 _mm256_cvtusepi32_epi8 (__m256i __A) 7673 { 7674 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7675 (__v16qi)_mm_undefined_si128(), 7676 (__mmask8) -1); 7677 } 7678 7679 static __inline__ __m128i __DEFAULT_FN_ATTRS 7680 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7681 { 7682 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7683 (__v16qi) __O, 7684 __M); 7685 } 7686 7687 static __inline__ __m128i __DEFAULT_FN_ATTRS 7688 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7689 { 7690 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7691 (__v16qi) _mm_setzero_si128 (), 7692 __M); 7693 } 7694 7695 static __inline__ void __DEFAULT_FN_ATTRS 7696 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7697 { 7698 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7699 } 7700 7701 static __inline__ __m128i __DEFAULT_FN_ATTRS 7702 _mm_cvtusepi32_epi16 (__m128i __A) 7703 { 7704 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7705 (__v8hi)_mm_undefined_si128(), 7706 (__mmask8) -1); 7707 } 7708 7709 static __inline__ __m128i __DEFAULT_FN_ATTRS 7710 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7711 { 7712 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7713 (__v8hi) __O, __M); 7714 } 7715 7716 static __inline__ __m128i __DEFAULT_FN_ATTRS 7717 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7718 { 7719 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7720 (__v8hi) _mm_setzero_si128 (), 7721 __M); 7722 } 7723 7724 static __inline__ void __DEFAULT_FN_ATTRS 7725 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7726 { 7727 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7728 } 7729 7730 static __inline__ __m128i __DEFAULT_FN_ATTRS 7731 _mm256_cvtusepi32_epi16 (__m256i __A) 7732 { 7733 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7734 (__v8hi) _mm_undefined_si128(), 7735 (__mmask8) -1); 7736 } 7737 7738 static __inline__ __m128i __DEFAULT_FN_ATTRS 7739 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7740 { 7741 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7742 (__v8hi) __O, __M); 7743 } 7744 7745 static __inline__ __m128i __DEFAULT_FN_ATTRS 7746 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7747 { 7748 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7749 (__v8hi) _mm_setzero_si128 (), 7750 __M); 7751 } 7752 7753 static __inline__ void __DEFAULT_FN_ATTRS 7754 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7755 { 7756 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7757 } 7758 7759 static __inline__ __m128i __DEFAULT_FN_ATTRS 7760 _mm_cvtusepi64_epi8 (__m128i __A) 7761 { 7762 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7763 (__v16qi)_mm_undefined_si128(), 7764 (__mmask8) -1); 7765 } 7766 7767 static __inline__ __m128i __DEFAULT_FN_ATTRS 7768 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7769 { 7770 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7771 (__v16qi) __O, 7772 __M); 7773 } 7774 7775 static __inline__ __m128i __DEFAULT_FN_ATTRS 7776 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7777 { 7778 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7779 (__v16qi) _mm_setzero_si128 (), 7780 __M); 7781 } 7782 7783 static __inline__ void __DEFAULT_FN_ATTRS 7784 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7785 { 7786 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7787 } 7788 7789 static __inline__ __m128i __DEFAULT_FN_ATTRS 7790 _mm256_cvtusepi64_epi8 (__m256i __A) 7791 { 7792 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7793 (__v16qi)_mm_undefined_si128(), 7794 (__mmask8) -1); 7795 } 7796 7797 static __inline__ __m128i __DEFAULT_FN_ATTRS 7798 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7799 { 7800 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7801 (__v16qi) __O, 7802 __M); 7803 } 7804 7805 static __inline__ __m128i __DEFAULT_FN_ATTRS 7806 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7807 { 7808 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7809 (__v16qi) _mm_setzero_si128 (), 7810 __M); 7811 } 7812 7813 static __inline__ void __DEFAULT_FN_ATTRS 7814 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7815 { 7816 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7817 } 7818 7819 static __inline__ __m128i __DEFAULT_FN_ATTRS 7820 _mm_cvtusepi64_epi32 (__m128i __A) 7821 { 7822 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7823 (__v4si)_mm_undefined_si128(), 7824 (__mmask8) -1); 7825 } 7826 7827 static __inline__ __m128i __DEFAULT_FN_ATTRS 7828 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7829 { 7830 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7831 (__v4si) __O, __M); 7832 } 7833 7834 static __inline__ __m128i __DEFAULT_FN_ATTRS 7835 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7836 { 7837 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7838 (__v4si) _mm_setzero_si128 (), 7839 __M); 7840 } 7841 7842 static __inline__ void __DEFAULT_FN_ATTRS 7843 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7844 { 7845 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7846 } 7847 7848 static __inline__ __m128i __DEFAULT_FN_ATTRS 7849 _mm256_cvtusepi64_epi32 (__m256i __A) 7850 { 7851 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7852 (__v4si)_mm_undefined_si128(), 7853 (__mmask8) -1); 7854 } 7855 7856 static __inline__ __m128i __DEFAULT_FN_ATTRS 7857 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7858 { 7859 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7860 (__v4si) __O, __M); 7861 } 7862 7863 static __inline__ __m128i __DEFAULT_FN_ATTRS 7864 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7865 { 7866 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7867 (__v4si) _mm_setzero_si128 (), 7868 __M); 7869 } 7870 7871 static __inline__ void __DEFAULT_FN_ATTRS 7872 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7873 { 7874 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7875 } 7876 7877 static __inline__ __m128i __DEFAULT_FN_ATTRS 7878 _mm_cvtusepi64_epi16 (__m128i __A) 7879 { 7880 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7881 (__v8hi)_mm_undefined_si128(), 7882 (__mmask8) -1); 7883 } 7884 7885 static __inline__ __m128i __DEFAULT_FN_ATTRS 7886 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7887 { 7888 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7889 (__v8hi) __O, __M); 7890 } 7891 7892 static __inline__ __m128i __DEFAULT_FN_ATTRS 7893 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7894 { 7895 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7896 (__v8hi) _mm_setzero_si128 (), 7897 __M); 7898 } 7899 7900 static __inline__ void __DEFAULT_FN_ATTRS 7901 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7902 { 7903 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7904 } 7905 7906 static __inline__ __m128i __DEFAULT_FN_ATTRS 7907 _mm256_cvtusepi64_epi16 (__m256i __A) 7908 { 7909 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7910 (__v8hi)_mm_undefined_si128(), 7911 (__mmask8) -1); 7912 } 7913 7914 static __inline__ __m128i __DEFAULT_FN_ATTRS 7915 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7916 { 7917 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7918 (__v8hi) __O, __M); 7919 } 7920 7921 static __inline__ __m128i __DEFAULT_FN_ATTRS 7922 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7923 { 7924 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7925 (__v8hi) _mm_setzero_si128 (), 7926 __M); 7927 } 7928 7929 static __inline__ void __DEFAULT_FN_ATTRS 7930 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7931 { 7932 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7933 } 7934 7935 static __inline__ __m128i __DEFAULT_FN_ATTRS 7936 _mm_cvtepi32_epi8 (__m128i __A) 7937 { 7938 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7939 (__v16qi)_mm_undefined_si128(), 7940 (__mmask8) -1); 7941 } 7942 7943 static __inline__ __m128i __DEFAULT_FN_ATTRS 7944 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7945 { 7946 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7947 (__v16qi) __O, __M); 7948 } 7949 7950 static __inline__ __m128i __DEFAULT_FN_ATTRS 7951 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7952 { 7953 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7954 (__v16qi) 7955 _mm_setzero_si128 (), 7956 __M); 7957 } 7958 7959 static __inline__ void __DEFAULT_FN_ATTRS 7960 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7961 { 7962 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7963 } 7964 7965 static __inline__ __m128i __DEFAULT_FN_ATTRS 7966 _mm256_cvtepi32_epi8 (__m256i __A) 7967 { 7968 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7969 (__v16qi)_mm_undefined_si128(), 7970 (__mmask8) -1); 7971 } 7972 7973 static __inline__ __m128i __DEFAULT_FN_ATTRS 7974 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7975 { 7976 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7977 (__v16qi) __O, __M); 7978 } 7979 7980 static __inline__ __m128i __DEFAULT_FN_ATTRS 7981 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7982 { 7983 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7984 (__v16qi) _mm_setzero_si128 (), 7985 __M); 7986 } 7987 7988 static __inline__ void __DEFAULT_FN_ATTRS 7989 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7990 { 7991 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7992 } 7993 7994 static __inline__ __m128i __DEFAULT_FN_ATTRS 7995 _mm_cvtepi32_epi16 (__m128i __A) 7996 { 7997 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7998 (__v8hi) _mm_setzero_si128 (), 7999 (__mmask8) -1); 8000 } 8001 8002 static __inline__ __m128i __DEFAULT_FN_ATTRS 8003 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8004 { 8005 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8006 (__v8hi) __O, __M); 8007 } 8008 8009 static __inline__ __m128i __DEFAULT_FN_ATTRS 8010 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 8011 { 8012 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8013 (__v8hi) _mm_setzero_si128 (), 8014 __M); 8015 } 8016 8017 static __inline__ void __DEFAULT_FN_ATTRS 8018 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8019 { 8020 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 8021 } 8022 8023 static __inline__ __m128i __DEFAULT_FN_ATTRS 8024 _mm256_cvtepi32_epi16 (__m256i __A) 8025 { 8026 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8027 (__v8hi)_mm_setzero_si128 (), 8028 (__mmask8) -1); 8029 } 8030 8031 static __inline__ __m128i __DEFAULT_FN_ATTRS 8032 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8033 { 8034 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8035 (__v8hi) __O, __M); 8036 } 8037 8038 static __inline__ __m128i __DEFAULT_FN_ATTRS 8039 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 8040 { 8041 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8042 (__v8hi) _mm_setzero_si128 (), 8043 __M); 8044 } 8045 8046 static __inline__ void __DEFAULT_FN_ATTRS 8047 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8048 { 8049 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 8050 } 8051 8052 static __inline__ __m128i __DEFAULT_FN_ATTRS 8053 _mm_cvtepi64_epi8 (__m128i __A) 8054 { 8055 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8056 (__v16qi) _mm_undefined_si128(), 8057 (__mmask8) -1); 8058 } 8059 8060 static __inline__ __m128i __DEFAULT_FN_ATTRS 8061 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8062 { 8063 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8064 (__v16qi) __O, __M); 8065 } 8066 8067 static __inline__ __m128i __DEFAULT_FN_ATTRS 8068 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 8069 { 8070 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8071 (__v16qi) _mm_setzero_si128 (), 8072 __M); 8073 } 8074 8075 static __inline__ void __DEFAULT_FN_ATTRS 8076 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8077 { 8078 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 8079 } 8080 8081 static __inline__ __m128i __DEFAULT_FN_ATTRS 8082 _mm256_cvtepi64_epi8 (__m256i __A) 8083 { 8084 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8085 (__v16qi) _mm_undefined_si128(), 8086 (__mmask8) -1); 8087 } 8088 8089 static __inline__ __m128i __DEFAULT_FN_ATTRS 8090 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8091 { 8092 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8093 (__v16qi) __O, __M); 8094 } 8095 8096 static __inline__ __m128i __DEFAULT_FN_ATTRS 8097 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 8098 { 8099 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8100 (__v16qi) _mm_setzero_si128 (), 8101 __M); 8102 } 8103 8104 static __inline__ void __DEFAULT_FN_ATTRS 8105 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8106 { 8107 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 8108 } 8109 8110 static __inline__ __m128i __DEFAULT_FN_ATTRS 8111 _mm_cvtepi64_epi32 (__m128i __A) 8112 { 8113 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8114 (__v4si)_mm_undefined_si128(), 8115 (__mmask8) -1); 8116 } 8117 8118 static __inline__ __m128i __DEFAULT_FN_ATTRS 8119 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 8120 { 8121 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8122 (__v4si) __O, __M); 8123 } 8124 8125 static __inline__ __m128i __DEFAULT_FN_ATTRS 8126 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 8127 { 8128 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8129 (__v4si) _mm_setzero_si128 (), 8130 __M); 8131 } 8132 8133 static __inline__ void __DEFAULT_FN_ATTRS 8134 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 8135 { 8136 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 8137 } 8138 8139 static __inline__ __m128i __DEFAULT_FN_ATTRS 8140 _mm256_cvtepi64_epi32 (__m256i __A) 8141 { 8142 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8143 (__v4si) _mm_undefined_si128(), 8144 (__mmask8) -1); 8145 } 8146 8147 static __inline__ __m128i __DEFAULT_FN_ATTRS 8148 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 8149 { 8150 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8151 (__v4si) __O, __M); 8152 } 8153 8154 static __inline__ __m128i __DEFAULT_FN_ATTRS 8155 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 8156 { 8157 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8158 (__v4si) _mm_setzero_si128 (), 8159 __M); 8160 } 8161 8162 static __inline__ void __DEFAULT_FN_ATTRS 8163 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 8164 { 8165 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 8166 } 8167 8168 static __inline__ __m128i __DEFAULT_FN_ATTRS 8169 _mm_cvtepi64_epi16 (__m128i __A) 8170 { 8171 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8172 (__v8hi) _mm_undefined_si128(), 8173 (__mmask8) -1); 8174 } 8175 8176 static __inline__ __m128i __DEFAULT_FN_ATTRS 8177 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8178 { 8179 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8180 (__v8hi)__O, 8181 __M); 8182 } 8183 8184 static __inline__ __m128i __DEFAULT_FN_ATTRS 8185 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 8186 { 8187 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8188 (__v8hi) _mm_setzero_si128 (), 8189 __M); 8190 } 8191 8192 static __inline__ void __DEFAULT_FN_ATTRS 8193 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8194 { 8195 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 8196 } 8197 8198 static __inline__ __m128i __DEFAULT_FN_ATTRS 8199 _mm256_cvtepi64_epi16 (__m256i __A) 8200 { 8201 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8202 (__v8hi)_mm_undefined_si128(), 8203 (__mmask8) -1); 8204 } 8205 8206 static __inline__ __m128i __DEFAULT_FN_ATTRS 8207 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8208 { 8209 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8210 (__v8hi) __O, __M); 8211 } 8212 8213 static __inline__ __m128i __DEFAULT_FN_ATTRS 8214 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 8215 { 8216 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8217 (__v8hi) _mm_setzero_si128 (), 8218 __M); 8219 } 8220 8221 static __inline__ void __DEFAULT_FN_ATTRS 8222 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8223 { 8224 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 8225 } 8226 8227 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ 8228 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ 8229 (__v8sf)_mm256_undefined_ps(), \ 8230 ((imm) & 1) ? 4 : 0, \ 8231 ((imm) & 1) ? 5 : 1, \ 8232 ((imm) & 1) ? 6 : 2, \ 8233 ((imm) & 1) ? 7 : 3); }) 8234 8235 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ 8236 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8237 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8238 (__v4sf)(W)); }) 8239 8240 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ 8241 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8242 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8243 (__v4sf)_mm_setzero_ps()); }) 8244 8245 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ 8246 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ 8247 (__v8si)_mm256_undefined_si256(), \ 8248 ((imm) & 1) ? 4 : 0, \ 8249 ((imm) & 1) ? 5 : 1, \ 8250 ((imm) & 1) ? 6 : 2, \ 8251 ((imm) & 1) ? 7 : 3); }) 8252 8253 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 8254 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8255 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8256 (__v4si)(W)); }) 8257 8258 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 8259 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8260 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8261 (__v4si)_mm_setzero_si128()); }) 8262 8263 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ 8264 (__m256)__builtin_shufflevector((__v8sf)(A), \ 8265 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ 8266 ((imm) & 0x1) ? 0 : 8, \ 8267 ((imm) & 0x1) ? 1 : 9, \ 8268 ((imm) & 0x1) ? 2 : 10, \ 8269 ((imm) & 0x1) ? 3 : 11, \ 8270 ((imm) & 0x1) ? 8 : 4, \ 8271 ((imm) & 0x1) ? 9 : 5, \ 8272 ((imm) & 0x1) ? 10 : 6, \ 8273 ((imm) & 0x1) ? 11 : 7); }) 8274 8275 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 8276 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8277 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8278 (__v8sf)(W)); }) 8279 8280 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 8281 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8282 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8283 (__v8sf)_mm256_setzero_ps()); }) 8284 8285 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ 8286 (__m256i)__builtin_shufflevector((__v8si)(A), \ 8287 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ 8288 ((imm) & 0x1) ? 0 : 8, \ 8289 ((imm) & 0x1) ? 1 : 9, \ 8290 ((imm) & 0x1) ? 2 : 10, \ 8291 ((imm) & 0x1) ? 3 : 11, \ 8292 ((imm) & 0x1) ? 8 : 4, \ 8293 ((imm) & 0x1) ? 9 : 5, \ 8294 ((imm) & 0x1) ? 10 : 6, \ 8295 ((imm) & 0x1) ? 11 : 7); }) 8296 8297 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 8298 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8299 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8300 (__v8si)(W)); }) 8301 8302 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 8303 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8304 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8305 (__v8si)_mm256_setzero_si256()); }) 8306 8307 #define _mm_getmant_pd(A, B, C) __extension__({\ 8308 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8309 (int)(((C)<<2) | (B)), \ 8310 (__v2df)_mm_setzero_pd(), \ 8311 (__mmask8)-1); }) 8312 8313 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ 8314 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8315 (int)(((C)<<2) | (B)), \ 8316 (__v2df)(__m128d)(W), \ 8317 (__mmask8)(U)); }) 8318 8319 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ 8320 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8321 (int)(((C)<<2) | (B)), \ 8322 (__v2df)_mm_setzero_pd(), \ 8323 (__mmask8)(U)); }) 8324 8325 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \ 8326 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8327 (int)(((C)<<2) | (B)), \ 8328 (__v4df)_mm256_setzero_pd(), \ 8329 (__mmask8)-1); }) 8330 8331 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8332 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8333 (int)(((C)<<2) | (B)), \ 8334 (__v4df)(__m256d)(W), \ 8335 (__mmask8)(U)); }) 8336 8337 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8338 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8339 (int)(((C)<<2) | (B)), \ 8340 (__v4df)_mm256_setzero_pd(), \ 8341 (__mmask8)(U)); }) 8342 8343 #define _mm_getmant_ps(A, B, C) __extension__ ({ \ 8344 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8345 (int)(((C)<<2) | (B)), \ 8346 (__v4sf)_mm_setzero_ps(), \ 8347 (__mmask8)-1); }) 8348 8349 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8350 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8351 (int)(((C)<<2) | (B)), \ 8352 (__v4sf)(__m128)(W), \ 8353 (__mmask8)(U)); }) 8354 8355 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8356 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8357 (int)(((C)<<2) | (B)), \ 8358 (__v4sf)_mm_setzero_ps(), \ 8359 (__mmask8)(U)); }) 8360 8361 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \ 8362 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8363 (int)(((C)<<2) | (B)), \ 8364 (__v8sf)_mm256_setzero_ps(), \ 8365 (__mmask8)-1); }) 8366 8367 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8368 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8369 (int)(((C)<<2) | (B)), \ 8370 (__v8sf)(__m256)(W), \ 8371 (__mmask8)(U)); }) 8372 8373 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8374 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8375 (int)(((C)<<2) | (B)), \ 8376 (__v8sf)_mm256_setzero_ps(), \ 8377 (__mmask8)(U)); }) 8378 8379 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8380 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 8381 (double const *)(addr), \ 8382 (__v2di)(__m128i)(index), \ 8383 (__mmask8)(mask), (int)(scale)); }) 8384 8385 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8386 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 8387 (long long const *)(addr), \ 8388 (__v2di)(__m128i)(index), \ 8389 (__mmask8)(mask), (int)(scale)); }) 8390 8391 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8392 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 8393 (double const *)(addr), \ 8394 (__v4di)(__m256i)(index), \ 8395 (__mmask8)(mask), (int)(scale)); }) 8396 8397 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8398 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 8399 (long long const *)(addr), \ 8400 (__v4di)(__m256i)(index), \ 8401 (__mmask8)(mask), (int)(scale)); }) 8402 8403 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8404 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 8405 (float const *)(addr), \ 8406 (__v2di)(__m128i)(index), \ 8407 (__mmask8)(mask), (int)(scale)); }) 8408 8409 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8410 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8411 (int const *)(addr), \ 8412 (__v2di)(__m128i)(index), \ 8413 (__mmask8)(mask), (int)(scale)); }) 8414 8415 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8416 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8417 (float const *)(addr), \ 8418 (__v4di)(__m256i)(index), \ 8419 (__mmask8)(mask), (int)(scale)); }) 8420 8421 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8422 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8423 (int const *)(addr), \ 8424 (__v4di)(__m256i)(index), \ 8425 (__mmask8)(mask), (int)(scale)); }) 8426 8427 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8428 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8429 (double const *)(addr), \ 8430 (__v4si)(__m128i)(index), \ 8431 (__mmask8)(mask), (int)(scale)); }) 8432 8433 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8434 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8435 (long long const *)(addr), \ 8436 (__v4si)(__m128i)(index), \ 8437 (__mmask8)(mask), (int)(scale)); }) 8438 8439 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8440 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8441 (double const *)(addr), \ 8442 (__v4si)(__m128i)(index), \ 8443 (__mmask8)(mask), (int)(scale)); }) 8444 8445 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8446 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8447 (long long const *)(addr), \ 8448 (__v4si)(__m128i)(index), \ 8449 (__mmask8)(mask), (int)(scale)); }) 8450 8451 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8452 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8453 (float const *)(addr), \ 8454 (__v4si)(__m128i)(index), \ 8455 (__mmask8)(mask), (int)(scale)); }) 8456 8457 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8458 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8459 (int const *)(addr), \ 8460 (__v4si)(__m128i)(index), \ 8461 (__mmask8)(mask), (int)(scale)); }) 8462 8463 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8464 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8465 (float const *)(addr), \ 8466 (__v8si)(__m256i)(index), \ 8467 (__mmask8)(mask), (int)(scale)); }) 8468 8469 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8470 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8471 (int const *)(addr), \ 8472 (__v8si)(__m256i)(index), \ 8473 (__mmask8)(mask), (int)(scale)); }) 8474 8475 #define _mm256_permutex_pd(X, C) __extension__ ({ \ 8476 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ 8477 (__v4df)_mm256_undefined_pd(), \ 8478 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8479 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8480 8481 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8482 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8483 (__v4df)_mm256_permutex_pd((X), (C)), \ 8484 (__v4df)(__m256d)(W)); }) 8485 8486 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8487 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8488 (__v4df)_mm256_permutex_pd((X), (C)), \ 8489 (__v4df)_mm256_setzero_pd()); }) 8490 8491 #define _mm256_permutex_epi64(X, C) __extension__ ({ \ 8492 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ 8493 (__v4di)_mm256_undefined_si256(), \ 8494 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8495 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8496 8497 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8498 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8499 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8500 (__v4di)(__m256i)(W)); }) 8501 8502 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8503 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8504 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8505 (__v4di)_mm256_setzero_si256()); }) 8506 8507 static __inline__ __m256d __DEFAULT_FN_ATTRS 8508 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8509 { 8510 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8511 (__v4di) __X, 8512 (__v4df) _mm256_undefined_si256 (), 8513 (__mmask8) -1); 8514 } 8515 8516 static __inline__ __m256d __DEFAULT_FN_ATTRS 8517 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8518 __m256d __Y) 8519 { 8520 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8521 (__v4di) __X, 8522 (__v4df) __W, 8523 (__mmask8) __U); 8524 } 8525 8526 static __inline__ __m256d __DEFAULT_FN_ATTRS 8527 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8528 { 8529 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8530 (__v4di) __X, 8531 (__v4df) _mm256_setzero_pd (), 8532 (__mmask8) __U); 8533 } 8534 8535 static __inline__ __m256i __DEFAULT_FN_ATTRS 8536 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8537 { 8538 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8539 (__v4di) __X, 8540 (__v4di) _mm256_setzero_si256 (), 8541 (__mmask8) __M); 8542 } 8543 8544 static __inline__ __m256i __DEFAULT_FN_ATTRS 8545 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8546 { 8547 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8548 (__v4di) __X, 8549 (__v4di) _mm256_undefined_si256 (), 8550 (__mmask8) -1); 8551 } 8552 8553 static __inline__ __m256i __DEFAULT_FN_ATTRS 8554 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8555 __m256i __Y) 8556 { 8557 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8558 (__v4di) __X, 8559 (__v4di) __W, 8560 __M); 8561 } 8562 8563 static __inline__ __m256 __DEFAULT_FN_ATTRS 8564 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, 8565 __m256 __Y) 8566 { 8567 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8568 (__v8si) __X, 8569 (__v8sf) __W, 8570 (__mmask8) __U); 8571 } 8572 8573 static __inline__ __m256 __DEFAULT_FN_ATTRS 8574 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) 8575 { 8576 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8577 (__v8si) __X, 8578 (__v8sf) _mm256_setzero_ps (), 8579 (__mmask8) __U); 8580 } 8581 8582 static __inline__ __m256 __DEFAULT_FN_ATTRS 8583 _mm256_permutexvar_ps (__m256i __X, __m256 __Y) 8584 { 8585 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8586 (__v8si) __X, 8587 (__v8sf) _mm256_undefined_si256 (), 8588 (__mmask8) -1); 8589 } 8590 8591 static __inline__ __m256i __DEFAULT_FN_ATTRS 8592 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 8593 { 8594 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8595 (__v8si) __X, 8596 (__v8si) _mm256_setzero_si256 (), 8597 __M); 8598 } 8599 8600 static __inline__ __m256i __DEFAULT_FN_ATTRS 8601 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 8602 __m256i __Y) 8603 { 8604 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8605 (__v8si) __X, 8606 (__v8si) __W, 8607 (__mmask8) __M); 8608 } 8609 8610 static __inline__ __m256i __DEFAULT_FN_ATTRS 8611 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) 8612 { 8613 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8614 (__v8si) __X, 8615 (__v8si) _mm256_undefined_si256(), 8616 (__mmask8) -1); 8617 } 8618 8619 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ 8620 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ 8621 (__v4si)(__m128i)(A), \ 8622 ((int)(imm) & 0x3) + 0, \ 8623 ((int)(imm) & 0x3) + 1, \ 8624 ((int)(imm) & 0x3) + 2, \ 8625 ((int)(imm) & 0x3) + 3); }) 8626 8627 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8628 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8629 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8630 (__v4si)(__m128i)(W)); }) 8631 8632 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8633 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8634 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8635 (__v4si)_mm_setzero_si128()); }) 8636 8637 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ 8638 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ 8639 (__v8si)(__m256i)(A), \ 8640 ((int)(imm) & 0x7) + 0, \ 8641 ((int)(imm) & 0x7) + 1, \ 8642 ((int)(imm) & 0x7) + 2, \ 8643 ((int)(imm) & 0x7) + 3, \ 8644 ((int)(imm) & 0x7) + 4, \ 8645 ((int)(imm) & 0x7) + 5, \ 8646 ((int)(imm) & 0x7) + 6, \ 8647 ((int)(imm) & 0x7) + 7); }) 8648 8649 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8650 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8651 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8652 (__v8si)(__m256i)(W)); }) 8653 8654 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8655 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8656 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8657 (__v8si)_mm256_setzero_si256()); }) 8658 8659 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ 8660 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ 8661 (__v2di)(__m128i)(A), \ 8662 ((int)(imm) & 0x1) + 0, \ 8663 ((int)(imm) & 0x1) + 1); }) 8664 8665 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8666 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8667 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8668 (__v2di)(__m128i)(W)); }) 8669 8670 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8671 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8672 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8673 (__v2di)_mm_setzero_di()); }) 8674 8675 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ 8676 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ 8677 (__v4di)(__m256i)(A), \ 8678 ((int)(imm) & 0x3) + 0, \ 8679 ((int)(imm) & 0x3) + 1, \ 8680 ((int)(imm) & 0x3) + 2, \ 8681 ((int)(imm) & 0x3) + 3); }) 8682 8683 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8684 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8685 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8686 (__v4di)(__m256i)(W)); }) 8687 8688 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8689 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8690 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8691 (__v4di)_mm256_setzero_si256()); }) 8692 8693 static __inline__ __m128 __DEFAULT_FN_ATTRS 8694 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8695 { 8696 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8697 (__v4sf)_mm_movehdup_ps(__A), 8698 (__v4sf)__W); 8699 } 8700 8701 static __inline__ __m128 __DEFAULT_FN_ATTRS 8702 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8703 { 8704 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8705 (__v4sf)_mm_movehdup_ps(__A), 8706 (__v4sf)_mm_setzero_ps()); 8707 } 8708 8709 static __inline__ __m256 __DEFAULT_FN_ATTRS 8710 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8711 { 8712 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8713 (__v8sf)_mm256_movehdup_ps(__A), 8714 (__v8sf)__W); 8715 } 8716 8717 static __inline__ __m256 __DEFAULT_FN_ATTRS 8718 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8719 { 8720 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8721 (__v8sf)_mm256_movehdup_ps(__A), 8722 (__v8sf)_mm256_setzero_ps()); 8723 } 8724 8725 static __inline__ __m128 __DEFAULT_FN_ATTRS 8726 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8727 { 8728 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8729 (__v4sf)_mm_moveldup_ps(__A), 8730 (__v4sf)__W); 8731 } 8732 8733 static __inline__ __m128 __DEFAULT_FN_ATTRS 8734 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8735 { 8736 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8737 (__v4sf)_mm_moveldup_ps(__A), 8738 (__v4sf)_mm_setzero_ps()); 8739 } 8740 8741 static __inline__ __m256 __DEFAULT_FN_ATTRS 8742 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8743 { 8744 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8745 (__v8sf)_mm256_moveldup_ps(__A), 8746 (__v8sf)__W); 8747 } 8748 8749 static __inline__ __m256 __DEFAULT_FN_ATTRS 8750 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8751 { 8752 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8753 (__v8sf)_mm256_moveldup_ps(__A), 8754 (__v8sf)_mm256_setzero_ps()); 8755 } 8756 8757 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8758 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8759 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8760 (__v8si)(__m256i)(W)); }) 8761 8762 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ 8763 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8764 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8765 (__v8si)_mm256_setzero_si256()); }) 8766 8767 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8768 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8769 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8770 (__v4si)(__m128i)(W)); }) 8771 8772 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ 8773 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8774 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8775 (__v4si)_mm_setzero_si128()); }) 8776 8777 static __inline__ __m128d __DEFAULT_FN_ATTRS 8778 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8779 { 8780 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8781 (__v2df) __A, 8782 (__v2df) __W); 8783 } 8784 8785 static __inline__ __m128d __DEFAULT_FN_ATTRS 8786 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8787 { 8788 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8789 (__v2df) __A, 8790 (__v2df) _mm_setzero_pd ()); 8791 } 8792 8793 static __inline__ __m256d __DEFAULT_FN_ATTRS 8794 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8795 { 8796 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8797 (__v4df) __A, 8798 (__v4df) __W); 8799 } 8800 8801 static __inline__ __m256d __DEFAULT_FN_ATTRS 8802 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8803 { 8804 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8805 (__v4df) __A, 8806 (__v4df) _mm256_setzero_pd ()); 8807 } 8808 8809 static __inline__ __m128 __DEFAULT_FN_ATTRS 8810 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8811 { 8812 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8813 (__v4sf) __A, 8814 (__v4sf) __W); 8815 } 8816 8817 static __inline__ __m128 __DEFAULT_FN_ATTRS 8818 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8819 { 8820 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8821 (__v4sf) __A, 8822 (__v4sf) _mm_setzero_ps ()); 8823 } 8824 8825 static __inline__ __m256 __DEFAULT_FN_ATTRS 8826 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8827 { 8828 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8829 (__v8sf) __A, 8830 (__v8sf) __W); 8831 } 8832 8833 static __inline__ __m256 __DEFAULT_FN_ATTRS 8834 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8835 { 8836 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8837 (__v8sf) __A, 8838 (__v8sf) _mm256_setzero_ps ()); 8839 } 8840 8841 static __inline__ __m128 __DEFAULT_FN_ATTRS 8842 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8843 { 8844 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8845 (__v4sf) __W, 8846 (__mmask8) __U); 8847 } 8848 8849 static __inline__ __m128 __DEFAULT_FN_ATTRS 8850 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8851 { 8852 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8853 (__v4sf) 8854 _mm_setzero_ps (), 8855 (__mmask8) __U); 8856 } 8857 8858 static __inline__ __m256 __DEFAULT_FN_ATTRS 8859 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8860 { 8861 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8862 (__v8sf) __W, 8863 (__mmask8) __U); 8864 } 8865 8866 static __inline__ __m256 __DEFAULT_FN_ATTRS 8867 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8868 { 8869 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8870 (__v8sf) 8871 _mm256_setzero_ps (), 8872 (__mmask8) __U); 8873 } 8874 8875 static __inline __m128i __DEFAULT_FN_ATTRS 8876 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) 8877 { 8878 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8879 (__v8hi) __W, 8880 (__mmask8) __U); 8881 } 8882 8883 static __inline __m128i __DEFAULT_FN_ATTRS 8884 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) 8885 { 8886 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8887 (__v8hi) _mm_setzero_si128 (), 8888 (__mmask8) __U); 8889 } 8890 8891 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8892 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8893 (__v8hi)(__m128i)(W), \ 8894 (__mmask8)(U)); }) 8895 8896 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8897 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8898 (__v8hi)_mm_setzero_si128(), \ 8899 (__mmask8)(U)); }) 8900 8901 static __inline __m128i __DEFAULT_FN_ATTRS 8902 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) 8903 { 8904 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8905 (__v8hi) __W, 8906 (__mmask8) __U); 8907 } 8908 8909 static __inline __m128i __DEFAULT_FN_ATTRS 8910 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) 8911 { 8912 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8913 (__v8hi) _mm_setzero_si128(), 8914 (__mmask8) __U); 8915 } 8916 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8917 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8918 (__v8hi)(__m128i)(W), \ 8919 (__mmask8)(U)); }) 8920 8921 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8922 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8923 (__v8hi)_mm_setzero_si128(), \ 8924 (__mmask8)(U)); }) 8925 8926 8927 #undef __DEFAULT_FN_ATTRS 8928 8929 #endif /* __AVX512VLINTRIN_H */ 8930