1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512VLDQINTRIN_H 29 #define __AVX512VLDQINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 33 34 static __inline__ __m256i __DEFAULT_FN_ATTRS 35 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36 return (__m256i) ((__v4du) __A * (__v4du) __B); 37 } 38 39 static __inline__ __m256i __DEFAULT_FN_ATTRS 40 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 42 (__v4di) __B, 43 (__v4di) __W, 44 (__mmask8) __U); 45 } 46 47 static __inline__ __m256i __DEFAULT_FN_ATTRS 48 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 49 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 50 (__v4di) __B, 51 (__v4di) 52 _mm256_setzero_si256 (), 53 (__mmask8) __U); 54 } 55 56 static __inline__ __m128i __DEFAULT_FN_ATTRS 57 _mm_mullo_epi64 (__m128i __A, __m128i __B) { 58 return (__m128i) ((__v2du) __A * (__v2du) __B); 59 } 60 61 static __inline__ __m128i __DEFAULT_FN_ATTRS 62 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 63 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 64 (__v2di) __B, 65 (__v2di) __W, 66 (__mmask8) __U); 67 } 68 69 static __inline__ __m128i __DEFAULT_FN_ATTRS 70 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 71 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 72 (__v2di) __B, 73 (__v2di) 74 _mm_setzero_si128 (), 75 (__mmask8) __U); 76 } 77 78 static __inline__ __m256d __DEFAULT_FN_ATTRS 79 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 80 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 81 (__v4df) __B, 82 (__v4df) __W, 83 (__mmask8) __U); 84 } 85 86 static __inline__ __m256d __DEFAULT_FN_ATTRS 87 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 88 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 89 (__v4df) __B, 90 (__v4df) 91 _mm256_setzero_pd (), 92 (__mmask8) __U); 93 } 94 95 static __inline__ __m128d __DEFAULT_FN_ATTRS 96 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 97 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 98 (__v2df) __B, 99 (__v2df) __W, 100 (__mmask8) __U); 101 } 102 103 static __inline__ __m128d __DEFAULT_FN_ATTRS 104 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 105 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 106 (__v2df) __B, 107 (__v2df) 108 _mm_setzero_pd (), 109 (__mmask8) __U); 110 } 111 112 static __inline__ __m256 __DEFAULT_FN_ATTRS 113 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 114 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 115 (__v8sf) __B, 116 (__v8sf) __W, 117 (__mmask8) __U); 118 } 119 120 static __inline__ __m256 __DEFAULT_FN_ATTRS 121 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 122 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 123 (__v8sf) __B, 124 (__v8sf) 125 _mm256_setzero_ps (), 126 (__mmask8) __U); 127 } 128 129 static __inline__ __m128 __DEFAULT_FN_ATTRS 130 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 131 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 132 (__v4sf) __B, 133 (__v4sf) __W, 134 (__mmask8) __U); 135 } 136 137 static __inline__ __m128 __DEFAULT_FN_ATTRS 138 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 139 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 140 (__v4sf) __B, 141 (__v4sf) 142 _mm_setzero_ps (), 143 (__mmask8) __U); 144 } 145 146 static __inline__ __m256d __DEFAULT_FN_ATTRS 147 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 148 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 149 (__v4df) __B, 150 (__v4df) __W, 151 (__mmask8) __U); 152 } 153 154 static __inline__ __m256d __DEFAULT_FN_ATTRS 155 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 156 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 157 (__v4df) __B, 158 (__v4df) 159 _mm256_setzero_pd (), 160 (__mmask8) __U); 161 } 162 163 static __inline__ __m128d __DEFAULT_FN_ATTRS 164 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 165 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 166 (__v2df) __B, 167 (__v2df) __W, 168 (__mmask8) __U); 169 } 170 171 static __inline__ __m128d __DEFAULT_FN_ATTRS 172 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 173 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 174 (__v2df) __B, 175 (__v2df) 176 _mm_setzero_pd (), 177 (__mmask8) __U); 178 } 179 180 static __inline__ __m256 __DEFAULT_FN_ATTRS 181 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 182 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 183 (__v8sf) __B, 184 (__v8sf) __W, 185 (__mmask8) __U); 186 } 187 188 static __inline__ __m256 __DEFAULT_FN_ATTRS 189 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 190 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 191 (__v8sf) __B, 192 (__v8sf) 193 _mm256_setzero_ps (), 194 (__mmask8) __U); 195 } 196 197 static __inline__ __m128 __DEFAULT_FN_ATTRS 198 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 199 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 200 (__v4sf) __B, 201 (__v4sf) __W, 202 (__mmask8) __U); 203 } 204 205 static __inline__ __m128 __DEFAULT_FN_ATTRS 206 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 207 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 208 (__v4sf) __B, 209 (__v4sf) 210 _mm_setzero_ps (), 211 (__mmask8) __U); 212 } 213 214 static __inline__ __m256d __DEFAULT_FN_ATTRS 215 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 216 __m256d __B) { 217 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 218 (__v4df) __B, 219 (__v4df) __W, 220 (__mmask8) __U); 221 } 222 223 static __inline__ __m256d __DEFAULT_FN_ATTRS 224 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 225 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 226 (__v4df) __B, 227 (__v4df) 228 _mm256_setzero_pd (), 229 (__mmask8) __U); 230 } 231 232 static __inline__ __m128d __DEFAULT_FN_ATTRS 233 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 234 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 235 (__v2df) __B, 236 (__v2df) __W, 237 (__mmask8) __U); 238 } 239 240 static __inline__ __m128d __DEFAULT_FN_ATTRS 241 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 242 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 243 (__v2df) __B, 244 (__v2df) 245 _mm_setzero_pd (), 246 (__mmask8) __U); 247 } 248 249 static __inline__ __m256 __DEFAULT_FN_ATTRS 250 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 251 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 252 (__v8sf) __B, 253 (__v8sf) __W, 254 (__mmask8) __U); 255 } 256 257 static __inline__ __m256 __DEFAULT_FN_ATTRS 258 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 259 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 260 (__v8sf) __B, 261 (__v8sf) 262 _mm256_setzero_ps (), 263 (__mmask8) __U); 264 } 265 266 static __inline__ __m128 __DEFAULT_FN_ATTRS 267 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 268 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 269 (__v4sf) __B, 270 (__v4sf) __W, 271 (__mmask8) __U); 272 } 273 274 static __inline__ __m128 __DEFAULT_FN_ATTRS 275 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 276 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 277 (__v4sf) __B, 278 (__v4sf) 279 _mm_setzero_ps (), 280 (__mmask8) __U); 281 } 282 283 static __inline__ __m256d __DEFAULT_FN_ATTRS 284 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 285 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 286 (__v4df) __B, 287 (__v4df) __W, 288 (__mmask8) __U); 289 } 290 291 static __inline__ __m256d __DEFAULT_FN_ATTRS 292 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 293 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 294 (__v4df) __B, 295 (__v4df) 296 _mm256_setzero_pd (), 297 (__mmask8) __U); 298 } 299 300 static __inline__ __m128d __DEFAULT_FN_ATTRS 301 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 302 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 303 (__v2df) __B, 304 (__v2df) __W, 305 (__mmask8) __U); 306 } 307 308 static __inline__ __m128d __DEFAULT_FN_ATTRS 309 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 310 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 311 (__v2df) __B, 312 (__v2df) 313 _mm_setzero_pd (), 314 (__mmask8) __U); 315 } 316 317 static __inline__ __m256 __DEFAULT_FN_ATTRS 318 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 319 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 320 (__v8sf) __B, 321 (__v8sf) __W, 322 (__mmask8) __U); 323 } 324 325 static __inline__ __m256 __DEFAULT_FN_ATTRS 326 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 327 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 328 (__v8sf) __B, 329 (__v8sf) 330 _mm256_setzero_ps (), 331 (__mmask8) __U); 332 } 333 334 static __inline__ __m128 __DEFAULT_FN_ATTRS 335 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 336 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 337 (__v4sf) __B, 338 (__v4sf) __W, 339 (__mmask8) __U); 340 } 341 342 static __inline__ __m128 __DEFAULT_FN_ATTRS 343 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 344 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 345 (__v4sf) __B, 346 (__v4sf) 347 _mm_setzero_ps (), 348 (__mmask8) __U); 349 } 350 351 static __inline__ __m128i __DEFAULT_FN_ATTRS 352 _mm_cvtpd_epi64 (__m128d __A) { 353 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 354 (__v2di) _mm_setzero_si128(), 355 (__mmask8) -1); 356 } 357 358 static __inline__ __m128i __DEFAULT_FN_ATTRS 359 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 360 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 361 (__v2di) __W, 362 (__mmask8) __U); 363 } 364 365 static __inline__ __m128i __DEFAULT_FN_ATTRS 366 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 367 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 368 (__v2di) _mm_setzero_si128(), 369 (__mmask8) __U); 370 } 371 372 static __inline__ __m256i __DEFAULT_FN_ATTRS 373 _mm256_cvtpd_epi64 (__m256d __A) { 374 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 375 (__v4di) _mm256_setzero_si256(), 376 (__mmask8) -1); 377 } 378 379 static __inline__ __m256i __DEFAULT_FN_ATTRS 380 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 381 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 382 (__v4di) __W, 383 (__mmask8) __U); 384 } 385 386 static __inline__ __m256i __DEFAULT_FN_ATTRS 387 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 388 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 389 (__v4di) _mm256_setzero_si256(), 390 (__mmask8) __U); 391 } 392 393 static __inline__ __m128i __DEFAULT_FN_ATTRS 394 _mm_cvtpd_epu64 (__m128d __A) { 395 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 396 (__v2di) _mm_setzero_si128(), 397 (__mmask8) -1); 398 } 399 400 static __inline__ __m128i __DEFAULT_FN_ATTRS 401 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 402 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 403 (__v2di) __W, 404 (__mmask8) __U); 405 } 406 407 static __inline__ __m128i __DEFAULT_FN_ATTRS 408 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 409 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 410 (__v2di) _mm_setzero_si128(), 411 (__mmask8) __U); 412 } 413 414 static __inline__ __m256i __DEFAULT_FN_ATTRS 415 _mm256_cvtpd_epu64 (__m256d __A) { 416 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 417 (__v4di) _mm256_setzero_si256(), 418 (__mmask8) -1); 419 } 420 421 static __inline__ __m256i __DEFAULT_FN_ATTRS 422 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 423 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 424 (__v4di) __W, 425 (__mmask8) __U); 426 } 427 428 static __inline__ __m256i __DEFAULT_FN_ATTRS 429 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 430 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 431 (__v4di) _mm256_setzero_si256(), 432 (__mmask8) __U); 433 } 434 435 static __inline__ __m128i __DEFAULT_FN_ATTRS 436 _mm_cvtps_epi64 (__m128 __A) { 437 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 438 (__v2di) _mm_setzero_si128(), 439 (__mmask8) -1); 440 } 441 442 static __inline__ __m128i __DEFAULT_FN_ATTRS 443 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 444 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 445 (__v2di) __W, 446 (__mmask8) __U); 447 } 448 449 static __inline__ __m128i __DEFAULT_FN_ATTRS 450 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 451 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 452 (__v2di) _mm_setzero_si128(), 453 (__mmask8) __U); 454 } 455 456 static __inline__ __m256i __DEFAULT_FN_ATTRS 457 _mm256_cvtps_epi64 (__m128 __A) { 458 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 459 (__v4di) _mm256_setzero_si256(), 460 (__mmask8) -1); 461 } 462 463 static __inline__ __m256i __DEFAULT_FN_ATTRS 464 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 465 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 466 (__v4di) __W, 467 (__mmask8) __U); 468 } 469 470 static __inline__ __m256i __DEFAULT_FN_ATTRS 471 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 472 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 473 (__v4di) _mm256_setzero_si256(), 474 (__mmask8) __U); 475 } 476 477 static __inline__ __m128i __DEFAULT_FN_ATTRS 478 _mm_cvtps_epu64 (__m128 __A) { 479 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 480 (__v2di) _mm_setzero_si128(), 481 (__mmask8) -1); 482 } 483 484 static __inline__ __m128i __DEFAULT_FN_ATTRS 485 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 486 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 487 (__v2di) __W, 488 (__mmask8) __U); 489 } 490 491 static __inline__ __m128i __DEFAULT_FN_ATTRS 492 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 493 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 494 (__v2di) _mm_setzero_si128(), 495 (__mmask8) __U); 496 } 497 498 static __inline__ __m256i __DEFAULT_FN_ATTRS 499 _mm256_cvtps_epu64 (__m128 __A) { 500 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 501 (__v4di) _mm256_setzero_si256(), 502 (__mmask8) -1); 503 } 504 505 static __inline__ __m256i __DEFAULT_FN_ATTRS 506 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 507 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 508 (__v4di) __W, 509 (__mmask8) __U); 510 } 511 512 static __inline__ __m256i __DEFAULT_FN_ATTRS 513 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 514 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 515 (__v4di) _mm256_setzero_si256(), 516 (__mmask8) __U); 517 } 518 519 static __inline__ __m128d __DEFAULT_FN_ATTRS 520 _mm_cvtepi64_pd (__m128i __A) { 521 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 522 (__v2df) _mm_setzero_pd(), 523 (__mmask8) -1); 524 } 525 526 static __inline__ __m128d __DEFAULT_FN_ATTRS 527 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 528 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 529 (__v2df) __W, 530 (__mmask8) __U); 531 } 532 533 static __inline__ __m128d __DEFAULT_FN_ATTRS 534 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 535 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 536 (__v2df) _mm_setzero_pd(), 537 (__mmask8) __U); 538 } 539 540 static __inline__ __m256d __DEFAULT_FN_ATTRS 541 _mm256_cvtepi64_pd (__m256i __A) { 542 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 543 (__v4df) _mm256_setzero_pd(), 544 (__mmask8) -1); 545 } 546 547 static __inline__ __m256d __DEFAULT_FN_ATTRS 548 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 549 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 550 (__v4df) __W, 551 (__mmask8) __U); 552 } 553 554 static __inline__ __m256d __DEFAULT_FN_ATTRS 555 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 556 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 557 (__v4df) _mm256_setzero_pd(), 558 (__mmask8) __U); 559 } 560 561 static __inline__ __m128 __DEFAULT_FN_ATTRS 562 _mm_cvtepi64_ps (__m128i __A) { 563 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 564 (__v4sf) _mm_setzero_ps(), 565 (__mmask8) -1); 566 } 567 568 static __inline__ __m128 __DEFAULT_FN_ATTRS 569 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 570 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 571 (__v4sf) __W, 572 (__mmask8) __U); 573 } 574 575 static __inline__ __m128 __DEFAULT_FN_ATTRS 576 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 577 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 578 (__v4sf) _mm_setzero_ps(), 579 (__mmask8) __U); 580 } 581 582 static __inline__ __m128 __DEFAULT_FN_ATTRS 583 _mm256_cvtepi64_ps (__m256i __A) { 584 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 585 (__v4sf) _mm_setzero_ps(), 586 (__mmask8) -1); 587 } 588 589 static __inline__ __m128 __DEFAULT_FN_ATTRS 590 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 591 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 592 (__v4sf) __W, 593 (__mmask8) __U); 594 } 595 596 static __inline__ __m128 __DEFAULT_FN_ATTRS 597 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 598 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 599 (__v4sf) _mm_setzero_ps(), 600 (__mmask8) __U); 601 } 602 603 static __inline__ __m128i __DEFAULT_FN_ATTRS 604 _mm_cvttpd_epi64 (__m128d __A) { 605 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 606 (__v2di) _mm_setzero_si128(), 607 (__mmask8) -1); 608 } 609 610 static __inline__ __m128i __DEFAULT_FN_ATTRS 611 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 612 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 613 (__v2di) __W, 614 (__mmask8) __U); 615 } 616 617 static __inline__ __m128i __DEFAULT_FN_ATTRS 618 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 619 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 620 (__v2di) _mm_setzero_si128(), 621 (__mmask8) __U); 622 } 623 624 static __inline__ __m256i __DEFAULT_FN_ATTRS 625 _mm256_cvttpd_epi64 (__m256d __A) { 626 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 627 (__v4di) _mm256_setzero_si256(), 628 (__mmask8) -1); 629 } 630 631 static __inline__ __m256i __DEFAULT_FN_ATTRS 632 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 633 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 634 (__v4di) __W, 635 (__mmask8) __U); 636 } 637 638 static __inline__ __m256i __DEFAULT_FN_ATTRS 639 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 640 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 641 (__v4di) _mm256_setzero_si256(), 642 (__mmask8) __U); 643 } 644 645 static __inline__ __m128i __DEFAULT_FN_ATTRS 646 _mm_cvttpd_epu64 (__m128d __A) { 647 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 648 (__v2di) _mm_setzero_si128(), 649 (__mmask8) -1); 650 } 651 652 static __inline__ __m128i __DEFAULT_FN_ATTRS 653 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 654 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 655 (__v2di) __W, 656 (__mmask8) __U); 657 } 658 659 static __inline__ __m128i __DEFAULT_FN_ATTRS 660 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 661 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 662 (__v2di) _mm_setzero_si128(), 663 (__mmask8) __U); 664 } 665 666 static __inline__ __m256i __DEFAULT_FN_ATTRS 667 _mm256_cvttpd_epu64 (__m256d __A) { 668 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 669 (__v4di) _mm256_setzero_si256(), 670 (__mmask8) -1); 671 } 672 673 static __inline__ __m256i __DEFAULT_FN_ATTRS 674 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 675 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 676 (__v4di) __W, 677 (__mmask8) __U); 678 } 679 680 static __inline__ __m256i __DEFAULT_FN_ATTRS 681 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 682 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 683 (__v4di) _mm256_setzero_si256(), 684 (__mmask8) __U); 685 } 686 687 static __inline__ __m128i __DEFAULT_FN_ATTRS 688 _mm_cvttps_epi64 (__m128 __A) { 689 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 690 (__v2di) _mm_setzero_si128(), 691 (__mmask8) -1); 692 } 693 694 static __inline__ __m128i __DEFAULT_FN_ATTRS 695 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 696 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 697 (__v2di) __W, 698 (__mmask8) __U); 699 } 700 701 static __inline__ __m128i __DEFAULT_FN_ATTRS 702 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 703 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 704 (__v2di) _mm_setzero_si128(), 705 (__mmask8) __U); 706 } 707 708 static __inline__ __m256i __DEFAULT_FN_ATTRS 709 _mm256_cvttps_epi64 (__m128 __A) { 710 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 711 (__v4di) _mm256_setzero_si256(), 712 (__mmask8) -1); 713 } 714 715 static __inline__ __m256i __DEFAULT_FN_ATTRS 716 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 717 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 718 (__v4di) __W, 719 (__mmask8) __U); 720 } 721 722 static __inline__ __m256i __DEFAULT_FN_ATTRS 723 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 724 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 725 (__v4di) _mm256_setzero_si256(), 726 (__mmask8) __U); 727 } 728 729 static __inline__ __m128i __DEFAULT_FN_ATTRS 730 _mm_cvttps_epu64 (__m128 __A) { 731 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 732 (__v2di) _mm_setzero_si128(), 733 (__mmask8) -1); 734 } 735 736 static __inline__ __m128i __DEFAULT_FN_ATTRS 737 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 738 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 739 (__v2di) __W, 740 (__mmask8) __U); 741 } 742 743 static __inline__ __m128i __DEFAULT_FN_ATTRS 744 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 745 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 746 (__v2di) _mm_setzero_si128(), 747 (__mmask8) __U); 748 } 749 750 static __inline__ __m256i __DEFAULT_FN_ATTRS 751 _mm256_cvttps_epu64 (__m128 __A) { 752 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 753 (__v4di) _mm256_setzero_si256(), 754 (__mmask8) -1); 755 } 756 757 static __inline__ __m256i __DEFAULT_FN_ATTRS 758 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 759 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 760 (__v4di) __W, 761 (__mmask8) __U); 762 } 763 764 static __inline__ __m256i __DEFAULT_FN_ATTRS 765 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 766 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 767 (__v4di) _mm256_setzero_si256(), 768 (__mmask8) __U); 769 } 770 771 static __inline__ __m128d __DEFAULT_FN_ATTRS 772 _mm_cvtepu64_pd (__m128i __A) { 773 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 774 (__v2df) _mm_setzero_pd(), 775 (__mmask8) -1); 776 } 777 778 static __inline__ __m128d __DEFAULT_FN_ATTRS 779 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 780 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 781 (__v2df) __W, 782 (__mmask8) __U); 783 } 784 785 static __inline__ __m128d __DEFAULT_FN_ATTRS 786 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 787 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 788 (__v2df) _mm_setzero_pd(), 789 (__mmask8) __U); 790 } 791 792 static __inline__ __m256d __DEFAULT_FN_ATTRS 793 _mm256_cvtepu64_pd (__m256i __A) { 794 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 795 (__v4df) _mm256_setzero_pd(), 796 (__mmask8) -1); 797 } 798 799 static __inline__ __m256d __DEFAULT_FN_ATTRS 800 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 801 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 802 (__v4df) __W, 803 (__mmask8) __U); 804 } 805 806 static __inline__ __m256d __DEFAULT_FN_ATTRS 807 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 808 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 809 (__v4df) _mm256_setzero_pd(), 810 (__mmask8) __U); 811 } 812 813 static __inline__ __m128 __DEFAULT_FN_ATTRS 814 _mm_cvtepu64_ps (__m128i __A) { 815 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 816 (__v4sf) _mm_setzero_ps(), 817 (__mmask8) -1); 818 } 819 820 static __inline__ __m128 __DEFAULT_FN_ATTRS 821 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 822 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 823 (__v4sf) __W, 824 (__mmask8) __U); 825 } 826 827 static __inline__ __m128 __DEFAULT_FN_ATTRS 828 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 829 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 830 (__v4sf) _mm_setzero_ps(), 831 (__mmask8) __U); 832 } 833 834 static __inline__ __m128 __DEFAULT_FN_ATTRS 835 _mm256_cvtepu64_ps (__m256i __A) { 836 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 837 (__v4sf) _mm_setzero_ps(), 838 (__mmask8) -1); 839 } 840 841 static __inline__ __m128 __DEFAULT_FN_ATTRS 842 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 843 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 844 (__v4sf) __W, 845 (__mmask8) __U); 846 } 847 848 static __inline__ __m128 __DEFAULT_FN_ATTRS 849 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 850 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 851 (__v4sf) _mm_setzero_ps(), 852 (__mmask8) __U); 853 } 854 855 #define _mm_range_pd(A, B, C) __extension__ ({ \ 856 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 857 (__v2df)(__m128d)(B), (int)(C), \ 858 (__v2df)_mm_setzero_pd(), \ 859 (__mmask8)-1); }) 860 861 #define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 862 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 863 (__v2df)(__m128d)(B), (int)(C), \ 864 (__v2df)(__m128d)(W), \ 865 (__mmask8)(U)); }) 866 867 #define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ 868 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 869 (__v2df)(__m128d)(B), (int)(C), \ 870 (__v2df)_mm_setzero_pd(), \ 871 (__mmask8)(U)); }) 872 873 #define _mm256_range_pd(A, B, C) __extension__ ({ \ 874 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 875 (__v4df)(__m256d)(B), (int)(C), \ 876 (__v4df)_mm256_setzero_pd(), \ 877 (__mmask8)-1); }) 878 879 #define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 880 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 881 (__v4df)(__m256d)(B), (int)(C), \ 882 (__v4df)(__m256d)(W), \ 883 (__mmask8)(U)); }) 884 885 #define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ 886 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 887 (__v4df)(__m256d)(B), (int)(C), \ 888 (__v4df)_mm256_setzero_pd(), \ 889 (__mmask8)(U)); }) 890 891 #define _mm_range_ps(A, B, C) __extension__ ({ \ 892 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 893 (__v4sf)(__m128)(B), (int)(C), \ 894 (__v4sf)_mm_setzero_ps(), \ 895 (__mmask8)-1); }) 896 897 #define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 898 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 899 (__v4sf)(__m128)(B), (int)(C), \ 900 (__v4sf)(__m128)(W), (__mmask8)(U)); }) 901 902 #define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ 903 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 904 (__v4sf)(__m128)(B), (int)(C), \ 905 (__v4sf)_mm_setzero_ps(), \ 906 (__mmask8)(U)); }) 907 908 #define _mm256_range_ps(A, B, C) __extension__ ({ \ 909 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 910 (__v8sf)(__m256)(B), (int)(C), \ 911 (__v8sf)_mm256_setzero_ps(), \ 912 (__mmask8)-1); }) 913 914 #define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 915 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 916 (__v8sf)(__m256)(B), (int)(C), \ 917 (__v8sf)(__m256)(W), (__mmask8)(U)); }) 918 919 #define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ 920 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 921 (__v8sf)(__m256)(B), (int)(C), \ 922 (__v8sf)_mm256_setzero_ps(), \ 923 (__mmask8)(U)); }) 924 925 #define _mm_reduce_pd(A, B) __extension__ ({ \ 926 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 927 (__v2df)_mm_setzero_pd(), \ 928 (__mmask8)-1); }) 929 930 #define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 931 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 932 (__v2df)(__m128d)(W), \ 933 (__mmask8)(U)); }) 934 935 #define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ 936 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 937 (__v2df)_mm_setzero_pd(), \ 938 (__mmask8)(U)); }) 939 940 #define _mm256_reduce_pd(A, B) __extension__ ({ \ 941 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 942 (__v4df)_mm256_setzero_pd(), \ 943 (__mmask8)-1); }) 944 945 #define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 946 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 947 (__v4df)(__m256d)(W), \ 948 (__mmask8)(U)); }) 949 950 #define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ 951 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 952 (__v4df)_mm256_setzero_pd(), \ 953 (__mmask8)(U)); }) 954 955 #define _mm_reduce_ps(A, B) __extension__ ({ \ 956 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 957 (__v4sf)_mm_setzero_ps(), \ 958 (__mmask8)-1); }) 959 960 #define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 961 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 962 (__v4sf)(__m128)(W), \ 963 (__mmask8)(U)); }) 964 965 #define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ 966 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 967 (__v4sf)_mm_setzero_ps(), \ 968 (__mmask8)(U)); }) 969 970 #define _mm256_reduce_ps(A, B) __extension__ ({ \ 971 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 972 (__v8sf)_mm256_setzero_ps(), \ 973 (__mmask8)-1); }) 974 975 #define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 976 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 977 (__v8sf)(__m256)(W), \ 978 (__mmask8)(U)); }) 979 980 #define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ 981 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 982 (__v8sf)_mm256_setzero_ps(), \ 983 (__mmask8)(U)); }) 984 985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 986 _mm_movepi32_mask (__m128i __A) 987 { 988 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 989 } 990 991 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 992 _mm256_movepi32_mask (__m256i __A) 993 { 994 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 995 } 996 997 static __inline__ __m128i __DEFAULT_FN_ATTRS 998 _mm_movm_epi32 (__mmask8 __A) 999 { 1000 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 1001 } 1002 1003 static __inline__ __m256i __DEFAULT_FN_ATTRS 1004 _mm256_movm_epi32 (__mmask8 __A) 1005 { 1006 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 1007 } 1008 1009 static __inline__ __m128i __DEFAULT_FN_ATTRS 1010 _mm_movm_epi64 (__mmask8 __A) 1011 { 1012 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 1013 } 1014 1015 static __inline__ __m256i __DEFAULT_FN_ATTRS 1016 _mm256_movm_epi64 (__mmask8 __A) 1017 { 1018 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 1019 } 1020 1021 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1022 _mm_movepi64_mask (__m128i __A) 1023 { 1024 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 1025 } 1026 1027 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1028 _mm256_movepi64_mask (__m256i __A) 1029 { 1030 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 1031 } 1032 1033 static __inline__ __m256 __DEFAULT_FN_ATTRS 1034 _mm256_broadcast_f32x2 (__m128 __A) 1035 { 1036 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1037 (__v8sf)_mm256_undefined_ps(), 1038 (__mmask8) -1); 1039 } 1040 1041 static __inline__ __m256 __DEFAULT_FN_ATTRS 1042 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 1043 { 1044 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1045 (__v8sf) __O, 1046 __M); 1047 } 1048 1049 static __inline__ __m256 __DEFAULT_FN_ATTRS 1050 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 1051 { 1052 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1053 (__v8sf) _mm256_setzero_ps (), 1054 __M); 1055 } 1056 1057 static __inline__ __m256d __DEFAULT_FN_ATTRS 1058 _mm256_broadcast_f64x2 (__m128d __A) 1059 { 1060 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1061 (__v4df)_mm256_undefined_pd(), 1062 (__mmask8) -1); 1063 } 1064 1065 static __inline__ __m256d __DEFAULT_FN_ATTRS 1066 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) 1067 { 1068 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1069 (__v4df) __O, 1070 __M); 1071 } 1072 1073 static __inline__ __m256d __DEFAULT_FN_ATTRS 1074 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 1075 { 1076 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1077 (__v4df) _mm256_setzero_ps (), 1078 __M); 1079 } 1080 1081 static __inline__ __m128i __DEFAULT_FN_ATTRS 1082 _mm_broadcast_i32x2 (__m128i __A) 1083 { 1084 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1085 (__v4si)_mm_undefined_si128(), 1086 (__mmask8) -1); 1087 } 1088 1089 static __inline__ __m128i __DEFAULT_FN_ATTRS 1090 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1091 { 1092 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1093 (__v4si) __O, 1094 __M); 1095 } 1096 1097 static __inline__ __m128i __DEFAULT_FN_ATTRS 1098 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1099 { 1100 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1101 (__v4si) _mm_setzero_si128 (), 1102 __M); 1103 } 1104 1105 static __inline__ __m256i __DEFAULT_FN_ATTRS 1106 _mm256_broadcast_i32x2 (__m128i __A) 1107 { 1108 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1109 (__v8si)_mm256_undefined_si256(), 1110 (__mmask8) -1); 1111 } 1112 1113 static __inline__ __m256i __DEFAULT_FN_ATTRS 1114 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1115 { 1116 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1117 (__v8si) __O, 1118 __M); 1119 } 1120 1121 static __inline__ __m256i __DEFAULT_FN_ATTRS 1122 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1123 { 1124 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1125 (__v8si) _mm256_setzero_si256 (), 1126 __M); 1127 } 1128 1129 static __inline__ __m256i __DEFAULT_FN_ATTRS 1130 _mm256_broadcast_i64x2 (__m128i __A) 1131 { 1132 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1133 (__v4di)_mm256_undefined_si256(), 1134 (__mmask8) -1); 1135 } 1136 1137 static __inline__ __m256i __DEFAULT_FN_ATTRS 1138 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) 1139 { 1140 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1141 (__v4di) __O, 1142 __M); 1143 } 1144 1145 static __inline__ __m256i __DEFAULT_FN_ATTRS 1146 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1147 { 1148 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1149 (__v4di) _mm256_setzero_si256 (), 1150 __M); 1151 } 1152 1153 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ 1154 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1155 (int)(imm), \ 1156 (__v2df)_mm_setzero_pd(), \ 1157 (__mmask8)-1); }) 1158 1159 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1160 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1161 (int)(imm), \ 1162 (__v2df)(__m128d)(W), \ 1163 (__mmask8)(U)); }) 1164 1165 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1166 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1167 (int)(imm), \ 1168 (__v2df)_mm_setzero_pd(), \ 1169 (__mmask8)(U)); }) 1170 1171 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ 1172 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1173 (int)(imm), \ 1174 (__v2di)_mm_setzero_di(), \ 1175 (__mmask8)-1); }) 1176 1177 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1178 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1179 (int)(imm), \ 1180 (__v2di)(__m128i)(W), \ 1181 (__mmask8)(U)); }) 1182 1183 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1184 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1185 (int)(imm), \ 1186 (__v2di)_mm_setzero_di(), \ 1187 (__mmask8)(U)); }) 1188 1189 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ 1190 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1191 (__v2df)(__m128d)(B), \ 1192 (int)(imm), \ 1193 (__v4df)_mm256_setzero_pd(), \ 1194 (__mmask8)-1); }) 1195 1196 #define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1197 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1198 (__v2df)(__m128d)(B), \ 1199 (int)(imm), \ 1200 (__v4df)(__m256d)(W), \ 1201 (__mmask8)(U)); }) 1202 1203 #define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1204 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1205 (__v2df)(__m128d)(B), \ 1206 (int)(imm), \ 1207 (__v4df)_mm256_setzero_pd(), \ 1208 (__mmask8)(U)); }) 1209 1210 #define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ 1211 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1212 (__v2di)(__m128i)(B), \ 1213 (int)(imm), \ 1214 (__v4di)_mm256_setzero_si256(), \ 1215 (__mmask8)-1); }) 1216 1217 #define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1218 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1219 (__v2di)(__m128i)(B), \ 1220 (int)(imm), \ 1221 (__v4di)(__m256i)(W), \ 1222 (__mmask8)(U)); }) 1223 1224 #define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1225 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1226 (__v2di)(__m128i)(B), \ 1227 (int)(imm), \ 1228 (__v4di)_mm256_setzero_si256(), \ 1229 (__mmask8)(U)); }) 1230 1231 #define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1232 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1233 (__mmask8)(U)); }) 1234 1235 #define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ 1236 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1237 (__mmask8)-1); }) 1238 1239 #define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1240 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1241 (__mmask8)(U)); }) 1242 1243 #define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ 1244 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1245 (__mmask8)-1); }) 1246 1247 #define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1248 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1249 (__mmask8)(U)); }) 1250 1251 #define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ 1252 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1253 (__mmask8)-1); }) 1254 1255 #define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1256 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1257 (__mmask8)(U)); }) 1258 1259 #define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ 1260 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1261 (__mmask8)-1); }) 1262 1263 #undef __DEFAULT_FN_ATTRS 1264 1265 #endif 1266