1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512VLDQINTRIN_H 29 #define __AVX512VLDQINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 33 34 static __inline__ __m256i __DEFAULT_FN_ATTRS 35 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36 return (__m256i) ((__v4du) __A * (__v4du) __B); 37 } 38 39 static __inline__ __m256i __DEFAULT_FN_ATTRS 40 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42 (__v4di)_mm256_mullo_epi64(__A, __B), 43 (__v4di)__W); 44 } 45 46 static __inline__ __m256i __DEFAULT_FN_ATTRS 47 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { 48 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 49 (__v4di)_mm256_mullo_epi64(__A, __B), 50 (__v4di)_mm256_setzero_si256()); 51 } 52 53 static __inline__ __m128i __DEFAULT_FN_ATTRS 54 _mm_mullo_epi64 (__m128i __A, __m128i __B) { 55 return (__m128i) ((__v2du) __A * (__v2du) __B); 56 } 57 58 static __inline__ __m128i __DEFAULT_FN_ATTRS 59 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 60 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 61 (__v2di)_mm_mullo_epi64(__A, __B), 62 (__v2di)__W); 63 } 64 65 static __inline__ __m128i __DEFAULT_FN_ATTRS 66 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { 67 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 68 (__v2di)_mm_mullo_epi64(__A, __B), 69 (__v2di)_mm_setzero_si128()); 70 } 71 72 static __inline__ __m256d __DEFAULT_FN_ATTRS 73 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 74 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 75 (__v4df)_mm256_andnot_pd(__A, __B), 76 (__v4df)__W); 77 } 78 79 static __inline__ __m256d __DEFAULT_FN_ATTRS 80 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { 81 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 82 (__v4df)_mm256_andnot_pd(__A, __B), 83 (__v4df)_mm256_setzero_pd()); 84 } 85 86 static __inline__ __m128d __DEFAULT_FN_ATTRS 87 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 88 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 89 (__v2df)_mm_andnot_pd(__A, __B), 90 (__v2df)__W); 91 } 92 93 static __inline__ __m128d __DEFAULT_FN_ATTRS 94 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { 95 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 96 (__v2df)_mm_andnot_pd(__A, __B), 97 (__v2df)_mm_setzero_pd()); 98 } 99 100 static __inline__ __m256 __DEFAULT_FN_ATTRS 101 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 102 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 103 (__v8sf)_mm256_andnot_ps(__A, __B), 104 (__v8sf)__W); 105 } 106 107 static __inline__ __m256 __DEFAULT_FN_ATTRS 108 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { 109 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 110 (__v8sf)_mm256_andnot_ps(__A, __B), 111 (__v8sf)_mm256_setzero_ps()); 112 } 113 114 static __inline__ __m128 __DEFAULT_FN_ATTRS 115 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 117 (__v4sf)_mm_andnot_ps(__A, __B), 118 (__v4sf)__W); 119 } 120 121 static __inline__ __m128 __DEFAULT_FN_ATTRS 122 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { 123 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 124 (__v4sf)_mm_andnot_ps(__A, __B), 125 (__v4sf)_mm_setzero_ps()); 126 } 127 128 static __inline__ __m256d __DEFAULT_FN_ATTRS 129 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 130 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 131 (__v4df)_mm256_and_pd(__A, __B), 132 (__v4df)__W); 133 } 134 135 static __inline__ __m256d __DEFAULT_FN_ATTRS 136 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { 137 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 138 (__v4df)_mm256_and_pd(__A, __B), 139 (__v4df)_mm256_setzero_pd()); 140 } 141 142 static __inline__ __m128d __DEFAULT_FN_ATTRS 143 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 145 (__v2df)_mm_and_pd(__A, __B), 146 (__v2df)__W); 147 } 148 149 static __inline__ __m128d __DEFAULT_FN_ATTRS 150 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { 151 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 152 (__v2df)_mm_and_pd(__A, __B), 153 (__v2df)_mm_setzero_pd()); 154 } 155 156 static __inline__ __m256 __DEFAULT_FN_ATTRS 157 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 158 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 159 (__v8sf)_mm256_and_ps(__A, __B), 160 (__v8sf)__W); 161 } 162 163 static __inline__ __m256 __DEFAULT_FN_ATTRS 164 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { 165 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 166 (__v8sf)_mm256_and_ps(__A, __B), 167 (__v8sf)_mm256_setzero_ps()); 168 } 169 170 static __inline__ __m128 __DEFAULT_FN_ATTRS 171 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 172 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 173 (__v4sf)_mm_and_ps(__A, __B), 174 (__v4sf)__W); 175 } 176 177 static __inline__ __m128 __DEFAULT_FN_ATTRS 178 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { 179 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 180 (__v4sf)_mm_and_ps(__A, __B), 181 (__v4sf)_mm_setzero_ps()); 182 } 183 184 static __inline__ __m256d __DEFAULT_FN_ATTRS 185 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 186 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 187 (__v4df)_mm256_xor_pd(__A, __B), 188 (__v4df)__W); 189 } 190 191 static __inline__ __m256d __DEFAULT_FN_ATTRS 192 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { 193 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 194 (__v4df)_mm256_xor_pd(__A, __B), 195 (__v4df)_mm256_setzero_pd()); 196 } 197 198 static __inline__ __m128d __DEFAULT_FN_ATTRS 199 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 200 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 201 (__v2df)_mm_xor_pd(__A, __B), 202 (__v2df)__W); 203 } 204 205 static __inline__ __m128d __DEFAULT_FN_ATTRS 206 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 207 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 208 (__v2df)_mm_xor_pd(__A, __B), 209 (__v2df)_mm_setzero_pd()); 210 } 211 212 static __inline__ __m256 __DEFAULT_FN_ATTRS 213 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 214 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 215 (__v8sf)_mm256_xor_ps(__A, __B), 216 (__v8sf)__W); 217 } 218 219 static __inline__ __m256 __DEFAULT_FN_ATTRS 220 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { 221 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 222 (__v8sf)_mm256_xor_ps(__A, __B), 223 (__v8sf)_mm256_setzero_ps()); 224 } 225 226 static __inline__ __m128 __DEFAULT_FN_ATTRS 227 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 229 (__v4sf)_mm_xor_ps(__A, __B), 230 (__v4sf)__W); 231 } 232 233 static __inline__ __m128 __DEFAULT_FN_ATTRS 234 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { 235 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 236 (__v4sf)_mm_xor_ps(__A, __B), 237 (__v4sf)_mm_setzero_ps()); 238 } 239 240 static __inline__ __m256d __DEFAULT_FN_ATTRS 241 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 242 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 243 (__v4df)_mm256_or_pd(__A, __B), 244 (__v4df)__W); 245 } 246 247 static __inline__ __m256d __DEFAULT_FN_ATTRS 248 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { 249 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 250 (__v4df)_mm256_or_pd(__A, __B), 251 (__v4df)_mm256_setzero_pd()); 252 } 253 254 static __inline__ __m128d __DEFAULT_FN_ATTRS 255 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 256 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 257 (__v2df)_mm_or_pd(__A, __B), 258 (__v2df)__W); 259 } 260 261 static __inline__ __m128d __DEFAULT_FN_ATTRS 262 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { 263 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 264 (__v2df)_mm_or_pd(__A, __B), 265 (__v2df)_mm_setzero_pd()); 266 } 267 268 static __inline__ __m256 __DEFAULT_FN_ATTRS 269 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 271 (__v8sf)_mm256_or_ps(__A, __B), 272 (__v8sf)__W); 273 } 274 275 static __inline__ __m256 __DEFAULT_FN_ATTRS 276 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { 277 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 278 (__v8sf)_mm256_or_ps(__A, __B), 279 (__v8sf)_mm256_setzero_ps()); 280 } 281 282 static __inline__ __m128 __DEFAULT_FN_ATTRS 283 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 284 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 285 (__v4sf)_mm_or_ps(__A, __B), 286 (__v4sf)__W); 287 } 288 289 static __inline__ __m128 __DEFAULT_FN_ATTRS 290 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { 291 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 292 (__v4sf)_mm_or_ps(__A, __B), 293 (__v4sf)_mm_setzero_ps()); 294 } 295 296 static __inline__ __m128i __DEFAULT_FN_ATTRS 297 _mm_cvtpd_epi64 (__m128d __A) { 298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 299 (__v2di) _mm_setzero_si128(), 300 (__mmask8) -1); 301 } 302 303 static __inline__ __m128i __DEFAULT_FN_ATTRS 304 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 306 (__v2di) __W, 307 (__mmask8) __U); 308 } 309 310 static __inline__ __m128i __DEFAULT_FN_ATTRS 311 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 312 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 313 (__v2di) _mm_setzero_si128(), 314 (__mmask8) __U); 315 } 316 317 static __inline__ __m256i __DEFAULT_FN_ATTRS 318 _mm256_cvtpd_epi64 (__m256d __A) { 319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 320 (__v4di) _mm256_setzero_si256(), 321 (__mmask8) -1); 322 } 323 324 static __inline__ __m256i __DEFAULT_FN_ATTRS 325 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 327 (__v4di) __W, 328 (__mmask8) __U); 329 } 330 331 static __inline__ __m256i __DEFAULT_FN_ATTRS 332 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 333 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 334 (__v4di) _mm256_setzero_si256(), 335 (__mmask8) __U); 336 } 337 338 static __inline__ __m128i __DEFAULT_FN_ATTRS 339 _mm_cvtpd_epu64 (__m128d __A) { 340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 341 (__v2di) _mm_setzero_si128(), 342 (__mmask8) -1); 343 } 344 345 static __inline__ __m128i __DEFAULT_FN_ATTRS 346 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 348 (__v2di) __W, 349 (__mmask8) __U); 350 } 351 352 static __inline__ __m128i __DEFAULT_FN_ATTRS 353 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 354 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 355 (__v2di) _mm_setzero_si128(), 356 (__mmask8) __U); 357 } 358 359 static __inline__ __m256i __DEFAULT_FN_ATTRS 360 _mm256_cvtpd_epu64 (__m256d __A) { 361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 362 (__v4di) _mm256_setzero_si256(), 363 (__mmask8) -1); 364 } 365 366 static __inline__ __m256i __DEFAULT_FN_ATTRS 367 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 369 (__v4di) __W, 370 (__mmask8) __U); 371 } 372 373 static __inline__ __m256i __DEFAULT_FN_ATTRS 374 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 375 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 376 (__v4di) _mm256_setzero_si256(), 377 (__mmask8) __U); 378 } 379 380 static __inline__ __m128i __DEFAULT_FN_ATTRS 381 _mm_cvtps_epi64 (__m128 __A) { 382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 383 (__v2di) _mm_setzero_si128(), 384 (__mmask8) -1); 385 } 386 387 static __inline__ __m128i __DEFAULT_FN_ATTRS 388 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 390 (__v2di) __W, 391 (__mmask8) __U); 392 } 393 394 static __inline__ __m128i __DEFAULT_FN_ATTRS 395 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 396 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 397 (__v2di) _mm_setzero_si128(), 398 (__mmask8) __U); 399 } 400 401 static __inline__ __m256i __DEFAULT_FN_ATTRS 402 _mm256_cvtps_epi64 (__m128 __A) { 403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 404 (__v4di) _mm256_setzero_si256(), 405 (__mmask8) -1); 406 } 407 408 static __inline__ __m256i __DEFAULT_FN_ATTRS 409 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 411 (__v4di) __W, 412 (__mmask8) __U); 413 } 414 415 static __inline__ __m256i __DEFAULT_FN_ATTRS 416 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 417 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 418 (__v4di) _mm256_setzero_si256(), 419 (__mmask8) __U); 420 } 421 422 static __inline__ __m128i __DEFAULT_FN_ATTRS 423 _mm_cvtps_epu64 (__m128 __A) { 424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 425 (__v2di) _mm_setzero_si128(), 426 (__mmask8) -1); 427 } 428 429 static __inline__ __m128i __DEFAULT_FN_ATTRS 430 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 432 (__v2di) __W, 433 (__mmask8) __U); 434 } 435 436 static __inline__ __m128i __DEFAULT_FN_ATTRS 437 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 438 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 439 (__v2di) _mm_setzero_si128(), 440 (__mmask8) __U); 441 } 442 443 static __inline__ __m256i __DEFAULT_FN_ATTRS 444 _mm256_cvtps_epu64 (__m128 __A) { 445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 446 (__v4di) _mm256_setzero_si256(), 447 (__mmask8) -1); 448 } 449 450 static __inline__ __m256i __DEFAULT_FN_ATTRS 451 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 453 (__v4di) __W, 454 (__mmask8) __U); 455 } 456 457 static __inline__ __m256i __DEFAULT_FN_ATTRS 458 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 459 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 460 (__v4di) _mm256_setzero_si256(), 461 (__mmask8) __U); 462 } 463 464 static __inline__ __m128d __DEFAULT_FN_ATTRS 465 _mm_cvtepi64_pd (__m128i __A) { 466 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 467 (__v2df) _mm_setzero_pd(), 468 (__mmask8) -1); 469 } 470 471 static __inline__ __m128d __DEFAULT_FN_ATTRS 472 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 473 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 474 (__v2df) __W, 475 (__mmask8) __U); 476 } 477 478 static __inline__ __m128d __DEFAULT_FN_ATTRS 479 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 480 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 481 (__v2df) _mm_setzero_pd(), 482 (__mmask8) __U); 483 } 484 485 static __inline__ __m256d __DEFAULT_FN_ATTRS 486 _mm256_cvtepi64_pd (__m256i __A) { 487 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 488 (__v4df) _mm256_setzero_pd(), 489 (__mmask8) -1); 490 } 491 492 static __inline__ __m256d __DEFAULT_FN_ATTRS 493 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 494 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 495 (__v4df) __W, 496 (__mmask8) __U); 497 } 498 499 static __inline__ __m256d __DEFAULT_FN_ATTRS 500 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 501 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 502 (__v4df) _mm256_setzero_pd(), 503 (__mmask8) __U); 504 } 505 506 static __inline__ __m128 __DEFAULT_FN_ATTRS 507 _mm_cvtepi64_ps (__m128i __A) { 508 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 509 (__v4sf) _mm_setzero_ps(), 510 (__mmask8) -1); 511 } 512 513 static __inline__ __m128 __DEFAULT_FN_ATTRS 514 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 515 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 516 (__v4sf) __W, 517 (__mmask8) __U); 518 } 519 520 static __inline__ __m128 __DEFAULT_FN_ATTRS 521 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 522 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 523 (__v4sf) _mm_setzero_ps(), 524 (__mmask8) __U); 525 } 526 527 static __inline__ __m128 __DEFAULT_FN_ATTRS 528 _mm256_cvtepi64_ps (__m256i __A) { 529 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 530 (__v4sf) _mm_setzero_ps(), 531 (__mmask8) -1); 532 } 533 534 static __inline__ __m128 __DEFAULT_FN_ATTRS 535 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 536 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 537 (__v4sf) __W, 538 (__mmask8) __U); 539 } 540 541 static __inline__ __m128 __DEFAULT_FN_ATTRS 542 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 543 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 544 (__v4sf) _mm_setzero_ps(), 545 (__mmask8) __U); 546 } 547 548 static __inline__ __m128i __DEFAULT_FN_ATTRS 549 _mm_cvttpd_epi64 (__m128d __A) { 550 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 551 (__v2di) _mm_setzero_si128(), 552 (__mmask8) -1); 553 } 554 555 static __inline__ __m128i __DEFAULT_FN_ATTRS 556 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 557 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 558 (__v2di) __W, 559 (__mmask8) __U); 560 } 561 562 static __inline__ __m128i __DEFAULT_FN_ATTRS 563 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 564 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 565 (__v2di) _mm_setzero_si128(), 566 (__mmask8) __U); 567 } 568 569 static __inline__ __m256i __DEFAULT_FN_ATTRS 570 _mm256_cvttpd_epi64 (__m256d __A) { 571 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 572 (__v4di) _mm256_setzero_si256(), 573 (__mmask8) -1); 574 } 575 576 static __inline__ __m256i __DEFAULT_FN_ATTRS 577 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 578 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 579 (__v4di) __W, 580 (__mmask8) __U); 581 } 582 583 static __inline__ __m256i __DEFAULT_FN_ATTRS 584 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 585 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 586 (__v4di) _mm256_setzero_si256(), 587 (__mmask8) __U); 588 } 589 590 static __inline__ __m128i __DEFAULT_FN_ATTRS 591 _mm_cvttpd_epu64 (__m128d __A) { 592 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 593 (__v2di) _mm_setzero_si128(), 594 (__mmask8) -1); 595 } 596 597 static __inline__ __m128i __DEFAULT_FN_ATTRS 598 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 599 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 600 (__v2di) __W, 601 (__mmask8) __U); 602 } 603 604 static __inline__ __m128i __DEFAULT_FN_ATTRS 605 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 606 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 607 (__v2di) _mm_setzero_si128(), 608 (__mmask8) __U); 609 } 610 611 static __inline__ __m256i __DEFAULT_FN_ATTRS 612 _mm256_cvttpd_epu64 (__m256d __A) { 613 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 614 (__v4di) _mm256_setzero_si256(), 615 (__mmask8) -1); 616 } 617 618 static __inline__ __m256i __DEFAULT_FN_ATTRS 619 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 620 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 621 (__v4di) __W, 622 (__mmask8) __U); 623 } 624 625 static __inline__ __m256i __DEFAULT_FN_ATTRS 626 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 627 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 628 (__v4di) _mm256_setzero_si256(), 629 (__mmask8) __U); 630 } 631 632 static __inline__ __m128i __DEFAULT_FN_ATTRS 633 _mm_cvttps_epi64 (__m128 __A) { 634 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 635 (__v2di) _mm_setzero_si128(), 636 (__mmask8) -1); 637 } 638 639 static __inline__ __m128i __DEFAULT_FN_ATTRS 640 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 641 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 642 (__v2di) __W, 643 (__mmask8) __U); 644 } 645 646 static __inline__ __m128i __DEFAULT_FN_ATTRS 647 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 648 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 649 (__v2di) _mm_setzero_si128(), 650 (__mmask8) __U); 651 } 652 653 static __inline__ __m256i __DEFAULT_FN_ATTRS 654 _mm256_cvttps_epi64 (__m128 __A) { 655 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 656 (__v4di) _mm256_setzero_si256(), 657 (__mmask8) -1); 658 } 659 660 static __inline__ __m256i __DEFAULT_FN_ATTRS 661 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 662 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 663 (__v4di) __W, 664 (__mmask8) __U); 665 } 666 667 static __inline__ __m256i __DEFAULT_FN_ATTRS 668 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 669 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 670 (__v4di) _mm256_setzero_si256(), 671 (__mmask8) __U); 672 } 673 674 static __inline__ __m128i __DEFAULT_FN_ATTRS 675 _mm_cvttps_epu64 (__m128 __A) { 676 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 677 (__v2di) _mm_setzero_si128(), 678 (__mmask8) -1); 679 } 680 681 static __inline__ __m128i __DEFAULT_FN_ATTRS 682 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 683 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 684 (__v2di) __W, 685 (__mmask8) __U); 686 } 687 688 static __inline__ __m128i __DEFAULT_FN_ATTRS 689 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 690 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 691 (__v2di) _mm_setzero_si128(), 692 (__mmask8) __U); 693 } 694 695 static __inline__ __m256i __DEFAULT_FN_ATTRS 696 _mm256_cvttps_epu64 (__m128 __A) { 697 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 698 (__v4di) _mm256_setzero_si256(), 699 (__mmask8) -1); 700 } 701 702 static __inline__ __m256i __DEFAULT_FN_ATTRS 703 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 704 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 705 (__v4di) __W, 706 (__mmask8) __U); 707 } 708 709 static __inline__ __m256i __DEFAULT_FN_ATTRS 710 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 711 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 712 (__v4di) _mm256_setzero_si256(), 713 (__mmask8) __U); 714 } 715 716 static __inline__ __m128d __DEFAULT_FN_ATTRS 717 _mm_cvtepu64_pd (__m128i __A) { 718 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 719 (__v2df) _mm_setzero_pd(), 720 (__mmask8) -1); 721 } 722 723 static __inline__ __m128d __DEFAULT_FN_ATTRS 724 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 725 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 726 (__v2df) __W, 727 (__mmask8) __U); 728 } 729 730 static __inline__ __m128d __DEFAULT_FN_ATTRS 731 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 732 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 733 (__v2df) _mm_setzero_pd(), 734 (__mmask8) __U); 735 } 736 737 static __inline__ __m256d __DEFAULT_FN_ATTRS 738 _mm256_cvtepu64_pd (__m256i __A) { 739 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 740 (__v4df) _mm256_setzero_pd(), 741 (__mmask8) -1); 742 } 743 744 static __inline__ __m256d __DEFAULT_FN_ATTRS 745 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 746 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 747 (__v4df) __W, 748 (__mmask8) __U); 749 } 750 751 static __inline__ __m256d __DEFAULT_FN_ATTRS 752 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 753 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 754 (__v4df) _mm256_setzero_pd(), 755 (__mmask8) __U); 756 } 757 758 static __inline__ __m128 __DEFAULT_FN_ATTRS 759 _mm_cvtepu64_ps (__m128i __A) { 760 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 761 (__v4sf) _mm_setzero_ps(), 762 (__mmask8) -1); 763 } 764 765 static __inline__ __m128 __DEFAULT_FN_ATTRS 766 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 767 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 768 (__v4sf) __W, 769 (__mmask8) __U); 770 } 771 772 static __inline__ __m128 __DEFAULT_FN_ATTRS 773 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 774 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 775 (__v4sf) _mm_setzero_ps(), 776 (__mmask8) __U); 777 } 778 779 static __inline__ __m128 __DEFAULT_FN_ATTRS 780 _mm256_cvtepu64_ps (__m256i __A) { 781 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 782 (__v4sf) _mm_setzero_ps(), 783 (__mmask8) -1); 784 } 785 786 static __inline__ __m128 __DEFAULT_FN_ATTRS 787 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 788 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 789 (__v4sf) __W, 790 (__mmask8) __U); 791 } 792 793 static __inline__ __m128 __DEFAULT_FN_ATTRS 794 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 795 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 796 (__v4sf) _mm_setzero_ps(), 797 (__mmask8) __U); 798 } 799 800 #define _mm_range_pd(A, B, C) __extension__ ({ \ 801 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 802 (__v2df)(__m128d)(B), (int)(C), \ 803 (__v2df)_mm_setzero_pd(), \ 804 (__mmask8)-1); }) 805 806 #define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 807 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 808 (__v2df)(__m128d)(B), (int)(C), \ 809 (__v2df)(__m128d)(W), \ 810 (__mmask8)(U)); }) 811 812 #define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ 813 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 814 (__v2df)(__m128d)(B), (int)(C), \ 815 (__v2df)_mm_setzero_pd(), \ 816 (__mmask8)(U)); }) 817 818 #define _mm256_range_pd(A, B, C) __extension__ ({ \ 819 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 820 (__v4df)(__m256d)(B), (int)(C), \ 821 (__v4df)_mm256_setzero_pd(), \ 822 (__mmask8)-1); }) 823 824 #define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 825 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 826 (__v4df)(__m256d)(B), (int)(C), \ 827 (__v4df)(__m256d)(W), \ 828 (__mmask8)(U)); }) 829 830 #define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ 831 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 832 (__v4df)(__m256d)(B), (int)(C), \ 833 (__v4df)_mm256_setzero_pd(), \ 834 (__mmask8)(U)); }) 835 836 #define _mm_range_ps(A, B, C) __extension__ ({ \ 837 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 838 (__v4sf)(__m128)(B), (int)(C), \ 839 (__v4sf)_mm_setzero_ps(), \ 840 (__mmask8)-1); }) 841 842 #define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 843 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 844 (__v4sf)(__m128)(B), (int)(C), \ 845 (__v4sf)(__m128)(W), (__mmask8)(U)); }) 846 847 #define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ 848 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 849 (__v4sf)(__m128)(B), (int)(C), \ 850 (__v4sf)_mm_setzero_ps(), \ 851 (__mmask8)(U)); }) 852 853 #define _mm256_range_ps(A, B, C) __extension__ ({ \ 854 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 855 (__v8sf)(__m256)(B), (int)(C), \ 856 (__v8sf)_mm256_setzero_ps(), \ 857 (__mmask8)-1); }) 858 859 #define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 860 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 861 (__v8sf)(__m256)(B), (int)(C), \ 862 (__v8sf)(__m256)(W), (__mmask8)(U)); }) 863 864 #define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ 865 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 866 (__v8sf)(__m256)(B), (int)(C), \ 867 (__v8sf)_mm256_setzero_ps(), \ 868 (__mmask8)(U)); }) 869 870 #define _mm_reduce_pd(A, B) __extension__ ({ \ 871 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 872 (__v2df)_mm_setzero_pd(), \ 873 (__mmask8)-1); }) 874 875 #define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 876 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 877 (__v2df)(__m128d)(W), \ 878 (__mmask8)(U)); }) 879 880 #define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ 881 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 882 (__v2df)_mm_setzero_pd(), \ 883 (__mmask8)(U)); }) 884 885 #define _mm256_reduce_pd(A, B) __extension__ ({ \ 886 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 887 (__v4df)_mm256_setzero_pd(), \ 888 (__mmask8)-1); }) 889 890 #define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 891 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 892 (__v4df)(__m256d)(W), \ 893 (__mmask8)(U)); }) 894 895 #define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ 896 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 897 (__v4df)_mm256_setzero_pd(), \ 898 (__mmask8)(U)); }) 899 900 #define _mm_reduce_ps(A, B) __extension__ ({ \ 901 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 902 (__v4sf)_mm_setzero_ps(), \ 903 (__mmask8)-1); }) 904 905 #define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 906 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 907 (__v4sf)(__m128)(W), \ 908 (__mmask8)(U)); }) 909 910 #define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ 911 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 912 (__v4sf)_mm_setzero_ps(), \ 913 (__mmask8)(U)); }) 914 915 #define _mm256_reduce_ps(A, B) __extension__ ({ \ 916 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 917 (__v8sf)_mm256_setzero_ps(), \ 918 (__mmask8)-1); }) 919 920 #define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 921 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 922 (__v8sf)(__m256)(W), \ 923 (__mmask8)(U)); }) 924 925 #define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ 926 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 927 (__v8sf)_mm256_setzero_ps(), \ 928 (__mmask8)(U)); }) 929 930 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 931 _mm_movepi32_mask (__m128i __A) 932 { 933 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 934 } 935 936 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 937 _mm256_movepi32_mask (__m256i __A) 938 { 939 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 940 } 941 942 static __inline__ __m128i __DEFAULT_FN_ATTRS 943 _mm_movm_epi32 (__mmask8 __A) 944 { 945 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 946 } 947 948 static __inline__ __m256i __DEFAULT_FN_ATTRS 949 _mm256_movm_epi32 (__mmask8 __A) 950 { 951 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 952 } 953 954 static __inline__ __m128i __DEFAULT_FN_ATTRS 955 _mm_movm_epi64 (__mmask8 __A) 956 { 957 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 958 } 959 960 static __inline__ __m256i __DEFAULT_FN_ATTRS 961 _mm256_movm_epi64 (__mmask8 __A) 962 { 963 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 964 } 965 966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 967 _mm_movepi64_mask (__m128i __A) 968 { 969 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 970 } 971 972 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 973 _mm256_movepi64_mask (__m256i __A) 974 { 975 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 976 } 977 978 static __inline__ __m256 __DEFAULT_FN_ATTRS 979 _mm256_broadcast_f32x2 (__m128 __A) 980 { 981 return (__m256)__builtin_shufflevector((__v4sf)__A, 982 (__v4sf)_mm_undefined_ps(), 983 0, 1, 0, 1, 0, 1, 0, 1); 984 } 985 986 static __inline__ __m256 __DEFAULT_FN_ATTRS 987 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 988 { 989 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 990 (__v8sf)_mm256_broadcast_f32x2(__A), 991 (__v8sf)__O); 992 } 993 994 static __inline__ __m256 __DEFAULT_FN_ATTRS 995 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 996 { 997 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 998 (__v8sf)_mm256_broadcast_f32x2(__A), 999 (__v8sf)_mm256_setzero_ps()); 1000 } 1001 1002 static __inline__ __m256d __DEFAULT_FN_ATTRS 1003 _mm256_broadcast_f64x2(__m128d __A) 1004 { 1005 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1006 0, 1, 0, 1); 1007 } 1008 1009 static __inline__ __m256d __DEFAULT_FN_ATTRS 1010 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) 1011 { 1012 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 1013 (__v4df)_mm256_broadcast_f64x2(__A), 1014 (__v4df)__O); 1015 } 1016 1017 static __inline__ __m256d __DEFAULT_FN_ATTRS 1018 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 1019 { 1020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 1021 (__v4df)_mm256_broadcast_f64x2(__A), 1022 (__v4df)_mm256_setzero_pd()); 1023 } 1024 1025 static __inline__ __m128i __DEFAULT_FN_ATTRS 1026 _mm_broadcast_i32x2 (__m128i __A) 1027 { 1028 return (__m128i)__builtin_shufflevector((__v4si)__A, 1029 (__v4si)_mm_undefined_si128(), 1030 0, 1, 0, 1); 1031 } 1032 1033 static __inline__ __m128i __DEFAULT_FN_ATTRS 1034 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1035 { 1036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1037 (__v4si)_mm_broadcast_i32x2(__A), 1038 (__v4si)__O); 1039 } 1040 1041 static __inline__ __m128i __DEFAULT_FN_ATTRS 1042 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1043 { 1044 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1045 (__v4si)_mm_broadcast_i32x2(__A), 1046 (__v4si)_mm_setzero_si128()); 1047 } 1048 1049 static __inline__ __m256i __DEFAULT_FN_ATTRS 1050 _mm256_broadcast_i32x2 (__m128i __A) 1051 { 1052 return (__m256i)__builtin_shufflevector((__v4si)__A, 1053 (__v4si)_mm_undefined_si128(), 1054 0, 1, 0, 1, 0, 1, 0, 1); 1055 } 1056 1057 static __inline__ __m256i __DEFAULT_FN_ATTRS 1058 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1059 { 1060 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1061 (__v8si)_mm256_broadcast_i32x2(__A), 1062 (__v8si)__O); 1063 } 1064 1065 static __inline__ __m256i __DEFAULT_FN_ATTRS 1066 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1067 { 1068 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1069 (__v8si)_mm256_broadcast_i32x2(__A), 1070 (__v8si)_mm256_setzero_si256()); 1071 } 1072 1073 static __inline__ __m256i __DEFAULT_FN_ATTRS 1074 _mm256_broadcast_i64x2(__m128i __A) 1075 { 1076 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1077 0, 1, 0, 1); 1078 } 1079 1080 static __inline__ __m256i __DEFAULT_FN_ATTRS 1081 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) 1082 { 1083 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1084 (__v4di)_mm256_broadcast_i64x2(__A), 1085 (__v4di)__O); 1086 } 1087 1088 static __inline__ __m256i __DEFAULT_FN_ATTRS 1089 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1090 { 1091 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1092 (__v4di)_mm256_broadcast_i64x2(__A), 1093 (__v4di)_mm256_setzero_si256()); 1094 } 1095 1096 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ 1097 (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ 1098 (__v4df)_mm256_undefined_pd(), \ 1099 ((imm) & 1) ? 2 : 0, \ 1100 ((imm) & 1) ? 3 : 1); }) 1101 1102 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1103 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1104 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ 1105 (__v2df)(W)); }) 1106 1107 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1108 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1109 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ 1110 (__v2df)_mm_setzero_pd()); }) 1111 1112 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ 1113 (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ 1114 (__v4di)_mm256_undefined_si256(), \ 1115 ((imm) & 1) ? 2 : 0, \ 1116 ((imm) & 1) ? 3 : 1); }) 1117 1118 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1119 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 1120 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ 1121 (__v2di)(W)); }) 1122 1123 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1124 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 1125 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ 1126 (__v2di)_mm_setzero_di()); }) 1127 1128 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ 1129 (__m256d)__builtin_shufflevector((__v4df)(A), \ 1130 (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ 1131 ((imm) & 0x1) ? 0 : 4, \ 1132 ((imm) & 0x1) ? 1 : 5, \ 1133 ((imm) & 0x1) ? 4 : 2, \ 1134 ((imm) & 0x1) ? 5 : 3); }) 1135 1136 #define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1137 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1138 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1139 (__v4df)(W)); }) 1140 1141 #define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1142 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1143 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1144 (__v4df)_mm256_setzero_pd()); }) 1145 1146 #define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ 1147 (__m256i)__builtin_shufflevector((__v4di)(A), \ 1148 (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ 1149 ((imm) & 0x1) ? 0 : 4, \ 1150 ((imm) & 0x1) ? 1 : 5, \ 1151 ((imm) & 0x1) ? 4 : 2, \ 1152 ((imm) & 0x1) ? 5 : 3); }) 1153 1154 #define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1155 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1156 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1157 (__v4di)(W)); }) 1158 1159 #define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1160 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1161 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1162 (__v4di)_mm256_setzero_si256()); }) 1163 1164 #define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1165 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1166 (__mmask8)(U)); }) 1167 1168 #define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ 1169 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1170 (__mmask8)-1); }) 1171 1172 #define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1173 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1174 (__mmask8)(U)); }) 1175 1176 #define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ 1177 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1178 (__mmask8)-1); }) 1179 1180 #define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1182 (__mmask8)(U)); }) 1183 1184 #define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ 1185 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1186 (__mmask8)-1); }) 1187 1188 #define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1189 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1190 (__mmask8)(U)); }) 1191 1192 #define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ 1193 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1194 (__mmask8)-1); }) 1195 1196 #undef __DEFAULT_FN_ATTRS 1197 1198 #endif 1199