1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512DQINTRIN_H 29 #define __AVX512DQINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 33 34 static __inline__ __m512i __DEFAULT_FN_ATTRS 35 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { 36 return (__m512i) ((__v8du) __A * (__v8du) __B); 37 } 38 39 static __inline__ __m512i __DEFAULT_FN_ATTRS 40 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 41 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 42 (__v8di) __B, 43 (__v8di) __W, 44 (__mmask8) __U); 45 } 46 47 static __inline__ __m512i __DEFAULT_FN_ATTRS 48 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { 49 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 50 (__v8di) __B, 51 (__v8di) 52 _mm512_setzero_si512 (), 53 (__mmask8) __U); 54 } 55 56 static __inline__ __m512d __DEFAULT_FN_ATTRS 57 _mm512_xor_pd (__m512d __A, __m512d __B) { 58 return (__m512d) ((__v8du) __A ^ (__v8du) __B); 59 } 60 61 static __inline__ __m512d __DEFAULT_FN_ATTRS 62 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 63 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 64 (__v8df) __B, 65 (__v8df) __W, 66 (__mmask8) __U); 67 } 68 69 static __inline__ __m512d __DEFAULT_FN_ATTRS 70 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { 71 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 72 (__v8df) __B, 73 (__v8df) 74 _mm512_setzero_pd (), 75 (__mmask8) __U); 76 } 77 78 static __inline__ __m512 __DEFAULT_FN_ATTRS 79 _mm512_xor_ps (__m512 __A, __m512 __B) { 80 return (__m512) ((__v16su) __A ^ (__v16su) __B); 81 } 82 83 static __inline__ __m512 __DEFAULT_FN_ATTRS 84 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 85 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 86 (__v16sf) __B, 87 (__v16sf) __W, 88 (__mmask16) __U); 89 } 90 91 static __inline__ __m512 __DEFAULT_FN_ATTRS 92 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { 93 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 94 (__v16sf) __B, 95 (__v16sf) 96 _mm512_setzero_ps (), 97 (__mmask16) __U); 98 } 99 100 static __inline__ __m512d __DEFAULT_FN_ATTRS 101 _mm512_or_pd (__m512d __A, __m512d __B) { 102 return (__m512d) ((__v8du) __A | (__v8du) __B); 103 } 104 105 static __inline__ __m512d __DEFAULT_FN_ATTRS 106 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 107 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 108 (__v8df) __B, 109 (__v8df) __W, 110 (__mmask8) __U); 111 } 112 113 static __inline__ __m512d __DEFAULT_FN_ATTRS 114 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { 115 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 116 (__v8df) __B, 117 (__v8df) 118 _mm512_setzero_pd (), 119 (__mmask8) __U); 120 } 121 122 static __inline__ __m512 __DEFAULT_FN_ATTRS 123 _mm512_or_ps (__m512 __A, __m512 __B) { 124 return (__m512) ((__v16su) __A | (__v16su) __B); 125 } 126 127 static __inline__ __m512 __DEFAULT_FN_ATTRS 128 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 129 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 130 (__v16sf) __B, 131 (__v16sf) __W, 132 (__mmask16) __U); 133 } 134 135 static __inline__ __m512 __DEFAULT_FN_ATTRS 136 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { 137 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 138 (__v16sf) __B, 139 (__v16sf) 140 _mm512_setzero_ps (), 141 (__mmask16) __U); 142 } 143 144 static __inline__ __m512d __DEFAULT_FN_ATTRS 145 _mm512_and_pd (__m512d __A, __m512d __B) { 146 return (__m512d) ((__v8du) __A & (__v8du) __B); 147 } 148 149 static __inline__ __m512d __DEFAULT_FN_ATTRS 150 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 151 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 152 (__v8df) __B, 153 (__v8df) __W, 154 (__mmask8) __U); 155 } 156 157 static __inline__ __m512d __DEFAULT_FN_ATTRS 158 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { 159 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 160 (__v8df) __B, 161 (__v8df) 162 _mm512_setzero_pd (), 163 (__mmask8) __U); 164 } 165 166 static __inline__ __m512 __DEFAULT_FN_ATTRS 167 _mm512_and_ps (__m512 __A, __m512 __B) { 168 return (__m512) ((__v16su) __A & (__v16su) __B); 169 } 170 171 static __inline__ __m512 __DEFAULT_FN_ATTRS 172 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 173 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 174 (__v16sf) __B, 175 (__v16sf) __W, 176 (__mmask16) __U); 177 } 178 179 static __inline__ __m512 __DEFAULT_FN_ATTRS 180 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { 181 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 182 (__v16sf) __B, 183 (__v16sf) 184 _mm512_setzero_ps (), 185 (__mmask16) __U); 186 } 187 188 static __inline__ __m512d __DEFAULT_FN_ATTRS 189 _mm512_andnot_pd (__m512d __A, __m512d __B) { 190 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 191 (__v8df) __B, 192 (__v8df) 193 _mm512_setzero_pd (), 194 (__mmask8) -1); 195 } 196 197 static __inline__ __m512d __DEFAULT_FN_ATTRS 198 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 199 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 200 (__v8df) __B, 201 (__v8df) __W, 202 (__mmask8) __U); 203 } 204 205 static __inline__ __m512d __DEFAULT_FN_ATTRS 206 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { 207 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 208 (__v8df) __B, 209 (__v8df) 210 _mm512_setzero_pd (), 211 (__mmask8) __U); 212 } 213 214 static __inline__ __m512 __DEFAULT_FN_ATTRS 215 _mm512_andnot_ps (__m512 __A, __m512 __B) { 216 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 217 (__v16sf) __B, 218 (__v16sf) 219 _mm512_setzero_ps (), 220 (__mmask16) -1); 221 } 222 223 static __inline__ __m512 __DEFAULT_FN_ATTRS 224 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 225 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 226 (__v16sf) __B, 227 (__v16sf) __W, 228 (__mmask16) __U); 229 } 230 231 static __inline__ __m512 __DEFAULT_FN_ATTRS 232 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { 233 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 234 (__v16sf) __B, 235 (__v16sf) 236 _mm512_setzero_ps (), 237 (__mmask16) __U); 238 } 239 240 static __inline__ __m512i __DEFAULT_FN_ATTRS 241 _mm512_cvtpd_epi64 (__m512d __A) { 242 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 243 (__v8di) _mm512_setzero_si512(), 244 (__mmask8) -1, 245 _MM_FROUND_CUR_DIRECTION); 246 } 247 248 static __inline__ __m512i __DEFAULT_FN_ATTRS 249 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 250 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 251 (__v8di) __W, 252 (__mmask8) __U, 253 _MM_FROUND_CUR_DIRECTION); 254 } 255 256 static __inline__ __m512i __DEFAULT_FN_ATTRS 257 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 258 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 259 (__v8di) _mm512_setzero_si512(), 260 (__mmask8) __U, 261 _MM_FROUND_CUR_DIRECTION); 262 } 263 264 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ 265 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 266 (__v8di)_mm512_setzero_si512(), \ 267 (__mmask8)-1, (int)(R)); }) 268 269 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 270 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 271 (__v8di)(__m512i)(W), \ 272 (__mmask8)(U), (int)(R)); }) 273 274 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ 275 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 276 (__v8di)_mm512_setzero_si512(), \ 277 (__mmask8)(U), (int)(R)); }) 278 279 static __inline__ __m512i __DEFAULT_FN_ATTRS 280 _mm512_cvtpd_epu64 (__m512d __A) { 281 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 282 (__v8di) _mm512_setzero_si512(), 283 (__mmask8) -1, 284 _MM_FROUND_CUR_DIRECTION); 285 } 286 287 static __inline__ __m512i __DEFAULT_FN_ATTRS 288 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 289 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 290 (__v8di) __W, 291 (__mmask8) __U, 292 _MM_FROUND_CUR_DIRECTION); 293 } 294 295 static __inline__ __m512i __DEFAULT_FN_ATTRS 296 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 297 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 298 (__v8di) _mm512_setzero_si512(), 299 (__mmask8) __U, 300 _MM_FROUND_CUR_DIRECTION); 301 } 302 303 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ 304 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 305 (__v8di)_mm512_setzero_si512(), \ 306 (__mmask8)-1, (int)(R)); }) 307 308 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 309 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 310 (__v8di)(__m512i)(W), \ 311 (__mmask8)(U), (int)(R)); }) 312 313 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ 314 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 315 (__v8di)_mm512_setzero_si512(), \ 316 (__mmask8)(U), (int)(R)); }) 317 318 static __inline__ __m512i __DEFAULT_FN_ATTRS 319 _mm512_cvtps_epi64 (__m256 __A) { 320 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 321 (__v8di) _mm512_setzero_si512(), 322 (__mmask8) -1, 323 _MM_FROUND_CUR_DIRECTION); 324 } 325 326 static __inline__ __m512i __DEFAULT_FN_ATTRS 327 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 328 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 329 (__v8di) __W, 330 (__mmask8) __U, 331 _MM_FROUND_CUR_DIRECTION); 332 } 333 334 static __inline__ __m512i __DEFAULT_FN_ATTRS 335 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 336 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 337 (__v8di) _mm512_setzero_si512(), 338 (__mmask8) __U, 339 _MM_FROUND_CUR_DIRECTION); 340 } 341 342 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ 343 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 344 (__v8di)_mm512_setzero_si512(), \ 345 (__mmask8)-1, (int)(R)); }) 346 347 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ 348 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 349 (__v8di)(__m512i)(W), \ 350 (__mmask8)(U), (int)(R)); }) 351 352 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ 353 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 354 (__v8di)_mm512_setzero_si512(), \ 355 (__mmask8)(U), (int)(R)); }) 356 357 static __inline__ __m512i __DEFAULT_FN_ATTRS 358 _mm512_cvtps_epu64 (__m256 __A) { 359 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 360 (__v8di) _mm512_setzero_si512(), 361 (__mmask8) -1, 362 _MM_FROUND_CUR_DIRECTION); 363 } 364 365 static __inline__ __m512i __DEFAULT_FN_ATTRS 366 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 367 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 368 (__v8di) __W, 369 (__mmask8) __U, 370 _MM_FROUND_CUR_DIRECTION); 371 } 372 373 static __inline__ __m512i __DEFAULT_FN_ATTRS 374 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 375 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 376 (__v8di) _mm512_setzero_si512(), 377 (__mmask8) __U, 378 _MM_FROUND_CUR_DIRECTION); 379 } 380 381 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ 382 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 383 (__v8di)_mm512_setzero_si512(), \ 384 (__mmask8)-1, (int)(R)); }) 385 386 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ 387 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 388 (__v8di)(__m512i)(W), \ 389 (__mmask8)(U), (int)(R)); }) 390 391 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ 392 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 393 (__v8di)_mm512_setzero_si512(), \ 394 (__mmask8)(U), (int)(R)); }) 395 396 397 static __inline__ __m512d __DEFAULT_FN_ATTRS 398 _mm512_cvtepi64_pd (__m512i __A) { 399 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 400 (__v8df) _mm512_setzero_pd(), 401 (__mmask8) -1, 402 _MM_FROUND_CUR_DIRECTION); 403 } 404 405 static __inline__ __m512d __DEFAULT_FN_ATTRS 406 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 407 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 408 (__v8df) __W, 409 (__mmask8) __U, 410 _MM_FROUND_CUR_DIRECTION); 411 } 412 413 static __inline__ __m512d __DEFAULT_FN_ATTRS 414 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 415 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 416 (__v8df) _mm512_setzero_pd(), 417 (__mmask8) __U, 418 _MM_FROUND_CUR_DIRECTION); 419 } 420 421 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ 422 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 423 (__v8df)_mm512_setzero_pd(), \ 424 (__mmask8)-1, (int)(R)); }) 425 426 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ 427 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 428 (__v8df)(__m512d)(W), \ 429 (__mmask8)(U), (int)(R)); }) 430 431 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ 432 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 433 (__v8df)_mm512_setzero_pd(), \ 434 (__mmask8)(U), (int)(R)); }) 435 436 static __inline__ __m256 __DEFAULT_FN_ATTRS 437 _mm512_cvtepi64_ps (__m512i __A) { 438 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 439 (__v8sf) _mm256_setzero_ps(), 440 (__mmask8) -1, 441 _MM_FROUND_CUR_DIRECTION); 442 } 443 444 static __inline__ __m256 __DEFAULT_FN_ATTRS 445 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 446 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 447 (__v8sf) __W, 448 (__mmask8) __U, 449 _MM_FROUND_CUR_DIRECTION); 450 } 451 452 static __inline__ __m256 __DEFAULT_FN_ATTRS 453 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 454 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 455 (__v8sf) _mm256_setzero_ps(), 456 (__mmask8) __U, 457 _MM_FROUND_CUR_DIRECTION); 458 } 459 460 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ 461 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 462 (__v8sf)_mm256_setzero_ps(), \ 463 (__mmask8)-1, (int)(R)); }) 464 465 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ 466 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 467 (__v8sf)(__m256)(W), (__mmask8)(U), \ 468 (int)(R)); }) 469 470 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ 471 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 472 (__v8sf)_mm256_setzero_ps(), \ 473 (__mmask8)(U), (int)(R)); }) 474 475 476 static __inline__ __m512i __DEFAULT_FN_ATTRS 477 _mm512_cvttpd_epi64 (__m512d __A) { 478 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 479 (__v8di) _mm512_setzero_si512(), 480 (__mmask8) -1, 481 _MM_FROUND_CUR_DIRECTION); 482 } 483 484 static __inline__ __m512i __DEFAULT_FN_ATTRS 485 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 486 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 487 (__v8di) __W, 488 (__mmask8) __U, 489 _MM_FROUND_CUR_DIRECTION); 490 } 491 492 static __inline__ __m512i __DEFAULT_FN_ATTRS 493 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 494 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 495 (__v8di) _mm512_setzero_si512(), 496 (__mmask8) __U, 497 _MM_FROUND_CUR_DIRECTION); 498 } 499 500 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ 501 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 502 (__v8di)_mm512_setzero_si512(), \ 503 (__mmask8)-1, (int)(R)); }) 504 505 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 506 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 507 (__v8di)(__m512i)(W), \ 508 (__mmask8)(U), (int)(R)); }) 509 510 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ 511 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 512 (__v8di)_mm512_setzero_si512(), \ 513 (__mmask8)(U), (int)(R)); }) 514 515 static __inline__ __m512i __DEFAULT_FN_ATTRS 516 _mm512_cvttpd_epu64 (__m512d __A) { 517 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 518 (__v8di) _mm512_setzero_si512(), 519 (__mmask8) -1, 520 _MM_FROUND_CUR_DIRECTION); 521 } 522 523 static __inline__ __m512i __DEFAULT_FN_ATTRS 524 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 525 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 526 (__v8di) __W, 527 (__mmask8) __U, 528 _MM_FROUND_CUR_DIRECTION); 529 } 530 531 static __inline__ __m512i __DEFAULT_FN_ATTRS 532 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 533 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 534 (__v8di) _mm512_setzero_si512(), 535 (__mmask8) __U, 536 _MM_FROUND_CUR_DIRECTION); 537 } 538 539 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ 540 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 541 (__v8di)_mm512_setzero_si512(), \ 542 (__mmask8)-1, (int)(R)); }) 543 544 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 545 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 546 (__v8di)(__m512i)(W), \ 547 (__mmask8)(U), (int)(R)); }) 548 549 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ 550 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 551 (__v8di)_mm512_setzero_si512(), \ 552 (__mmask8)(U), (int)(R)); }) 553 554 static __inline__ __m512i __DEFAULT_FN_ATTRS 555 _mm512_cvttps_epi64 (__m256 __A) { 556 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 557 (__v8di) _mm512_setzero_si512(), 558 (__mmask8) -1, 559 _MM_FROUND_CUR_DIRECTION); 560 } 561 562 static __inline__ __m512i __DEFAULT_FN_ATTRS 563 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 564 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 565 (__v8di) __W, 566 (__mmask8) __U, 567 _MM_FROUND_CUR_DIRECTION); 568 } 569 570 static __inline__ __m512i __DEFAULT_FN_ATTRS 571 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 572 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 573 (__v8di) _mm512_setzero_si512(), 574 (__mmask8) __U, 575 _MM_FROUND_CUR_DIRECTION); 576 } 577 578 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ 579 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 580 (__v8di)_mm512_setzero_si512(), \ 581 (__mmask8)-1, (int)(R)); }) 582 583 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ 584 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 585 (__v8di)(__m512i)(W), \ 586 (__mmask8)(U), (int)(R)); }) 587 588 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ 589 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 590 (__v8di)_mm512_setzero_si512(), \ 591 (__mmask8)(U), (int)(R)); }) 592 593 static __inline__ __m512i __DEFAULT_FN_ATTRS 594 _mm512_cvttps_epu64 (__m256 __A) { 595 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 596 (__v8di) _mm512_setzero_si512(), 597 (__mmask8) -1, 598 _MM_FROUND_CUR_DIRECTION); 599 } 600 601 static __inline__ __m512i __DEFAULT_FN_ATTRS 602 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 603 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 604 (__v8di) __W, 605 (__mmask8) __U, 606 _MM_FROUND_CUR_DIRECTION); 607 } 608 609 static __inline__ __m512i __DEFAULT_FN_ATTRS 610 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 611 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 612 (__v8di) _mm512_setzero_si512(), 613 (__mmask8) __U, 614 _MM_FROUND_CUR_DIRECTION); 615 } 616 617 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ 618 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 619 (__v8di)_mm512_setzero_si512(), \ 620 (__mmask8)-1, (int)(R)); }) 621 622 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ 623 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 624 (__v8di)(__m512i)(W), \ 625 (__mmask8)(U), (int)(R)); }) 626 627 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ 628 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 629 (__v8di)_mm512_setzero_si512(), \ 630 (__mmask8)(U), (int)(R)); }) 631 632 static __inline__ __m512d __DEFAULT_FN_ATTRS 633 _mm512_cvtepu64_pd (__m512i __A) { 634 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 635 (__v8df) _mm512_setzero_pd(), 636 (__mmask8) -1, 637 _MM_FROUND_CUR_DIRECTION); 638 } 639 640 static __inline__ __m512d __DEFAULT_FN_ATTRS 641 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 642 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 643 (__v8df) __W, 644 (__mmask8) __U, 645 _MM_FROUND_CUR_DIRECTION); 646 } 647 648 static __inline__ __m512d __DEFAULT_FN_ATTRS 649 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 650 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 651 (__v8df) _mm512_setzero_pd(), 652 (__mmask8) __U, 653 _MM_FROUND_CUR_DIRECTION); 654 } 655 656 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ 657 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 658 (__v8df)_mm512_setzero_pd(), \ 659 (__mmask8)-1, (int)(R)); }) 660 661 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ 662 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 663 (__v8df)(__m512d)(W), \ 664 (__mmask8)(U), (int)(R)); }) 665 666 667 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ 668 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 669 (__v8df)_mm512_setzero_pd(), \ 670 (__mmask8)(U), (int)(R)); }) 671 672 673 static __inline__ __m256 __DEFAULT_FN_ATTRS 674 _mm512_cvtepu64_ps (__m512i __A) { 675 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 676 (__v8sf) _mm256_setzero_ps(), 677 (__mmask8) -1, 678 _MM_FROUND_CUR_DIRECTION); 679 } 680 681 static __inline__ __m256 __DEFAULT_FN_ATTRS 682 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 683 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 684 (__v8sf) __W, 685 (__mmask8) __U, 686 _MM_FROUND_CUR_DIRECTION); 687 } 688 689 static __inline__ __m256 __DEFAULT_FN_ATTRS 690 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 691 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 692 (__v8sf) _mm256_setzero_ps(), 693 (__mmask8) __U, 694 _MM_FROUND_CUR_DIRECTION); 695 } 696 697 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ 698 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 699 (__v8sf)_mm256_setzero_ps(), \ 700 (__mmask8)-1, (int)(R)); }) 701 702 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ 703 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 704 (__v8sf)(__m256)(W), (__mmask8)(U), \ 705 (int)(R)); }) 706 707 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ 708 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 709 (__v8sf)_mm256_setzero_ps(), \ 710 (__mmask8)(U), (int)(R)); }) 711 712 #define _mm512_range_pd(A, B, C) __extension__ ({ \ 713 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 714 (__v8df)(__m512d)(B), (int)(C), \ 715 (__v8df)_mm512_setzero_pd(), \ 716 (__mmask8)-1, \ 717 _MM_FROUND_CUR_DIRECTION); }) 718 719 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 720 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 721 (__v8df)(__m512d)(B), (int)(C), \ 722 (__v8df)(__m512d)(W), (__mmask8)(U), \ 723 _MM_FROUND_CUR_DIRECTION); }) 724 725 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ 726 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 727 (__v8df)(__m512d)(B), (int)(C), \ 728 (__v8df)_mm512_setzero_pd(), \ 729 (__mmask8)(U), \ 730 _MM_FROUND_CUR_DIRECTION); }) 731 732 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ 733 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 734 (__v8df)(__m512d)(B), (int)(C), \ 735 (__v8df)_mm512_setzero_pd(), \ 736 (__mmask8)-1, (int)(R)); }) 737 738 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ 739 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 740 (__v8df)(__m512d)(B), (int)(C), \ 741 (__v8df)(__m512d)(W), (__mmask8)(U), \ 742 (int)(R)); }) 743 744 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ 745 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 746 (__v8df)(__m512d)(B), (int)(C), \ 747 (__v8df)_mm512_setzero_pd(), \ 748 (__mmask8)(U), (int)(R)); }) 749 750 #define _mm512_range_ps(A, B, C) __extension__ ({ \ 751 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 752 (__v16sf)(__m512)(B), (int)(C), \ 753 (__v16sf)_mm512_setzero_ps(), \ 754 (__mmask16)-1, \ 755 _MM_FROUND_CUR_DIRECTION); }) 756 757 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 758 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 759 (__v16sf)(__m512)(B), (int)(C), \ 760 (__v16sf)(__m512)(W), (__mmask16)(U), \ 761 _MM_FROUND_CUR_DIRECTION); }) 762 763 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ 764 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 765 (__v16sf)(__m512)(B), (int)(C), \ 766 (__v16sf)_mm512_setzero_ps(), \ 767 (__mmask16)(U), \ 768 _MM_FROUND_CUR_DIRECTION); }) 769 770 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ 771 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 772 (__v16sf)(__m512)(B), (int)(C), \ 773 (__v16sf)_mm512_setzero_ps(), \ 774 (__mmask16)-1, (int)(R)); }) 775 776 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ 777 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 778 (__v16sf)(__m512)(B), (int)(C), \ 779 (__v16sf)(__m512)(W), (__mmask16)(U), \ 780 (int)(R)); }) 781 782 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ 783 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 784 (__v16sf)(__m512)(B), (int)(C), \ 785 (__v16sf)_mm512_setzero_ps(), \ 786 (__mmask16)(U), (int)(R)); }) 787 788 #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ 789 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 790 (__v4sf)(__m128)(B), \ 791 (__v4sf)_mm_setzero_ps(), \ 792 (__mmask8) -1, (int)(C),\ 793 (int)(R)); }) 794 795 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 796 797 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ 798 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 799 (__v4sf)(__m128)(B), \ 800 (__v4sf)(__m128)(W),\ 801 (__mmask8)(U), (int)(C),\ 802 (int)(R)); }) 803 804 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 805 806 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ 807 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 808 (__v4sf)(__m128)(B), \ 809 (__v4sf)_mm_setzero_ps(), \ 810 (__mmask8)(U), (int)(C),\ 811 (int)(R)); }) 812 813 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 814 815 #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ 816 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 817 (__v2df)(__m128d)(B), \ 818 (__v2df)_mm_setzero_pd(), \ 819 (__mmask8) -1, (int)(C),\ 820 (int)(R)); }) 821 822 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 823 824 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ 825 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 826 (__v2df)(__m128d)(B), \ 827 (__v2df)(__m128d)(W),\ 828 (__mmask8)(U), (int)(C),\ 829 (int)(R)); }) 830 831 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 832 833 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ 834 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 835 (__v2df)(__m128d)(B), \ 836 (__v2df)_mm_setzero_pd(), \ 837 (__mmask8)(U), (int)(C),\ 838 (int)(R)); }) 839 840 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 841 842 #define _mm512_reduce_pd(A, B) __extension__ ({ \ 843 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 844 (__v8df)_mm512_setzero_pd(), \ 845 (__mmask8)-1, \ 846 _MM_FROUND_CUR_DIRECTION); }) 847 848 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 850 (__v8df)(__m512d)(W), \ 851 (__mmask8)(U), \ 852 _MM_FROUND_CUR_DIRECTION); }) 853 854 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ 855 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 856 (__v8df)_mm512_setzero_pd(), \ 857 (__mmask8)(U), \ 858 _MM_FROUND_CUR_DIRECTION); }) 859 860 #define _mm512_reduce_ps(A, B) __extension__ ({ \ 861 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 862 (__v16sf)_mm512_setzero_ps(), \ 863 (__mmask16)-1, \ 864 _MM_FROUND_CUR_DIRECTION); }) 865 866 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 867 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 868 (__v16sf)(__m512)(W), \ 869 (__mmask16)(U), \ 870 _MM_FROUND_CUR_DIRECTION); }) 871 872 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ 873 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 874 (__v16sf)_mm512_setzero_ps(), \ 875 (__mmask16)(U), \ 876 _MM_FROUND_CUR_DIRECTION); }) 877 878 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ 879 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 880 (__v8df)_mm512_setzero_pd(), \ 881 (__mmask8)-1, (int)(R)); }) 882 883 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ 884 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 885 (__v8df)(__m512d)(W), \ 886 (__mmask8)(U), (int)(R)); }) 887 888 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ 889 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 890 (__v8df)_mm512_setzero_pd(), \ 891 (__mmask8)(U), (int)(R)); }) 892 893 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ 894 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 895 (__v16sf)_mm512_setzero_ps(), \ 896 (__mmask16)-1, (int)(R)); }) 897 898 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ 899 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 900 (__v16sf)(__m512)(W), \ 901 (__mmask16)(U), (int)(R)); }) 902 903 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ 904 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 905 (__v16sf)_mm512_setzero_ps(), \ 906 (__mmask16)(U), (int)(R)); }) 907 908 #define _mm_reduce_ss(A, B, C) __extension__ ({ \ 909 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 910 (__v4sf)(__m128)(B), \ 911 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 912 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 913 914 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ 915 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 916 (__v4sf)(__m128)(B), \ 917 (__v4sf)(__m128)(W), (__mmask8)(U), \ 918 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 919 920 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ 921 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 922 (__v4sf)(__m128)(B), \ 923 (__v4sf)_mm_setzero_ps(), \ 924 (__mmask8)(U), (int)(C), \ 925 _MM_FROUND_CUR_DIRECTION); }) 926 927 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ 928 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 929 (__v4sf)(__m128)(B), \ 930 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 931 (int)(C), (int)(R)); }) 932 933 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ 934 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 935 (__v4sf)(__m128)(B), \ 936 (__v4sf)(__m128)(W), (__mmask8)(U), \ 937 (int)(C), (int)(R)); }) 938 939 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ 940 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 941 (__v4sf)(__m128)(B), \ 942 (__v4sf)_mm_setzero_ps(), \ 943 (__mmask8)(U), (int)(C), (int)(R)); }) 944 945 #define _mm_reduce_sd(A, B, C) __extension__ ({ \ 946 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 947 (__v2df)(__m128d)(B), \ 948 (__v2df)_mm_setzero_pd(), \ 949 (__mmask8)-1, (int)(C), \ 950 _MM_FROUND_CUR_DIRECTION); }) 951 952 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ 953 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 954 (__v2df)(__m128d)(B), \ 955 (__v2df)(__m128d)(W), (__mmask8)(U), \ 956 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 957 958 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ 959 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 960 (__v2df)(__m128d)(B), \ 961 (__v2df)_mm_setzero_pd(), \ 962 (__mmask8)(U), (int)(C), \ 963 _MM_FROUND_CUR_DIRECTION); }) 964 965 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ 966 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 967 (__v2df)(__m128d)(B), \ 968 (__v2df)_mm_setzero_pd(), \ 969 (__mmask8)-1, (int)(C), (int)(R)); }) 970 971 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ 972 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 973 (__v2df)(__m128d)(B), \ 974 (__v2df)(__m128d)(W), (__mmask8)(U), \ 975 (int)(C), (int)(R)); }) 976 977 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ 978 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 979 (__v2df)(__m128d)(B), \ 980 (__v2df)_mm_setzero_pd(), \ 981 (__mmask8)(U), (int)(C), (int)(R)); }) 982 983 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 984 _mm512_movepi32_mask (__m512i __A) 985 { 986 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 987 } 988 989 static __inline__ __m512i __DEFAULT_FN_ATTRS 990 _mm512_movm_epi32 (__mmask16 __A) 991 { 992 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 993 } 994 995 static __inline__ __m512i __DEFAULT_FN_ATTRS 996 _mm512_movm_epi64 (__mmask8 __A) 997 { 998 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 999 } 1000 1001 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1002 _mm512_movepi64_mask (__m512i __A) 1003 { 1004 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 1005 } 1006 1007 1008 static __inline__ __m512 __DEFAULT_FN_ATTRS 1009 _mm512_broadcast_f32x2 (__m128 __A) 1010 { 1011 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 1012 (__v16sf)_mm512_undefined_ps(), 1013 (__mmask16) -1); 1014 } 1015 1016 static __inline__ __m512 __DEFAULT_FN_ATTRS 1017 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 1018 { 1019 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 1020 (__v16sf) 1021 __O, __M); 1022 } 1023 1024 static __inline__ __m512 __DEFAULT_FN_ATTRS 1025 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 1026 { 1027 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 1028 (__v16sf)_mm512_setzero_ps (), 1029 __M); 1030 } 1031 1032 static __inline__ __m512 __DEFAULT_FN_ATTRS 1033 _mm512_broadcast_f32x8 (__m256 __A) 1034 { 1035 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 1036 _mm512_undefined_ps(), 1037 (__mmask16) -1); 1038 } 1039 1040 static __inline__ __m512 __DEFAULT_FN_ATTRS 1041 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) 1042 { 1043 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 1044 (__v16sf)__O, 1045 __M); 1046 } 1047 1048 static __inline__ __m512 __DEFAULT_FN_ATTRS 1049 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) 1050 { 1051 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 1052 (__v16sf)_mm512_setzero_ps (), 1053 __M); 1054 } 1055 1056 static __inline__ __m512d __DEFAULT_FN_ATTRS 1057 _mm512_broadcast_f64x2 (__m128d __A) 1058 { 1059 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, 1060 (__v8df)_mm512_undefined_pd(), 1061 (__mmask8) -1); 1062 } 1063 1064 static __inline__ __m512d __DEFAULT_FN_ATTRS 1065 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) 1066 { 1067 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, 1068 (__v8df) 1069 __O, __M); 1070 } 1071 1072 static __inline__ __m512d __DEFAULT_FN_ATTRS 1073 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 1074 { 1075 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, 1076 (__v8df)_mm512_setzero_ps (), 1077 __M); 1078 } 1079 1080 static __inline__ __m512i __DEFAULT_FN_ATTRS 1081 _mm512_broadcast_i32x2 (__m128i __A) 1082 { 1083 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, 1084 (__v16si)_mm512_setzero_si512(), 1085 (__mmask16) -1); 1086 } 1087 1088 static __inline__ __m512i __DEFAULT_FN_ATTRS 1089 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 1090 { 1091 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, 1092 (__v16si) 1093 __O, __M); 1094 } 1095 1096 static __inline__ __m512i __DEFAULT_FN_ATTRS 1097 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 1098 { 1099 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, 1100 (__v16si)_mm512_setzero_si512 (), 1101 __M); 1102 } 1103 1104 static __inline__ __m512i __DEFAULT_FN_ATTRS 1105 _mm512_broadcast_i32x8 (__m256i __A) 1106 { 1107 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, 1108 (__v16si)_mm512_setzero_si512(), 1109 (__mmask16) -1); 1110 } 1111 1112 static __inline__ __m512i __DEFAULT_FN_ATTRS 1113 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) 1114 { 1115 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, 1116 (__v16si)__O, 1117 __M); 1118 } 1119 1120 static __inline__ __m512i __DEFAULT_FN_ATTRS 1121 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) 1122 { 1123 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, 1124 (__v16si) 1125 _mm512_setzero_si512 (), 1126 __M); 1127 } 1128 1129 static __inline__ __m512i __DEFAULT_FN_ATTRS 1130 _mm512_broadcast_i64x2 (__m128i __A) 1131 { 1132 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, 1133 (__v8di)_mm512_setzero_si512(), 1134 (__mmask8) -1); 1135 } 1136 1137 static __inline__ __m512i __DEFAULT_FN_ATTRS 1138 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) 1139 { 1140 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, 1141 (__v8di) 1142 __O, __M); 1143 } 1144 1145 static __inline__ __m512i __DEFAULT_FN_ATTRS 1146 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1147 { 1148 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, 1149 (__v8di)_mm512_setzero_si512 (), 1150 __M); 1151 } 1152 1153 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ 1154 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1155 (__v8sf)_mm256_setzero_ps(), \ 1156 (__mmask8)-1); }) 1157 1158 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ 1159 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1160 (__v8sf)(__m256)(W), \ 1161 (__mmask8)(U)); }) 1162 1163 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ 1164 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1165 (__v8sf)_mm256_setzero_ps(), \ 1166 (__mmask8)(U)); }) 1167 1168 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ 1169 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1170 (int)(imm), \ 1171 (__v2df)_mm_setzero_pd(), \ 1172 (__mmask8)-1); }) 1173 1174 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1175 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1176 (int)(imm), \ 1177 (__v2df)(__m128d)(W), \ 1178 (__mmask8)(U)); }) 1179 1180 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1181 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1182 (int)(imm), \ 1183 (__v2df)_mm_setzero_pd(), \ 1184 (__mmask8)(U)); }) 1185 1186 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ 1187 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1188 (__v8si)_mm256_setzero_si256(), \ 1189 (__mmask8)-1); }) 1190 1191 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ 1192 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1193 (__v8si)(__m256i)(W), \ 1194 (__mmask8)(U)); }) 1195 1196 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ 1197 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1198 (__v8si)_mm256_setzero_si256(), \ 1199 (__mmask8)(U)); }) 1200 1201 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ 1202 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1203 (int)(imm), \ 1204 (__v2di)_mm_setzero_di(), \ 1205 (__mmask8)-1); }) 1206 1207 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1208 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1209 (int)(imm), \ 1210 (__v2di)(__m128i)(W), \ 1211 (__mmask8)(U)); }) 1212 1213 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1214 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1215 (int)(imm), \ 1216 (__v2di)_mm_setzero_di(), \ 1217 (__mmask8)(U)); }) 1218 1219 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ 1220 (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \ 1221 (__v8sf)(__m256)(B), (int)(imm), \ 1222 (__v16sf)_mm512_setzero_ps(), \ 1223 (__mmask16)-1); }) 1224 1225 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ 1226 (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \ 1227 (__v8sf)(__m256)(B), (int)(imm), \ 1228 (__v16sf)(__m512)(W), \ 1229 (__mmask16)(U)); }) 1230 1231 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ 1232 (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \ 1233 (__v8sf)(__m256)(B), (int)(imm), \ 1234 (__v16sf)_mm512_setzero_ps(), \ 1235 (__mmask16)(U)); }) 1236 1237 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ 1238 (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \ 1239 (__v2df)(__m128d)(B), \ 1240 (int)(imm), \ 1241 (__v8df)_mm512_setzero_pd(), \ 1242 (__mmask8)-1); }) 1243 1244 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1245 (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \ 1246 (__v2df)(__m128d)(B), \ 1247 (int)(imm), \ 1248 (__v8df)(__m512d)(W), \ 1249 (__mmask8)(U)); }) 1250 1251 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1252 (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \ 1253 (__v2df)(__m128d)(B), \ 1254 (int)(imm), \ 1255 (__v8df)_mm512_setzero_pd(), \ 1256 (__mmask8)(U)); }) 1257 1258 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ 1259 (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \ 1260 (__v8si)(__m256i)(B), (int)(imm), \ 1261 (__v16si)_mm512_setzero_si512(), \ 1262 (__mmask16)-1); }) 1263 1264 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ 1265 (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \ 1266 (__v8si)(__m256i)(B), (int)(imm), \ 1267 (__v16si)(__m512i)(W), \ 1268 (__mmask16)(U)); }) 1269 1270 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ 1271 (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \ 1272 (__v8si)(__m256i)(B), (int)(imm), \ 1273 (__v16si)_mm512_setzero_si512(), \ 1274 (__mmask16)(U)); }) 1275 1276 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ 1277 (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \ 1278 (__v2di)(__m128i)(B), \ 1279 (int)(imm), \ 1280 (__v8di)_mm512_setzero_si512(), \ 1281 (__mmask8)-1); }) 1282 1283 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1284 (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \ 1285 (__v2di)(__m128i)(B), \ 1286 (int)(imm), \ 1287 (__v8di)(__m512i)(W), \ 1288 (__mmask8)(U)); }) 1289 1290 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1291 (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \ 1292 (__v2di)(__m128i)(B), \ 1293 (int)(imm), \ 1294 (__v8di)_mm512_setzero_si512(), \ 1295 (__mmask8)(U)); }) 1296 1297 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1298 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1299 (int)(imm), (__mmask16)(U)); }) 1300 1301 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ 1302 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1303 (int)(imm), (__mmask16)-1); }) 1304 1305 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1306 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1307 (__mmask8)(U)); }) 1308 1309 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ 1310 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1311 (__mmask8)-1); }) 1312 1313 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ 1314 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1315 (__mmask8)-1); }) 1316 1317 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ 1318 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1319 (__mmask8)(U)); }) 1320 1321 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ 1322 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1323 (__mmask8)-1); }) 1324 1325 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ 1326 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1327 (__mmask8)(U)); }) 1328 1329 #undef __DEFAULT_FN_ATTRS 1330 1331 #endif 1332