1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512DQINTRIN_H 29 #define __AVX512DQINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 33 34 static __inline__ __m512i __DEFAULT_FN_ATTRS 35 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { 36 return (__m512i) ((__v8du) __A * (__v8du) __B); 37 } 38 39 static __inline__ __m512i __DEFAULT_FN_ATTRS 40 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 41 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 42 (__v8di)_mm512_mullo_epi64(__A, __B), 43 (__v8di)__W); 44 } 45 46 static __inline__ __m512i __DEFAULT_FN_ATTRS 47 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { 48 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 49 (__v8di)_mm512_mullo_epi64(__A, __B), 50 (__v8di)_mm512_setzero_si512()); 51 } 52 53 static __inline__ __m512d __DEFAULT_FN_ATTRS 54 _mm512_xor_pd(__m512d __A, __m512d __B) { 55 return (__m512d)((__v8du)__A ^ (__v8du)__B); 56 } 57 58 static __inline__ __m512d __DEFAULT_FN_ATTRS 59 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 60 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 61 (__v8df)_mm512_xor_pd(__A, __B), 62 (__v8df)__W); 63 } 64 65 static __inline__ __m512d __DEFAULT_FN_ATTRS 66 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { 67 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 68 (__v8df)_mm512_xor_pd(__A, __B), 69 (__v8df)_mm512_setzero_pd()); 70 } 71 72 static __inline__ __m512 __DEFAULT_FN_ATTRS 73 _mm512_xor_ps (__m512 __A, __m512 __B) { 74 return (__m512)((__v16su)__A ^ (__v16su)__B); 75 } 76 77 static __inline__ __m512 __DEFAULT_FN_ATTRS 78 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 79 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 80 (__v16sf)_mm512_xor_ps(__A, __B), 81 (__v16sf)__W); 82 } 83 84 static __inline__ __m512 __DEFAULT_FN_ATTRS 85 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { 86 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 87 (__v16sf)_mm512_xor_ps(__A, __B), 88 (__v16sf)_mm512_setzero_ps()); 89 } 90 91 static __inline__ __m512d __DEFAULT_FN_ATTRS 92 _mm512_or_pd(__m512d __A, __m512d __B) { 93 return (__m512d)((__v8du)__A | (__v8du)__B); 94 } 95 96 static __inline__ __m512d __DEFAULT_FN_ATTRS 97 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 98 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 99 (__v8df)_mm512_or_pd(__A, __B), 100 (__v8df)__W); 101 } 102 103 static __inline__ __m512d __DEFAULT_FN_ATTRS 104 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { 105 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 106 (__v8df)_mm512_or_pd(__A, __B), 107 (__v8df)_mm512_setzero_pd()); 108 } 109 110 static __inline__ __m512 __DEFAULT_FN_ATTRS 111 _mm512_or_ps(__m512 __A, __m512 __B) { 112 return (__m512)((__v16su)__A | (__v16su)__B); 113 } 114 115 static __inline__ __m512 __DEFAULT_FN_ATTRS 116 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 117 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 118 (__v16sf)_mm512_or_ps(__A, __B), 119 (__v16sf)__W); 120 } 121 122 static __inline__ __m512 __DEFAULT_FN_ATTRS 123 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { 124 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 125 (__v16sf)_mm512_or_ps(__A, __B), 126 (__v16sf)_mm512_setzero_ps()); 127 } 128 129 static __inline__ __m512d __DEFAULT_FN_ATTRS 130 _mm512_and_pd(__m512d __A, __m512d __B) { 131 return (__m512d)((__v8du)__A & (__v8du)__B); 132 } 133 134 static __inline__ __m512d __DEFAULT_FN_ATTRS 135 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 137 (__v8df)_mm512_and_pd(__A, __B), 138 (__v8df)__W); 139 } 140 141 static __inline__ __m512d __DEFAULT_FN_ATTRS 142 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { 143 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 144 (__v8df)_mm512_and_pd(__A, __B), 145 (__v8df)_mm512_setzero_pd()); 146 } 147 148 static __inline__ __m512 __DEFAULT_FN_ATTRS 149 _mm512_and_ps(__m512 __A, __m512 __B) { 150 return (__m512)((__v16su)__A & (__v16su)__B); 151 } 152 153 static __inline__ __m512 __DEFAULT_FN_ATTRS 154 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 155 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 156 (__v16sf)_mm512_and_ps(__A, __B), 157 (__v16sf)__W); 158 } 159 160 static __inline__ __m512 __DEFAULT_FN_ATTRS 161 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { 162 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 163 (__v16sf)_mm512_and_ps(__A, __B), 164 (__v16sf)_mm512_setzero_ps()); 165 } 166 167 static __inline__ __m512d __DEFAULT_FN_ATTRS 168 _mm512_andnot_pd(__m512d __A, __m512d __B) { 169 return (__m512d)(~(__v8du)__A & (__v8du)__B); 170 } 171 172 static __inline__ __m512d __DEFAULT_FN_ATTRS 173 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 174 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 175 (__v8df)_mm512_andnot_pd(__A, __B), 176 (__v8df)__W); 177 } 178 179 static __inline__ __m512d __DEFAULT_FN_ATTRS 180 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { 181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 182 (__v8df)_mm512_andnot_pd(__A, __B), 183 (__v8df)_mm512_setzero_pd()); 184 } 185 186 static __inline__ __m512 __DEFAULT_FN_ATTRS 187 _mm512_andnot_ps(__m512 __A, __m512 __B) { 188 return (__m512)(~(__v16su)__A & (__v16su)__B); 189 } 190 191 static __inline__ __m512 __DEFAULT_FN_ATTRS 192 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 193 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 194 (__v16sf)_mm512_andnot_ps(__A, __B), 195 (__v16sf)__W); 196 } 197 198 static __inline__ __m512 __DEFAULT_FN_ATTRS 199 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { 200 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 201 (__v16sf)_mm512_andnot_ps(__A, __B), 202 (__v16sf)_mm512_setzero_ps()); 203 } 204 205 static __inline__ __m512i __DEFAULT_FN_ATTRS 206 _mm512_cvtpd_epi64 (__m512d __A) { 207 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 208 (__v8di) _mm512_setzero_si512(), 209 (__mmask8) -1, 210 _MM_FROUND_CUR_DIRECTION); 211 } 212 213 static __inline__ __m512i __DEFAULT_FN_ATTRS 214 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 215 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 216 (__v8di) __W, 217 (__mmask8) __U, 218 _MM_FROUND_CUR_DIRECTION); 219 } 220 221 static __inline__ __m512i __DEFAULT_FN_ATTRS 222 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 223 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 224 (__v8di) _mm512_setzero_si512(), 225 (__mmask8) __U, 226 _MM_FROUND_CUR_DIRECTION); 227 } 228 229 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ 230 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 231 (__v8di)_mm512_setzero_si512(), \ 232 (__mmask8)-1, (int)(R)); }) 233 234 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 235 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 236 (__v8di)(__m512i)(W), \ 237 (__mmask8)(U), (int)(R)); }) 238 239 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ 240 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 241 (__v8di)_mm512_setzero_si512(), \ 242 (__mmask8)(U), (int)(R)); }) 243 244 static __inline__ __m512i __DEFAULT_FN_ATTRS 245 _mm512_cvtpd_epu64 (__m512d __A) { 246 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 247 (__v8di) _mm512_setzero_si512(), 248 (__mmask8) -1, 249 _MM_FROUND_CUR_DIRECTION); 250 } 251 252 static __inline__ __m512i __DEFAULT_FN_ATTRS 253 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 254 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 255 (__v8di) __W, 256 (__mmask8) __U, 257 _MM_FROUND_CUR_DIRECTION); 258 } 259 260 static __inline__ __m512i __DEFAULT_FN_ATTRS 261 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 262 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 263 (__v8di) _mm512_setzero_si512(), 264 (__mmask8) __U, 265 _MM_FROUND_CUR_DIRECTION); 266 } 267 268 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ 269 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 270 (__v8di)_mm512_setzero_si512(), \ 271 (__mmask8)-1, (int)(R)); }) 272 273 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 274 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 275 (__v8di)(__m512i)(W), \ 276 (__mmask8)(U), (int)(R)); }) 277 278 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ 279 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 280 (__v8di)_mm512_setzero_si512(), \ 281 (__mmask8)(U), (int)(R)); }) 282 283 static __inline__ __m512i __DEFAULT_FN_ATTRS 284 _mm512_cvtps_epi64 (__m256 __A) { 285 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 286 (__v8di) _mm512_setzero_si512(), 287 (__mmask8) -1, 288 _MM_FROUND_CUR_DIRECTION); 289 } 290 291 static __inline__ __m512i __DEFAULT_FN_ATTRS 292 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 293 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 294 (__v8di) __W, 295 (__mmask8) __U, 296 _MM_FROUND_CUR_DIRECTION); 297 } 298 299 static __inline__ __m512i __DEFAULT_FN_ATTRS 300 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 301 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 302 (__v8di) _mm512_setzero_si512(), 303 (__mmask8) __U, 304 _MM_FROUND_CUR_DIRECTION); 305 } 306 307 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ 308 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 309 (__v8di)_mm512_setzero_si512(), \ 310 (__mmask8)-1, (int)(R)); }) 311 312 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ 313 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 314 (__v8di)(__m512i)(W), \ 315 (__mmask8)(U), (int)(R)); }) 316 317 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ 318 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 319 (__v8di)_mm512_setzero_si512(), \ 320 (__mmask8)(U), (int)(R)); }) 321 322 static __inline__ __m512i __DEFAULT_FN_ATTRS 323 _mm512_cvtps_epu64 (__m256 __A) { 324 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 325 (__v8di) _mm512_setzero_si512(), 326 (__mmask8) -1, 327 _MM_FROUND_CUR_DIRECTION); 328 } 329 330 static __inline__ __m512i __DEFAULT_FN_ATTRS 331 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 332 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 333 (__v8di) __W, 334 (__mmask8) __U, 335 _MM_FROUND_CUR_DIRECTION); 336 } 337 338 static __inline__ __m512i __DEFAULT_FN_ATTRS 339 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 340 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 341 (__v8di) _mm512_setzero_si512(), 342 (__mmask8) __U, 343 _MM_FROUND_CUR_DIRECTION); 344 } 345 346 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ 347 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 348 (__v8di)_mm512_setzero_si512(), \ 349 (__mmask8)-1, (int)(R)); }) 350 351 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ 352 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 353 (__v8di)(__m512i)(W), \ 354 (__mmask8)(U), (int)(R)); }) 355 356 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ 357 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 358 (__v8di)_mm512_setzero_si512(), \ 359 (__mmask8)(U), (int)(R)); }) 360 361 362 static __inline__ __m512d __DEFAULT_FN_ATTRS 363 _mm512_cvtepi64_pd (__m512i __A) { 364 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 365 (__v8df) _mm512_setzero_pd(), 366 (__mmask8) -1, 367 _MM_FROUND_CUR_DIRECTION); 368 } 369 370 static __inline__ __m512d __DEFAULT_FN_ATTRS 371 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 372 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 373 (__v8df) __W, 374 (__mmask8) __U, 375 _MM_FROUND_CUR_DIRECTION); 376 } 377 378 static __inline__ __m512d __DEFAULT_FN_ATTRS 379 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 380 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 381 (__v8df) _mm512_setzero_pd(), 382 (__mmask8) __U, 383 _MM_FROUND_CUR_DIRECTION); 384 } 385 386 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ 387 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 388 (__v8df)_mm512_setzero_pd(), \ 389 (__mmask8)-1, (int)(R)); }) 390 391 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ 392 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 393 (__v8df)(__m512d)(W), \ 394 (__mmask8)(U), (int)(R)); }) 395 396 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ 397 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 398 (__v8df)_mm512_setzero_pd(), \ 399 (__mmask8)(U), (int)(R)); }) 400 401 static __inline__ __m256 __DEFAULT_FN_ATTRS 402 _mm512_cvtepi64_ps (__m512i __A) { 403 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 404 (__v8sf) _mm256_setzero_ps(), 405 (__mmask8) -1, 406 _MM_FROUND_CUR_DIRECTION); 407 } 408 409 static __inline__ __m256 __DEFAULT_FN_ATTRS 410 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 411 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 412 (__v8sf) __W, 413 (__mmask8) __U, 414 _MM_FROUND_CUR_DIRECTION); 415 } 416 417 static __inline__ __m256 __DEFAULT_FN_ATTRS 418 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 419 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 420 (__v8sf) _mm256_setzero_ps(), 421 (__mmask8) __U, 422 _MM_FROUND_CUR_DIRECTION); 423 } 424 425 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ 426 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 427 (__v8sf)_mm256_setzero_ps(), \ 428 (__mmask8)-1, (int)(R)); }) 429 430 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ 431 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 432 (__v8sf)(__m256)(W), (__mmask8)(U), \ 433 (int)(R)); }) 434 435 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ 436 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 437 (__v8sf)_mm256_setzero_ps(), \ 438 (__mmask8)(U), (int)(R)); }) 439 440 441 static __inline__ __m512i __DEFAULT_FN_ATTRS 442 _mm512_cvttpd_epi64 (__m512d __A) { 443 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 444 (__v8di) _mm512_setzero_si512(), 445 (__mmask8) -1, 446 _MM_FROUND_CUR_DIRECTION); 447 } 448 449 static __inline__ __m512i __DEFAULT_FN_ATTRS 450 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 451 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 452 (__v8di) __W, 453 (__mmask8) __U, 454 _MM_FROUND_CUR_DIRECTION); 455 } 456 457 static __inline__ __m512i __DEFAULT_FN_ATTRS 458 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 459 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 460 (__v8di) _mm512_setzero_si512(), 461 (__mmask8) __U, 462 _MM_FROUND_CUR_DIRECTION); 463 } 464 465 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ 466 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 467 (__v8di)_mm512_setzero_si512(), \ 468 (__mmask8)-1, (int)(R)); }) 469 470 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 471 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 472 (__v8di)(__m512i)(W), \ 473 (__mmask8)(U), (int)(R)); }) 474 475 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ 476 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 477 (__v8di)_mm512_setzero_si512(), \ 478 (__mmask8)(U), (int)(R)); }) 479 480 static __inline__ __m512i __DEFAULT_FN_ATTRS 481 _mm512_cvttpd_epu64 (__m512d __A) { 482 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 483 (__v8di) _mm512_setzero_si512(), 484 (__mmask8) -1, 485 _MM_FROUND_CUR_DIRECTION); 486 } 487 488 static __inline__ __m512i __DEFAULT_FN_ATTRS 489 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 490 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 491 (__v8di) __W, 492 (__mmask8) __U, 493 _MM_FROUND_CUR_DIRECTION); 494 } 495 496 static __inline__ __m512i __DEFAULT_FN_ATTRS 497 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 498 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 499 (__v8di) _mm512_setzero_si512(), 500 (__mmask8) __U, 501 _MM_FROUND_CUR_DIRECTION); 502 } 503 504 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ 505 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 506 (__v8di)_mm512_setzero_si512(), \ 507 (__mmask8)-1, (int)(R)); }) 508 509 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 510 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 511 (__v8di)(__m512i)(W), \ 512 (__mmask8)(U), (int)(R)); }) 513 514 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ 515 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 516 (__v8di)_mm512_setzero_si512(), \ 517 (__mmask8)(U), (int)(R)); }) 518 519 static __inline__ __m512i __DEFAULT_FN_ATTRS 520 _mm512_cvttps_epi64 (__m256 __A) { 521 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 522 (__v8di) _mm512_setzero_si512(), 523 (__mmask8) -1, 524 _MM_FROUND_CUR_DIRECTION); 525 } 526 527 static __inline__ __m512i __DEFAULT_FN_ATTRS 528 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 529 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 530 (__v8di) __W, 531 (__mmask8) __U, 532 _MM_FROUND_CUR_DIRECTION); 533 } 534 535 static __inline__ __m512i __DEFAULT_FN_ATTRS 536 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 537 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 538 (__v8di) _mm512_setzero_si512(), 539 (__mmask8) __U, 540 _MM_FROUND_CUR_DIRECTION); 541 } 542 543 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ 544 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 545 (__v8di)_mm512_setzero_si512(), \ 546 (__mmask8)-1, (int)(R)); }) 547 548 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ 549 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 550 (__v8di)(__m512i)(W), \ 551 (__mmask8)(U), (int)(R)); }) 552 553 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ 554 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 555 (__v8di)_mm512_setzero_si512(), \ 556 (__mmask8)(U), (int)(R)); }) 557 558 static __inline__ __m512i __DEFAULT_FN_ATTRS 559 _mm512_cvttps_epu64 (__m256 __A) { 560 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 561 (__v8di) _mm512_setzero_si512(), 562 (__mmask8) -1, 563 _MM_FROUND_CUR_DIRECTION); 564 } 565 566 static __inline__ __m512i __DEFAULT_FN_ATTRS 567 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 568 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 569 (__v8di) __W, 570 (__mmask8) __U, 571 _MM_FROUND_CUR_DIRECTION); 572 } 573 574 static __inline__ __m512i __DEFAULT_FN_ATTRS 575 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 577 (__v8di) _mm512_setzero_si512(), 578 (__mmask8) __U, 579 _MM_FROUND_CUR_DIRECTION); 580 } 581 582 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ 583 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 584 (__v8di)_mm512_setzero_si512(), \ 585 (__mmask8)-1, (int)(R)); }) 586 587 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ 588 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 589 (__v8di)(__m512i)(W), \ 590 (__mmask8)(U), (int)(R)); }) 591 592 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ 593 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 594 (__v8di)_mm512_setzero_si512(), \ 595 (__mmask8)(U), (int)(R)); }) 596 597 static __inline__ __m512d __DEFAULT_FN_ATTRS 598 _mm512_cvtepu64_pd (__m512i __A) { 599 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 600 (__v8df) _mm512_setzero_pd(), 601 (__mmask8) -1, 602 _MM_FROUND_CUR_DIRECTION); 603 } 604 605 static __inline__ __m512d __DEFAULT_FN_ATTRS 606 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 607 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 608 (__v8df) __W, 609 (__mmask8) __U, 610 _MM_FROUND_CUR_DIRECTION); 611 } 612 613 static __inline__ __m512d __DEFAULT_FN_ATTRS 614 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 615 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 616 (__v8df) _mm512_setzero_pd(), 617 (__mmask8) __U, 618 _MM_FROUND_CUR_DIRECTION); 619 } 620 621 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ 622 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 623 (__v8df)_mm512_setzero_pd(), \ 624 (__mmask8)-1, (int)(R)); }) 625 626 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ 627 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 628 (__v8df)(__m512d)(W), \ 629 (__mmask8)(U), (int)(R)); }) 630 631 632 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ 633 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 634 (__v8df)_mm512_setzero_pd(), \ 635 (__mmask8)(U), (int)(R)); }) 636 637 638 static __inline__ __m256 __DEFAULT_FN_ATTRS 639 _mm512_cvtepu64_ps (__m512i __A) { 640 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 641 (__v8sf) _mm256_setzero_ps(), 642 (__mmask8) -1, 643 _MM_FROUND_CUR_DIRECTION); 644 } 645 646 static __inline__ __m256 __DEFAULT_FN_ATTRS 647 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 648 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 649 (__v8sf) __W, 650 (__mmask8) __U, 651 _MM_FROUND_CUR_DIRECTION); 652 } 653 654 static __inline__ __m256 __DEFAULT_FN_ATTRS 655 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 656 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 657 (__v8sf) _mm256_setzero_ps(), 658 (__mmask8) __U, 659 _MM_FROUND_CUR_DIRECTION); 660 } 661 662 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ 663 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 664 (__v8sf)_mm256_setzero_ps(), \ 665 (__mmask8)-1, (int)(R)); }) 666 667 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ 668 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 669 (__v8sf)(__m256)(W), (__mmask8)(U), \ 670 (int)(R)); }) 671 672 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ 673 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 674 (__v8sf)_mm256_setzero_ps(), \ 675 (__mmask8)(U), (int)(R)); }) 676 677 #define _mm512_range_pd(A, B, C) __extension__ ({ \ 678 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 679 (__v8df)(__m512d)(B), (int)(C), \ 680 (__v8df)_mm512_setzero_pd(), \ 681 (__mmask8)-1, \ 682 _MM_FROUND_CUR_DIRECTION); }) 683 684 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 685 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 686 (__v8df)(__m512d)(B), (int)(C), \ 687 (__v8df)(__m512d)(W), (__mmask8)(U), \ 688 _MM_FROUND_CUR_DIRECTION); }) 689 690 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ 691 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 692 (__v8df)(__m512d)(B), (int)(C), \ 693 (__v8df)_mm512_setzero_pd(), \ 694 (__mmask8)(U), \ 695 _MM_FROUND_CUR_DIRECTION); }) 696 697 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ 698 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 699 (__v8df)(__m512d)(B), (int)(C), \ 700 (__v8df)_mm512_setzero_pd(), \ 701 (__mmask8)-1, (int)(R)); }) 702 703 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ 704 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 705 (__v8df)(__m512d)(B), (int)(C), \ 706 (__v8df)(__m512d)(W), (__mmask8)(U), \ 707 (int)(R)); }) 708 709 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ 710 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 711 (__v8df)(__m512d)(B), (int)(C), \ 712 (__v8df)_mm512_setzero_pd(), \ 713 (__mmask8)(U), (int)(R)); }) 714 715 #define _mm512_range_ps(A, B, C) __extension__ ({ \ 716 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 717 (__v16sf)(__m512)(B), (int)(C), \ 718 (__v16sf)_mm512_setzero_ps(), \ 719 (__mmask16)-1, \ 720 _MM_FROUND_CUR_DIRECTION); }) 721 722 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 723 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 724 (__v16sf)(__m512)(B), (int)(C), \ 725 (__v16sf)(__m512)(W), (__mmask16)(U), \ 726 _MM_FROUND_CUR_DIRECTION); }) 727 728 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ 729 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 730 (__v16sf)(__m512)(B), (int)(C), \ 731 (__v16sf)_mm512_setzero_ps(), \ 732 (__mmask16)(U), \ 733 _MM_FROUND_CUR_DIRECTION); }) 734 735 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ 736 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 737 (__v16sf)(__m512)(B), (int)(C), \ 738 (__v16sf)_mm512_setzero_ps(), \ 739 (__mmask16)-1, (int)(R)); }) 740 741 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ 742 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 743 (__v16sf)(__m512)(B), (int)(C), \ 744 (__v16sf)(__m512)(W), (__mmask16)(U), \ 745 (int)(R)); }) 746 747 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ 748 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 749 (__v16sf)(__m512)(B), (int)(C), \ 750 (__v16sf)_mm512_setzero_ps(), \ 751 (__mmask16)(U), (int)(R)); }) 752 753 #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ 754 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 755 (__v4sf)(__m128)(B), \ 756 (__v4sf)_mm_setzero_ps(), \ 757 (__mmask8) -1, (int)(C),\ 758 (int)(R)); }) 759 760 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 761 762 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ 763 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 764 (__v4sf)(__m128)(B), \ 765 (__v4sf)(__m128)(W),\ 766 (__mmask8)(U), (int)(C),\ 767 (int)(R)); }) 768 769 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 770 771 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ 772 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 773 (__v4sf)(__m128)(B), \ 774 (__v4sf)_mm_setzero_ps(), \ 775 (__mmask8)(U), (int)(C),\ 776 (int)(R)); }) 777 778 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 779 780 #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ 781 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 782 (__v2df)(__m128d)(B), \ 783 (__v2df)_mm_setzero_pd(), \ 784 (__mmask8) -1, (int)(C),\ 785 (int)(R)); }) 786 787 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 788 789 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ 790 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 791 (__v2df)(__m128d)(B), \ 792 (__v2df)(__m128d)(W),\ 793 (__mmask8)(U), (int)(C),\ 794 (int)(R)); }) 795 796 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 797 798 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ 799 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 800 (__v2df)(__m128d)(B), \ 801 (__v2df)_mm_setzero_pd(), \ 802 (__mmask8)(U), (int)(C),\ 803 (int)(R)); }) 804 805 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 806 807 #define _mm512_reduce_pd(A, B) __extension__ ({ \ 808 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 809 (__v8df)_mm512_setzero_pd(), \ 810 (__mmask8)-1, \ 811 _MM_FROUND_CUR_DIRECTION); }) 812 813 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 814 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 815 (__v8df)(__m512d)(W), \ 816 (__mmask8)(U), \ 817 _MM_FROUND_CUR_DIRECTION); }) 818 819 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ 820 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 821 (__v8df)_mm512_setzero_pd(), \ 822 (__mmask8)(U), \ 823 _MM_FROUND_CUR_DIRECTION); }) 824 825 #define _mm512_reduce_ps(A, B) __extension__ ({ \ 826 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 827 (__v16sf)_mm512_setzero_ps(), \ 828 (__mmask16)-1, \ 829 _MM_FROUND_CUR_DIRECTION); }) 830 831 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 832 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 833 (__v16sf)(__m512)(W), \ 834 (__mmask16)(U), \ 835 _MM_FROUND_CUR_DIRECTION); }) 836 837 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ 838 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 839 (__v16sf)_mm512_setzero_ps(), \ 840 (__mmask16)(U), \ 841 _MM_FROUND_CUR_DIRECTION); }) 842 843 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ 844 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 845 (__v8df)_mm512_setzero_pd(), \ 846 (__mmask8)-1, (int)(R)); }) 847 848 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ 849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 850 (__v8df)(__m512d)(W), \ 851 (__mmask8)(U), (int)(R)); }) 852 853 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ 854 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 855 (__v8df)_mm512_setzero_pd(), \ 856 (__mmask8)(U), (int)(R)); }) 857 858 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ 859 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 860 (__v16sf)_mm512_setzero_ps(), \ 861 (__mmask16)-1, (int)(R)); }) 862 863 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ 864 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 865 (__v16sf)(__m512)(W), \ 866 (__mmask16)(U), (int)(R)); }) 867 868 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ 869 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 870 (__v16sf)_mm512_setzero_ps(), \ 871 (__mmask16)(U), (int)(R)); }) 872 873 #define _mm_reduce_ss(A, B, C) __extension__ ({ \ 874 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 875 (__v4sf)(__m128)(B), \ 876 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 877 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 878 879 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ 880 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 881 (__v4sf)(__m128)(B), \ 882 (__v4sf)(__m128)(W), (__mmask8)(U), \ 883 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 884 885 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ 886 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 887 (__v4sf)(__m128)(B), \ 888 (__v4sf)_mm_setzero_ps(), \ 889 (__mmask8)(U), (int)(C), \ 890 _MM_FROUND_CUR_DIRECTION); }) 891 892 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ 893 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 894 (__v4sf)(__m128)(B), \ 895 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 896 (int)(C), (int)(R)); }) 897 898 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ 899 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 900 (__v4sf)(__m128)(B), \ 901 (__v4sf)(__m128)(W), (__mmask8)(U), \ 902 (int)(C), (int)(R)); }) 903 904 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ 905 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 906 (__v4sf)(__m128)(B), \ 907 (__v4sf)_mm_setzero_ps(), \ 908 (__mmask8)(U), (int)(C), (int)(R)); }) 909 910 #define _mm_reduce_sd(A, B, C) __extension__ ({ \ 911 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 912 (__v2df)(__m128d)(B), \ 913 (__v2df)_mm_setzero_pd(), \ 914 (__mmask8)-1, (int)(C), \ 915 _MM_FROUND_CUR_DIRECTION); }) 916 917 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ 918 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 919 (__v2df)(__m128d)(B), \ 920 (__v2df)(__m128d)(W), (__mmask8)(U), \ 921 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 922 923 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ 924 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 925 (__v2df)(__m128d)(B), \ 926 (__v2df)_mm_setzero_pd(), \ 927 (__mmask8)(U), (int)(C), \ 928 _MM_FROUND_CUR_DIRECTION); }) 929 930 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ 931 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 932 (__v2df)(__m128d)(B), \ 933 (__v2df)_mm_setzero_pd(), \ 934 (__mmask8)-1, (int)(C), (int)(R)); }) 935 936 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ 937 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 938 (__v2df)(__m128d)(B), \ 939 (__v2df)(__m128d)(W), (__mmask8)(U), \ 940 (int)(C), (int)(R)); }) 941 942 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ 943 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 944 (__v2df)(__m128d)(B), \ 945 (__v2df)_mm_setzero_pd(), \ 946 (__mmask8)(U), (int)(C), (int)(R)); }) 947 948 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 949 _mm512_movepi32_mask (__m512i __A) 950 { 951 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 952 } 953 954 static __inline__ __m512i __DEFAULT_FN_ATTRS 955 _mm512_movm_epi32 (__mmask16 __A) 956 { 957 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 958 } 959 960 static __inline__ __m512i __DEFAULT_FN_ATTRS 961 _mm512_movm_epi64 (__mmask8 __A) 962 { 963 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 964 } 965 966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 967 _mm512_movepi64_mask (__m512i __A) 968 { 969 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 970 } 971 972 973 static __inline__ __m512 __DEFAULT_FN_ATTRS 974 _mm512_broadcast_f32x2 (__m128 __A) 975 { 976 return (__m512)__builtin_shufflevector((__v4sf)__A, 977 (__v4sf)_mm_undefined_ps(), 978 0, 1, 0, 1, 0, 1, 0, 1, 979 0, 1, 0, 1, 0, 1, 0, 1); 980 } 981 982 static __inline__ __m512 __DEFAULT_FN_ATTRS 983 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 984 { 985 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 986 (__v16sf)_mm512_broadcast_f32x2(__A), 987 (__v16sf)__O); 988 } 989 990 static __inline__ __m512 __DEFAULT_FN_ATTRS 991 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 992 { 993 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 994 (__v16sf)_mm512_broadcast_f32x2(__A), 995 (__v16sf)_mm512_setzero_ps()); 996 } 997 998 static __inline__ __m512 __DEFAULT_FN_ATTRS 999 _mm512_broadcast_f32x8(__m256 __A) 1000 { 1001 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 1002 0, 1, 2, 3, 4, 5, 6, 7, 1003 0, 1, 2, 3, 4, 5, 6, 7); 1004 } 1005 1006 static __inline__ __m512 __DEFAULT_FN_ATTRS 1007 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) 1008 { 1009 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, 1010 (__v16sf)_mm512_broadcast_f32x8(__A), 1011 (__v16sf)__O); 1012 } 1013 1014 static __inline__ __m512 __DEFAULT_FN_ATTRS 1015 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) 1016 { 1017 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, 1018 (__v16sf)_mm512_broadcast_f32x8(__A), 1019 (__v16sf)_mm512_setzero_ps()); 1020 } 1021 1022 static __inline__ __m512d __DEFAULT_FN_ATTRS 1023 _mm512_broadcast_f64x2(__m128d __A) 1024 { 1025 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1026 0, 1, 0, 1, 0, 1, 0, 1); 1027 } 1028 1029 static __inline__ __m512d __DEFAULT_FN_ATTRS 1030 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) 1031 { 1032 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1033 (__v8df)_mm512_broadcast_f64x2(__A), 1034 (__v8df)__O); 1035 } 1036 1037 static __inline__ __m512d __DEFAULT_FN_ATTRS 1038 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) 1039 { 1040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1041 (__v8df)_mm512_broadcast_f64x2(__A), 1042 (__v8df)_mm512_setzero_pd()); 1043 } 1044 1045 static __inline__ __m512i __DEFAULT_FN_ATTRS 1046 _mm512_broadcast_i32x2 (__m128i __A) 1047 { 1048 return (__m512i)__builtin_shufflevector((__v4si)__A, 1049 (__v4si)_mm_undefined_si128(), 1050 0, 1, 0, 1, 0, 1, 0, 1, 1051 0, 1, 0, 1, 0, 1, 0, 1); 1052 } 1053 1054 static __inline__ __m512i __DEFAULT_FN_ATTRS 1055 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 1056 { 1057 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1058 (__v16si)_mm512_broadcast_i32x2(__A), 1059 (__v16si)__O); 1060 } 1061 1062 static __inline__ __m512i __DEFAULT_FN_ATTRS 1063 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 1064 { 1065 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1066 (__v16si)_mm512_broadcast_i32x2(__A), 1067 (__v16si)_mm512_setzero_si512()); 1068 } 1069 1070 static __inline__ __m512i __DEFAULT_FN_ATTRS 1071 _mm512_broadcast_i32x8(__m256i __A) 1072 { 1073 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 1074 0, 1, 2, 3, 4, 5, 6, 7, 1075 0, 1, 2, 3, 4, 5, 6, 7); 1076 } 1077 1078 static __inline__ __m512i __DEFAULT_FN_ATTRS 1079 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) 1080 { 1081 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, 1082 (__v16si)_mm512_broadcast_i32x8(__A), 1083 (__v16si)__O); 1084 } 1085 1086 static __inline__ __m512i __DEFAULT_FN_ATTRS 1087 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) 1088 { 1089 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, 1090 (__v16si)_mm512_broadcast_i32x8(__A), 1091 (__v16si)_mm512_setzero_si512()); 1092 } 1093 1094 static __inline__ __m512i __DEFAULT_FN_ATTRS 1095 _mm512_broadcast_i64x2(__m128i __A) 1096 { 1097 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1098 0, 1, 0, 1, 0, 1, 0, 1); 1099 } 1100 1101 static __inline__ __m512i __DEFAULT_FN_ATTRS 1102 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) 1103 { 1104 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1105 (__v8di)_mm512_broadcast_i64x2(__A), 1106 (__v8di)__O); 1107 } 1108 1109 static __inline__ __m512i __DEFAULT_FN_ATTRS 1110 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) 1111 { 1112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1113 (__v8di)_mm512_broadcast_i64x2(__A), 1114 (__v8di)_mm512_setzero_si512()); 1115 } 1116 1117 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ 1118 (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1119 (__v16sf)_mm512_undefined_ps(), \ 1120 ((imm) & 1) ? 8 : 0, \ 1121 ((imm) & 1) ? 9 : 1, \ 1122 ((imm) & 1) ? 10 : 2, \ 1123 ((imm) & 1) ? 11 : 3, \ 1124 ((imm) & 1) ? 12 : 4, \ 1125 ((imm) & 1) ? 13 : 5, \ 1126 ((imm) & 1) ? 14 : 6, \ 1127 ((imm) & 1) ? 15 : 7); }) 1128 1129 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ 1130 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1131 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1132 (__v8sf)(W)); }) 1133 1134 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ 1135 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1136 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1137 (__v8sf)_mm256_setzero_ps()); }) 1138 1139 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ 1140 (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1141 (__v8df)_mm512_undefined_pd(), \ 1142 0 + ((imm) & 0x3) * 2, \ 1143 1 + ((imm) & 0x3) * 2); }) 1144 1145 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1146 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1147 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1148 (__v2df)(W)); }) 1149 1150 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1151 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1152 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1153 (__v2df)_mm_setzero_pd()); }) 1154 1155 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ 1156 (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1157 (__v16si)_mm512_undefined_epi32(), \ 1158 ((imm) & 1) ? 8 : 0, \ 1159 ((imm) & 1) ? 9 : 1, \ 1160 ((imm) & 1) ? 10 : 2, \ 1161 ((imm) & 1) ? 11 : 3, \ 1162 ((imm) & 1) ? 12 : 4, \ 1163 ((imm) & 1) ? 13 : 5, \ 1164 ((imm) & 1) ? 14 : 6, \ 1165 ((imm) & 1) ? 15 : 7); }) 1166 1167 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ 1168 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1169 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1170 (__v8si)(W)); }) 1171 1172 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ 1173 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1174 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1175 (__v8si)_mm256_setzero_si256()); }) 1176 1177 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ 1178 (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1179 (__v8di)_mm512_undefined_epi32(), \ 1180 0 + ((imm) & 0x3) * 2, \ 1181 1 + ((imm) & 0x3) * 2); }) 1182 1183 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1184 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1185 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1186 (__v2di)(W)); }) 1187 1188 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1189 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1190 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1191 (__v2di)_mm_setzero_di()); }) 1192 1193 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ 1194 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1195 (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ 1196 ((imm) & 0x1) ? 0 : 16, \ 1197 ((imm) & 0x1) ? 1 : 17, \ 1198 ((imm) & 0x1) ? 2 : 18, \ 1199 ((imm) & 0x1) ? 3 : 19, \ 1200 ((imm) & 0x1) ? 4 : 20, \ 1201 ((imm) & 0x1) ? 5 : 21, \ 1202 ((imm) & 0x1) ? 6 : 22, \ 1203 ((imm) & 0x1) ? 7 : 23, \ 1204 ((imm) & 0x1) ? 16 : 8, \ 1205 ((imm) & 0x1) ? 17 : 9, \ 1206 ((imm) & 0x1) ? 18 : 10, \ 1207 ((imm) & 0x1) ? 19 : 11, \ 1208 ((imm) & 0x1) ? 20 : 12, \ 1209 ((imm) & 0x1) ? 21 : 13, \ 1210 ((imm) & 0x1) ? 22 : 14, \ 1211 ((imm) & 0x1) ? 23 : 15); }) 1212 1213 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ 1214 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1215 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1216 (__v16sf)(W)); }) 1217 1218 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ 1219 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1220 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1221 (__v16sf)_mm512_setzero_ps()); }) 1222 1223 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ 1224 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1225 (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ 1226 (((imm) & 0x3) == 0) ? 8 : 0, \ 1227 (((imm) & 0x3) == 0) ? 9 : 1, \ 1228 (((imm) & 0x3) == 1) ? 8 : 2, \ 1229 (((imm) & 0x3) == 1) ? 9 : 3, \ 1230 (((imm) & 0x3) == 2) ? 8 : 4, \ 1231 (((imm) & 0x3) == 2) ? 9 : 5, \ 1232 (((imm) & 0x3) == 3) ? 8 : 6, \ 1233 (((imm) & 0x3) == 3) ? 9 : 7); }) 1234 1235 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1236 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1237 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1238 (__v8df)(W)); }) 1239 1240 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1241 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1242 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1243 (__v8df)_mm512_setzero_pd()); }) 1244 1245 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ 1246 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1247 (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ 1248 ((imm) & 0x1) ? 0 : 16, \ 1249 ((imm) & 0x1) ? 1 : 17, \ 1250 ((imm) & 0x1) ? 2 : 18, \ 1251 ((imm) & 0x1) ? 3 : 19, \ 1252 ((imm) & 0x1) ? 4 : 20, \ 1253 ((imm) & 0x1) ? 5 : 21, \ 1254 ((imm) & 0x1) ? 6 : 22, \ 1255 ((imm) & 0x1) ? 7 : 23, \ 1256 ((imm) & 0x1) ? 16 : 8, \ 1257 ((imm) & 0x1) ? 17 : 9, \ 1258 ((imm) & 0x1) ? 18 : 10, \ 1259 ((imm) & 0x1) ? 19 : 11, \ 1260 ((imm) & 0x1) ? 20 : 12, \ 1261 ((imm) & 0x1) ? 21 : 13, \ 1262 ((imm) & 0x1) ? 22 : 14, \ 1263 ((imm) & 0x1) ? 23 : 15); }) 1264 1265 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ 1266 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1267 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1268 (__v16si)(W)); }) 1269 1270 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ 1271 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1272 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1273 (__v16si)_mm512_setzero_si512()); }) 1274 1275 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ 1276 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1277 (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ 1278 (((imm) & 0x3) == 0) ? 8 : 0, \ 1279 (((imm) & 0x3) == 0) ? 9 : 1, \ 1280 (((imm) & 0x3) == 1) ? 8 : 2, \ 1281 (((imm) & 0x3) == 1) ? 9 : 3, \ 1282 (((imm) & 0x3) == 2) ? 8 : 4, \ 1283 (((imm) & 0x3) == 2) ? 9 : 5, \ 1284 (((imm) & 0x3) == 3) ? 8 : 6, \ 1285 (((imm) & 0x3) == 3) ? 9 : 7); }) 1286 1287 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1288 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1289 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1290 (__v8di)(W)); }) 1291 1292 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1293 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1294 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1295 (__v8di)_mm512_setzero_si512()); }) 1296 1297 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1298 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1299 (int)(imm), (__mmask16)(U)); }) 1300 1301 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ 1302 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1303 (int)(imm), (__mmask16)-1); }) 1304 1305 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1306 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1307 (__mmask8)(U)); }) 1308 1309 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ 1310 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1311 (__mmask8)-1); }) 1312 1313 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ 1314 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1315 (__mmask8)-1); }) 1316 1317 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ 1318 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1319 (__mmask8)(U)); }) 1320 1321 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ 1322 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1323 (__mmask8)-1); }) 1324 1325 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ 1326 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1327 (__mmask8)(U)); }) 1328 1329 #undef __DEFAULT_FN_ATTRS 1330 1331 #endif 1332