1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 #ifndef __IMMINTRIN_H 24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25 #endif 26 27 #ifndef __AVX512FINTRIN_H 28 #define __AVX512FINTRIN_H 29 30 typedef char __v64qi __attribute__((__vector_size__(64))); 31 typedef short __v32hi __attribute__((__vector_size__(64))); 32 typedef double __v8df __attribute__((__vector_size__(64))); 33 typedef float __v16sf __attribute__((__vector_size__(64))); 34 typedef long long __v8di __attribute__((__vector_size__(64))); 35 typedef int __v16si __attribute__((__vector_size__(64))); 36 37 /* Unsigned types */ 38 typedef unsigned char __v64qu __attribute__((__vector_size__(64))); 39 typedef unsigned short __v32hu __attribute__((__vector_size__(64))); 40 typedef unsigned long long __v8du __attribute__((__vector_size__(64))); 41 typedef unsigned int __v16su __attribute__((__vector_size__(64))); 42 43 typedef float __m512 __attribute__((__vector_size__(64))); 44 typedef double __m512d __attribute__((__vector_size__(64))); 45 typedef long long __m512i __attribute__((__vector_size__(64))); 46 47 typedef unsigned char __mmask8; 48 typedef unsigned short __mmask16; 49 50 /* Rounding mode macros. */ 51 #define _MM_FROUND_TO_NEAREST_INT 0x00 52 #define _MM_FROUND_TO_NEG_INF 0x01 53 #define _MM_FROUND_TO_POS_INF 0x02 54 #define _MM_FROUND_TO_ZERO 0x03 55 #define _MM_FROUND_CUR_DIRECTION 0x04 56 57 /* Constants for integer comparison predicates */ 58 typedef enum { 59 _MM_CMPINT_EQ, /* Equal */ 60 _MM_CMPINT_LT, /* Less than */ 61 _MM_CMPINT_LE, /* Less than or Equal */ 62 _MM_CMPINT_UNUSED, 63 _MM_CMPINT_NE, /* Not Equal */ 64 _MM_CMPINT_NLT, /* Not Less than */ 65 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ 66 _MM_CMPINT_NLE /* Not Less than or Equal */ 67 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ 68 } _MM_CMPINT_ENUM; 69 70 typedef enum 71 { 72 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 73 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 74 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 75 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 76 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 77 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 78 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 79 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 80 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 81 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 82 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 83 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 84 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 85 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 86 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 87 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 88 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 89 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 90 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 91 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 92 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 93 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 94 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 95 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 96 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 97 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 98 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 99 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 100 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 101 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 102 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 103 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 104 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 105 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 106 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 107 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 108 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 109 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 110 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 111 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 112 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 113 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 114 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 115 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 116 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 117 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 118 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 119 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 120 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 121 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 122 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 123 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 124 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 125 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 126 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 127 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 128 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 129 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 130 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 131 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 132 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 133 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 134 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 135 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 136 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 137 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 138 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 139 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 140 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 141 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 142 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 143 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 144 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 145 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 146 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 147 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 148 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 149 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 150 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 151 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 152 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 153 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 154 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 155 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 156 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 157 _MM_PERM_DDDD = 0xFF 158 } _MM_PERM_ENUM; 159 160 typedef enum 161 { 162 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 163 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 164 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 165 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 166 } _MM_MANTISSA_NORM_ENUM; 167 168 typedef enum 169 { 170 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 171 _MM_MANT_SIGN_zero, /* sign = 0 */ 172 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 173 } _MM_MANTISSA_SIGN_ENUM; 174 175 /* Define the default attributes for the functions in this file. */ 176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 177 178 /* Create vectors with repeated elements */ 179 180 static __inline __m512i __DEFAULT_FN_ATTRS 181 _mm512_setzero_si512(void) 182 { 183 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 184 } 185 186 #define _mm512_setzero_epi32 _mm512_setzero_si512 187 188 static __inline__ __m512d __DEFAULT_FN_ATTRS 189 _mm512_undefined_pd(void) 190 { 191 return (__m512d)__builtin_ia32_undef512(); 192 } 193 194 static __inline__ __m512 __DEFAULT_FN_ATTRS 195 _mm512_undefined(void) 196 { 197 return (__m512)__builtin_ia32_undef512(); 198 } 199 200 static __inline__ __m512 __DEFAULT_FN_ATTRS 201 _mm512_undefined_ps(void) 202 { 203 return (__m512)__builtin_ia32_undef512(); 204 } 205 206 static __inline__ __m512i __DEFAULT_FN_ATTRS 207 _mm512_undefined_epi32(void) 208 { 209 return (__m512i)__builtin_ia32_undef512(); 210 } 211 212 static __inline__ __m512i __DEFAULT_FN_ATTRS 213 _mm512_broadcastd_epi32 (__m128i __A) 214 { 215 return (__m512i)__builtin_shufflevector((__v4si) __A, 216 (__v4si)_mm_undefined_si128(), 217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 218 } 219 220 static __inline__ __m512i __DEFAULT_FN_ATTRS 221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 222 { 223 return (__m512i)__builtin_ia32_selectd_512(__M, 224 (__v16si) _mm512_broadcastd_epi32(__A), 225 (__v16si) __O); 226 } 227 228 static __inline__ __m512i __DEFAULT_FN_ATTRS 229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 230 { 231 return (__m512i)__builtin_ia32_selectd_512(__M, 232 (__v16si) _mm512_broadcastd_epi32(__A), 233 (__v16si) _mm512_setzero_si512()); 234 } 235 236 static __inline__ __m512i __DEFAULT_FN_ATTRS 237 _mm512_broadcastq_epi64 (__m128i __A) 238 { 239 return (__m512i)__builtin_shufflevector((__v2di) __A, 240 (__v2di) _mm_undefined_si128(), 241 0, 0, 0, 0, 0, 0, 0, 0); 242 } 243 244 static __inline__ __m512i __DEFAULT_FN_ATTRS 245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 246 { 247 return (__m512i)__builtin_ia32_selectq_512(__M, 248 (__v8di) _mm512_broadcastq_epi64(__A), 249 (__v8di) __O); 250 251 } 252 253 static __inline__ __m512i __DEFAULT_FN_ATTRS 254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 255 { 256 return (__m512i)__builtin_ia32_selectq_512(__M, 257 (__v8di) _mm512_broadcastq_epi64(__A), 258 (__v8di) _mm512_setzero_si512()); 259 } 260 261 static __inline __m512i __DEFAULT_FN_ATTRS 262 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) 263 { 264 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 265 (__v16si) 266 _mm512_setzero_si512 (), 267 __M); 268 } 269 270 static __inline __m512i __DEFAULT_FN_ATTRS 271 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 272 { 273 #ifdef __x86_64__ 274 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 275 (__v8di) 276 _mm512_setzero_si512 (), 277 __M); 278 #else 279 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 280 (__v8di) 281 _mm512_setzero_si512 (), 282 __M); 283 #endif 284 } 285 286 static __inline __m512 __DEFAULT_FN_ATTRS 287 _mm512_setzero_ps(void) 288 { 289 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 290 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 291 } 292 293 #define _mm512_setzero _mm512_setzero_ps 294 295 static __inline __m512d __DEFAULT_FN_ATTRS 296 _mm512_setzero_pd(void) 297 { 298 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 299 } 300 301 static __inline __m512 __DEFAULT_FN_ATTRS 302 _mm512_set1_ps(float __w) 303 { 304 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 305 __w, __w, __w, __w, __w, __w, __w, __w }; 306 } 307 308 static __inline __m512d __DEFAULT_FN_ATTRS 309 _mm512_set1_pd(double __w) 310 { 311 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 312 } 313 314 static __inline __m512i __DEFAULT_FN_ATTRS 315 _mm512_set1_epi8(char __w) 316 { 317 return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, 318 __w, __w, __w, __w, __w, __w, __w, __w, 319 __w, __w, __w, __w, __w, __w, __w, __w, 320 __w, __w, __w, __w, __w, __w, __w, __w, 321 __w, __w, __w, __w, __w, __w, __w, __w, 322 __w, __w, __w, __w, __w, __w, __w, __w, 323 __w, __w, __w, __w, __w, __w, __w, __w, 324 __w, __w, __w, __w, __w, __w, __w, __w }; 325 } 326 327 static __inline __m512i __DEFAULT_FN_ATTRS 328 _mm512_set1_epi16(short __w) 329 { 330 return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, 331 __w, __w, __w, __w, __w, __w, __w, __w, 332 __w, __w, __w, __w, __w, __w, __w, __w, 333 __w, __w, __w, __w, __w, __w, __w, __w }; 334 } 335 336 static __inline __m512i __DEFAULT_FN_ATTRS 337 _mm512_set1_epi32(int __s) 338 { 339 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 340 __s, __s, __s, __s, __s, __s, __s, __s }; 341 } 342 343 static __inline __m512i __DEFAULT_FN_ATTRS 344 _mm512_set1_epi64(long long __d) 345 { 346 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 347 } 348 349 static __inline__ __m512 __DEFAULT_FN_ATTRS 350 _mm512_broadcastss_ps(__m128 __A) 351 { 352 return (__m512)__builtin_shufflevector((__v4sf) __A, 353 (__v4sf)_mm_undefined_ps(), 354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 355 } 356 357 static __inline __m512i __DEFAULT_FN_ATTRS 358 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) 359 { 360 return (__m512i)(__v16si) 361 { __D, __C, __B, __A, __D, __C, __B, __A, 362 __D, __C, __B, __A, __D, __C, __B, __A }; 363 } 364 365 static __inline __m512i __DEFAULT_FN_ATTRS 366 _mm512_set4_epi64 (long long __A, long long __B, long long __C, 367 long long __D) 368 { 369 return (__m512i) (__v8di) 370 { __D, __C, __B, __A, __D, __C, __B, __A }; 371 } 372 373 static __inline __m512d __DEFAULT_FN_ATTRS 374 _mm512_set4_pd (double __A, double __B, double __C, double __D) 375 { 376 return (__m512d) 377 { __D, __C, __B, __A, __D, __C, __B, __A }; 378 } 379 380 static __inline __m512 __DEFAULT_FN_ATTRS 381 _mm512_set4_ps (float __A, float __B, float __C, float __D) 382 { 383 return (__m512) 384 { __D, __C, __B, __A, __D, __C, __B, __A, 385 __D, __C, __B, __A, __D, __C, __B, __A }; 386 } 387 388 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 389 _mm512_set4_epi32((e3),(e2),(e1),(e0)) 390 391 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 392 _mm512_set4_epi64((e3),(e2),(e1),(e0)) 393 394 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 395 _mm512_set4_pd((e3),(e2),(e1),(e0)) 396 397 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 398 _mm512_set4_ps((e3),(e2),(e1),(e0)) 399 400 static __inline__ __m512d __DEFAULT_FN_ATTRS 401 _mm512_broadcastsd_pd(__m128d __A) 402 { 403 return (__m512d)__builtin_shufflevector((__v2df) __A, 404 (__v2df) _mm_undefined_pd(), 405 0, 0, 0, 0, 0, 0, 0, 0); 406 } 407 408 /* Cast between vector types */ 409 410 static __inline __m512d __DEFAULT_FN_ATTRS 411 _mm512_castpd256_pd512(__m256d __a) 412 { 413 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 414 } 415 416 static __inline __m512 __DEFAULT_FN_ATTRS 417 _mm512_castps256_ps512(__m256 __a) 418 { 419 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 420 -1, -1, -1, -1, -1, -1, -1, -1); 421 } 422 423 static __inline __m128d __DEFAULT_FN_ATTRS 424 _mm512_castpd512_pd128(__m512d __a) 425 { 426 return __builtin_shufflevector(__a, __a, 0, 1); 427 } 428 429 static __inline __m256d __DEFAULT_FN_ATTRS 430 _mm512_castpd512_pd256 (__m512d __A) 431 { 432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 433 } 434 435 static __inline __m128 __DEFAULT_FN_ATTRS 436 _mm512_castps512_ps128(__m512 __a) 437 { 438 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 439 } 440 441 static __inline __m256 __DEFAULT_FN_ATTRS 442 _mm512_castps512_ps256 (__m512 __A) 443 { 444 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 445 } 446 447 static __inline __m512 __DEFAULT_FN_ATTRS 448 _mm512_castpd_ps (__m512d __A) 449 { 450 return (__m512) (__A); 451 } 452 453 static __inline __m512i __DEFAULT_FN_ATTRS 454 _mm512_castpd_si512 (__m512d __A) 455 { 456 return (__m512i) (__A); 457 } 458 459 static __inline__ __m512d __DEFAULT_FN_ATTRS 460 _mm512_castpd128_pd512 (__m128d __A) 461 { 462 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 463 } 464 465 static __inline __m512d __DEFAULT_FN_ATTRS 466 _mm512_castps_pd (__m512 __A) 467 { 468 return (__m512d) (__A); 469 } 470 471 static __inline __m512i __DEFAULT_FN_ATTRS 472 _mm512_castps_si512 (__m512 __A) 473 { 474 return (__m512i) (__A); 475 } 476 477 static __inline__ __m512 __DEFAULT_FN_ATTRS 478 _mm512_castps128_ps512 (__m128 __A) 479 { 480 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 481 } 482 483 static __inline__ __m512i __DEFAULT_FN_ATTRS 484 _mm512_castsi128_si512 (__m128i __A) 485 { 486 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 487 } 488 489 static __inline__ __m512i __DEFAULT_FN_ATTRS 490 _mm512_castsi256_si512 (__m256i __A) 491 { 492 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); 493 } 494 495 static __inline __m512 __DEFAULT_FN_ATTRS 496 _mm512_castsi512_ps (__m512i __A) 497 { 498 return (__m512) (__A); 499 } 500 501 static __inline __m512d __DEFAULT_FN_ATTRS 502 _mm512_castsi512_pd (__m512i __A) 503 { 504 return (__m512d) (__A); 505 } 506 507 static __inline __m128i __DEFAULT_FN_ATTRS 508 _mm512_castsi512_si128 (__m512i __A) 509 { 510 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 511 } 512 513 static __inline __m256i __DEFAULT_FN_ATTRS 514 _mm512_castsi512_si256 (__m512i __A) 515 { 516 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 517 } 518 519 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 520 _mm512_int2mask(int __a) 521 { 522 return (__mmask16)__a; 523 } 524 525 static __inline__ int __DEFAULT_FN_ATTRS 526 _mm512_mask2int(__mmask16 __a) 527 { 528 return (int)__a; 529 } 530 531 /* Bitwise operators */ 532 static __inline__ __m512i __DEFAULT_FN_ATTRS 533 _mm512_and_epi32(__m512i __a, __m512i __b) 534 { 535 return (__m512i)((__v16su)__a & (__v16su)__b); 536 } 537 538 static __inline__ __m512i __DEFAULT_FN_ATTRS 539 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 540 { 541 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 542 (__v16si) _mm512_and_epi32(__a, __b), 543 (__v16si) __src); 544 } 545 546 static __inline__ __m512i __DEFAULT_FN_ATTRS 547 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 548 { 549 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), 550 __k, __a, __b); 551 } 552 553 static __inline__ __m512i __DEFAULT_FN_ATTRS 554 _mm512_and_epi64(__m512i __a, __m512i __b) 555 { 556 return (__m512i)((__v8du)__a & (__v8du)__b); 557 } 558 559 static __inline__ __m512i __DEFAULT_FN_ATTRS 560 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 561 { 562 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, 563 (__v8di) _mm512_and_epi64(__a, __b), 564 (__v8di) __src); 565 } 566 567 static __inline__ __m512i __DEFAULT_FN_ATTRS 568 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 569 { 570 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), 571 __k, __a, __b); 572 } 573 574 static __inline__ __m512i __DEFAULT_FN_ATTRS 575 _mm512_andnot_si512 (__m512i __A, __m512i __B) 576 { 577 return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 578 } 579 580 static __inline__ __m512i __DEFAULT_FN_ATTRS 581 _mm512_andnot_epi32 (__m512i __A, __m512i __B) 582 { 583 return (__m512i)(~(__v16su)(__A) & (__v16su)__B); 584 } 585 586 static __inline__ __m512i __DEFAULT_FN_ATTRS 587 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 588 { 589 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 590 (__v16si)_mm512_andnot_epi32(__A, __B), 591 (__v16si)__W); 592 } 593 594 static __inline__ __m512i __DEFAULT_FN_ATTRS 595 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) 596 { 597 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), 598 __U, __A, __B); 599 } 600 601 static __inline__ __m512i __DEFAULT_FN_ATTRS 602 _mm512_andnot_epi64(__m512i __A, __m512i __B) 603 { 604 return (__m512i)(~(__v8du)(__A) & (__v8du)__B); 605 } 606 607 static __inline__ __m512i __DEFAULT_FN_ATTRS 608 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 609 { 610 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 611 (__v8di)_mm512_andnot_epi64(__A, __B), 612 (__v8di)__W); 613 } 614 615 static __inline__ __m512i __DEFAULT_FN_ATTRS 616 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) 617 { 618 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), 619 __U, __A, __B); 620 } 621 622 static __inline__ __m512i __DEFAULT_FN_ATTRS 623 _mm512_or_epi32(__m512i __a, __m512i __b) 624 { 625 return (__m512i)((__v16su)__a | (__v16su)__b); 626 } 627 628 static __inline__ __m512i __DEFAULT_FN_ATTRS 629 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 630 { 631 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 632 (__v16si)_mm512_or_epi32(__a, __b), 633 (__v16si)__src); 634 } 635 636 static __inline__ __m512i __DEFAULT_FN_ATTRS 637 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 638 { 639 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); 640 } 641 642 static __inline__ __m512i __DEFAULT_FN_ATTRS 643 _mm512_or_epi64(__m512i __a, __m512i __b) 644 { 645 return (__m512i)((__v8du)__a | (__v8du)__b); 646 } 647 648 static __inline__ __m512i __DEFAULT_FN_ATTRS 649 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 650 { 651 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 652 (__v8di)_mm512_or_epi64(__a, __b), 653 (__v8di)__src); 654 } 655 656 static __inline__ __m512i __DEFAULT_FN_ATTRS 657 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 658 { 659 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); 660 } 661 662 static __inline__ __m512i __DEFAULT_FN_ATTRS 663 _mm512_xor_epi32(__m512i __a, __m512i __b) 664 { 665 return (__m512i)((__v16su)__a ^ (__v16su)__b); 666 } 667 668 static __inline__ __m512i __DEFAULT_FN_ATTRS 669 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 670 { 671 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 672 (__v16si)_mm512_xor_epi32(__a, __b), 673 (__v16si)__src); 674 } 675 676 static __inline__ __m512i __DEFAULT_FN_ATTRS 677 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 678 { 679 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); 680 } 681 682 static __inline__ __m512i __DEFAULT_FN_ATTRS 683 _mm512_xor_epi64(__m512i __a, __m512i __b) 684 { 685 return (__m512i)((__v8du)__a ^ (__v8du)__b); 686 } 687 688 static __inline__ __m512i __DEFAULT_FN_ATTRS 689 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 690 { 691 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 692 (__v8di)_mm512_xor_epi64(__a, __b), 693 (__v8di)__src); 694 } 695 696 static __inline__ __m512i __DEFAULT_FN_ATTRS 697 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 698 { 699 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); 700 } 701 702 static __inline__ __m512i __DEFAULT_FN_ATTRS 703 _mm512_and_si512(__m512i __a, __m512i __b) 704 { 705 return (__m512i)((__v8du)__a & (__v8du)__b); 706 } 707 708 static __inline__ __m512i __DEFAULT_FN_ATTRS 709 _mm512_or_si512(__m512i __a, __m512i __b) 710 { 711 return (__m512i)((__v8du)__a | (__v8du)__b); 712 } 713 714 static __inline__ __m512i __DEFAULT_FN_ATTRS 715 _mm512_xor_si512(__m512i __a, __m512i __b) 716 { 717 return (__m512i)((__v8du)__a ^ (__v8du)__b); 718 } 719 720 /* Arithmetic */ 721 722 static __inline __m512d __DEFAULT_FN_ATTRS 723 _mm512_add_pd(__m512d __a, __m512d __b) 724 { 725 return (__m512d)((__v8df)__a + (__v8df)__b); 726 } 727 728 static __inline __m512 __DEFAULT_FN_ATTRS 729 _mm512_add_ps(__m512 __a, __m512 __b) 730 { 731 return (__m512)((__v16sf)__a + (__v16sf)__b); 732 } 733 734 static __inline __m512d __DEFAULT_FN_ATTRS 735 _mm512_mul_pd(__m512d __a, __m512d __b) 736 { 737 return (__m512d)((__v8df)__a * (__v8df)__b); 738 } 739 740 static __inline __m512 __DEFAULT_FN_ATTRS 741 _mm512_mul_ps(__m512 __a, __m512 __b) 742 { 743 return (__m512)((__v16sf)__a * (__v16sf)__b); 744 } 745 746 static __inline __m512d __DEFAULT_FN_ATTRS 747 _mm512_sub_pd(__m512d __a, __m512d __b) 748 { 749 return (__m512d)((__v8df)__a - (__v8df)__b); 750 } 751 752 static __inline __m512 __DEFAULT_FN_ATTRS 753 _mm512_sub_ps(__m512 __a, __m512 __b) 754 { 755 return (__m512)((__v16sf)__a - (__v16sf)__b); 756 } 757 758 static __inline__ __m512i __DEFAULT_FN_ATTRS 759 _mm512_add_epi64 (__m512i __A, __m512i __B) 760 { 761 return (__m512i) ((__v8du) __A + (__v8du) __B); 762 } 763 764 static __inline__ __m512i __DEFAULT_FN_ATTRS 765 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 766 { 767 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 768 (__v8di)_mm512_add_epi64(__A, __B), 769 (__v8di)__W); 770 } 771 772 static __inline__ __m512i __DEFAULT_FN_ATTRS 773 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) 774 { 775 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 776 (__v8di)_mm512_add_epi64(__A, __B), 777 (__v8di)_mm512_setzero_si512()); 778 } 779 780 static __inline__ __m512i __DEFAULT_FN_ATTRS 781 _mm512_sub_epi64 (__m512i __A, __m512i __B) 782 { 783 return (__m512i) ((__v8du) __A - (__v8du) __B); 784 } 785 786 static __inline__ __m512i __DEFAULT_FN_ATTRS 787 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 788 { 789 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 790 (__v8di)_mm512_sub_epi64(__A, __B), 791 (__v8di)__W); 792 } 793 794 static __inline__ __m512i __DEFAULT_FN_ATTRS 795 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) 796 { 797 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 798 (__v8di)_mm512_sub_epi64(__A, __B), 799 (__v8di)_mm512_setzero_si512()); 800 } 801 802 static __inline__ __m512i __DEFAULT_FN_ATTRS 803 _mm512_add_epi32 (__m512i __A, __m512i __B) 804 { 805 return (__m512i) ((__v16su) __A + (__v16su) __B); 806 } 807 808 static __inline__ __m512i __DEFAULT_FN_ATTRS 809 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 810 { 811 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 812 (__v16si)_mm512_add_epi32(__A, __B), 813 (__v16si)__W); 814 } 815 816 static __inline__ __m512i __DEFAULT_FN_ATTRS 817 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 818 { 819 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 820 (__v16si)_mm512_add_epi32(__A, __B), 821 (__v16si)_mm512_setzero_si512()); 822 } 823 824 static __inline__ __m512i __DEFAULT_FN_ATTRS 825 _mm512_sub_epi32 (__m512i __A, __m512i __B) 826 { 827 return (__m512i) ((__v16su) __A - (__v16su) __B); 828 } 829 830 static __inline__ __m512i __DEFAULT_FN_ATTRS 831 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 832 { 833 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 834 (__v16si)_mm512_sub_epi32(__A, __B), 835 (__v16si)__W); 836 } 837 838 static __inline__ __m512i __DEFAULT_FN_ATTRS 839 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) 840 { 841 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 842 (__v16si)_mm512_sub_epi32(__A, __B), 843 (__v16si)_mm512_setzero_si512()); 844 } 845 846 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ 847 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 848 (__v8df)(__m512d)(B), \ 849 (__v8df)(__m512d)(W), (__mmask8)(U), \ 850 (int)(R)); }) 851 852 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ 853 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 854 (__v8df)(__m512d)(B), \ 855 (__v8df)_mm512_setzero_pd(), \ 856 (__mmask8)(U), (int)(R)); }) 857 858 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \ 859 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 860 (__v8df)(__m512d)(B), \ 861 (__v8df)_mm512_undefined_pd(), \ 862 (__mmask8)-1, (int)(R)); }) 863 864 static __inline__ __m512d __DEFAULT_FN_ATTRS 865 _mm512_max_pd(__m512d __A, __m512d __B) 866 { 867 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 868 (__v8df) __B, 869 (__v8df) 870 _mm512_setzero_pd (), 871 (__mmask8) -1, 872 _MM_FROUND_CUR_DIRECTION); 873 } 874 875 static __inline__ __m512d __DEFAULT_FN_ATTRS 876 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 877 { 878 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 879 (__v8df) __B, 880 (__v8df) __W, 881 (__mmask8) __U, 882 _MM_FROUND_CUR_DIRECTION); 883 } 884 885 static __inline__ __m512d __DEFAULT_FN_ATTRS 886 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 887 { 888 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 889 (__v8df) __B, 890 (__v8df) 891 _mm512_setzero_pd (), 892 (__mmask8) __U, 893 _MM_FROUND_CUR_DIRECTION); 894 } 895 896 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ 897 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 898 (__v16sf)(__m512)(B), \ 899 (__v16sf)(__m512)(W), (__mmask16)(U), \ 900 (int)(R)); }) 901 902 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ 903 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 904 (__v16sf)(__m512)(B), \ 905 (__v16sf)_mm512_setzero_ps(), \ 906 (__mmask16)(U), (int)(R)); }) 907 908 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \ 909 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 910 (__v16sf)(__m512)(B), \ 911 (__v16sf)_mm512_undefined_ps(), \ 912 (__mmask16)-1, (int)(R)); }) 913 914 static __inline__ __m512 __DEFAULT_FN_ATTRS 915 _mm512_max_ps(__m512 __A, __m512 __B) 916 { 917 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 918 (__v16sf) __B, 919 (__v16sf) 920 _mm512_setzero_ps (), 921 (__mmask16) -1, 922 _MM_FROUND_CUR_DIRECTION); 923 } 924 925 static __inline__ __m512 __DEFAULT_FN_ATTRS 926 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 927 { 928 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 929 (__v16sf) __B, 930 (__v16sf) __W, 931 (__mmask16) __U, 932 _MM_FROUND_CUR_DIRECTION); 933 } 934 935 static __inline__ __m512 __DEFAULT_FN_ATTRS 936 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 937 { 938 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 939 (__v16sf) __B, 940 (__v16sf) 941 _mm512_setzero_ps (), 942 (__mmask16) __U, 943 _MM_FROUND_CUR_DIRECTION); 944 } 945 946 static __inline__ __m128 __DEFAULT_FN_ATTRS 947 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 948 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 949 (__v4sf) __B, 950 (__v4sf) __W, 951 (__mmask8) __U, 952 _MM_FROUND_CUR_DIRECTION); 953 } 954 955 static __inline__ __m128 __DEFAULT_FN_ATTRS 956 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 957 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 958 (__v4sf) __B, 959 (__v4sf) _mm_setzero_ps (), 960 (__mmask8) __U, 961 _MM_FROUND_CUR_DIRECTION); 962 } 963 964 #define _mm_max_round_ss(A, B, R) __extension__ ({ \ 965 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 966 (__v4sf)(__m128)(B), \ 967 (__v4sf)_mm_setzero_ps(), \ 968 (__mmask8)-1, (int)(R)); }) 969 970 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ 971 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 972 (__v4sf)(__m128)(B), \ 973 (__v4sf)(__m128)(W), (__mmask8)(U), \ 974 (int)(R)); }) 975 976 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ 977 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 978 (__v4sf)(__m128)(B), \ 979 (__v4sf)_mm_setzero_ps(), \ 980 (__mmask8)(U), (int)(R)); }) 981 982 static __inline__ __m128d __DEFAULT_FN_ATTRS 983 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 984 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 985 (__v2df) __B, 986 (__v2df) __W, 987 (__mmask8) __U, 988 _MM_FROUND_CUR_DIRECTION); 989 } 990 991 static __inline__ __m128d __DEFAULT_FN_ATTRS 992 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 993 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 994 (__v2df) __B, 995 (__v2df) _mm_setzero_pd (), 996 (__mmask8) __U, 997 _MM_FROUND_CUR_DIRECTION); 998 } 999 1000 #define _mm_max_round_sd(A, B, R) __extension__ ({ \ 1001 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1002 (__v2df)(__m128d)(B), \ 1003 (__v2df)_mm_setzero_pd(), \ 1004 (__mmask8)-1, (int)(R)); }) 1005 1006 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ 1007 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1008 (__v2df)(__m128d)(B), \ 1009 (__v2df)(__m128d)(W), \ 1010 (__mmask8)(U), (int)(R)); }) 1011 1012 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ 1013 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1014 (__v2df)(__m128d)(B), \ 1015 (__v2df)_mm_setzero_pd(), \ 1016 (__mmask8)(U), (int)(R)); }) 1017 1018 static __inline __m512i 1019 __DEFAULT_FN_ATTRS 1020 _mm512_max_epi32(__m512i __A, __m512i __B) 1021 { 1022 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1023 (__v16si) __B, 1024 (__v16si) 1025 _mm512_setzero_si512 (), 1026 (__mmask16) -1); 1027 } 1028 1029 static __inline__ __m512i __DEFAULT_FN_ATTRS 1030 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1031 { 1032 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1033 (__v16si) __B, 1034 (__v16si) __W, __M); 1035 } 1036 1037 static __inline__ __m512i __DEFAULT_FN_ATTRS 1038 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1039 { 1040 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1041 (__v16si) __B, 1042 (__v16si) 1043 _mm512_setzero_si512 (), 1044 __M); 1045 } 1046 1047 static __inline __m512i __DEFAULT_FN_ATTRS 1048 _mm512_max_epu32(__m512i __A, __m512i __B) 1049 { 1050 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1051 (__v16si) __B, 1052 (__v16si) 1053 _mm512_setzero_si512 (), 1054 (__mmask16) -1); 1055 } 1056 1057 static __inline__ __m512i __DEFAULT_FN_ATTRS 1058 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1059 { 1060 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1061 (__v16si) __B, 1062 (__v16si) __W, __M); 1063 } 1064 1065 static __inline__ __m512i __DEFAULT_FN_ATTRS 1066 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1067 { 1068 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1069 (__v16si) __B, 1070 (__v16si) 1071 _mm512_setzero_si512 (), 1072 __M); 1073 } 1074 1075 static __inline __m512i __DEFAULT_FN_ATTRS 1076 _mm512_max_epi64(__m512i __A, __m512i __B) 1077 { 1078 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1079 (__v8di) __B, 1080 (__v8di) 1081 _mm512_setzero_si512 (), 1082 (__mmask8) -1); 1083 } 1084 1085 static __inline__ __m512i __DEFAULT_FN_ATTRS 1086 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1087 { 1088 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1089 (__v8di) __B, 1090 (__v8di) __W, __M); 1091 } 1092 1093 static __inline__ __m512i __DEFAULT_FN_ATTRS 1094 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1095 { 1096 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1097 (__v8di) __B, 1098 (__v8di) 1099 _mm512_setzero_si512 (), 1100 __M); 1101 } 1102 1103 static __inline __m512i __DEFAULT_FN_ATTRS 1104 _mm512_max_epu64(__m512i __A, __m512i __B) 1105 { 1106 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1107 (__v8di) __B, 1108 (__v8di) 1109 _mm512_setzero_si512 (), 1110 (__mmask8) -1); 1111 } 1112 1113 static __inline__ __m512i __DEFAULT_FN_ATTRS 1114 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1115 { 1116 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1117 (__v8di) __B, 1118 (__v8di) __W, __M); 1119 } 1120 1121 static __inline__ __m512i __DEFAULT_FN_ATTRS 1122 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1123 { 1124 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1125 (__v8di) __B, 1126 (__v8di) 1127 _mm512_setzero_si512 (), 1128 __M); 1129 } 1130 1131 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ 1132 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1133 (__v8df)(__m512d)(B), \ 1134 (__v8df)(__m512d)(W), (__mmask8)(U), \ 1135 (int)(R)); }) 1136 1137 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ 1138 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1139 (__v8df)(__m512d)(B), \ 1140 (__v8df)_mm512_setzero_pd(), \ 1141 (__mmask8)(U), (int)(R)); }) 1142 1143 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \ 1144 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1145 (__v8df)(__m512d)(B), \ 1146 (__v8df)_mm512_undefined_pd(), \ 1147 (__mmask8)-1, (int)(R)); }) 1148 1149 static __inline__ __m512d __DEFAULT_FN_ATTRS 1150 _mm512_min_pd(__m512d __A, __m512d __B) 1151 { 1152 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1153 (__v8df) __B, 1154 (__v8df) 1155 _mm512_setzero_pd (), 1156 (__mmask8) -1, 1157 _MM_FROUND_CUR_DIRECTION); 1158 } 1159 1160 static __inline__ __m512d __DEFAULT_FN_ATTRS 1161 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 1162 { 1163 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1164 (__v8df) __B, 1165 (__v8df) __W, 1166 (__mmask8) __U, 1167 _MM_FROUND_CUR_DIRECTION); 1168 } 1169 1170 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ 1171 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1172 (__v16sf)(__m512)(B), \ 1173 (__v16sf)(__m512)(W), (__mmask16)(U), \ 1174 (int)(R)); }) 1175 1176 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ 1177 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1178 (__v16sf)(__m512)(B), \ 1179 (__v16sf)_mm512_setzero_ps(), \ 1180 (__mmask16)(U), (int)(R)); }) 1181 1182 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \ 1183 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1184 (__v16sf)(__m512)(B), \ 1185 (__v16sf)_mm512_undefined_ps(), \ 1186 (__mmask16)-1, (int)(R)); }) 1187 1188 static __inline__ __m512d __DEFAULT_FN_ATTRS 1189 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 1190 { 1191 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1192 (__v8df) __B, 1193 (__v8df) 1194 _mm512_setzero_pd (), 1195 (__mmask8) __U, 1196 _MM_FROUND_CUR_DIRECTION); 1197 } 1198 1199 static __inline__ __m512 __DEFAULT_FN_ATTRS 1200 _mm512_min_ps(__m512 __A, __m512 __B) 1201 { 1202 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1203 (__v16sf) __B, 1204 (__v16sf) 1205 _mm512_setzero_ps (), 1206 (__mmask16) -1, 1207 _MM_FROUND_CUR_DIRECTION); 1208 } 1209 1210 static __inline__ __m512 __DEFAULT_FN_ATTRS 1211 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1212 { 1213 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1214 (__v16sf) __B, 1215 (__v16sf) __W, 1216 (__mmask16) __U, 1217 _MM_FROUND_CUR_DIRECTION); 1218 } 1219 1220 static __inline__ __m512 __DEFAULT_FN_ATTRS 1221 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 1222 { 1223 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1224 (__v16sf) __B, 1225 (__v16sf) 1226 _mm512_setzero_ps (), 1227 (__mmask16) __U, 1228 _MM_FROUND_CUR_DIRECTION); 1229 } 1230 1231 static __inline__ __m128 __DEFAULT_FN_ATTRS 1232 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1233 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1234 (__v4sf) __B, 1235 (__v4sf) __W, 1236 (__mmask8) __U, 1237 _MM_FROUND_CUR_DIRECTION); 1238 } 1239 1240 static __inline__ __m128 __DEFAULT_FN_ATTRS 1241 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1242 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1243 (__v4sf) __B, 1244 (__v4sf) _mm_setzero_ps (), 1245 (__mmask8) __U, 1246 _MM_FROUND_CUR_DIRECTION); 1247 } 1248 1249 #define _mm_min_round_ss(A, B, R) __extension__ ({ \ 1250 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1251 (__v4sf)(__m128)(B), \ 1252 (__v4sf)_mm_setzero_ps(), \ 1253 (__mmask8)-1, (int)(R)); }) 1254 1255 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ 1256 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1257 (__v4sf)(__m128)(B), \ 1258 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1259 (int)(R)); }) 1260 1261 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ 1262 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1263 (__v4sf)(__m128)(B), \ 1264 (__v4sf)_mm_setzero_ps(), \ 1265 (__mmask8)(U), (int)(R)); }) 1266 1267 static __inline__ __m128d __DEFAULT_FN_ATTRS 1268 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1269 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1270 (__v2df) __B, 1271 (__v2df) __W, 1272 (__mmask8) __U, 1273 _MM_FROUND_CUR_DIRECTION); 1274 } 1275 1276 static __inline__ __m128d __DEFAULT_FN_ATTRS 1277 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1278 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1279 (__v2df) __B, 1280 (__v2df) _mm_setzero_pd (), 1281 (__mmask8) __U, 1282 _MM_FROUND_CUR_DIRECTION); 1283 } 1284 1285 #define _mm_min_round_sd(A, B, R) __extension__ ({ \ 1286 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1287 (__v2df)(__m128d)(B), \ 1288 (__v2df)_mm_setzero_pd(), \ 1289 (__mmask8)-1, (int)(R)); }) 1290 1291 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ 1292 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1293 (__v2df)(__m128d)(B), \ 1294 (__v2df)(__m128d)(W), \ 1295 (__mmask8)(U), (int)(R)); }) 1296 1297 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ 1298 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1299 (__v2df)(__m128d)(B), \ 1300 (__v2df)_mm_setzero_pd(), \ 1301 (__mmask8)(U), (int)(R)); }) 1302 1303 static __inline __m512i 1304 __DEFAULT_FN_ATTRS 1305 _mm512_min_epi32(__m512i __A, __m512i __B) 1306 { 1307 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1308 (__v16si) __B, 1309 (__v16si) 1310 _mm512_setzero_si512 (), 1311 (__mmask16) -1); 1312 } 1313 1314 static __inline__ __m512i __DEFAULT_FN_ATTRS 1315 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1316 { 1317 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1318 (__v16si) __B, 1319 (__v16si) __W, __M); 1320 } 1321 1322 static __inline__ __m512i __DEFAULT_FN_ATTRS 1323 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1324 { 1325 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1326 (__v16si) __B, 1327 (__v16si) 1328 _mm512_setzero_si512 (), 1329 __M); 1330 } 1331 1332 static __inline __m512i __DEFAULT_FN_ATTRS 1333 _mm512_min_epu32(__m512i __A, __m512i __B) 1334 { 1335 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1336 (__v16si) __B, 1337 (__v16si) 1338 _mm512_setzero_si512 (), 1339 (__mmask16) -1); 1340 } 1341 1342 static __inline__ __m512i __DEFAULT_FN_ATTRS 1343 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1344 { 1345 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1346 (__v16si) __B, 1347 (__v16si) __W, __M); 1348 } 1349 1350 static __inline__ __m512i __DEFAULT_FN_ATTRS 1351 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1352 { 1353 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1354 (__v16si) __B, 1355 (__v16si) 1356 _mm512_setzero_si512 (), 1357 __M); 1358 } 1359 1360 static __inline __m512i __DEFAULT_FN_ATTRS 1361 _mm512_min_epi64(__m512i __A, __m512i __B) 1362 { 1363 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1364 (__v8di) __B, 1365 (__v8di) 1366 _mm512_setzero_si512 (), 1367 (__mmask8) -1); 1368 } 1369 1370 static __inline__ __m512i __DEFAULT_FN_ATTRS 1371 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1372 { 1373 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1374 (__v8di) __B, 1375 (__v8di) __W, __M); 1376 } 1377 1378 static __inline__ __m512i __DEFAULT_FN_ATTRS 1379 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1380 { 1381 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1382 (__v8di) __B, 1383 (__v8di) 1384 _mm512_setzero_si512 (), 1385 __M); 1386 } 1387 1388 static __inline __m512i __DEFAULT_FN_ATTRS 1389 _mm512_min_epu64(__m512i __A, __m512i __B) 1390 { 1391 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1392 (__v8di) __B, 1393 (__v8di) 1394 _mm512_setzero_si512 (), 1395 (__mmask8) -1); 1396 } 1397 1398 static __inline__ __m512i __DEFAULT_FN_ATTRS 1399 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1400 { 1401 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1402 (__v8di) __B, 1403 (__v8di) __W, __M); 1404 } 1405 1406 static __inline__ __m512i __DEFAULT_FN_ATTRS 1407 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1408 { 1409 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1410 (__v8di) __B, 1411 (__v8di) 1412 _mm512_setzero_si512 (), 1413 __M); 1414 } 1415 1416 static __inline __m512i __DEFAULT_FN_ATTRS 1417 _mm512_mul_epi32(__m512i __X, __m512i __Y) 1418 { 1419 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); 1420 } 1421 1422 static __inline __m512i __DEFAULT_FN_ATTRS 1423 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1424 { 1425 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1426 (__v8di)_mm512_mul_epi32(__X, __Y), 1427 (__v8di)__W); 1428 } 1429 1430 static __inline __m512i __DEFAULT_FN_ATTRS 1431 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) 1432 { 1433 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1434 (__v8di)_mm512_mul_epi32(__X, __Y), 1435 (__v8di)_mm512_setzero_si512 ()); 1436 } 1437 1438 static __inline __m512i __DEFAULT_FN_ATTRS 1439 _mm512_mul_epu32(__m512i __X, __m512i __Y) 1440 { 1441 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); 1442 } 1443 1444 static __inline __m512i __DEFAULT_FN_ATTRS 1445 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1446 { 1447 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1448 (__v8di)_mm512_mul_epu32(__X, __Y), 1449 (__v8di)__W); 1450 } 1451 1452 static __inline __m512i __DEFAULT_FN_ATTRS 1453 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) 1454 { 1455 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1456 (__v8di)_mm512_mul_epu32(__X, __Y), 1457 (__v8di)_mm512_setzero_si512 ()); 1458 } 1459 1460 static __inline __m512i __DEFAULT_FN_ATTRS 1461 _mm512_mullo_epi32 (__m512i __A, __m512i __B) 1462 { 1463 return (__m512i) ((__v16su) __A * (__v16su) __B); 1464 } 1465 1466 static __inline __m512i __DEFAULT_FN_ATTRS 1467 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) 1468 { 1469 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1470 (__v16si)_mm512_mullo_epi32(__A, __B), 1471 (__v16si)_mm512_setzero_si512()); 1472 } 1473 1474 static __inline __m512i __DEFAULT_FN_ATTRS 1475 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1476 { 1477 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1478 (__v16si)_mm512_mullo_epi32(__A, __B), 1479 (__v16si)__W); 1480 } 1481 1482 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ 1483 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1484 (__v8df)(__m512d)(W), (__mmask8)(U), \ 1485 (int)(R)); }) 1486 1487 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ 1488 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1489 (__v8df)_mm512_setzero_pd(), \ 1490 (__mmask8)(U), (int)(R)); }) 1491 1492 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ 1493 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1494 (__v8df)_mm512_undefined_pd(), \ 1495 (__mmask8)-1, (int)(R)); }) 1496 1497 static __inline__ __m512d __DEFAULT_FN_ATTRS 1498 _mm512_sqrt_pd(__m512d __a) 1499 { 1500 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 1501 (__v8df) _mm512_setzero_pd (), 1502 (__mmask8) -1, 1503 _MM_FROUND_CUR_DIRECTION); 1504 } 1505 1506 static __inline__ __m512d __DEFAULT_FN_ATTRS 1507 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 1508 { 1509 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1510 (__v8df) __W, 1511 (__mmask8) __U, 1512 _MM_FROUND_CUR_DIRECTION); 1513 } 1514 1515 static __inline__ __m512d __DEFAULT_FN_ATTRS 1516 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 1517 { 1518 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1519 (__v8df) 1520 _mm512_setzero_pd (), 1521 (__mmask8) __U, 1522 _MM_FROUND_CUR_DIRECTION); 1523 } 1524 1525 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ 1526 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1527 (__v16sf)(__m512)(W), (__mmask16)(U), \ 1528 (int)(R)); }) 1529 1530 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ 1531 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1532 (__v16sf)_mm512_setzero_ps(), \ 1533 (__mmask16)(U), (int)(R)); }) 1534 1535 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ 1536 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1537 (__v16sf)_mm512_undefined_ps(), \ 1538 (__mmask16)-1, (int)(R)); }) 1539 1540 static __inline__ __m512 __DEFAULT_FN_ATTRS 1541 _mm512_sqrt_ps(__m512 __a) 1542 { 1543 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 1544 (__v16sf) _mm512_setzero_ps (), 1545 (__mmask16) -1, 1546 _MM_FROUND_CUR_DIRECTION); 1547 } 1548 1549 static __inline__ __m512 __DEFAULT_FN_ATTRS 1550 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) 1551 { 1552 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1553 (__v16sf) __W, 1554 (__mmask16) __U, 1555 _MM_FROUND_CUR_DIRECTION); 1556 } 1557 1558 static __inline__ __m512 __DEFAULT_FN_ATTRS 1559 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) 1560 { 1561 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1562 (__v16sf) _mm512_setzero_ps (), 1563 (__mmask16) __U, 1564 _MM_FROUND_CUR_DIRECTION); 1565 } 1566 1567 static __inline__ __m512d __DEFAULT_FN_ATTRS 1568 _mm512_rsqrt14_pd(__m512d __A) 1569 { 1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1571 (__v8df) 1572 _mm512_setzero_pd (), 1573 (__mmask8) -1);} 1574 1575 static __inline__ __m512d __DEFAULT_FN_ATTRS 1576 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1577 { 1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1579 (__v8df) __W, 1580 (__mmask8) __U); 1581 } 1582 1583 static __inline__ __m512d __DEFAULT_FN_ATTRS 1584 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1585 { 1586 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1587 (__v8df) 1588 _mm512_setzero_pd (), 1589 (__mmask8) __U); 1590 } 1591 1592 static __inline__ __m512 __DEFAULT_FN_ATTRS 1593 _mm512_rsqrt14_ps(__m512 __A) 1594 { 1595 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1596 (__v16sf) 1597 _mm512_setzero_ps (), 1598 (__mmask16) -1); 1599 } 1600 1601 static __inline__ __m512 __DEFAULT_FN_ATTRS 1602 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1603 { 1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1605 (__v16sf) __W, 1606 (__mmask16) __U); 1607 } 1608 1609 static __inline__ __m512 __DEFAULT_FN_ATTRS 1610 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1611 { 1612 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1613 (__v16sf) 1614 _mm512_setzero_ps (), 1615 (__mmask16) __U); 1616 } 1617 1618 static __inline__ __m128 __DEFAULT_FN_ATTRS 1619 _mm_rsqrt14_ss(__m128 __A, __m128 __B) 1620 { 1621 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1622 (__v4sf) __B, 1623 (__v4sf) 1624 _mm_setzero_ps (), 1625 (__mmask8) -1); 1626 } 1627 1628 static __inline__ __m128 __DEFAULT_FN_ATTRS 1629 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1630 { 1631 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1632 (__v4sf) __B, 1633 (__v4sf) __W, 1634 (__mmask8) __U); 1635 } 1636 1637 static __inline__ __m128 __DEFAULT_FN_ATTRS 1638 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1639 { 1640 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1641 (__v4sf) __B, 1642 (__v4sf) _mm_setzero_ps (), 1643 (__mmask8) __U); 1644 } 1645 1646 static __inline__ __m128d __DEFAULT_FN_ATTRS 1647 _mm_rsqrt14_sd(__m128d __A, __m128d __B) 1648 { 1649 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 1650 (__v2df) __B, 1651 (__v2df) 1652 _mm_setzero_pd (), 1653 (__mmask8) -1); 1654 } 1655 1656 static __inline__ __m128d __DEFAULT_FN_ATTRS 1657 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1658 { 1659 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1660 (__v2df) __B, 1661 (__v2df) __W, 1662 (__mmask8) __U); 1663 } 1664 1665 static __inline__ __m128d __DEFAULT_FN_ATTRS 1666 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1667 { 1668 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1669 (__v2df) __B, 1670 (__v2df) _mm_setzero_pd (), 1671 (__mmask8) __U); 1672 } 1673 1674 static __inline__ __m512d __DEFAULT_FN_ATTRS 1675 _mm512_rcp14_pd(__m512d __A) 1676 { 1677 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1678 (__v8df) 1679 _mm512_setzero_pd (), 1680 (__mmask8) -1); 1681 } 1682 1683 static __inline__ __m512d __DEFAULT_FN_ATTRS 1684 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1685 { 1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1687 (__v8df) __W, 1688 (__mmask8) __U); 1689 } 1690 1691 static __inline__ __m512d __DEFAULT_FN_ATTRS 1692 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1693 { 1694 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1695 (__v8df) 1696 _mm512_setzero_pd (), 1697 (__mmask8) __U); 1698 } 1699 1700 static __inline__ __m512 __DEFAULT_FN_ATTRS 1701 _mm512_rcp14_ps(__m512 __A) 1702 { 1703 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1704 (__v16sf) 1705 _mm512_setzero_ps (), 1706 (__mmask16) -1); 1707 } 1708 1709 static __inline__ __m512 __DEFAULT_FN_ATTRS 1710 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1711 { 1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1713 (__v16sf) __W, 1714 (__mmask16) __U); 1715 } 1716 1717 static __inline__ __m512 __DEFAULT_FN_ATTRS 1718 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1719 { 1720 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1721 (__v16sf) 1722 _mm512_setzero_ps (), 1723 (__mmask16) __U); 1724 } 1725 1726 static __inline__ __m128 __DEFAULT_FN_ATTRS 1727 _mm_rcp14_ss(__m128 __A, __m128 __B) 1728 { 1729 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1730 (__v4sf) __B, 1731 (__v4sf) 1732 _mm_setzero_ps (), 1733 (__mmask8) -1); 1734 } 1735 1736 static __inline__ __m128 __DEFAULT_FN_ATTRS 1737 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1738 { 1739 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1740 (__v4sf) __B, 1741 (__v4sf) __W, 1742 (__mmask8) __U); 1743 } 1744 1745 static __inline__ __m128 __DEFAULT_FN_ATTRS 1746 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1747 { 1748 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1749 (__v4sf) __B, 1750 (__v4sf) _mm_setzero_ps (), 1751 (__mmask8) __U); 1752 } 1753 1754 static __inline__ __m128d __DEFAULT_FN_ATTRS 1755 _mm_rcp14_sd(__m128d __A, __m128d __B) 1756 { 1757 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 1758 (__v2df) __B, 1759 (__v2df) 1760 _mm_setzero_pd (), 1761 (__mmask8) -1); 1762 } 1763 1764 static __inline__ __m128d __DEFAULT_FN_ATTRS 1765 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1766 { 1767 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1768 (__v2df) __B, 1769 (__v2df) __W, 1770 (__mmask8) __U); 1771 } 1772 1773 static __inline__ __m128d __DEFAULT_FN_ATTRS 1774 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1775 { 1776 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1777 (__v2df) __B, 1778 (__v2df) _mm_setzero_pd (), 1779 (__mmask8) __U); 1780 } 1781 1782 static __inline __m512 __DEFAULT_FN_ATTRS 1783 _mm512_floor_ps(__m512 __A) 1784 { 1785 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1786 _MM_FROUND_FLOOR, 1787 (__v16sf) __A, -1, 1788 _MM_FROUND_CUR_DIRECTION); 1789 } 1790 1791 static __inline__ __m512 __DEFAULT_FN_ATTRS 1792 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 1793 { 1794 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1795 _MM_FROUND_FLOOR, 1796 (__v16sf) __W, __U, 1797 _MM_FROUND_CUR_DIRECTION); 1798 } 1799 1800 static __inline __m512d __DEFAULT_FN_ATTRS 1801 _mm512_floor_pd(__m512d __A) 1802 { 1803 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1804 _MM_FROUND_FLOOR, 1805 (__v8df) __A, -1, 1806 _MM_FROUND_CUR_DIRECTION); 1807 } 1808 1809 static __inline__ __m512d __DEFAULT_FN_ATTRS 1810 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 1811 { 1812 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1813 _MM_FROUND_FLOOR, 1814 (__v8df) __W, __U, 1815 _MM_FROUND_CUR_DIRECTION); 1816 } 1817 1818 static __inline__ __m512 __DEFAULT_FN_ATTRS 1819 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 1820 { 1821 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1822 _MM_FROUND_CEIL, 1823 (__v16sf) __W, __U, 1824 _MM_FROUND_CUR_DIRECTION); 1825 } 1826 1827 static __inline __m512 __DEFAULT_FN_ATTRS 1828 _mm512_ceil_ps(__m512 __A) 1829 { 1830 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1831 _MM_FROUND_CEIL, 1832 (__v16sf) __A, -1, 1833 _MM_FROUND_CUR_DIRECTION); 1834 } 1835 1836 static __inline __m512d __DEFAULT_FN_ATTRS 1837 _mm512_ceil_pd(__m512d __A) 1838 { 1839 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1840 _MM_FROUND_CEIL, 1841 (__v8df) __A, -1, 1842 _MM_FROUND_CUR_DIRECTION); 1843 } 1844 1845 static __inline__ __m512d __DEFAULT_FN_ATTRS 1846 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 1847 { 1848 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1849 _MM_FROUND_CEIL, 1850 (__v8df) __W, __U, 1851 _MM_FROUND_CUR_DIRECTION); 1852 } 1853 1854 static __inline __m512i __DEFAULT_FN_ATTRS 1855 _mm512_abs_epi64(__m512i __A) 1856 { 1857 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1858 (__v8di) 1859 _mm512_setzero_si512 (), 1860 (__mmask8) -1); 1861 } 1862 1863 static __inline__ __m512i __DEFAULT_FN_ATTRS 1864 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 1865 { 1866 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1867 (__v8di) __W, 1868 (__mmask8) __U); 1869 } 1870 1871 static __inline__ __m512i __DEFAULT_FN_ATTRS 1872 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 1873 { 1874 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1875 (__v8di) 1876 _mm512_setzero_si512 (), 1877 (__mmask8) __U); 1878 } 1879 1880 static __inline __m512i __DEFAULT_FN_ATTRS 1881 _mm512_abs_epi32(__m512i __A) 1882 { 1883 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1884 (__v16si) 1885 _mm512_setzero_si512 (), 1886 (__mmask16) -1); 1887 } 1888 1889 static __inline__ __m512i __DEFAULT_FN_ATTRS 1890 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 1891 { 1892 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1893 (__v16si) __W, 1894 (__mmask16) __U); 1895 } 1896 1897 static __inline__ __m512i __DEFAULT_FN_ATTRS 1898 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 1899 { 1900 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1901 (__v16si) 1902 _mm512_setzero_si512 (), 1903 (__mmask16) __U); 1904 } 1905 1906 static __inline__ __m128 __DEFAULT_FN_ATTRS 1907 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1908 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1909 (__v4sf) __B, 1910 (__v4sf) __W, 1911 (__mmask8) __U, 1912 _MM_FROUND_CUR_DIRECTION); 1913 } 1914 1915 static __inline__ __m128 __DEFAULT_FN_ATTRS 1916 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1917 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1918 (__v4sf) __B, 1919 (__v4sf) _mm_setzero_ps (), 1920 (__mmask8) __U, 1921 _MM_FROUND_CUR_DIRECTION); 1922 } 1923 1924 #define _mm_add_round_ss(A, B, R) __extension__ ({ \ 1925 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1926 (__v4sf)(__m128)(B), \ 1927 (__v4sf)_mm_setzero_ps(), \ 1928 (__mmask8)-1, (int)(R)); }) 1929 1930 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ 1931 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1932 (__v4sf)(__m128)(B), \ 1933 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1934 (int)(R)); }) 1935 1936 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ 1937 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1938 (__v4sf)(__m128)(B), \ 1939 (__v4sf)_mm_setzero_ps(), \ 1940 (__mmask8)(U), (int)(R)); }) 1941 1942 static __inline__ __m128d __DEFAULT_FN_ATTRS 1943 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1944 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1945 (__v2df) __B, 1946 (__v2df) __W, 1947 (__mmask8) __U, 1948 _MM_FROUND_CUR_DIRECTION); 1949 } 1950 1951 static __inline__ __m128d __DEFAULT_FN_ATTRS 1952 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1953 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1954 (__v2df) __B, 1955 (__v2df) _mm_setzero_pd (), 1956 (__mmask8) __U, 1957 _MM_FROUND_CUR_DIRECTION); 1958 } 1959 #define _mm_add_round_sd(A, B, R) __extension__ ({ \ 1960 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1961 (__v2df)(__m128d)(B), \ 1962 (__v2df)_mm_setzero_pd(), \ 1963 (__mmask8)-1, (int)(R)); }) 1964 1965 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ 1966 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1967 (__v2df)(__m128d)(B), \ 1968 (__v2df)(__m128d)(W), \ 1969 (__mmask8)(U), (int)(R)); }) 1970 1971 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ 1972 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1973 (__v2df)(__m128d)(B), \ 1974 (__v2df)_mm_setzero_pd(), \ 1975 (__mmask8)(U), (int)(R)); }) 1976 1977 static __inline__ __m512d __DEFAULT_FN_ATTRS 1978 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1979 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1980 (__v8df)_mm512_add_pd(__A, __B), 1981 (__v8df)__W); 1982 } 1983 1984 static __inline__ __m512d __DEFAULT_FN_ATTRS 1985 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1986 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1987 (__v8df)_mm512_add_pd(__A, __B), 1988 (__v8df)_mm512_setzero_pd()); 1989 } 1990 1991 static __inline__ __m512 __DEFAULT_FN_ATTRS 1992 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1993 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 1994 (__v16sf)_mm512_add_ps(__A, __B), 1995 (__v16sf)__W); 1996 } 1997 1998 static __inline__ __m512 __DEFAULT_FN_ATTRS 1999 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2000 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2001 (__v16sf)_mm512_add_ps(__A, __B), 2002 (__v16sf)_mm512_setzero_ps()); 2003 } 2004 2005 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \ 2006 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2007 (__v8df)(__m512d)(B), \ 2008 (__v8df)_mm512_setzero_pd(), \ 2009 (__mmask8)-1, (int)(R)); }) 2010 2011 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ 2012 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2013 (__v8df)(__m512d)(B), \ 2014 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2015 (int)(R)); }) 2016 2017 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ 2018 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2019 (__v8df)(__m512d)(B), \ 2020 (__v8df)_mm512_setzero_pd(), \ 2021 (__mmask8)(U), (int)(R)); }) 2022 2023 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \ 2024 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2025 (__v16sf)(__m512)(B), \ 2026 (__v16sf)_mm512_setzero_ps(), \ 2027 (__mmask16)-1, (int)(R)); }) 2028 2029 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ 2030 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2031 (__v16sf)(__m512)(B), \ 2032 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2033 (int)(R)); }) 2034 2035 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ 2036 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2037 (__v16sf)(__m512)(B), \ 2038 (__v16sf)_mm512_setzero_ps(), \ 2039 (__mmask16)(U), (int)(R)); }) 2040 2041 static __inline__ __m128 __DEFAULT_FN_ATTRS 2042 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2043 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2044 (__v4sf) __B, 2045 (__v4sf) __W, 2046 (__mmask8) __U, 2047 _MM_FROUND_CUR_DIRECTION); 2048 } 2049 2050 static __inline__ __m128 __DEFAULT_FN_ATTRS 2051 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2052 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2053 (__v4sf) __B, 2054 (__v4sf) _mm_setzero_ps (), 2055 (__mmask8) __U, 2056 _MM_FROUND_CUR_DIRECTION); 2057 } 2058 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \ 2059 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2060 (__v4sf)(__m128)(B), \ 2061 (__v4sf)_mm_setzero_ps(), \ 2062 (__mmask8)-1, (int)(R)); }) 2063 2064 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ 2065 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2066 (__v4sf)(__m128)(B), \ 2067 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2068 (int)(R)); }) 2069 2070 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ 2071 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2072 (__v4sf)(__m128)(B), \ 2073 (__v4sf)_mm_setzero_ps(), \ 2074 (__mmask8)(U), (int)(R)); }) 2075 2076 static __inline__ __m128d __DEFAULT_FN_ATTRS 2077 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2078 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2079 (__v2df) __B, 2080 (__v2df) __W, 2081 (__mmask8) __U, 2082 _MM_FROUND_CUR_DIRECTION); 2083 } 2084 2085 static __inline__ __m128d __DEFAULT_FN_ATTRS 2086 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2087 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2088 (__v2df) __B, 2089 (__v2df) _mm_setzero_pd (), 2090 (__mmask8) __U, 2091 _MM_FROUND_CUR_DIRECTION); 2092 } 2093 2094 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \ 2095 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2096 (__v2df)(__m128d)(B), \ 2097 (__v2df)_mm_setzero_pd(), \ 2098 (__mmask8)-1, (int)(R)); }) 2099 2100 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ 2101 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2102 (__v2df)(__m128d)(B), \ 2103 (__v2df)(__m128d)(W), \ 2104 (__mmask8)(U), (int)(R)); }) 2105 2106 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ 2107 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2108 (__v2df)(__m128d)(B), \ 2109 (__v2df)_mm_setzero_pd(), \ 2110 (__mmask8)(U), (int)(R)); }) 2111 2112 static __inline__ __m512d __DEFAULT_FN_ATTRS 2113 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2114 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2115 (__v8df)_mm512_sub_pd(__A, __B), 2116 (__v8df)__W); 2117 } 2118 2119 static __inline__ __m512d __DEFAULT_FN_ATTRS 2120 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2121 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2122 (__v8df)_mm512_sub_pd(__A, __B), 2123 (__v8df)_mm512_setzero_pd()); 2124 } 2125 2126 static __inline__ __m512 __DEFAULT_FN_ATTRS 2127 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2128 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2129 (__v16sf)_mm512_sub_ps(__A, __B), 2130 (__v16sf)__W); 2131 } 2132 2133 static __inline__ __m512 __DEFAULT_FN_ATTRS 2134 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2135 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2136 (__v16sf)_mm512_sub_ps(__A, __B), 2137 (__v16sf)_mm512_setzero_ps()); 2138 } 2139 2140 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ 2141 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2142 (__v8df)(__m512d)(B), \ 2143 (__v8df)_mm512_setzero_pd(), \ 2144 (__mmask8)-1, (int)(R)); }) 2145 2146 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ 2147 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2148 (__v8df)(__m512d)(B), \ 2149 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2150 (int)(R)); }) 2151 2152 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ 2153 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2154 (__v8df)(__m512d)(B), \ 2155 (__v8df)_mm512_setzero_pd(), \ 2156 (__mmask8)(U), (int)(R)); }) 2157 2158 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ 2159 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2160 (__v16sf)(__m512)(B), \ 2161 (__v16sf)_mm512_setzero_ps(), \ 2162 (__mmask16)-1, (int)(R)); }) 2163 2164 #define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ 2165 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2166 (__v16sf)(__m512)(B), \ 2167 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2168 (int)(R)); }); 2169 2170 #define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ 2171 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2172 (__v16sf)(__m512)(B), \ 2173 (__v16sf)_mm512_setzero_ps(), \ 2174 (__mmask16)(U), (int)(R)); }); 2175 2176 static __inline__ __m128 __DEFAULT_FN_ATTRS 2177 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2178 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2179 (__v4sf) __B, 2180 (__v4sf) __W, 2181 (__mmask8) __U, 2182 _MM_FROUND_CUR_DIRECTION); 2183 } 2184 2185 static __inline__ __m128 __DEFAULT_FN_ATTRS 2186 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2187 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2188 (__v4sf) __B, 2189 (__v4sf) _mm_setzero_ps (), 2190 (__mmask8) __U, 2191 _MM_FROUND_CUR_DIRECTION); 2192 } 2193 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \ 2194 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2195 (__v4sf)(__m128)(B), \ 2196 (__v4sf)_mm_setzero_ps(), \ 2197 (__mmask8)-1, (int)(R)); }) 2198 2199 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ 2200 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2201 (__v4sf)(__m128)(B), \ 2202 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2203 (int)(R)); }) 2204 2205 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ 2206 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2207 (__v4sf)(__m128)(B), \ 2208 (__v4sf)_mm_setzero_ps(), \ 2209 (__mmask8)(U), (int)(R)); }) 2210 2211 static __inline__ __m128d __DEFAULT_FN_ATTRS 2212 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2213 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2214 (__v2df) __B, 2215 (__v2df) __W, 2216 (__mmask8) __U, 2217 _MM_FROUND_CUR_DIRECTION); 2218 } 2219 2220 static __inline__ __m128d __DEFAULT_FN_ATTRS 2221 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2222 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2223 (__v2df) __B, 2224 (__v2df) _mm_setzero_pd (), 2225 (__mmask8) __U, 2226 _MM_FROUND_CUR_DIRECTION); 2227 } 2228 2229 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \ 2230 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2231 (__v2df)(__m128d)(B), \ 2232 (__v2df)_mm_setzero_pd(), \ 2233 (__mmask8)-1, (int)(R)); }) 2234 2235 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ 2236 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2237 (__v2df)(__m128d)(B), \ 2238 (__v2df)(__m128d)(W), \ 2239 (__mmask8)(U), (int)(R)); }) 2240 2241 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ 2242 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2243 (__v2df)(__m128d)(B), \ 2244 (__v2df)_mm_setzero_pd(), \ 2245 (__mmask8)(U), (int)(R)); }) 2246 2247 static __inline__ __m512d __DEFAULT_FN_ATTRS 2248 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2249 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2250 (__v8df)_mm512_mul_pd(__A, __B), 2251 (__v8df)__W); 2252 } 2253 2254 static __inline__ __m512d __DEFAULT_FN_ATTRS 2255 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2256 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2257 (__v8df)_mm512_mul_pd(__A, __B), 2258 (__v8df)_mm512_setzero_pd()); 2259 } 2260 2261 static __inline__ __m512 __DEFAULT_FN_ATTRS 2262 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2263 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2264 (__v16sf)_mm512_mul_ps(__A, __B), 2265 (__v16sf)__W); 2266 } 2267 2268 static __inline__ __m512 __DEFAULT_FN_ATTRS 2269 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2270 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2271 (__v16sf)_mm512_mul_ps(__A, __B), 2272 (__v16sf)_mm512_setzero_ps()); 2273 } 2274 2275 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ 2276 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2277 (__v8df)(__m512d)(B), \ 2278 (__v8df)_mm512_setzero_pd(), \ 2279 (__mmask8)-1, (int)(R)); }) 2280 2281 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ 2282 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2283 (__v8df)(__m512d)(B), \ 2284 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2285 (int)(R)); }) 2286 2287 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ 2288 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2289 (__v8df)(__m512d)(B), \ 2290 (__v8df)_mm512_setzero_pd(), \ 2291 (__mmask8)(U), (int)(R)); }) 2292 2293 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ 2294 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2295 (__v16sf)(__m512)(B), \ 2296 (__v16sf)_mm512_setzero_ps(), \ 2297 (__mmask16)-1, (int)(R)); }) 2298 2299 #define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ 2300 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2301 (__v16sf)(__m512)(B), \ 2302 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2303 (int)(R)); }); 2304 2305 #define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ 2306 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2307 (__v16sf)(__m512)(B), \ 2308 (__v16sf)_mm512_setzero_ps(), \ 2309 (__mmask16)(U), (int)(R)); }); 2310 2311 static __inline__ __m128 __DEFAULT_FN_ATTRS 2312 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2313 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2314 (__v4sf) __B, 2315 (__v4sf) __W, 2316 (__mmask8) __U, 2317 _MM_FROUND_CUR_DIRECTION); 2318 } 2319 2320 static __inline__ __m128 __DEFAULT_FN_ATTRS 2321 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2322 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2323 (__v4sf) __B, 2324 (__v4sf) _mm_setzero_ps (), 2325 (__mmask8) __U, 2326 _MM_FROUND_CUR_DIRECTION); 2327 } 2328 2329 #define _mm_div_round_ss(A, B, R) __extension__ ({ \ 2330 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2331 (__v4sf)(__m128)(B), \ 2332 (__v4sf)_mm_setzero_ps(), \ 2333 (__mmask8)-1, (int)(R)); }) 2334 2335 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ 2336 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2337 (__v4sf)(__m128)(B), \ 2338 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2339 (int)(R)); }) 2340 2341 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ 2342 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2343 (__v4sf)(__m128)(B), \ 2344 (__v4sf)_mm_setzero_ps(), \ 2345 (__mmask8)(U), (int)(R)); }) 2346 2347 static __inline__ __m128d __DEFAULT_FN_ATTRS 2348 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2349 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2350 (__v2df) __B, 2351 (__v2df) __W, 2352 (__mmask8) __U, 2353 _MM_FROUND_CUR_DIRECTION); 2354 } 2355 2356 static __inline__ __m128d __DEFAULT_FN_ATTRS 2357 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2358 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2359 (__v2df) __B, 2360 (__v2df) _mm_setzero_pd (), 2361 (__mmask8) __U, 2362 _MM_FROUND_CUR_DIRECTION); 2363 } 2364 2365 #define _mm_div_round_sd(A, B, R) __extension__ ({ \ 2366 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2367 (__v2df)(__m128d)(B), \ 2368 (__v2df)_mm_setzero_pd(), \ 2369 (__mmask8)-1, (int)(R)); }) 2370 2371 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ 2372 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2373 (__v2df)(__m128d)(B), \ 2374 (__v2df)(__m128d)(W), \ 2375 (__mmask8)(U), (int)(R)); }) 2376 2377 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ 2378 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2379 (__v2df)(__m128d)(B), \ 2380 (__v2df)_mm_setzero_pd(), \ 2381 (__mmask8)(U), (int)(R)); }) 2382 2383 static __inline __m512d __DEFAULT_FN_ATTRS 2384 _mm512_div_pd(__m512d __a, __m512d __b) 2385 { 2386 return (__m512d)((__v8df)__a/(__v8df)__b); 2387 } 2388 2389 static __inline__ __m512d __DEFAULT_FN_ATTRS 2390 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2391 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2392 (__v8df)_mm512_div_pd(__A, __B), 2393 (__v8df)__W); 2394 } 2395 2396 static __inline__ __m512d __DEFAULT_FN_ATTRS 2397 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2398 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2399 (__v8df)_mm512_div_pd(__A, __B), 2400 (__v8df)_mm512_setzero_pd()); 2401 } 2402 2403 static __inline __m512 __DEFAULT_FN_ATTRS 2404 _mm512_div_ps(__m512 __a, __m512 __b) 2405 { 2406 return (__m512)((__v16sf)__a/(__v16sf)__b); 2407 } 2408 2409 static __inline__ __m512 __DEFAULT_FN_ATTRS 2410 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2411 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2412 (__v16sf)_mm512_div_ps(__A, __B), 2413 (__v16sf)__W); 2414 } 2415 2416 static __inline__ __m512 __DEFAULT_FN_ATTRS 2417 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2418 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2419 (__v16sf)_mm512_div_ps(__A, __B), 2420 (__v16sf)_mm512_setzero_ps()); 2421 } 2422 2423 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \ 2424 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2425 (__v8df)(__m512d)(B), \ 2426 (__v8df)_mm512_setzero_pd(), \ 2427 (__mmask8)-1, (int)(R)); }) 2428 2429 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ 2430 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2431 (__v8df)(__m512d)(B), \ 2432 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2433 (int)(R)); }) 2434 2435 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ 2436 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2437 (__v8df)(__m512d)(B), \ 2438 (__v8df)_mm512_setzero_pd(), \ 2439 (__mmask8)(U), (int)(R)); }) 2440 2441 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \ 2442 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2443 (__v16sf)(__m512)(B), \ 2444 (__v16sf)_mm512_setzero_ps(), \ 2445 (__mmask16)-1, (int)(R)); }) 2446 2447 #define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ 2448 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2449 (__v16sf)(__m512)(B), \ 2450 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2451 (int)(R)); }); 2452 2453 #define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ 2454 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2455 (__v16sf)(__m512)(B), \ 2456 (__v16sf)_mm512_setzero_ps(), \ 2457 (__mmask16)(U), (int)(R)); }); 2458 2459 #define _mm512_roundscale_ps(A, B) __extension__ ({ \ 2460 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2461 (__v16sf)(__m512)(A), (__mmask16)-1, \ 2462 _MM_FROUND_CUR_DIRECTION); }) 2463 2464 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ 2465 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2466 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2467 _MM_FROUND_CUR_DIRECTION); }) 2468 2469 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ 2470 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2471 (__v16sf)_mm512_setzero_ps(), \ 2472 (__mmask16)(A), \ 2473 _MM_FROUND_CUR_DIRECTION); }) 2474 2475 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ 2476 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2477 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2478 (int)(R)); }) 2479 2480 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ 2481 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2482 (__v16sf)_mm512_setzero_ps(), \ 2483 (__mmask16)(A), (int)(R)); }) 2484 2485 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ 2486 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2487 (__v16sf)_mm512_undefined_ps(), \ 2488 (__mmask16)-1, (int)(R)); }) 2489 2490 #define _mm512_roundscale_pd(A, B) __extension__ ({ \ 2491 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2492 (__v8df)(__m512d)(A), (__mmask8)-1, \ 2493 _MM_FROUND_CUR_DIRECTION); }) 2494 2495 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ 2496 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2497 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2498 _MM_FROUND_CUR_DIRECTION); }) 2499 2500 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ 2501 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2502 (__v8df)_mm512_setzero_pd(), \ 2503 (__mmask8)(A), \ 2504 _MM_FROUND_CUR_DIRECTION); }) 2505 2506 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ 2507 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2508 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2509 (int)(R)); }) 2510 2511 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ 2512 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2513 (__v8df)_mm512_setzero_pd(), \ 2514 (__mmask8)(A), (int)(R)); }) 2515 2516 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ 2517 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2518 (__v8df)_mm512_undefined_pd(), \ 2519 (__mmask8)-1, (int)(R)); }) 2520 2521 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 2522 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2523 (__v8df)(__m512d)(B), \ 2524 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2525 (int)(R)); }) 2526 2527 2528 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2529 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2530 (__v8df)(__m512d)(B), \ 2531 (__v8df)(__m512d)(C), \ 2532 (__mmask8)(U), (int)(R)); }) 2533 2534 2535 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2536 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2537 (__v8df)(__m512d)(B), \ 2538 (__v8df)(__m512d)(C), \ 2539 (__mmask8)(U), (int)(R)); }) 2540 2541 2542 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2543 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2544 (__v8df)(__m512d)(B), \ 2545 (__v8df)(__m512d)(C), \ 2546 (__mmask8)(U), (int)(R)); }) 2547 2548 2549 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 2550 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2551 (__v8df)(__m512d)(B), \ 2552 -(__v8df)(__m512d)(C), \ 2553 (__mmask8)-1, (int)(R)); }) 2554 2555 2556 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2557 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2558 (__v8df)(__m512d)(B), \ 2559 -(__v8df)(__m512d)(C), \ 2560 (__mmask8)(U), (int)(R)); }) 2561 2562 2563 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2564 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2565 (__v8df)(__m512d)(B), \ 2566 -(__v8df)(__m512d)(C), \ 2567 (__mmask8)(U), (int)(R)); }) 2568 2569 2570 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 2571 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2572 (__v8df)(__m512d)(B), \ 2573 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2574 (int)(R)); }) 2575 2576 2577 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2578 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2579 (__v8df)(__m512d)(B), \ 2580 (__v8df)(__m512d)(C), \ 2581 (__mmask8)(U), (int)(R)); }) 2582 2583 2584 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2585 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2586 (__v8df)(__m512d)(B), \ 2587 (__v8df)(__m512d)(C), \ 2588 (__mmask8)(U), (int)(R)); }) 2589 2590 2591 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 2592 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2593 (__v8df)(__m512d)(B), \ 2594 -(__v8df)(__m512d)(C), \ 2595 (__mmask8)-1, (int)(R)); }) 2596 2597 2598 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2599 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2600 (__v8df)(__m512d)(B), \ 2601 -(__v8df)(__m512d)(C), \ 2602 (__mmask8)(U), (int)(R)); }) 2603 2604 2605 static __inline__ __m512d __DEFAULT_FN_ATTRS 2606 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2607 { 2608 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2609 (__v8df) __B, 2610 (__v8df) __C, 2611 (__mmask8) -1, 2612 _MM_FROUND_CUR_DIRECTION); 2613 } 2614 2615 static __inline__ __m512d __DEFAULT_FN_ATTRS 2616 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2617 { 2618 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2619 (__v8df) __B, 2620 (__v8df) __C, 2621 (__mmask8) __U, 2622 _MM_FROUND_CUR_DIRECTION); 2623 } 2624 2625 static __inline__ __m512d __DEFAULT_FN_ATTRS 2626 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2627 { 2628 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2629 (__v8df) __B, 2630 (__v8df) __C, 2631 (__mmask8) __U, 2632 _MM_FROUND_CUR_DIRECTION); 2633 } 2634 2635 static __inline__ __m512d __DEFAULT_FN_ATTRS 2636 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2637 { 2638 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2639 (__v8df) __B, 2640 (__v8df) __C, 2641 (__mmask8) __U, 2642 _MM_FROUND_CUR_DIRECTION); 2643 } 2644 2645 static __inline__ __m512d __DEFAULT_FN_ATTRS 2646 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2647 { 2648 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2649 (__v8df) __B, 2650 -(__v8df) __C, 2651 (__mmask8) -1, 2652 _MM_FROUND_CUR_DIRECTION); 2653 } 2654 2655 static __inline__ __m512d __DEFAULT_FN_ATTRS 2656 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2657 { 2658 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2659 (__v8df) __B, 2660 -(__v8df) __C, 2661 (__mmask8) __U, 2662 _MM_FROUND_CUR_DIRECTION); 2663 } 2664 2665 static __inline__ __m512d __DEFAULT_FN_ATTRS 2666 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2667 { 2668 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2669 (__v8df) __B, 2670 -(__v8df) __C, 2671 (__mmask8) __U, 2672 _MM_FROUND_CUR_DIRECTION); 2673 } 2674 2675 static __inline__ __m512d __DEFAULT_FN_ATTRS 2676 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2677 { 2678 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2679 (__v8df) __B, 2680 (__v8df) __C, 2681 (__mmask8) -1, 2682 _MM_FROUND_CUR_DIRECTION); 2683 } 2684 2685 static __inline__ __m512d __DEFAULT_FN_ATTRS 2686 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2687 { 2688 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 2689 (__v8df) __B, 2690 (__v8df) __C, 2691 (__mmask8) __U, 2692 _MM_FROUND_CUR_DIRECTION); 2693 } 2694 2695 static __inline__ __m512d __DEFAULT_FN_ATTRS 2696 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2697 { 2698 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2699 (__v8df) __B, 2700 (__v8df) __C, 2701 (__mmask8) __U, 2702 _MM_FROUND_CUR_DIRECTION); 2703 } 2704 2705 static __inline__ __m512d __DEFAULT_FN_ATTRS 2706 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2707 { 2708 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2709 (__v8df) __B, 2710 -(__v8df) __C, 2711 (__mmask8) -1, 2712 _MM_FROUND_CUR_DIRECTION); 2713 } 2714 2715 static __inline__ __m512d __DEFAULT_FN_ATTRS 2716 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2717 { 2718 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2719 (__v8df) __B, 2720 -(__v8df) __C, 2721 (__mmask8) __U, 2722 _MM_FROUND_CUR_DIRECTION); 2723 } 2724 2725 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 2726 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2727 (__v16sf)(__m512)(B), \ 2728 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2729 (int)(R)); }) 2730 2731 2732 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2733 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2734 (__v16sf)(__m512)(B), \ 2735 (__v16sf)(__m512)(C), \ 2736 (__mmask16)(U), (int)(R)); }) 2737 2738 2739 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2740 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2741 (__v16sf)(__m512)(B), \ 2742 (__v16sf)(__m512)(C), \ 2743 (__mmask16)(U), (int)(R)); }) 2744 2745 2746 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2747 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2748 (__v16sf)(__m512)(B), \ 2749 (__v16sf)(__m512)(C), \ 2750 (__mmask16)(U), (int)(R)); }) 2751 2752 2753 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 2754 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2755 (__v16sf)(__m512)(B), \ 2756 -(__v16sf)(__m512)(C), \ 2757 (__mmask16)-1, (int)(R)); }) 2758 2759 2760 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2761 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2762 (__v16sf)(__m512)(B), \ 2763 -(__v16sf)(__m512)(C), \ 2764 (__mmask16)(U), (int)(R)); }) 2765 2766 2767 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2768 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2769 (__v16sf)(__m512)(B), \ 2770 -(__v16sf)(__m512)(C), \ 2771 (__mmask16)(U), (int)(R)); }) 2772 2773 2774 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 2775 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2776 (__v16sf)(__m512)(B), \ 2777 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2778 (int)(R)); }) 2779 2780 2781 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2782 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2783 (__v16sf)(__m512)(B), \ 2784 (__v16sf)(__m512)(C), \ 2785 (__mmask16)(U), (int)(R)); }) 2786 2787 2788 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2789 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2790 (__v16sf)(__m512)(B), \ 2791 (__v16sf)(__m512)(C), \ 2792 (__mmask16)(U), (int)(R)); }) 2793 2794 2795 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 2796 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2797 (__v16sf)(__m512)(B), \ 2798 -(__v16sf)(__m512)(C), \ 2799 (__mmask16)-1, (int)(R)); }) 2800 2801 2802 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2803 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2804 (__v16sf)(__m512)(B), \ 2805 -(__v16sf)(__m512)(C), \ 2806 (__mmask16)(U), (int)(R)); }) 2807 2808 2809 static __inline__ __m512 __DEFAULT_FN_ATTRS 2810 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2811 { 2812 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2813 (__v16sf) __B, 2814 (__v16sf) __C, 2815 (__mmask16) -1, 2816 _MM_FROUND_CUR_DIRECTION); 2817 } 2818 2819 static __inline__ __m512 __DEFAULT_FN_ATTRS 2820 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2821 { 2822 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2823 (__v16sf) __B, 2824 (__v16sf) __C, 2825 (__mmask16) __U, 2826 _MM_FROUND_CUR_DIRECTION); 2827 } 2828 2829 static __inline__ __m512 __DEFAULT_FN_ATTRS 2830 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2831 { 2832 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2833 (__v16sf) __B, 2834 (__v16sf) __C, 2835 (__mmask16) __U, 2836 _MM_FROUND_CUR_DIRECTION); 2837 } 2838 2839 static __inline__ __m512 __DEFAULT_FN_ATTRS 2840 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2841 { 2842 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2843 (__v16sf) __B, 2844 (__v16sf) __C, 2845 (__mmask16) __U, 2846 _MM_FROUND_CUR_DIRECTION); 2847 } 2848 2849 static __inline__ __m512 __DEFAULT_FN_ATTRS 2850 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2851 { 2852 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2853 (__v16sf) __B, 2854 -(__v16sf) __C, 2855 (__mmask16) -1, 2856 _MM_FROUND_CUR_DIRECTION); 2857 } 2858 2859 static __inline__ __m512 __DEFAULT_FN_ATTRS 2860 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2861 { 2862 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2863 (__v16sf) __B, 2864 -(__v16sf) __C, 2865 (__mmask16) __U, 2866 _MM_FROUND_CUR_DIRECTION); 2867 } 2868 2869 static __inline__ __m512 __DEFAULT_FN_ATTRS 2870 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2871 { 2872 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2873 (__v16sf) __B, 2874 -(__v16sf) __C, 2875 (__mmask16) __U, 2876 _MM_FROUND_CUR_DIRECTION); 2877 } 2878 2879 static __inline__ __m512 __DEFAULT_FN_ATTRS 2880 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2881 { 2882 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2883 (__v16sf) __B, 2884 (__v16sf) __C, 2885 (__mmask16) -1, 2886 _MM_FROUND_CUR_DIRECTION); 2887 } 2888 2889 static __inline__ __m512 __DEFAULT_FN_ATTRS 2890 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2891 { 2892 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 2893 (__v16sf) __B, 2894 (__v16sf) __C, 2895 (__mmask16) __U, 2896 _MM_FROUND_CUR_DIRECTION); 2897 } 2898 2899 static __inline__ __m512 __DEFAULT_FN_ATTRS 2900 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2901 { 2902 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2903 (__v16sf) __B, 2904 (__v16sf) __C, 2905 (__mmask16) __U, 2906 _MM_FROUND_CUR_DIRECTION); 2907 } 2908 2909 static __inline__ __m512 __DEFAULT_FN_ATTRS 2910 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2911 { 2912 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2913 (__v16sf) __B, 2914 -(__v16sf) __C, 2915 (__mmask16) -1, 2916 _MM_FROUND_CUR_DIRECTION); 2917 } 2918 2919 static __inline__ __m512 __DEFAULT_FN_ATTRS 2920 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2921 { 2922 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2923 (__v16sf) __B, 2924 -(__v16sf) __C, 2925 (__mmask16) __U, 2926 _MM_FROUND_CUR_DIRECTION); 2927 } 2928 2929 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 2930 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2931 (__v8df)(__m512d)(B), \ 2932 (__v8df)(__m512d)(C), \ 2933 (__mmask8)-1, (int)(R)); }) 2934 2935 2936 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2937 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2938 (__v8df)(__m512d)(B), \ 2939 (__v8df)(__m512d)(C), \ 2940 (__mmask8)(U), (int)(R)); }) 2941 2942 2943 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2944 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2945 (__v8df)(__m512d)(B), \ 2946 (__v8df)(__m512d)(C), \ 2947 (__mmask8)(U), (int)(R)); }) 2948 2949 2950 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2951 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2952 (__v8df)(__m512d)(B), \ 2953 (__v8df)(__m512d)(C), \ 2954 (__mmask8)(U), (int)(R)); }) 2955 2956 2957 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 2958 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2959 (__v8df)(__m512d)(B), \ 2960 -(__v8df)(__m512d)(C), \ 2961 (__mmask8)-1, (int)(R)); }) 2962 2963 2964 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2965 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2966 (__v8df)(__m512d)(B), \ 2967 -(__v8df)(__m512d)(C), \ 2968 (__mmask8)(U), (int)(R)); }) 2969 2970 2971 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2972 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2973 (__v8df)(__m512d)(B), \ 2974 -(__v8df)(__m512d)(C), \ 2975 (__mmask8)(U), (int)(R)); }) 2976 2977 2978 static __inline__ __m512d __DEFAULT_FN_ATTRS 2979 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 2980 { 2981 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2982 (__v8df) __B, 2983 (__v8df) __C, 2984 (__mmask8) -1, 2985 _MM_FROUND_CUR_DIRECTION); 2986 } 2987 2988 static __inline__ __m512d __DEFAULT_FN_ATTRS 2989 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2990 { 2991 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2992 (__v8df) __B, 2993 (__v8df) __C, 2994 (__mmask8) __U, 2995 _MM_FROUND_CUR_DIRECTION); 2996 } 2997 2998 static __inline__ __m512d __DEFAULT_FN_ATTRS 2999 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3000 { 3001 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3002 (__v8df) __B, 3003 (__v8df) __C, 3004 (__mmask8) __U, 3005 _MM_FROUND_CUR_DIRECTION); 3006 } 3007 3008 static __inline__ __m512d __DEFAULT_FN_ATTRS 3009 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3010 { 3011 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3012 (__v8df) __B, 3013 (__v8df) __C, 3014 (__mmask8) __U, 3015 _MM_FROUND_CUR_DIRECTION); 3016 } 3017 3018 static __inline__ __m512d __DEFAULT_FN_ATTRS 3019 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 3020 { 3021 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3022 (__v8df) __B, 3023 -(__v8df) __C, 3024 (__mmask8) -1, 3025 _MM_FROUND_CUR_DIRECTION); 3026 } 3027 3028 static __inline__ __m512d __DEFAULT_FN_ATTRS 3029 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3030 { 3031 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3032 (__v8df) __B, 3033 -(__v8df) __C, 3034 (__mmask8) __U, 3035 _MM_FROUND_CUR_DIRECTION); 3036 } 3037 3038 static __inline__ __m512d __DEFAULT_FN_ATTRS 3039 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3040 { 3041 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3042 (__v8df) __B, 3043 -(__v8df) __C, 3044 (__mmask8) __U, 3045 _MM_FROUND_CUR_DIRECTION); 3046 } 3047 3048 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 3049 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3050 (__v16sf)(__m512)(B), \ 3051 (__v16sf)(__m512)(C), \ 3052 (__mmask16)-1, (int)(R)); }) 3053 3054 3055 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3056 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3057 (__v16sf)(__m512)(B), \ 3058 (__v16sf)(__m512)(C), \ 3059 (__mmask16)(U), (int)(R)); }) 3060 3061 3062 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3063 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 3064 (__v16sf)(__m512)(B), \ 3065 (__v16sf)(__m512)(C), \ 3066 (__mmask16)(U), (int)(R)); }) 3067 3068 3069 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 3070 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3071 (__v16sf)(__m512)(B), \ 3072 (__v16sf)(__m512)(C), \ 3073 (__mmask16)(U), (int)(R)); }) 3074 3075 3076 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 3077 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3078 (__v16sf)(__m512)(B), \ 3079 -(__v16sf)(__m512)(C), \ 3080 (__mmask16)-1, (int)(R)); }) 3081 3082 3083 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3084 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3085 (__v16sf)(__m512)(B), \ 3086 -(__v16sf)(__m512)(C), \ 3087 (__mmask16)(U), (int)(R)); }) 3088 3089 3090 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 3091 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3092 (__v16sf)(__m512)(B), \ 3093 -(__v16sf)(__m512)(C), \ 3094 (__mmask16)(U), (int)(R)); }) 3095 3096 3097 static __inline__ __m512 __DEFAULT_FN_ATTRS 3098 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 3099 { 3100 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3101 (__v16sf) __B, 3102 (__v16sf) __C, 3103 (__mmask16) -1, 3104 _MM_FROUND_CUR_DIRECTION); 3105 } 3106 3107 static __inline__ __m512 __DEFAULT_FN_ATTRS 3108 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3109 { 3110 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3111 (__v16sf) __B, 3112 (__v16sf) __C, 3113 (__mmask16) __U, 3114 _MM_FROUND_CUR_DIRECTION); 3115 } 3116 3117 static __inline__ __m512 __DEFAULT_FN_ATTRS 3118 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3119 { 3120 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3121 (__v16sf) __B, 3122 (__v16sf) __C, 3123 (__mmask16) __U, 3124 _MM_FROUND_CUR_DIRECTION); 3125 } 3126 3127 static __inline__ __m512 __DEFAULT_FN_ATTRS 3128 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3129 { 3130 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3131 (__v16sf) __B, 3132 (__v16sf) __C, 3133 (__mmask16) __U, 3134 _MM_FROUND_CUR_DIRECTION); 3135 } 3136 3137 static __inline__ __m512 __DEFAULT_FN_ATTRS 3138 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 3139 { 3140 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3141 (__v16sf) __B, 3142 -(__v16sf) __C, 3143 (__mmask16) -1, 3144 _MM_FROUND_CUR_DIRECTION); 3145 } 3146 3147 static __inline__ __m512 __DEFAULT_FN_ATTRS 3148 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3149 { 3150 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3151 (__v16sf) __B, 3152 -(__v16sf) __C, 3153 (__mmask16) __U, 3154 _MM_FROUND_CUR_DIRECTION); 3155 } 3156 3157 static __inline__ __m512 __DEFAULT_FN_ATTRS 3158 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3159 { 3160 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3161 (__v16sf) __B, 3162 -(__v16sf) __C, 3163 (__mmask16) __U, 3164 _MM_FROUND_CUR_DIRECTION); 3165 } 3166 3167 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3168 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3169 (__v8df)(__m512d)(B), \ 3170 (__v8df)(__m512d)(C), \ 3171 (__mmask8)(U), (int)(R)); }) 3172 3173 3174 static __inline__ __m512d __DEFAULT_FN_ATTRS 3175 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3176 { 3177 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3178 (__v8df) __B, 3179 (__v8df) __C, 3180 (__mmask8) __U, 3181 _MM_FROUND_CUR_DIRECTION); 3182 } 3183 3184 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3185 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3186 (__v16sf)(__m512)(B), \ 3187 (__v16sf)(__m512)(C), \ 3188 (__mmask16)(U), (int)(R)); }) 3189 3190 3191 static __inline__ __m512 __DEFAULT_FN_ATTRS 3192 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3193 { 3194 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3195 (__v16sf) __B, 3196 (__v16sf) __C, 3197 (__mmask16) __U, 3198 _MM_FROUND_CUR_DIRECTION); 3199 } 3200 3201 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 3202 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3203 (__v8df)(__m512d)(B), \ 3204 (__v8df)(__m512d)(C), \ 3205 (__mmask8)(U), (int)(R)); }) 3206 3207 3208 static __inline__ __m512d __DEFAULT_FN_ATTRS 3209 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3210 { 3211 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3212 (__v8df) __B, 3213 (__v8df) __C, 3214 (__mmask8) __U, 3215 _MM_FROUND_CUR_DIRECTION); 3216 } 3217 3218 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 3219 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3220 (__v16sf)(__m512)(B), \ 3221 (__v16sf)(__m512)(C), \ 3222 (__mmask16)(U), (int)(R)); }) 3223 3224 3225 static __inline__ __m512 __DEFAULT_FN_ATTRS 3226 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3227 { 3228 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3229 (__v16sf) __B, 3230 (__v16sf) __C, 3231 (__mmask16) __U, 3232 _MM_FROUND_CUR_DIRECTION); 3233 } 3234 3235 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3236 (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ 3237 (__v8df)(__m512d)(B), \ 3238 (__v8df)(__m512d)(C), \ 3239 (__mmask8)(U), (int)(R)); }) 3240 3241 3242 static __inline__ __m512d __DEFAULT_FN_ATTRS 3243 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3244 { 3245 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3246 (__v8df) __B, 3247 (__v8df) __C, 3248 (__mmask8) __U, 3249 _MM_FROUND_CUR_DIRECTION); 3250 } 3251 3252 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3253 (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ 3254 (__v16sf)(__m512)(B), \ 3255 (__v16sf)(__m512)(C), \ 3256 (__mmask16)(U), (int)(R)); }) 3257 3258 3259 static __inline__ __m512 __DEFAULT_FN_ATTRS 3260 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3261 { 3262 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3263 (__v16sf) __B, 3264 (__v16sf) __C, 3265 (__mmask16) __U, 3266 _MM_FROUND_CUR_DIRECTION); 3267 } 3268 3269 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3270 (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ 3271 (__v8df)(__m512d)(B), \ 3272 (__v8df)(__m512d)(C), \ 3273 (__mmask8)(U), (int)(R)); }) 3274 3275 3276 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3277 (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ 3278 (__v8df)(__m512d)(B), \ 3279 (__v8df)(__m512d)(C), \ 3280 (__mmask8)(U), (int)(R)); }) 3281 3282 3283 static __inline__ __m512d __DEFAULT_FN_ATTRS 3284 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3285 { 3286 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3287 (__v8df) __B, 3288 (__v8df) __C, 3289 (__mmask8) __U, 3290 _MM_FROUND_CUR_DIRECTION); 3291 } 3292 3293 static __inline__ __m512d __DEFAULT_FN_ATTRS 3294 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3295 { 3296 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3297 (__v8df) __B, 3298 (__v8df) __C, 3299 (__mmask8) __U, 3300 _MM_FROUND_CUR_DIRECTION); 3301 } 3302 3303 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3304 (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ 3305 (__v16sf)(__m512)(B), \ 3306 (__v16sf)(__m512)(C), \ 3307 (__mmask16)(U), (int)(R)); }) 3308 3309 3310 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3311 (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ 3312 (__v16sf)(__m512)(B), \ 3313 (__v16sf)(__m512)(C), \ 3314 (__mmask16)(U), (int)(R)); }) 3315 3316 3317 static __inline__ __m512 __DEFAULT_FN_ATTRS 3318 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3319 { 3320 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3321 (__v16sf) __B, 3322 (__v16sf) __C, 3323 (__mmask16) __U, 3324 _MM_FROUND_CUR_DIRECTION); 3325 } 3326 3327 static __inline__ __m512 __DEFAULT_FN_ATTRS 3328 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3329 { 3330 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3331 (__v16sf) __B, 3332 (__v16sf) __C, 3333 (__mmask16) __U, 3334 _MM_FROUND_CUR_DIRECTION); 3335 } 3336 3337 3338 3339 /* Vector permutations */ 3340 3341 static __inline __m512i __DEFAULT_FN_ATTRS 3342 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 3343 { 3344 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3345 /* idx */ , 3346 (__v16si) __A, 3347 (__v16si) __B, 3348 (__mmask16) -1); 3349 } 3350 3351 static __inline__ __m512i __DEFAULT_FN_ATTRS 3352 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 3353 __m512i __I, __m512i __B) 3354 { 3355 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3356 /* idx */ , 3357 (__v16si) __A, 3358 (__v16si) __B, 3359 (__mmask16) __U); 3360 } 3361 3362 static __inline__ __m512i __DEFAULT_FN_ATTRS 3363 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 3364 __m512i __I, __m512i __B) 3365 { 3366 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 3367 /* idx */ , 3368 (__v16si) __A, 3369 (__v16si) __B, 3370 (__mmask16) __U); 3371 } 3372 3373 static __inline __m512i __DEFAULT_FN_ATTRS 3374 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 3375 { 3376 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3377 /* idx */ , 3378 (__v8di) __A, 3379 (__v8di) __B, 3380 (__mmask8) -1); 3381 } 3382 3383 static __inline__ __m512i __DEFAULT_FN_ATTRS 3384 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 3385 __m512i __B) 3386 { 3387 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3388 /* idx */ , 3389 (__v8di) __A, 3390 (__v8di) __B, 3391 (__mmask8) __U); 3392 } 3393 3394 3395 static __inline__ __m512i __DEFAULT_FN_ATTRS 3396 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 3397 __m512i __I, __m512i __B) 3398 { 3399 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 3400 /* idx */ , 3401 (__v8di) __A, 3402 (__v8di) __B, 3403 (__mmask8) __U); 3404 } 3405 3406 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 3407 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ 3408 (__v8di)(__m512i)(A), \ 3409 ((int)(I) & 0x7) + 0, \ 3410 ((int)(I) & 0x7) + 1, \ 3411 ((int)(I) & 0x7) + 2, \ 3412 ((int)(I) & 0x7) + 3, \ 3413 ((int)(I) & 0x7) + 4, \ 3414 ((int)(I) & 0x7) + 5, \ 3415 ((int)(I) & 0x7) + 6, \ 3416 ((int)(I) & 0x7) + 7); }) 3417 3418 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ 3419 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3420 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3421 (__v8di)(__m512i)(W)); }) 3422 3423 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ 3424 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3425 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3426 (__v8di)_mm512_setzero_si512()); }) 3427 3428 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 3429 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ 3430 (__v16si)(__m512i)(A), \ 3431 ((int)(I) & 0xf) + 0, \ 3432 ((int)(I) & 0xf) + 1, \ 3433 ((int)(I) & 0xf) + 2, \ 3434 ((int)(I) & 0xf) + 3, \ 3435 ((int)(I) & 0xf) + 4, \ 3436 ((int)(I) & 0xf) + 5, \ 3437 ((int)(I) & 0xf) + 6, \ 3438 ((int)(I) & 0xf) + 7, \ 3439 ((int)(I) & 0xf) + 8, \ 3440 ((int)(I) & 0xf) + 9, \ 3441 ((int)(I) & 0xf) + 10, \ 3442 ((int)(I) & 0xf) + 11, \ 3443 ((int)(I) & 0xf) + 12, \ 3444 ((int)(I) & 0xf) + 13, \ 3445 ((int)(I) & 0xf) + 14, \ 3446 ((int)(I) & 0xf) + 15); }) 3447 3448 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ 3449 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3450 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3451 (__v16si)(__m512i)(W)); }) 3452 3453 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ 3454 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3455 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3456 (__v16si)_mm512_setzero_si512()); }) 3457 /* Vector Extract */ 3458 3459 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 3460 (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 3461 (__v8df)_mm512_undefined_pd(), \ 3462 ((I) & 1) ? 4 : 0, \ 3463 ((I) & 1) ? 5 : 1, \ 3464 ((I) & 1) ? 6 : 2, \ 3465 ((I) & 1) ? 7 : 3); }) 3466 3467 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ 3468 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3469 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3470 (__v4df)(W)); }) 3471 3472 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ 3473 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3474 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3475 (__v4df)_mm256_setzero_pd()); }) 3476 3477 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 3478 (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ 3479 (__v16sf)_mm512_undefined_ps(), \ 3480 0 + ((I) & 0x3) * 4, \ 3481 1 + ((I) & 0x3) * 4, \ 3482 2 + ((I) & 0x3) * 4, \ 3483 3 + ((I) & 0x3) * 4); }) 3484 3485 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ 3486 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3487 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3488 (__v4sf)(W)); }) 3489 3490 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ 3491 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3492 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3493 (__v4sf)_mm_setzero_ps()); }) 3494 3495 /* Vector Blend */ 3496 3497 static __inline __m512d __DEFAULT_FN_ATTRS 3498 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 3499 { 3500 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 3501 (__v8df) __W, 3502 (__v8df) __A); 3503 } 3504 3505 static __inline __m512 __DEFAULT_FN_ATTRS 3506 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 3507 { 3508 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 3509 (__v16sf) __W, 3510 (__v16sf) __A); 3511 } 3512 3513 static __inline __m512i __DEFAULT_FN_ATTRS 3514 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 3515 { 3516 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 3517 (__v8di) __W, 3518 (__v8di) __A); 3519 } 3520 3521 static __inline __m512i __DEFAULT_FN_ATTRS 3522 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 3523 { 3524 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 3525 (__v16si) __W, 3526 (__v16si) __A); 3527 } 3528 3529 /* Compare */ 3530 3531 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 3532 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3533 (__v16sf)(__m512)(B), (int)(P), \ 3534 (__mmask16)-1, (int)(R)); }) 3535 3536 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 3537 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3538 (__v16sf)(__m512)(B), (int)(P), \ 3539 (__mmask16)(U), (int)(R)); }) 3540 3541 #define _mm512_cmp_ps_mask(A, B, P) \ 3542 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3543 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3544 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3545 3546 #define _mm512_cmpeq_ps_mask(A, B) \ 3547 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3548 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3549 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3550 3551 #define _mm512_cmplt_ps_mask(A, B) \ 3552 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3553 #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3554 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3555 3556 #define _mm512_cmple_ps_mask(A, B) \ 3557 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3558 #define _mm512_mask_cmple_ps_mask(k, A, B) \ 3559 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3560 3561 #define _mm512_cmpunord_ps_mask(A, B) \ 3562 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3563 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3564 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3565 3566 #define _mm512_cmpneq_ps_mask(A, B) \ 3567 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3568 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3569 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3570 3571 #define _mm512_cmpnlt_ps_mask(A, B) \ 3572 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3573 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3574 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3575 3576 #define _mm512_cmpnle_ps_mask(A, B) \ 3577 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3578 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3579 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3580 3581 #define _mm512_cmpord_ps_mask(A, B) \ 3582 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3583 #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3584 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3585 3586 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 3587 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3588 (__v8df)(__m512d)(B), (int)(P), \ 3589 (__mmask8)-1, (int)(R)); }) 3590 3591 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 3592 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3593 (__v8df)(__m512d)(B), (int)(P), \ 3594 (__mmask8)(U), (int)(R)); }) 3595 3596 #define _mm512_cmp_pd_mask(A, B, P) \ 3597 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3598 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3599 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3600 3601 #define _mm512_cmpeq_pd_mask(A, B) \ 3602 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3603 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3604 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3605 3606 #define _mm512_cmplt_pd_mask(A, B) \ 3607 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3608 #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3609 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3610 3611 #define _mm512_cmple_pd_mask(A, B) \ 3612 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3613 #define _mm512_mask_cmple_pd_mask(k, A, B) \ 3614 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3615 3616 #define _mm512_cmpunord_pd_mask(A, B) \ 3617 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3618 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3619 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3620 3621 #define _mm512_cmpneq_pd_mask(A, B) \ 3622 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3623 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3624 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3625 3626 #define _mm512_cmpnlt_pd_mask(A, B) \ 3627 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3628 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3629 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3630 3631 #define _mm512_cmpnle_pd_mask(A, B) \ 3632 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3633 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3634 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3635 3636 #define _mm512_cmpord_pd_mask(A, B) \ 3637 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3638 #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3639 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3640 3641 /* Conversion */ 3642 3643 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ 3644 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3645 (__v16si)_mm512_undefined_epi32(), \ 3646 (__mmask16)-1, (int)(R)); }) 3647 3648 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ 3649 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3650 (__v16si)(__m512i)(W), \ 3651 (__mmask16)(U), (int)(R)); }) 3652 3653 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ 3654 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3655 (__v16si)_mm512_setzero_si512(), \ 3656 (__mmask16)(U), (int)(R)); }) 3657 3658 3659 static __inline __m512i __DEFAULT_FN_ATTRS 3660 _mm512_cvttps_epu32(__m512 __A) 3661 { 3662 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3663 (__v16si) 3664 _mm512_setzero_si512 (), 3665 (__mmask16) -1, 3666 _MM_FROUND_CUR_DIRECTION); 3667 } 3668 3669 static __inline__ __m512i __DEFAULT_FN_ATTRS 3670 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 3671 { 3672 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3673 (__v16si) __W, 3674 (__mmask16) __U, 3675 _MM_FROUND_CUR_DIRECTION); 3676 } 3677 3678 static __inline__ __m512i __DEFAULT_FN_ATTRS 3679 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 3680 { 3681 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3682 (__v16si) _mm512_setzero_si512 (), 3683 (__mmask16) __U, 3684 _MM_FROUND_CUR_DIRECTION); 3685 } 3686 3687 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 3688 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3689 (__v16sf)_mm512_setzero_ps(), \ 3690 (__mmask16)-1, (int)(R)); }) 3691 3692 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ 3693 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3694 (__v16sf)(__m512)(W), \ 3695 (__mmask16)(U), (int)(R)); }) 3696 3697 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ 3698 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3699 (__v16sf)_mm512_setzero_ps(), \ 3700 (__mmask16)(U), (int)(R)); }) 3701 3702 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 3703 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3704 (__v16sf)_mm512_setzero_ps(), \ 3705 (__mmask16)-1, (int)(R)); }) 3706 3707 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ 3708 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3709 (__v16sf)(__m512)(W), \ 3710 (__mmask16)(U), (int)(R)); }) 3711 3712 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ 3713 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3714 (__v16sf)_mm512_setzero_ps(), \ 3715 (__mmask16)(U), (int)(R)); }) 3716 3717 static __inline__ __m512 __DEFAULT_FN_ATTRS 3718 _mm512_cvtepu32_ps (__m512i __A) 3719 { 3720 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3721 (__v16sf) _mm512_undefined_ps (), 3722 (__mmask16) -1, 3723 _MM_FROUND_CUR_DIRECTION); 3724 } 3725 3726 static __inline__ __m512 __DEFAULT_FN_ATTRS 3727 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3728 { 3729 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3730 (__v16sf) __W, 3731 (__mmask16) __U, 3732 _MM_FROUND_CUR_DIRECTION); 3733 } 3734 3735 static __inline__ __m512 __DEFAULT_FN_ATTRS 3736 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 3737 { 3738 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3739 (__v16sf) _mm512_setzero_ps (), 3740 (__mmask16) __U, 3741 _MM_FROUND_CUR_DIRECTION); 3742 } 3743 3744 static __inline __m512d __DEFAULT_FN_ATTRS 3745 _mm512_cvtepi32_pd(__m256i __A) 3746 { 3747 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); 3748 } 3749 3750 static __inline__ __m512d __DEFAULT_FN_ATTRS 3751 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3752 { 3753 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3754 (__v8df)_mm512_cvtepi32_pd(__A), 3755 (__v8df)__W); 3756 } 3757 3758 static __inline__ __m512d __DEFAULT_FN_ATTRS 3759 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 3760 { 3761 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3762 (__v8df)_mm512_cvtepi32_pd(__A), 3763 (__v8df)_mm512_setzero_pd()); 3764 } 3765 3766 static __inline__ __m512d __DEFAULT_FN_ATTRS 3767 _mm512_cvtepi32lo_pd(__m512i __A) 3768 { 3769 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); 3770 } 3771 3772 static __inline__ __m512d __DEFAULT_FN_ATTRS 3773 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3774 { 3775 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3776 } 3777 3778 static __inline__ __m512 __DEFAULT_FN_ATTRS 3779 _mm512_cvtepi32_ps (__m512i __A) 3780 { 3781 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3782 (__v16sf) _mm512_undefined_ps (), 3783 (__mmask16) -1, 3784 _MM_FROUND_CUR_DIRECTION); 3785 } 3786 3787 static __inline__ __m512 __DEFAULT_FN_ATTRS 3788 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3789 { 3790 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3791 (__v16sf) __W, 3792 (__mmask16) __U, 3793 _MM_FROUND_CUR_DIRECTION); 3794 } 3795 3796 static __inline__ __m512 __DEFAULT_FN_ATTRS 3797 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 3798 { 3799 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3800 (__v16sf) _mm512_setzero_ps (), 3801 (__mmask16) __U, 3802 _MM_FROUND_CUR_DIRECTION); 3803 } 3804 3805 static __inline __m512d __DEFAULT_FN_ATTRS 3806 _mm512_cvtepu32_pd(__m256i __A) 3807 { 3808 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); 3809 } 3810 3811 static __inline__ __m512d __DEFAULT_FN_ATTRS 3812 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3813 { 3814 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3815 (__v8df)_mm512_cvtepu32_pd(__A), 3816 (__v8df)__W); 3817 } 3818 3819 static __inline__ __m512d __DEFAULT_FN_ATTRS 3820 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 3821 { 3822 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 3823 (__v8df)_mm512_cvtepu32_pd(__A), 3824 (__v8df)_mm512_setzero_pd()); 3825 } 3826 3827 static __inline__ __m512d __DEFAULT_FN_ATTRS 3828 _mm512_cvtepu32lo_pd(__m512i __A) 3829 { 3830 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); 3831 } 3832 3833 static __inline__ __m512d __DEFAULT_FN_ATTRS 3834 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 3835 { 3836 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); 3837 } 3838 3839 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 3840 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3841 (__v8sf)_mm256_setzero_ps(), \ 3842 (__mmask8)-1, (int)(R)); }) 3843 3844 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ 3845 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3846 (__v8sf)(__m256)(W), (__mmask8)(U), \ 3847 (int)(R)); }) 3848 3849 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ 3850 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3851 (__v8sf)_mm256_setzero_ps(), \ 3852 (__mmask8)(U), (int)(R)); }) 3853 3854 static __inline__ __m256 __DEFAULT_FN_ATTRS 3855 _mm512_cvtpd_ps (__m512d __A) 3856 { 3857 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3858 (__v8sf) _mm256_undefined_ps (), 3859 (__mmask8) -1, 3860 _MM_FROUND_CUR_DIRECTION); 3861 } 3862 3863 static __inline__ __m256 __DEFAULT_FN_ATTRS 3864 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 3865 { 3866 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3867 (__v8sf) __W, 3868 (__mmask8) __U, 3869 _MM_FROUND_CUR_DIRECTION); 3870 } 3871 3872 static __inline__ __m256 __DEFAULT_FN_ATTRS 3873 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 3874 { 3875 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3876 (__v8sf) _mm256_setzero_ps (), 3877 (__mmask8) __U, 3878 _MM_FROUND_CUR_DIRECTION); 3879 } 3880 3881 static __inline__ __m512 __DEFAULT_FN_ATTRS 3882 _mm512_cvtpd_pslo (__m512d __A) 3883 { 3884 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), 3885 (__v8sf) _mm256_setzero_ps (), 3886 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3887 } 3888 3889 static __inline__ __m512 __DEFAULT_FN_ATTRS 3890 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 3891 { 3892 return (__m512) __builtin_shufflevector ( 3893 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), 3894 __U, __A), 3895 (__v8sf) _mm256_setzero_ps (), 3896 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 3897 } 3898 3899 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ 3900 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3901 (__v16hi)_mm256_undefined_si256(), \ 3902 (__mmask16)-1); }) 3903 3904 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ 3905 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3906 (__v16hi)(__m256i)(U), \ 3907 (__mmask16)(W)); }) 3908 3909 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ 3910 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3911 (__v16hi)_mm256_setzero_si256(), \ 3912 (__mmask16)(W)); }) 3913 3914 #define _mm512_cvtps_ph(A, I) __extension__ ({ \ 3915 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3916 (__v16hi)_mm256_setzero_si256(), \ 3917 (__mmask16)-1); }) 3918 3919 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ 3920 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3921 (__v16hi)(__m256i)(U), \ 3922 (__mmask16)(W)); }) 3923 3924 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ 3925 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3926 (__v16hi)_mm256_setzero_si256(), \ 3927 (__mmask16)(W)); }) 3928 3929 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ 3930 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3931 (__v16sf)_mm512_undefined_ps(), \ 3932 (__mmask16)-1, (int)(R)); }) 3933 3934 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ 3935 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3936 (__v16sf)(__m512)(W), \ 3937 (__mmask16)(U), (int)(R)); }) 3938 3939 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ 3940 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3941 (__v16sf)_mm512_setzero_ps(), \ 3942 (__mmask16)(U), (int)(R)); }) 3943 3944 3945 static __inline __m512 __DEFAULT_FN_ATTRS 3946 _mm512_cvtph_ps(__m256i __A) 3947 { 3948 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3949 (__v16sf) 3950 _mm512_setzero_ps (), 3951 (__mmask16) -1, 3952 _MM_FROUND_CUR_DIRECTION); 3953 } 3954 3955 static __inline__ __m512 __DEFAULT_FN_ATTRS 3956 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 3957 { 3958 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3959 (__v16sf) __W, 3960 (__mmask16) __U, 3961 _MM_FROUND_CUR_DIRECTION); 3962 } 3963 3964 static __inline__ __m512 __DEFAULT_FN_ATTRS 3965 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 3966 { 3967 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3968 (__v16sf) _mm512_setzero_ps (), 3969 (__mmask16) __U, 3970 _MM_FROUND_CUR_DIRECTION); 3971 } 3972 3973 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 3974 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3975 (__v8si)_mm256_setzero_si256(), \ 3976 (__mmask8)-1, (int)(R)); }) 3977 3978 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 3979 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3980 (__v8si)(__m256i)(W), \ 3981 (__mmask8)(U), (int)(R)); }) 3982 3983 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ 3984 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3985 (__v8si)_mm256_setzero_si256(), \ 3986 (__mmask8)(U), (int)(R)); }) 3987 3988 static __inline __m256i __DEFAULT_FN_ATTRS 3989 _mm512_cvttpd_epi32(__m512d __a) 3990 { 3991 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 3992 (__v8si)_mm256_setzero_si256(), 3993 (__mmask8) -1, 3994 _MM_FROUND_CUR_DIRECTION); 3995 } 3996 3997 static __inline__ __m256i __DEFAULT_FN_ATTRS 3998 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3999 { 4000 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4001 (__v8si) __W, 4002 (__mmask8) __U, 4003 _MM_FROUND_CUR_DIRECTION); 4004 } 4005 4006 static __inline__ __m256i __DEFAULT_FN_ATTRS 4007 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 4008 { 4009 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4010 (__v8si) _mm256_setzero_si256 (), 4011 (__mmask8) __U, 4012 _MM_FROUND_CUR_DIRECTION); 4013 } 4014 4015 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 4016 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4017 (__v16si)_mm512_setzero_si512(), \ 4018 (__mmask16)-1, (int)(R)); }) 4019 4020 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4021 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4022 (__v16si)(__m512i)(W), \ 4023 (__mmask16)(U), (int)(R)); }) 4024 4025 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ 4026 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4027 (__v16si)_mm512_setzero_si512(), \ 4028 (__mmask16)(U), (int)(R)); }) 4029 4030 static __inline __m512i __DEFAULT_FN_ATTRS 4031 _mm512_cvttps_epi32(__m512 __a) 4032 { 4033 return (__m512i) 4034 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 4035 (__v16si) _mm512_setzero_si512 (), 4036 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 4037 } 4038 4039 static __inline__ __m512i __DEFAULT_FN_ATTRS 4040 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4041 { 4042 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4043 (__v16si) __W, 4044 (__mmask16) __U, 4045 _MM_FROUND_CUR_DIRECTION); 4046 } 4047 4048 static __inline__ __m512i __DEFAULT_FN_ATTRS 4049 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 4050 { 4051 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4052 (__v16si) _mm512_setzero_si512 (), 4053 (__mmask16) __U, 4054 _MM_FROUND_CUR_DIRECTION); 4055 } 4056 4057 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 4058 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4059 (__v16si)_mm512_setzero_si512(), \ 4060 (__mmask16)-1, (int)(R)); }) 4061 4062 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4063 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4064 (__v16si)(__m512i)(W), \ 4065 (__mmask16)(U), (int)(R)); }) 4066 4067 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ 4068 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4069 (__v16si)_mm512_setzero_si512(), \ 4070 (__mmask16)(U), (int)(R)); }) 4071 4072 static __inline__ __m512i __DEFAULT_FN_ATTRS 4073 _mm512_cvtps_epi32 (__m512 __A) 4074 { 4075 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4076 (__v16si) _mm512_undefined_epi32 (), 4077 (__mmask16) -1, 4078 _MM_FROUND_CUR_DIRECTION); 4079 } 4080 4081 static __inline__ __m512i __DEFAULT_FN_ATTRS 4082 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 4083 { 4084 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4085 (__v16si) __W, 4086 (__mmask16) __U, 4087 _MM_FROUND_CUR_DIRECTION); 4088 } 4089 4090 static __inline__ __m512i __DEFAULT_FN_ATTRS 4091 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 4092 { 4093 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4094 (__v16si) 4095 _mm512_setzero_si512 (), 4096 (__mmask16) __U, 4097 _MM_FROUND_CUR_DIRECTION); 4098 } 4099 4100 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 4101 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4102 (__v8si)_mm256_setzero_si256(), \ 4103 (__mmask8)-1, (int)(R)); }) 4104 4105 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4106 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4107 (__v8si)(__m256i)(W), \ 4108 (__mmask8)(U), (int)(R)); }) 4109 4110 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ 4111 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4112 (__v8si)_mm256_setzero_si256(), \ 4113 (__mmask8)(U), (int)(R)); }) 4114 4115 static __inline__ __m256i __DEFAULT_FN_ATTRS 4116 _mm512_cvtpd_epi32 (__m512d __A) 4117 { 4118 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4119 (__v8si) 4120 _mm256_undefined_si256 (), 4121 (__mmask8) -1, 4122 _MM_FROUND_CUR_DIRECTION); 4123 } 4124 4125 static __inline__ __m256i __DEFAULT_FN_ATTRS 4126 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 4127 { 4128 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4129 (__v8si) __W, 4130 (__mmask8) __U, 4131 _MM_FROUND_CUR_DIRECTION); 4132 } 4133 4134 static __inline__ __m256i __DEFAULT_FN_ATTRS 4135 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 4136 { 4137 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4138 (__v8si) 4139 _mm256_setzero_si256 (), 4140 (__mmask8) __U, 4141 _MM_FROUND_CUR_DIRECTION); 4142 } 4143 4144 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 4145 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4146 (__v16si)_mm512_setzero_si512(), \ 4147 (__mmask16)-1, (int)(R)); }) 4148 4149 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ 4150 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4151 (__v16si)(__m512i)(W), \ 4152 (__mmask16)(U), (int)(R)); }) 4153 4154 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ 4155 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4156 (__v16si)_mm512_setzero_si512(), \ 4157 (__mmask16)(U), (int)(R)); }) 4158 4159 static __inline__ __m512i __DEFAULT_FN_ATTRS 4160 _mm512_cvtps_epu32 ( __m512 __A) 4161 { 4162 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 4163 (__v16si)\ 4164 _mm512_undefined_epi32 (),\ 4165 (__mmask16) -1,\ 4166 _MM_FROUND_CUR_DIRECTION);\ 4167 } 4168 4169 static __inline__ __m512i __DEFAULT_FN_ATTRS 4170 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 4171 { 4172 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4173 (__v16si) __W, 4174 (__mmask16) __U, 4175 _MM_FROUND_CUR_DIRECTION); 4176 } 4177 4178 static __inline__ __m512i __DEFAULT_FN_ATTRS 4179 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 4180 { 4181 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4182 (__v16si) 4183 _mm512_setzero_si512 (), 4184 (__mmask16) __U , 4185 _MM_FROUND_CUR_DIRECTION); 4186 } 4187 4188 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 4189 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4190 (__v8si)_mm256_setzero_si256(), \ 4191 (__mmask8)-1, (int)(R)); }) 4192 4193 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 4194 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4195 (__v8si)(W), \ 4196 (__mmask8)(U), (int)(R)); }) 4197 4198 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ 4199 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4200 (__v8si)_mm256_setzero_si256(), \ 4201 (__mmask8)(U), (int)(R)); }) 4202 4203 static __inline__ __m256i __DEFAULT_FN_ATTRS 4204 _mm512_cvtpd_epu32 (__m512d __A) 4205 { 4206 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4207 (__v8si) 4208 _mm256_undefined_si256 (), 4209 (__mmask8) -1, 4210 _MM_FROUND_CUR_DIRECTION); 4211 } 4212 4213 static __inline__ __m256i __DEFAULT_FN_ATTRS 4214 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 4215 { 4216 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4217 (__v8si) __W, 4218 (__mmask8) __U, 4219 _MM_FROUND_CUR_DIRECTION); 4220 } 4221 4222 static __inline__ __m256i __DEFAULT_FN_ATTRS 4223 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 4224 { 4225 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4226 (__v8si) 4227 _mm256_setzero_si256 (), 4228 (__mmask8) __U, 4229 _MM_FROUND_CUR_DIRECTION); 4230 } 4231 4232 static __inline__ double __DEFAULT_FN_ATTRS 4233 _mm512_cvtsd_f64(__m512d __a) 4234 { 4235 return __a[0]; 4236 } 4237 4238 static __inline__ float __DEFAULT_FN_ATTRS 4239 _mm512_cvtss_f32(__m512 __a) 4240 { 4241 return __a[0]; 4242 } 4243 4244 /* Unpack and Interleave */ 4245 4246 static __inline __m512d __DEFAULT_FN_ATTRS 4247 _mm512_unpackhi_pd(__m512d __a, __m512d __b) 4248 { 4249 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4250 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4251 } 4252 4253 static __inline__ __m512d __DEFAULT_FN_ATTRS 4254 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4255 { 4256 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4257 (__v8df)_mm512_unpackhi_pd(__A, __B), 4258 (__v8df)__W); 4259 } 4260 4261 static __inline__ __m512d __DEFAULT_FN_ATTRS 4262 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) 4263 { 4264 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4265 (__v8df)_mm512_unpackhi_pd(__A, __B), 4266 (__v8df)_mm512_setzero_pd()); 4267 } 4268 4269 static __inline __m512d __DEFAULT_FN_ATTRS 4270 _mm512_unpacklo_pd(__m512d __a, __m512d __b) 4271 { 4272 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 4273 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4274 } 4275 4276 static __inline__ __m512d __DEFAULT_FN_ATTRS 4277 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 4278 { 4279 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4280 (__v8df)_mm512_unpacklo_pd(__A, __B), 4281 (__v8df)__W); 4282 } 4283 4284 static __inline__ __m512d __DEFAULT_FN_ATTRS 4285 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 4286 { 4287 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 4288 (__v8df)_mm512_unpacklo_pd(__A, __B), 4289 (__v8df)_mm512_setzero_pd()); 4290 } 4291 4292 static __inline __m512 __DEFAULT_FN_ATTRS 4293 _mm512_unpackhi_ps(__m512 __a, __m512 __b) 4294 { 4295 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4296 2, 18, 3, 19, 4297 2+4, 18+4, 3+4, 19+4, 4298 2+8, 18+8, 3+8, 19+8, 4299 2+12, 18+12, 3+12, 19+12); 4300 } 4301 4302 static __inline__ __m512 __DEFAULT_FN_ATTRS 4303 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4304 { 4305 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4306 (__v16sf)_mm512_unpackhi_ps(__A, __B), 4307 (__v16sf)__W); 4308 } 4309 4310 static __inline__ __m512 __DEFAULT_FN_ATTRS 4311 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 4312 { 4313 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4314 (__v16sf)_mm512_unpackhi_ps(__A, __B), 4315 (__v16sf)_mm512_setzero_ps()); 4316 } 4317 4318 static __inline __m512 __DEFAULT_FN_ATTRS 4319 _mm512_unpacklo_ps(__m512 __a, __m512 __b) 4320 { 4321 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 4322 0, 16, 1, 17, 4323 0+4, 16+4, 1+4, 17+4, 4324 0+8, 16+8, 1+8, 17+8, 4325 0+12, 16+12, 1+12, 17+12); 4326 } 4327 4328 static __inline__ __m512 __DEFAULT_FN_ATTRS 4329 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 4330 { 4331 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4332 (__v16sf)_mm512_unpacklo_ps(__A, __B), 4333 (__v16sf)__W); 4334 } 4335 4336 static __inline__ __m512 __DEFAULT_FN_ATTRS 4337 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 4338 { 4339 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 4340 (__v16sf)_mm512_unpacklo_ps(__A, __B), 4341 (__v16sf)_mm512_setzero_ps()); 4342 } 4343 4344 static __inline__ __m512i __DEFAULT_FN_ATTRS 4345 _mm512_unpackhi_epi32(__m512i __A, __m512i __B) 4346 { 4347 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4348 2, 18, 3, 19, 4349 2+4, 18+4, 3+4, 19+4, 4350 2+8, 18+8, 3+8, 19+8, 4351 2+12, 18+12, 3+12, 19+12); 4352 } 4353 4354 static __inline__ __m512i __DEFAULT_FN_ATTRS 4355 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4356 { 4357 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4358 (__v16si)_mm512_unpackhi_epi32(__A, __B), 4359 (__v16si)__W); 4360 } 4361 4362 static __inline__ __m512i __DEFAULT_FN_ATTRS 4363 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4364 { 4365 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4366 (__v16si)_mm512_unpackhi_epi32(__A, __B), 4367 (__v16si)_mm512_setzero_si512()); 4368 } 4369 4370 static __inline__ __m512i __DEFAULT_FN_ATTRS 4371 _mm512_unpacklo_epi32(__m512i __A, __m512i __B) 4372 { 4373 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 4374 0, 16, 1, 17, 4375 0+4, 16+4, 1+4, 17+4, 4376 0+8, 16+8, 1+8, 17+8, 4377 0+12, 16+12, 1+12, 17+12); 4378 } 4379 4380 static __inline__ __m512i __DEFAULT_FN_ATTRS 4381 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4382 { 4383 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4384 (__v16si)_mm512_unpacklo_epi32(__A, __B), 4385 (__v16si)__W); 4386 } 4387 4388 static __inline__ __m512i __DEFAULT_FN_ATTRS 4389 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) 4390 { 4391 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 4392 (__v16si)_mm512_unpacklo_epi32(__A, __B), 4393 (__v16si)_mm512_setzero_si512()); 4394 } 4395 4396 static __inline__ __m512i __DEFAULT_FN_ATTRS 4397 _mm512_unpackhi_epi64(__m512i __A, __m512i __B) 4398 { 4399 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4400 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4401 } 4402 4403 static __inline__ __m512i __DEFAULT_FN_ATTRS 4404 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4405 { 4406 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4407 (__v8di)_mm512_unpackhi_epi64(__A, __B), 4408 (__v8di)__W); 4409 } 4410 4411 static __inline__ __m512i __DEFAULT_FN_ATTRS 4412 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) 4413 { 4414 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4415 (__v8di)_mm512_unpackhi_epi64(__A, __B), 4416 (__v8di)_mm512_setzero_si512()); 4417 } 4418 4419 static __inline__ __m512i __DEFAULT_FN_ATTRS 4420 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 4421 { 4422 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 4423 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4424 } 4425 4426 static __inline__ __m512i __DEFAULT_FN_ATTRS 4427 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4428 { 4429 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4430 (__v8di)_mm512_unpacklo_epi64(__A, __B), 4431 (__v8di)__W); 4432 } 4433 4434 static __inline__ __m512i __DEFAULT_FN_ATTRS 4435 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4436 { 4437 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 4438 (__v8di)_mm512_unpacklo_epi64(__A, __B), 4439 (__v8di)_mm512_setzero_si512()); 4440 } 4441 4442 /* Bit Test */ 4443 4444 static __inline __mmask16 __DEFAULT_FN_ATTRS 4445 _mm512_test_epi32_mask(__m512i __A, __m512i __B) 4446 { 4447 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4448 (__v16si) __B, 4449 (__mmask16) -1); 4450 } 4451 4452 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4453 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 4454 { 4455 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4456 (__v16si) __B, __U); 4457 } 4458 4459 static __inline __mmask8 __DEFAULT_FN_ATTRS 4460 _mm512_test_epi64_mask(__m512i __A, __m512i __B) 4461 { 4462 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 4463 (__v8di) __B, 4464 (__mmask8) -1); 4465 } 4466 4467 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4468 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 4469 { 4470 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 4471 } 4472 4473 4474 /* SIMD load ops */ 4475 4476 static __inline __m512i __DEFAULT_FN_ATTRS 4477 _mm512_loadu_si512 (void const *__P) 4478 { 4479 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4480 (__v16si) 4481 _mm512_setzero_si512 (), 4482 (__mmask16) -1); 4483 } 4484 4485 static __inline __m512i __DEFAULT_FN_ATTRS 4486 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 4487 { 4488 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4489 (__v16si) __W, 4490 (__mmask16) __U); 4491 } 4492 4493 4494 static __inline __m512i __DEFAULT_FN_ATTRS 4495 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 4496 { 4497 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 4498 (__v16si) 4499 _mm512_setzero_si512 (), 4500 (__mmask16) __U); 4501 } 4502 4503 static __inline __m512i __DEFAULT_FN_ATTRS 4504 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 4505 { 4506 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 4507 (__v8di) __W, 4508 (__mmask8) __U); 4509 } 4510 4511 static __inline __m512i __DEFAULT_FN_ATTRS 4512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 4513 { 4514 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 4515 (__v8di) 4516 _mm512_setzero_si512 (), 4517 (__mmask8) __U); 4518 } 4519 4520 static __inline __m512 __DEFAULT_FN_ATTRS 4521 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 4522 { 4523 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 4524 (__v16sf) __W, 4525 (__mmask16) __U); 4526 } 4527 4528 static __inline __m512 __DEFAULT_FN_ATTRS 4529 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 4530 { 4531 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 4532 (__v16sf) 4533 _mm512_setzero_ps (), 4534 (__mmask16) __U); 4535 } 4536 4537 static __inline __m512d __DEFAULT_FN_ATTRS 4538 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 4539 { 4540 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 4541 (__v8df) __W, 4542 (__mmask8) __U); 4543 } 4544 4545 static __inline __m512d __DEFAULT_FN_ATTRS 4546 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 4547 { 4548 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 4549 (__v8df) 4550 _mm512_setzero_pd (), 4551 (__mmask8) __U); 4552 } 4553 4554 static __inline __m512d __DEFAULT_FN_ATTRS 4555 _mm512_loadu_pd(void const *__p) 4556 { 4557 struct __loadu_pd { 4558 __m512d __v; 4559 } __attribute__((__packed__, __may_alias__)); 4560 return ((struct __loadu_pd*)__p)->__v; 4561 } 4562 4563 static __inline __m512 __DEFAULT_FN_ATTRS 4564 _mm512_loadu_ps(void const *__p) 4565 { 4566 struct __loadu_ps { 4567 __m512 __v; 4568 } __attribute__((__packed__, __may_alias__)); 4569 return ((struct __loadu_ps*)__p)->__v; 4570 } 4571 4572 static __inline __m512 __DEFAULT_FN_ATTRS 4573 _mm512_load_ps(void const *__p) 4574 { 4575 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 4576 (__v16sf) 4577 _mm512_setzero_ps (), 4578 (__mmask16) -1); 4579 } 4580 4581 static __inline __m512 __DEFAULT_FN_ATTRS 4582 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 4583 { 4584 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 4585 (__v16sf) __W, 4586 (__mmask16) __U); 4587 } 4588 4589 static __inline __m512 __DEFAULT_FN_ATTRS 4590 _mm512_maskz_load_ps(__mmask16 __U, void const *__P) 4591 { 4592 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 4593 (__v16sf) 4594 _mm512_setzero_ps (), 4595 (__mmask16) __U); 4596 } 4597 4598 static __inline __m512d __DEFAULT_FN_ATTRS 4599 _mm512_load_pd(void const *__p) 4600 { 4601 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 4602 (__v8df) 4603 _mm512_setzero_pd (), 4604 (__mmask8) -1); 4605 } 4606 4607 static __inline __m512d __DEFAULT_FN_ATTRS 4608 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 4609 { 4610 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 4611 (__v8df) __W, 4612 (__mmask8) __U); 4613 } 4614 4615 static __inline __m512d __DEFAULT_FN_ATTRS 4616 _mm512_maskz_load_pd(__mmask8 __U, void const *__P) 4617 { 4618 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 4619 (__v8df) 4620 _mm512_setzero_pd (), 4621 (__mmask8) __U); 4622 } 4623 4624 static __inline __m512i __DEFAULT_FN_ATTRS 4625 _mm512_load_si512 (void const *__P) 4626 { 4627 return *(__m512i *) __P; 4628 } 4629 4630 static __inline __m512i __DEFAULT_FN_ATTRS 4631 _mm512_load_epi32 (void const *__P) 4632 { 4633 return *(__m512i *) __P; 4634 } 4635 4636 static __inline __m512i __DEFAULT_FN_ATTRS 4637 _mm512_load_epi64 (void const *__P) 4638 { 4639 return *(__m512i *) __P; 4640 } 4641 4642 /* SIMD store ops */ 4643 4644 static __inline void __DEFAULT_FN_ATTRS 4645 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 4646 { 4647 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 4648 (__mmask8) __U); 4649 } 4650 4651 static __inline void __DEFAULT_FN_ATTRS 4652 _mm512_storeu_si512 (void *__P, __m512i __A) 4653 { 4654 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 4655 (__mmask16) -1); 4656 } 4657 4658 static __inline void __DEFAULT_FN_ATTRS 4659 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 4660 { 4661 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 4662 (__mmask16) __U); 4663 } 4664 4665 static __inline void __DEFAULT_FN_ATTRS 4666 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 4667 { 4668 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 4669 } 4670 4671 static __inline void __DEFAULT_FN_ATTRS 4672 _mm512_storeu_pd(void *__P, __m512d __A) 4673 { 4674 __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); 4675 } 4676 4677 static __inline void __DEFAULT_FN_ATTRS 4678 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 4679 { 4680 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 4681 (__mmask16) __U); 4682 } 4683 4684 static __inline void __DEFAULT_FN_ATTRS 4685 _mm512_storeu_ps(void *__P, __m512 __A) 4686 { 4687 __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); 4688 } 4689 4690 static __inline void __DEFAULT_FN_ATTRS 4691 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 4692 { 4693 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 4694 } 4695 4696 static __inline void __DEFAULT_FN_ATTRS 4697 _mm512_store_pd(void *__P, __m512d __A) 4698 { 4699 *(__m512d*)__P = __A; 4700 } 4701 4702 static __inline void __DEFAULT_FN_ATTRS 4703 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 4704 { 4705 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 4706 (__mmask16) __U); 4707 } 4708 4709 static __inline void __DEFAULT_FN_ATTRS 4710 _mm512_store_ps(void *__P, __m512 __A) 4711 { 4712 *(__m512*)__P = __A; 4713 } 4714 4715 static __inline void __DEFAULT_FN_ATTRS 4716 _mm512_store_si512 (void *__P, __m512i __A) 4717 { 4718 *(__m512i *) __P = __A; 4719 } 4720 4721 static __inline void __DEFAULT_FN_ATTRS 4722 _mm512_store_epi32 (void *__P, __m512i __A) 4723 { 4724 *(__m512i *) __P = __A; 4725 } 4726 4727 static __inline void __DEFAULT_FN_ATTRS 4728 _mm512_store_epi64 (void *__P, __m512i __A) 4729 { 4730 *(__m512i *) __P = __A; 4731 } 4732 4733 /* Mask ops */ 4734 4735 static __inline __mmask16 __DEFAULT_FN_ATTRS 4736 _mm512_knot(__mmask16 __M) 4737 { 4738 return __builtin_ia32_knothi(__M); 4739 } 4740 4741 /* Integer compare */ 4742 4743 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4744 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 4745 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4746 (__mmask16)-1); 4747 } 4748 4749 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4750 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4751 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4752 __u); 4753 } 4754 4755 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4756 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 4757 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4758 (__mmask16)-1); 4759 } 4760 4761 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4762 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4763 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4764 __u); 4765 } 4766 4767 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4768 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4769 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4770 __u); 4771 } 4772 4773 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4774 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 4775 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4776 (__mmask8)-1); 4777 } 4778 4779 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4780 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 4781 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4782 (__mmask8)-1); 4783 } 4784 4785 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4786 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4787 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4788 __u); 4789 } 4790 4791 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4792 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 4793 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4794 (__mmask16)-1); 4795 } 4796 4797 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4798 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4799 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4800 __u); 4801 } 4802 4803 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4804 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 4805 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4806 (__mmask16)-1); 4807 } 4808 4809 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4810 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4811 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4812 __u); 4813 } 4814 4815 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4816 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 4817 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4818 (__mmask8)-1); 4819 } 4820 4821 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4822 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4823 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4824 __u); 4825 } 4826 4827 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4828 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 4829 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4830 (__mmask8)-1); 4831 } 4832 4833 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4834 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4835 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4836 __u); 4837 } 4838 4839 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4840 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 4841 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4842 (__mmask16)-1); 4843 } 4844 4845 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4846 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4847 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4848 __u); 4849 } 4850 4851 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4852 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 4853 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4854 (__mmask16)-1); 4855 } 4856 4857 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4858 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4859 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4860 __u); 4861 } 4862 4863 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4864 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4865 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4866 __u); 4867 } 4868 4869 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4870 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 4871 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4872 (__mmask8)-1); 4873 } 4874 4875 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4876 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 4877 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4878 (__mmask8)-1); 4879 } 4880 4881 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4882 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4883 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4884 __u); 4885 } 4886 4887 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4888 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 4889 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4890 (__mmask16)-1); 4891 } 4892 4893 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4894 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4895 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4896 __u); 4897 } 4898 4899 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4900 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 4901 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4902 (__mmask16)-1); 4903 } 4904 4905 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4906 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4907 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4908 __u); 4909 } 4910 4911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4912 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 4913 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4914 (__mmask8)-1); 4915 } 4916 4917 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4918 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4919 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4920 __u); 4921 } 4922 4923 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4924 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 4925 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4926 (__mmask8)-1); 4927 } 4928 4929 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4930 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4931 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4932 __u); 4933 } 4934 4935 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4936 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 4937 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4938 (__mmask16)-1); 4939 } 4940 4941 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4942 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4943 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4944 __u); 4945 } 4946 4947 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4948 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 4949 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4950 (__mmask16)-1); 4951 } 4952 4953 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4954 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4955 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4956 __u); 4957 } 4958 4959 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4960 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 4961 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4962 (__mmask8)-1); 4963 } 4964 4965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4966 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4967 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4968 __u); 4969 } 4970 4971 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4972 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 4973 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4974 (__mmask8)-1); 4975 } 4976 4977 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4978 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4979 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4980 __u); 4981 } 4982 4983 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4984 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 4985 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4986 (__mmask16)-1); 4987 } 4988 4989 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4990 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4991 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4992 __u); 4993 } 4994 4995 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4996 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 4997 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4998 (__mmask16)-1); 4999 } 5000 5001 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 5002 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 5003 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 5004 __u); 5005 } 5006 5007 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 5008 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 5009 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5010 (__mmask8)-1); 5011 } 5012 5013 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 5014 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5015 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5016 __u); 5017 } 5018 5019 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 5020 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 5021 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5022 (__mmask8)-1); 5023 } 5024 5025 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 5026 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 5027 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 5028 __u); 5029 } 5030 5031 static __inline__ __m512i __DEFAULT_FN_ATTRS 5032 _mm512_cvtepi8_epi32(__m128i __A) 5033 { 5034 /* This function always performs a signed extension, but __v16qi is a char 5035 which may be signed or unsigned, so use __v16qs. */ 5036 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); 5037 } 5038 5039 static __inline__ __m512i __DEFAULT_FN_ATTRS 5040 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5041 { 5042 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5043 (__v16si)_mm512_cvtepi8_epi32(__A), 5044 (__v16si)__W); 5045 } 5046 5047 static __inline__ __m512i __DEFAULT_FN_ATTRS 5048 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) 5049 { 5050 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5051 (__v16si)_mm512_cvtepi8_epi32(__A), 5052 (__v16si)_mm512_setzero_si512()); 5053 } 5054 5055 static __inline__ __m512i __DEFAULT_FN_ATTRS 5056 _mm512_cvtepi8_epi64(__m128i __A) 5057 { 5058 /* This function always performs a signed extension, but __v16qi is a char 5059 which may be signed or unsigned, so use __v16qs. */ 5060 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5061 } 5062 5063 static __inline__ __m512i __DEFAULT_FN_ATTRS 5064 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5065 { 5066 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5067 (__v8di)_mm512_cvtepi8_epi64(__A), 5068 (__v8di)__W); 5069 } 5070 5071 static __inline__ __m512i __DEFAULT_FN_ATTRS 5072 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 5073 { 5074 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5075 (__v8di)_mm512_cvtepi8_epi64(__A), 5076 (__v8di)_mm512_setzero_si512 ()); 5077 } 5078 5079 static __inline__ __m512i __DEFAULT_FN_ATTRS 5080 _mm512_cvtepi32_epi64(__m256i __X) 5081 { 5082 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); 5083 } 5084 5085 static __inline__ __m512i __DEFAULT_FN_ATTRS 5086 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5087 { 5088 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5089 (__v8di)_mm512_cvtepi32_epi64(__X), 5090 (__v8di)__W); 5091 } 5092 5093 static __inline__ __m512i __DEFAULT_FN_ATTRS 5094 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) 5095 { 5096 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5097 (__v8di)_mm512_cvtepi32_epi64(__X), 5098 (__v8di)_mm512_setzero_si512()); 5099 } 5100 5101 static __inline__ __m512i __DEFAULT_FN_ATTRS 5102 _mm512_cvtepi16_epi32(__m256i __A) 5103 { 5104 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); 5105 } 5106 5107 static __inline__ __m512i __DEFAULT_FN_ATTRS 5108 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5109 { 5110 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5111 (__v16si)_mm512_cvtepi16_epi32(__A), 5112 (__v16si)__W); 5113 } 5114 5115 static __inline__ __m512i __DEFAULT_FN_ATTRS 5116 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) 5117 { 5118 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5119 (__v16si)_mm512_cvtepi16_epi32(__A), 5120 (__v16si)_mm512_setzero_si512 ()); 5121 } 5122 5123 static __inline__ __m512i __DEFAULT_FN_ATTRS 5124 _mm512_cvtepi16_epi64(__m128i __A) 5125 { 5126 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); 5127 } 5128 5129 static __inline__ __m512i __DEFAULT_FN_ATTRS 5130 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5131 { 5132 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5133 (__v8di)_mm512_cvtepi16_epi64(__A), 5134 (__v8di)__W); 5135 } 5136 5137 static __inline__ __m512i __DEFAULT_FN_ATTRS 5138 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 5139 { 5140 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5141 (__v8di)_mm512_cvtepi16_epi64(__A), 5142 (__v8di)_mm512_setzero_si512()); 5143 } 5144 5145 static __inline__ __m512i __DEFAULT_FN_ATTRS 5146 _mm512_cvtepu8_epi32(__m128i __A) 5147 { 5148 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); 5149 } 5150 5151 static __inline__ __m512i __DEFAULT_FN_ATTRS 5152 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 5153 { 5154 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5155 (__v16si)_mm512_cvtepu8_epi32(__A), 5156 (__v16si)__W); 5157 } 5158 5159 static __inline__ __m512i __DEFAULT_FN_ATTRS 5160 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) 5161 { 5162 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5163 (__v16si)_mm512_cvtepu8_epi32(__A), 5164 (__v16si)_mm512_setzero_si512()); 5165 } 5166 5167 static __inline__ __m512i __DEFAULT_FN_ATTRS 5168 _mm512_cvtepu8_epi64(__m128i __A) 5169 { 5170 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 5171 } 5172 5173 static __inline__ __m512i __DEFAULT_FN_ATTRS 5174 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5175 { 5176 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5177 (__v8di)_mm512_cvtepu8_epi64(__A), 5178 (__v8di)__W); 5179 } 5180 5181 static __inline__ __m512i __DEFAULT_FN_ATTRS 5182 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 5183 { 5184 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5185 (__v8di)_mm512_cvtepu8_epi64(__A), 5186 (__v8di)_mm512_setzero_si512()); 5187 } 5188 5189 static __inline__ __m512i __DEFAULT_FN_ATTRS 5190 _mm512_cvtepu32_epi64(__m256i __X) 5191 { 5192 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); 5193 } 5194 5195 static __inline__ __m512i __DEFAULT_FN_ATTRS 5196 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 5197 { 5198 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5199 (__v8di)_mm512_cvtepu32_epi64(__X), 5200 (__v8di)__W); 5201 } 5202 5203 static __inline__ __m512i __DEFAULT_FN_ATTRS 5204 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) 5205 { 5206 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5207 (__v8di)_mm512_cvtepu32_epi64(__X), 5208 (__v8di)_mm512_setzero_si512()); 5209 } 5210 5211 static __inline__ __m512i __DEFAULT_FN_ATTRS 5212 _mm512_cvtepu16_epi32(__m256i __A) 5213 { 5214 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); 5215 } 5216 5217 static __inline__ __m512i __DEFAULT_FN_ATTRS 5218 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 5219 { 5220 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5221 (__v16si)_mm512_cvtepu16_epi32(__A), 5222 (__v16si)__W); 5223 } 5224 5225 static __inline__ __m512i __DEFAULT_FN_ATTRS 5226 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) 5227 { 5228 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5229 (__v16si)_mm512_cvtepu16_epi32(__A), 5230 (__v16si)_mm512_setzero_si512()); 5231 } 5232 5233 static __inline__ __m512i __DEFAULT_FN_ATTRS 5234 _mm512_cvtepu16_epi64(__m128i __A) 5235 { 5236 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); 5237 } 5238 5239 static __inline__ __m512i __DEFAULT_FN_ATTRS 5240 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 5241 { 5242 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5243 (__v8di)_mm512_cvtepu16_epi64(__A), 5244 (__v8di)__W); 5245 } 5246 5247 static __inline__ __m512i __DEFAULT_FN_ATTRS 5248 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 5249 { 5250 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5251 (__v8di)_mm512_cvtepu16_epi64(__A), 5252 (__v8di)_mm512_setzero_si512()); 5253 } 5254 5255 static __inline__ __m512i __DEFAULT_FN_ATTRS 5256 _mm512_rorv_epi32 (__m512i __A, __m512i __B) 5257 { 5258 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5259 (__v16si) __B, 5260 (__v16si) 5261 _mm512_setzero_si512 (), 5262 (__mmask16) -1); 5263 } 5264 5265 static __inline__ __m512i __DEFAULT_FN_ATTRS 5266 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5267 { 5268 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5269 (__v16si) __B, 5270 (__v16si) __W, 5271 (__mmask16) __U); 5272 } 5273 5274 static __inline__ __m512i __DEFAULT_FN_ATTRS 5275 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5276 { 5277 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5278 (__v16si) __B, 5279 (__v16si) 5280 _mm512_setzero_si512 (), 5281 (__mmask16) __U); 5282 } 5283 5284 static __inline__ __m512i __DEFAULT_FN_ATTRS 5285 _mm512_rorv_epi64 (__m512i __A, __m512i __B) 5286 { 5287 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5288 (__v8di) __B, 5289 (__v8di) 5290 _mm512_setzero_si512 (), 5291 (__mmask8) -1); 5292 } 5293 5294 static __inline__ __m512i __DEFAULT_FN_ATTRS 5295 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5296 { 5297 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5298 (__v8di) __B, 5299 (__v8di) __W, 5300 (__mmask8) __U); 5301 } 5302 5303 static __inline__ __m512i __DEFAULT_FN_ATTRS 5304 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5305 { 5306 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5307 (__v8di) __B, 5308 (__v8di) 5309 _mm512_setzero_si512 (), 5310 (__mmask8) __U); 5311 } 5312 5313 5314 5315 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 5316 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5317 (__v16si)(__m512i)(b), (int)(p), \ 5318 (__mmask16)-1); }) 5319 5320 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 5321 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5322 (__v16si)(__m512i)(b), (int)(p), \ 5323 (__mmask16)-1); }) 5324 5325 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 5326 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5327 (__v8di)(__m512i)(b), (int)(p), \ 5328 (__mmask8)-1); }) 5329 5330 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 5331 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5332 (__v8di)(__m512i)(b), (int)(p), \ 5333 (__mmask8)-1); }) 5334 5335 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 5336 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5337 (__v16si)(__m512i)(b), (int)(p), \ 5338 (__mmask16)(m)); }) 5339 5340 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 5341 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5342 (__v16si)(__m512i)(b), (int)(p), \ 5343 (__mmask16)(m)); }) 5344 5345 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 5346 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5347 (__v8di)(__m512i)(b), (int)(p), \ 5348 (__mmask8)(m)); }) 5349 5350 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 5351 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5352 (__v8di)(__m512i)(b), (int)(p), \ 5353 (__mmask8)(m)); }) 5354 5355 #define _mm512_rol_epi32(a, b) __extension__ ({ \ 5356 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5357 (__v16si)_mm512_setzero_si512(), \ 5358 (__mmask16)-1); }) 5359 5360 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ 5361 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5362 (__v16si)(__m512i)(W), \ 5363 (__mmask16)(U)); }) 5364 5365 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ 5366 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5367 (__v16si)_mm512_setzero_si512(), \ 5368 (__mmask16)(U)); }) 5369 5370 #define _mm512_rol_epi64(a, b) __extension__ ({ \ 5371 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5372 (__v8di)_mm512_setzero_si512(), \ 5373 (__mmask8)-1); }) 5374 5375 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ 5376 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5377 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5378 5379 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ 5380 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5381 (__v8di)_mm512_setzero_si512(), \ 5382 (__mmask8)(U)); }) 5383 static __inline__ __m512i __DEFAULT_FN_ATTRS 5384 _mm512_rolv_epi32 (__m512i __A, __m512i __B) 5385 { 5386 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5387 (__v16si) __B, 5388 (__v16si) 5389 _mm512_setzero_si512 (), 5390 (__mmask16) -1); 5391 } 5392 5393 static __inline__ __m512i __DEFAULT_FN_ATTRS 5394 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5395 { 5396 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5397 (__v16si) __B, 5398 (__v16si) __W, 5399 (__mmask16) __U); 5400 } 5401 5402 static __inline__ __m512i __DEFAULT_FN_ATTRS 5403 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5404 { 5405 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5406 (__v16si) __B, 5407 (__v16si) 5408 _mm512_setzero_si512 (), 5409 (__mmask16) __U); 5410 } 5411 5412 static __inline__ __m512i __DEFAULT_FN_ATTRS 5413 _mm512_rolv_epi64 (__m512i __A, __m512i __B) 5414 { 5415 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5416 (__v8di) __B, 5417 (__v8di) 5418 _mm512_setzero_si512 (), 5419 (__mmask8) -1); 5420 } 5421 5422 static __inline__ __m512i __DEFAULT_FN_ATTRS 5423 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5424 { 5425 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5426 (__v8di) __B, 5427 (__v8di) __W, 5428 (__mmask8) __U); 5429 } 5430 5431 static __inline__ __m512i __DEFAULT_FN_ATTRS 5432 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5433 { 5434 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5435 (__v8di) __B, 5436 (__v8di) 5437 _mm512_setzero_si512 (), 5438 (__mmask8) __U); 5439 } 5440 5441 #define _mm512_ror_epi32(A, B) __extension__ ({ \ 5442 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5443 (__v16si)_mm512_setzero_si512(), \ 5444 (__mmask16)-1); }) 5445 5446 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5447 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5448 (__v16si)(__m512i)(W), \ 5449 (__mmask16)(U)); }) 5450 5451 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5452 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5453 (__v16si)_mm512_setzero_si512(), \ 5454 (__mmask16)(U)); }) 5455 5456 #define _mm512_ror_epi64(A, B) __extension__ ({ \ 5457 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5458 (__v8di)_mm512_setzero_si512(), \ 5459 (__mmask8)-1); }) 5460 5461 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5462 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5463 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5464 5465 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5466 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5467 (__v8di)_mm512_setzero_si512(), \ 5468 (__mmask8)(U)); }) 5469 5470 static __inline__ __m512i __DEFAULT_FN_ATTRS 5471 _mm512_slli_epi32(__m512i __A, int __B) 5472 { 5473 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); 5474 } 5475 5476 static __inline__ __m512i __DEFAULT_FN_ATTRS 5477 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5478 { 5479 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5480 (__v16si)_mm512_slli_epi32(__A, __B), 5481 (__v16si)__W); 5482 } 5483 5484 static __inline__ __m512i __DEFAULT_FN_ATTRS 5485 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { 5486 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5487 (__v16si)_mm512_slli_epi32(__A, __B), 5488 (__v16si)_mm512_setzero_si512()); 5489 } 5490 5491 static __inline__ __m512i __DEFAULT_FN_ATTRS 5492 _mm512_slli_epi64(__m512i __A, int __B) 5493 { 5494 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); 5495 } 5496 5497 static __inline__ __m512i __DEFAULT_FN_ATTRS 5498 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5499 { 5500 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5501 (__v8di)_mm512_slli_epi64(__A, __B), 5502 (__v8di)__W); 5503 } 5504 5505 static __inline__ __m512i __DEFAULT_FN_ATTRS 5506 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) 5507 { 5508 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5509 (__v8di)_mm512_slli_epi64(__A, __B), 5510 (__v8di)_mm512_setzero_si512()); 5511 } 5512 5513 static __inline__ __m512i __DEFAULT_FN_ATTRS 5514 _mm512_srli_epi32(__m512i __A, int __B) 5515 { 5516 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); 5517 } 5518 5519 static __inline__ __m512i __DEFAULT_FN_ATTRS 5520 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 5521 { 5522 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5523 (__v16si)_mm512_srli_epi32(__A, __B), 5524 (__v16si)__W); 5525 } 5526 5527 static __inline__ __m512i __DEFAULT_FN_ATTRS 5528 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { 5529 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 5530 (__v16si)_mm512_srli_epi32(__A, __B), 5531 (__v16si)_mm512_setzero_si512()); 5532 } 5533 5534 static __inline__ __m512i __DEFAULT_FN_ATTRS 5535 _mm512_srli_epi64(__m512i __A, int __B) 5536 { 5537 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); 5538 } 5539 5540 static __inline__ __m512i __DEFAULT_FN_ATTRS 5541 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 5542 { 5543 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5544 (__v8di)_mm512_srli_epi64(__A, __B), 5545 (__v8di)__W); 5546 } 5547 5548 static __inline__ __m512i __DEFAULT_FN_ATTRS 5549 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) 5550 { 5551 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 5552 (__v8di)_mm512_srli_epi64(__A, __B), 5553 (__v8di)_mm512_setzero_si512()); 5554 } 5555 5556 static __inline__ __m512i __DEFAULT_FN_ATTRS 5557 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5558 { 5559 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5560 (__v16si) __W, 5561 (__mmask16) __U); 5562 } 5563 5564 static __inline__ __m512i __DEFAULT_FN_ATTRS 5565 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 5566 { 5567 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5568 (__v16si) 5569 _mm512_setzero_si512 (), 5570 (__mmask16) __U); 5571 } 5572 5573 static __inline__ void __DEFAULT_FN_ATTRS 5574 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 5575 { 5576 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 5577 (__mmask16) __U); 5578 } 5579 5580 static __inline__ __m512i __DEFAULT_FN_ATTRS 5581 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 5582 { 5583 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5584 (__v16si) __A, 5585 (__v16si) __W); 5586 } 5587 5588 static __inline__ __m512i __DEFAULT_FN_ATTRS 5589 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 5590 { 5591 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 5592 (__v16si) __A, 5593 (__v16si) _mm512_setzero_si512 ()); 5594 } 5595 5596 static __inline__ __m512i __DEFAULT_FN_ATTRS 5597 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 5598 { 5599 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5600 (__v8di) __A, 5601 (__v8di) __W); 5602 } 5603 5604 static __inline__ __m512i __DEFAULT_FN_ATTRS 5605 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 5606 { 5607 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 5608 (__v8di) __A, 5609 (__v8di) _mm512_setzero_si512 ()); 5610 } 5611 5612 static __inline__ __m512i __DEFAULT_FN_ATTRS 5613 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5614 { 5615 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5616 (__v8di) __W, 5617 (__mmask8) __U); 5618 } 5619 5620 static __inline__ __m512i __DEFAULT_FN_ATTRS 5621 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 5622 { 5623 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5624 (__v8di) 5625 _mm512_setzero_si512 (), 5626 (__mmask8) __U); 5627 } 5628 5629 static __inline__ void __DEFAULT_FN_ATTRS 5630 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 5631 { 5632 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 5633 (__mmask8) __U); 5634 } 5635 5636 static __inline__ __m512d __DEFAULT_FN_ATTRS 5637 _mm512_movedup_pd (__m512d __A) 5638 { 5639 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 5640 0, 0, 2, 2, 4, 4, 6, 6); 5641 } 5642 5643 static __inline__ __m512d __DEFAULT_FN_ATTRS 5644 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 5645 { 5646 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5647 (__v8df)_mm512_movedup_pd(__A), 5648 (__v8df)__W); 5649 } 5650 5651 static __inline__ __m512d __DEFAULT_FN_ATTRS 5652 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 5653 { 5654 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 5655 (__v8df)_mm512_movedup_pd(__A), 5656 (__v8df)_mm512_setzero_pd()); 5657 } 5658 5659 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ 5660 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5661 (__v8df)(__m512d)(B), \ 5662 (__v8di)(__m512i)(C), (int)(imm), \ 5663 (__mmask8)-1, (int)(R)); }) 5664 5665 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ 5666 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5667 (__v8df)(__m512d)(B), \ 5668 (__v8di)(__m512i)(C), (int)(imm), \ 5669 (__mmask8)(U), (int)(R)); }) 5670 5671 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5672 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5673 (__v8df)(__m512d)(B), \ 5674 (__v8di)(__m512i)(C), (int)(imm), \ 5675 (__mmask8)-1, \ 5676 _MM_FROUND_CUR_DIRECTION); }) 5677 5678 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5679 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5680 (__v8df)(__m512d)(B), \ 5681 (__v8di)(__m512i)(C), (int)(imm), \ 5682 (__mmask8)(U), \ 5683 _MM_FROUND_CUR_DIRECTION); }) 5684 5685 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ 5686 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5687 (__v8df)(__m512d)(B), \ 5688 (__v8di)(__m512i)(C), \ 5689 (int)(imm), (__mmask8)(U), \ 5690 (int)(R)); }) 5691 5692 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5693 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5694 (__v8df)(__m512d)(B), \ 5695 (__v8di)(__m512i)(C), \ 5696 (int)(imm), (__mmask8)(U), \ 5697 _MM_FROUND_CUR_DIRECTION); }) 5698 5699 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ 5700 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5701 (__v16sf)(__m512)(B), \ 5702 (__v16si)(__m512i)(C), (int)(imm), \ 5703 (__mmask16)-1, (int)(R)); }) 5704 5705 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ 5706 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5707 (__v16sf)(__m512)(B), \ 5708 (__v16si)(__m512i)(C), (int)(imm), \ 5709 (__mmask16)(U), (int)(R)); }) 5710 5711 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5712 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5713 (__v16sf)(__m512)(B), \ 5714 (__v16si)(__m512i)(C), (int)(imm), \ 5715 (__mmask16)-1, \ 5716 _MM_FROUND_CUR_DIRECTION); }) 5717 5718 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5719 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5720 (__v16sf)(__m512)(B), \ 5721 (__v16si)(__m512i)(C), (int)(imm), \ 5722 (__mmask16)(U), \ 5723 _MM_FROUND_CUR_DIRECTION); }) 5724 5725 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ 5726 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5727 (__v16sf)(__m512)(B), \ 5728 (__v16si)(__m512i)(C), \ 5729 (int)(imm), (__mmask16)(U), \ 5730 (int)(R)); }) 5731 5732 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5733 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5734 (__v16sf)(__m512)(B), \ 5735 (__v16si)(__m512i)(C), \ 5736 (int)(imm), (__mmask16)(U), \ 5737 _MM_FROUND_CUR_DIRECTION); }) 5738 5739 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ 5740 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5741 (__v2df)(__m128d)(B), \ 5742 (__v2di)(__m128i)(C), (int)(imm), \ 5743 (__mmask8)-1, (int)(R)); }) 5744 5745 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ 5746 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5747 (__v2df)(__m128d)(B), \ 5748 (__v2di)(__m128i)(C), (int)(imm), \ 5749 (__mmask8)(U), (int)(R)); }) 5750 5751 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ 5752 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5753 (__v2df)(__m128d)(B), \ 5754 (__v2di)(__m128i)(C), (int)(imm), \ 5755 (__mmask8)-1, \ 5756 _MM_FROUND_CUR_DIRECTION); }) 5757 5758 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ 5759 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5760 (__v2df)(__m128d)(B), \ 5761 (__v2di)(__m128i)(C), (int)(imm), \ 5762 (__mmask8)(U), \ 5763 _MM_FROUND_CUR_DIRECTION); }) 5764 5765 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ 5766 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5767 (__v2df)(__m128d)(B), \ 5768 (__v2di)(__m128i)(C), (int)(imm), \ 5769 (__mmask8)(U), (int)(R)); }) 5770 5771 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ 5772 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5773 (__v2df)(__m128d)(B), \ 5774 (__v2di)(__m128i)(C), (int)(imm), \ 5775 (__mmask8)(U), \ 5776 _MM_FROUND_CUR_DIRECTION); }) 5777 5778 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ 5779 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5780 (__v4sf)(__m128)(B), \ 5781 (__v4si)(__m128i)(C), (int)(imm), \ 5782 (__mmask8)-1, (int)(R)); }) 5783 5784 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ 5785 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5786 (__v4sf)(__m128)(B), \ 5787 (__v4si)(__m128i)(C), (int)(imm), \ 5788 (__mmask8)(U), (int)(R)); }) 5789 5790 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ 5791 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5792 (__v4sf)(__m128)(B), \ 5793 (__v4si)(__m128i)(C), (int)(imm), \ 5794 (__mmask8)-1, \ 5795 _MM_FROUND_CUR_DIRECTION); }) 5796 5797 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ 5798 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5799 (__v4sf)(__m128)(B), \ 5800 (__v4si)(__m128i)(C), (int)(imm), \ 5801 (__mmask8)(U), \ 5802 _MM_FROUND_CUR_DIRECTION); }) 5803 5804 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ 5805 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5806 (__v4sf)(__m128)(B), \ 5807 (__v4si)(__m128i)(C), (int)(imm), \ 5808 (__mmask8)(U), (int)(R)); }) 5809 5810 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ 5811 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5812 (__v4sf)(__m128)(B), \ 5813 (__v4si)(__m128i)(C), (int)(imm), \ 5814 (__mmask8)(U), \ 5815 _MM_FROUND_CUR_DIRECTION); }) 5816 5817 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ 5818 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5819 (__v2df)(__m128d)(B), \ 5820 (__v2df)_mm_setzero_pd(), \ 5821 (__mmask8)-1, (int)(R)); }) 5822 5823 5824 static __inline__ __m128d __DEFAULT_FN_ATTRS 5825 _mm_getexp_sd (__m128d __A, __m128d __B) 5826 { 5827 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 5828 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5829 } 5830 5831 static __inline__ __m128d __DEFAULT_FN_ATTRS 5832 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5833 { 5834 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5835 (__v2df) __B, 5836 (__v2df) __W, 5837 (__mmask8) __U, 5838 _MM_FROUND_CUR_DIRECTION); 5839 } 5840 5841 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ 5842 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5843 (__v2df)(__m128d)(B), \ 5844 (__v2df)(__m128d)(W), \ 5845 (__mmask8)(U), (int)(R)); }) 5846 5847 static __inline__ __m128d __DEFAULT_FN_ATTRS 5848 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 5849 { 5850 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5851 (__v2df) __B, 5852 (__v2df) _mm_setzero_pd (), 5853 (__mmask8) __U, 5854 _MM_FROUND_CUR_DIRECTION); 5855 } 5856 5857 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ 5858 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5859 (__v2df)(__m128d)(B), \ 5860 (__v2df)_mm_setzero_pd(), \ 5861 (__mmask8)(U), (int)(R)); }) 5862 5863 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ 5864 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5865 (__v4sf)(__m128)(B), \ 5866 (__v4sf)_mm_setzero_ps(), \ 5867 (__mmask8)-1, (int)(R)); }) 5868 5869 static __inline__ __m128 __DEFAULT_FN_ATTRS 5870 _mm_getexp_ss (__m128 __A, __m128 __B) 5871 { 5872 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5873 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5874 } 5875 5876 static __inline__ __m128 __DEFAULT_FN_ATTRS 5877 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5878 { 5879 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5880 (__v4sf) __B, 5881 (__v4sf) __W, 5882 (__mmask8) __U, 5883 _MM_FROUND_CUR_DIRECTION); 5884 } 5885 5886 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ 5887 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5888 (__v4sf)(__m128)(B), \ 5889 (__v4sf)(__m128)(W), \ 5890 (__mmask8)(U), (int)(R)); }) 5891 5892 static __inline__ __m128 __DEFAULT_FN_ATTRS 5893 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 5894 { 5895 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5896 (__v4sf) __B, 5897 (__v4sf) _mm_setzero_pd (), 5898 (__mmask8) __U, 5899 _MM_FROUND_CUR_DIRECTION); 5900 } 5901 5902 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ 5903 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5904 (__v4sf)(__m128)(B), \ 5905 (__v4sf)_mm_setzero_ps(), \ 5906 (__mmask8)(U), (int)(R)); }) 5907 5908 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ 5909 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5910 (__v2df)(__m128d)(B), \ 5911 (int)(((D)<<2) | (C)), \ 5912 (__v2df)_mm_setzero_pd(), \ 5913 (__mmask8)-1, (int)(R)); }) 5914 5915 #define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ 5916 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5917 (__v2df)(__m128d)(B), \ 5918 (int)(((D)<<2) | (C)), \ 5919 (__v2df)_mm_setzero_pd(), \ 5920 (__mmask8)-1, \ 5921 _MM_FROUND_CUR_DIRECTION); }) 5922 5923 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ 5924 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5925 (__v2df)(__m128d)(B), \ 5926 (int)(((D)<<2) | (C)), \ 5927 (__v2df)(__m128d)(W), \ 5928 (__mmask8)(U), \ 5929 _MM_FROUND_CUR_DIRECTION); }) 5930 5931 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ 5932 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5933 (__v2df)(__m128d)(B), \ 5934 (int)(((D)<<2) | (C)), \ 5935 (__v2df)(__m128d)(W), \ 5936 (__mmask8)(U), (int)(R)); }) 5937 5938 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ 5939 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5940 (__v2df)(__m128d)(B), \ 5941 (int)(((D)<<2) | (C)), \ 5942 (__v2df)_mm_setzero_pd(), \ 5943 (__mmask8)(U), \ 5944 _MM_FROUND_CUR_DIRECTION); }) 5945 5946 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ 5947 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5948 (__v2df)(__m128d)(B), \ 5949 (int)(((D)<<2) | (C)), \ 5950 (__v2df)_mm_setzero_pd(), \ 5951 (__mmask8)(U), (int)(R)); }) 5952 5953 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ 5954 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5955 (__v4sf)(__m128)(B), \ 5956 (int)(((D)<<2) | (C)), \ 5957 (__v4sf)_mm_setzero_ps(), \ 5958 (__mmask8)-1, (int)(R)); }) 5959 5960 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ 5961 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5962 (__v4sf)(__m128)(B), \ 5963 (int)(((D)<<2) | (C)), \ 5964 (__v4sf)_mm_setzero_ps(), \ 5965 (__mmask8)-1, \ 5966 _MM_FROUND_CUR_DIRECTION); }) 5967 5968 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ 5969 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5970 (__v4sf)(__m128)(B), \ 5971 (int)(((D)<<2) | (C)), \ 5972 (__v4sf)(__m128)(W), \ 5973 (__mmask8)(U), \ 5974 _MM_FROUND_CUR_DIRECTION); }) 5975 5976 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ 5977 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5978 (__v4sf)(__m128)(B), \ 5979 (int)(((D)<<2) | (C)), \ 5980 (__v4sf)(__m128)(W), \ 5981 (__mmask8)(U), (int)(R)); }) 5982 5983 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ 5984 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5985 (__v4sf)(__m128)(B), \ 5986 (int)(((D)<<2) | (C)), \ 5987 (__v4sf)_mm_setzero_pd(), \ 5988 (__mmask8)(U), \ 5989 _MM_FROUND_CUR_DIRECTION); }) 5990 5991 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ 5992 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5993 (__v4sf)(__m128)(B), \ 5994 (int)(((D)<<2) | (C)), \ 5995 (__v4sf)_mm_setzero_ps(), \ 5996 (__mmask8)(U), (int)(R)); }) 5997 5998 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 5999 _mm512_kmov (__mmask16 __A) 6000 { 6001 return __A; 6002 } 6003 6004 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ 6005 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 6006 (int)(P), (int)(R)); }) 6007 6008 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ 6009 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 6010 (int)(P), (int)(R)); }) 6011 6012 #ifdef __x86_64__ 6013 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ 6014 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6015 #endif 6016 6017 static __inline__ __m512i __DEFAULT_FN_ATTRS 6018 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6019 __mmask16 __U, __m512i __B) 6020 { 6021 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6022 (__v16si) __I 6023 /* idx */ , 6024 (__v16si) __B, 6025 (__mmask16) __U); 6026 } 6027 6028 static __inline__ __m512i __DEFAULT_FN_ATTRS 6029 _mm512_sll_epi32(__m512i __A, __m128i __B) 6030 { 6031 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); 6032 } 6033 6034 static __inline__ __m512i __DEFAULT_FN_ATTRS 6035 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6036 { 6037 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6038 (__v16si)_mm512_sll_epi32(__A, __B), 6039 (__v16si)__W); 6040 } 6041 6042 static __inline__ __m512i __DEFAULT_FN_ATTRS 6043 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6044 { 6045 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6046 (__v16si)_mm512_sll_epi32(__A, __B), 6047 (__v16si)_mm512_setzero_si512()); 6048 } 6049 6050 static __inline__ __m512i __DEFAULT_FN_ATTRS 6051 _mm512_sll_epi64(__m512i __A, __m128i __B) 6052 { 6053 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); 6054 } 6055 6056 static __inline__ __m512i __DEFAULT_FN_ATTRS 6057 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6058 { 6059 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6060 (__v8di)_mm512_sll_epi64(__A, __B), 6061 (__v8di)__W); 6062 } 6063 6064 static __inline__ __m512i __DEFAULT_FN_ATTRS 6065 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6066 { 6067 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6068 (__v8di)_mm512_sll_epi64(__A, __B), 6069 (__v8di)_mm512_setzero_si512()); 6070 } 6071 6072 static __inline__ __m512i __DEFAULT_FN_ATTRS 6073 _mm512_sllv_epi32(__m512i __X, __m512i __Y) 6074 { 6075 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); 6076 } 6077 6078 static __inline__ __m512i __DEFAULT_FN_ATTRS 6079 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6080 { 6081 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6082 (__v16si)_mm512_sllv_epi32(__X, __Y), 6083 (__v16si)__W); 6084 } 6085 6086 static __inline__ __m512i __DEFAULT_FN_ATTRS 6087 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6088 { 6089 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6090 (__v16si)_mm512_sllv_epi32(__X, __Y), 6091 (__v16si)_mm512_setzero_si512()); 6092 } 6093 6094 static __inline__ __m512i __DEFAULT_FN_ATTRS 6095 _mm512_sllv_epi64(__m512i __X, __m512i __Y) 6096 { 6097 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); 6098 } 6099 6100 static __inline__ __m512i __DEFAULT_FN_ATTRS 6101 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6102 { 6103 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6104 (__v8di)_mm512_sllv_epi64(__X, __Y), 6105 (__v8di)__W); 6106 } 6107 6108 static __inline__ __m512i __DEFAULT_FN_ATTRS 6109 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6110 { 6111 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6112 (__v8di)_mm512_sllv_epi64(__X, __Y), 6113 (__v8di)_mm512_setzero_si512()); 6114 } 6115 6116 static __inline__ __m512i __DEFAULT_FN_ATTRS 6117 _mm512_sra_epi32(__m512i __A, __m128i __B) 6118 { 6119 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); 6120 } 6121 6122 static __inline__ __m512i __DEFAULT_FN_ATTRS 6123 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6124 { 6125 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6126 (__v16si)_mm512_sra_epi32(__A, __B), 6127 (__v16si)__W); 6128 } 6129 6130 static __inline__ __m512i __DEFAULT_FN_ATTRS 6131 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6132 { 6133 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6134 (__v16si)_mm512_sra_epi32(__A, __B), 6135 (__v16si)_mm512_setzero_si512()); 6136 } 6137 6138 static __inline__ __m512i __DEFAULT_FN_ATTRS 6139 _mm512_sra_epi64(__m512i __A, __m128i __B) 6140 { 6141 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); 6142 } 6143 6144 static __inline__ __m512i __DEFAULT_FN_ATTRS 6145 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6146 { 6147 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6148 (__v8di)_mm512_sra_epi64(__A, __B), 6149 (__v8di)__W); 6150 } 6151 6152 static __inline__ __m512i __DEFAULT_FN_ATTRS 6153 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6154 { 6155 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6156 (__v8di)_mm512_sra_epi64(__A, __B), 6157 (__v8di)_mm512_setzero_si512()); 6158 } 6159 6160 static __inline__ __m512i __DEFAULT_FN_ATTRS 6161 _mm512_srav_epi32(__m512i __X, __m512i __Y) 6162 { 6163 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); 6164 } 6165 6166 static __inline__ __m512i __DEFAULT_FN_ATTRS 6167 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6168 { 6169 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6170 (__v16si)_mm512_srav_epi32(__X, __Y), 6171 (__v16si)__W); 6172 } 6173 6174 static __inline__ __m512i __DEFAULT_FN_ATTRS 6175 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6176 { 6177 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6178 (__v16si)_mm512_srav_epi32(__X, __Y), 6179 (__v16si)_mm512_setzero_si512()); 6180 } 6181 6182 static __inline__ __m512i __DEFAULT_FN_ATTRS 6183 _mm512_srav_epi64(__m512i __X, __m512i __Y) 6184 { 6185 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); 6186 } 6187 6188 static __inline__ __m512i __DEFAULT_FN_ATTRS 6189 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6190 { 6191 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6192 (__v8di)_mm512_srav_epi64(__X, __Y), 6193 (__v8di)__W); 6194 } 6195 6196 static __inline__ __m512i __DEFAULT_FN_ATTRS 6197 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6198 { 6199 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6200 (__v8di)_mm512_srav_epi64(__X, __Y), 6201 (__v8di)_mm512_setzero_si512()); 6202 } 6203 6204 static __inline__ __m512i __DEFAULT_FN_ATTRS 6205 _mm512_srl_epi32(__m512i __A, __m128i __B) 6206 { 6207 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); 6208 } 6209 6210 static __inline__ __m512i __DEFAULT_FN_ATTRS 6211 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6212 { 6213 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6214 (__v16si)_mm512_srl_epi32(__A, __B), 6215 (__v16si)__W); 6216 } 6217 6218 static __inline__ __m512i __DEFAULT_FN_ATTRS 6219 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) 6220 { 6221 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6222 (__v16si)_mm512_srl_epi32(__A, __B), 6223 (__v16si)_mm512_setzero_si512()); 6224 } 6225 6226 static __inline__ __m512i __DEFAULT_FN_ATTRS 6227 _mm512_srl_epi64(__m512i __A, __m128i __B) 6228 { 6229 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); 6230 } 6231 6232 static __inline__ __m512i __DEFAULT_FN_ATTRS 6233 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6234 { 6235 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6236 (__v8di)_mm512_srl_epi64(__A, __B), 6237 (__v8di)__W); 6238 } 6239 6240 static __inline__ __m512i __DEFAULT_FN_ATTRS 6241 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) 6242 { 6243 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6244 (__v8di)_mm512_srl_epi64(__A, __B), 6245 (__v8di)_mm512_setzero_si512()); 6246 } 6247 6248 static __inline__ __m512i __DEFAULT_FN_ATTRS 6249 _mm512_srlv_epi32(__m512i __X, __m512i __Y) 6250 { 6251 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); 6252 } 6253 6254 static __inline__ __m512i __DEFAULT_FN_ATTRS 6255 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6256 { 6257 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6258 (__v16si)_mm512_srlv_epi32(__X, __Y), 6259 (__v16si)__W); 6260 } 6261 6262 static __inline__ __m512i __DEFAULT_FN_ATTRS 6263 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 6264 { 6265 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 6266 (__v16si)_mm512_srlv_epi32(__X, __Y), 6267 (__v16si)_mm512_setzero_si512()); 6268 } 6269 6270 static __inline__ __m512i __DEFAULT_FN_ATTRS 6271 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) 6272 { 6273 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); 6274 } 6275 6276 static __inline__ __m512i __DEFAULT_FN_ATTRS 6277 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6278 { 6279 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6280 (__v8di)_mm512_srlv_epi64(__X, __Y), 6281 (__v8di)__W); 6282 } 6283 6284 static __inline__ __m512i __DEFAULT_FN_ATTRS 6285 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 6286 { 6287 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 6288 (__v8di)_mm512_srlv_epi64(__X, __Y), 6289 (__v8di)_mm512_setzero_si512()); 6290 } 6291 6292 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6293 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6294 (__v16si)(__m512i)(B), \ 6295 (__v16si)(__m512i)(C), (int)(imm), \ 6296 (__mmask16)-1); }) 6297 6298 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6299 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6300 (__v16si)(__m512i)(B), \ 6301 (__v16si)(__m512i)(C), (int)(imm), \ 6302 (__mmask16)(U)); }) 6303 6304 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6305 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 6306 (__v16si)(__m512i)(B), \ 6307 (__v16si)(__m512i)(C), \ 6308 (int)(imm), (__mmask16)(U)); }) 6309 6310 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6311 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6312 (__v8di)(__m512i)(B), \ 6313 (__v8di)(__m512i)(C), (int)(imm), \ 6314 (__mmask8)-1); }) 6315 6316 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6317 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6318 (__v8di)(__m512i)(B), \ 6319 (__v8di)(__m512i)(C), (int)(imm), \ 6320 (__mmask8)(U)); }) 6321 6322 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6323 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 6324 (__v8di)(__m512i)(B), \ 6325 (__v8di)(__m512i)(C), (int)(imm), \ 6326 (__mmask8)(U)); }) 6327 6328 #ifdef __x86_64__ 6329 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ 6330 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6331 #endif 6332 6333 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ 6334 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6335 6336 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ 6337 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6338 6339 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ 6340 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6341 6342 static __inline__ unsigned __DEFAULT_FN_ATTRS 6343 _mm_cvtsd_u32 (__m128d __A) 6344 { 6345 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 6346 _MM_FROUND_CUR_DIRECTION); 6347 } 6348 6349 #ifdef __x86_64__ 6350 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ 6351 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6352 (int)(R)); }) 6353 6354 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6355 _mm_cvtsd_u64 (__m128d __A) 6356 { 6357 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 6358 __A, 6359 _MM_FROUND_CUR_DIRECTION); 6360 } 6361 #endif 6362 6363 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ 6364 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6365 6366 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ 6367 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6368 6369 #ifdef __x86_64__ 6370 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ 6371 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6372 6373 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ 6374 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6375 #endif 6376 6377 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ 6378 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6379 6380 static __inline__ unsigned __DEFAULT_FN_ATTRS 6381 _mm_cvtss_u32 (__m128 __A) 6382 { 6383 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 6384 _MM_FROUND_CUR_DIRECTION); 6385 } 6386 6387 #ifdef __x86_64__ 6388 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ 6389 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6390 (int)(R)); }) 6391 6392 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6393 _mm_cvtss_u64 (__m128 __A) 6394 { 6395 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 6396 __A, 6397 _MM_FROUND_CUR_DIRECTION); 6398 } 6399 #endif 6400 6401 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ 6402 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6403 6404 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ 6405 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6406 6407 static __inline__ int __DEFAULT_FN_ATTRS 6408 _mm_cvttsd_i32 (__m128d __A) 6409 { 6410 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 6411 _MM_FROUND_CUR_DIRECTION); 6412 } 6413 6414 #ifdef __x86_64__ 6415 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ 6416 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6417 6418 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ 6419 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6420 6421 static __inline__ long long __DEFAULT_FN_ATTRS 6422 _mm_cvttsd_i64 (__m128d __A) 6423 { 6424 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 6425 _MM_FROUND_CUR_DIRECTION); 6426 } 6427 #endif 6428 6429 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ 6430 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6431 6432 static __inline__ unsigned __DEFAULT_FN_ATTRS 6433 _mm_cvttsd_u32 (__m128d __A) 6434 { 6435 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 6436 _MM_FROUND_CUR_DIRECTION); 6437 } 6438 6439 #ifdef __x86_64__ 6440 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ 6441 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6442 (int)(R)); }) 6443 6444 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6445 _mm_cvttsd_u64 (__m128d __A) 6446 { 6447 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 6448 __A, 6449 _MM_FROUND_CUR_DIRECTION); 6450 } 6451 #endif 6452 6453 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ 6454 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6455 6456 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ 6457 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6458 6459 static __inline__ int __DEFAULT_FN_ATTRS 6460 _mm_cvttss_i32 (__m128 __A) 6461 { 6462 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 6463 _MM_FROUND_CUR_DIRECTION); 6464 } 6465 6466 #ifdef __x86_64__ 6467 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ 6468 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6469 6470 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ 6471 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6472 6473 static __inline__ long long __DEFAULT_FN_ATTRS 6474 _mm_cvttss_i64 (__m128 __A) 6475 { 6476 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 6477 _MM_FROUND_CUR_DIRECTION); 6478 } 6479 #endif 6480 6481 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ 6482 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6483 6484 static __inline__ unsigned __DEFAULT_FN_ATTRS 6485 _mm_cvttss_u32 (__m128 __A) 6486 { 6487 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 6488 _MM_FROUND_CUR_DIRECTION); 6489 } 6490 6491 #ifdef __x86_64__ 6492 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ 6493 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6494 (int)(R)); }) 6495 6496 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6497 _mm_cvttss_u64 (__m128 __A) 6498 { 6499 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 6500 __A, 6501 _MM_FROUND_CUR_DIRECTION); 6502 } 6503 #endif 6504 6505 static __inline__ __m512d __DEFAULT_FN_ATTRS 6506 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6507 __m512d __B) 6508 { 6509 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6510 (__v8di) __I 6511 /* idx */ , 6512 (__v8df) __B, 6513 (__mmask8) __U); 6514 } 6515 6516 static __inline__ __m512 __DEFAULT_FN_ATTRS 6517 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6518 __m512 __B) 6519 { 6520 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6521 (__v16si) __I 6522 /* idx */ , 6523 (__v16sf) __B, 6524 (__mmask16) __U); 6525 } 6526 6527 static __inline__ __m512i __DEFAULT_FN_ATTRS 6528 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6529 __mmask8 __U, __m512i __B) 6530 { 6531 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6532 (__v8di) __I 6533 /* idx */ , 6534 (__v8di) __B, 6535 (__mmask8) __U); 6536 } 6537 6538 #define _mm512_permute_pd(X, C) __extension__ ({ \ 6539 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 6540 (__v8df)_mm512_undefined_pd(), \ 6541 0 + (((C) >> 0) & 0x1), \ 6542 0 + (((C) >> 1) & 0x1), \ 6543 2 + (((C) >> 2) & 0x1), \ 6544 2 + (((C) >> 3) & 0x1), \ 6545 4 + (((C) >> 4) & 0x1), \ 6546 4 + (((C) >> 5) & 0x1), \ 6547 6 + (((C) >> 6) & 0x1), \ 6548 6 + (((C) >> 7) & 0x1)); }) 6549 6550 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6551 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6552 (__v8df)_mm512_permute_pd((X), (C)), \ 6553 (__v8df)(__m512d)(W)); }) 6554 6555 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ 6556 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6557 (__v8df)_mm512_permute_pd((X), (C)), \ 6558 (__v8df)_mm512_setzero_pd()); }) 6559 6560 #define _mm512_permute_ps(X, C) __extension__ ({ \ 6561 (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ 6562 (__v16sf)_mm512_undefined_ps(), \ 6563 0 + (((C) >> 0) & 0x3), \ 6564 0 + (((C) >> 2) & 0x3), \ 6565 0 + (((C) >> 4) & 0x3), \ 6566 0 + (((C) >> 6) & 0x3), \ 6567 4 + (((C) >> 0) & 0x3), \ 6568 4 + (((C) >> 2) & 0x3), \ 6569 4 + (((C) >> 4) & 0x3), \ 6570 4 + (((C) >> 6) & 0x3), \ 6571 8 + (((C) >> 0) & 0x3), \ 6572 8 + (((C) >> 2) & 0x3), \ 6573 8 + (((C) >> 4) & 0x3), \ 6574 8 + (((C) >> 6) & 0x3), \ 6575 12 + (((C) >> 0) & 0x3), \ 6576 12 + (((C) >> 2) & 0x3), \ 6577 12 + (((C) >> 4) & 0x3), \ 6578 12 + (((C) >> 6) & 0x3)); }) 6579 6580 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6581 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6582 (__v16sf)_mm512_permute_ps((X), (C)), \ 6583 (__v16sf)(__m512)(W)); }) 6584 6585 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ 6586 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6587 (__v16sf)_mm512_permute_ps((X), (C)), \ 6588 (__v16sf)_mm512_setzero_ps()); }) 6589 6590 static __inline__ __m512d __DEFAULT_FN_ATTRS 6591 _mm512_permutevar_pd(__m512d __A, __m512i __C) 6592 { 6593 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); 6594 } 6595 6596 static __inline__ __m512d __DEFAULT_FN_ATTRS 6597 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6598 { 6599 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6600 (__v8df)_mm512_permutevar_pd(__A, __C), 6601 (__v8df)__W); 6602 } 6603 6604 static __inline__ __m512d __DEFAULT_FN_ATTRS 6605 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 6606 { 6607 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 6608 (__v8df)_mm512_permutevar_pd(__A, __C), 6609 (__v8df)_mm512_setzero_pd()); 6610 } 6611 6612 static __inline__ __m512 __DEFAULT_FN_ATTRS 6613 _mm512_permutevar_ps(__m512 __A, __m512i __C) 6614 { 6615 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); 6616 } 6617 6618 static __inline__ __m512 __DEFAULT_FN_ATTRS 6619 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6620 { 6621 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6622 (__v16sf)_mm512_permutevar_ps(__A, __C), 6623 (__v16sf)__W); 6624 } 6625 6626 static __inline__ __m512 __DEFAULT_FN_ATTRS 6627 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) 6628 { 6629 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 6630 (__v16sf)_mm512_permutevar_ps(__A, __C), 6631 (__v16sf)_mm512_setzero_ps()); 6632 } 6633 6634 static __inline __m512d __DEFAULT_FN_ATTRS 6635 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 6636 { 6637 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6638 /* idx */ , 6639 (__v8df) __A, 6640 (__v8df) __B, 6641 (__mmask8) -1); 6642 } 6643 6644 static __inline__ __m512d __DEFAULT_FN_ATTRS 6645 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 6646 { 6647 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6648 /* idx */ , 6649 (__v8df) __A, 6650 (__v8df) __B, 6651 (__mmask8) __U); 6652 } 6653 6654 static __inline__ __m512d __DEFAULT_FN_ATTRS 6655 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6656 __m512d __B) 6657 { 6658 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6659 /* idx */ , 6660 (__v8df) __A, 6661 (__v8df) __B, 6662 (__mmask8) __U); 6663 } 6664 6665 static __inline __m512 __DEFAULT_FN_ATTRS 6666 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 6667 { 6668 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6669 /* idx */ , 6670 (__v16sf) __A, 6671 (__v16sf) __B, 6672 (__mmask16) -1); 6673 } 6674 6675 static __inline__ __m512 __DEFAULT_FN_ATTRS 6676 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6677 { 6678 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6679 /* idx */ , 6680 (__v16sf) __A, 6681 (__v16sf) __B, 6682 (__mmask16) __U); 6683 } 6684 6685 static __inline__ __m512 __DEFAULT_FN_ATTRS 6686 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6687 __m512 __B) 6688 { 6689 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6690 /* idx */ , 6691 (__v16sf) __A, 6692 (__v16sf) __B, 6693 (__mmask16) __U); 6694 } 6695 6696 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 6697 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) 6698 { 6699 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6700 (__v16si) __B, 6701 (__mmask16) -1); 6702 } 6703 6704 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 6705 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 6706 { 6707 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6708 (__v16si) __B, __U); 6709 } 6710 6711 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6712 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) 6713 { 6714 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6715 (__v8di) __B, 6716 (__mmask8) -1); 6717 } 6718 6719 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6720 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 6721 { 6722 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6723 (__v8di) __B, __U); 6724 } 6725 6726 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ 6727 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6728 (__v8si)_mm256_undefined_si256(), \ 6729 (__mmask8)-1, (int)(R)); }) 6730 6731 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 6732 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6733 (__v8si)(__m256i)(W), \ 6734 (__mmask8)(U), (int)(R)); }) 6735 6736 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ 6737 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6738 (__v8si)_mm256_setzero_si256(), \ 6739 (__mmask8)(U), (int)(R)); }) 6740 6741 static __inline__ __m256i __DEFAULT_FN_ATTRS 6742 _mm512_cvttpd_epu32 (__m512d __A) 6743 { 6744 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6745 (__v8si) 6746 _mm256_undefined_si256 (), 6747 (__mmask8) -1, 6748 _MM_FROUND_CUR_DIRECTION); 6749 } 6750 6751 static __inline__ __m256i __DEFAULT_FN_ATTRS 6752 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 6753 { 6754 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6755 (__v8si) __W, 6756 (__mmask8) __U, 6757 _MM_FROUND_CUR_DIRECTION); 6758 } 6759 6760 static __inline__ __m256i __DEFAULT_FN_ATTRS 6761 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 6762 { 6763 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6764 (__v8si) 6765 _mm256_setzero_si256 (), 6766 (__mmask8) __U, 6767 _MM_FROUND_CUR_DIRECTION); 6768 } 6769 6770 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ 6771 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6772 (__v2df)(__m128d)(B), \ 6773 (__v2df)_mm_setzero_pd(), \ 6774 (__mmask8)-1, (int)(imm), \ 6775 (int)(R)); }) 6776 6777 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ 6778 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6779 (__v2df)(__m128d)(B), \ 6780 (__v2df)_mm_setzero_pd(), \ 6781 (__mmask8)-1, (int)(imm), \ 6782 _MM_FROUND_CUR_DIRECTION); }) 6783 6784 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ 6785 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6786 (__v2df)(__m128d)(B), \ 6787 (__v2df)(__m128d)(W), \ 6788 (__mmask8)(U), (int)(imm), \ 6789 _MM_FROUND_CUR_DIRECTION); }) 6790 6791 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ 6792 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6793 (__v2df)(__m128d)(B), \ 6794 (__v2df)(__m128d)(W), \ 6795 (__mmask8)(U), (int)(I), \ 6796 (int)(R)); }) 6797 6798 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ 6799 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6800 (__v2df)(__m128d)(B), \ 6801 (__v2df)_mm_setzero_pd(), \ 6802 (__mmask8)(U), (int)(I), \ 6803 _MM_FROUND_CUR_DIRECTION); }) 6804 6805 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ 6806 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6807 (__v2df)(__m128d)(B), \ 6808 (__v2df)_mm_setzero_pd(), \ 6809 (__mmask8)(U), (int)(I), \ 6810 (int)(R)); }) 6811 6812 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ 6813 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6814 (__v4sf)(__m128)(B), \ 6815 (__v4sf)_mm_setzero_ps(), \ 6816 (__mmask8)-1, (int)(imm), \ 6817 (int)(R)); }) 6818 6819 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ 6820 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6821 (__v4sf)(__m128)(B), \ 6822 (__v4sf)_mm_setzero_ps(), \ 6823 (__mmask8)-1, (int)(imm), \ 6824 _MM_FROUND_CUR_DIRECTION); }) 6825 6826 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ 6827 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6828 (__v4sf)(__m128)(B), \ 6829 (__v4sf)(__m128)(W), \ 6830 (__mmask8)(U), (int)(I), \ 6831 _MM_FROUND_CUR_DIRECTION); }) 6832 6833 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ 6834 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6835 (__v4sf)(__m128)(B), \ 6836 (__v4sf)(__m128)(W), \ 6837 (__mmask8)(U), (int)(I), \ 6838 (int)(R)); }) 6839 6840 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ 6841 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6842 (__v4sf)(__m128)(B), \ 6843 (__v4sf)_mm_setzero_ps(), \ 6844 (__mmask8)(U), (int)(I), \ 6845 _MM_FROUND_CUR_DIRECTION); }) 6846 6847 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ 6848 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6849 (__v4sf)(__m128)(B), \ 6850 (__v4sf)_mm_setzero_ps(), \ 6851 (__mmask8)(U), (int)(I), \ 6852 (int)(R)); }) 6853 6854 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ 6855 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6856 (__v8df)(__m512d)(B), \ 6857 (__v8df)_mm512_undefined_pd(), \ 6858 (__mmask8)-1, (int)(R)); }) 6859 6860 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ 6861 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6862 (__v8df)(__m512d)(B), \ 6863 (__v8df)(__m512d)(W), \ 6864 (__mmask8)(U), (int)(R)); }) 6865 6866 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ 6867 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6868 (__v8df)(__m512d)(B), \ 6869 (__v8df)_mm512_setzero_pd(), \ 6870 (__mmask8)(U), (int)(R)); }) 6871 6872 static __inline__ __m512d __DEFAULT_FN_ATTRS 6873 _mm512_scalef_pd (__m512d __A, __m512d __B) 6874 { 6875 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6876 (__v8df) __B, 6877 (__v8df) 6878 _mm512_undefined_pd (), 6879 (__mmask8) -1, 6880 _MM_FROUND_CUR_DIRECTION); 6881 } 6882 6883 static __inline__ __m512d __DEFAULT_FN_ATTRS 6884 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6885 { 6886 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6887 (__v8df) __B, 6888 (__v8df) __W, 6889 (__mmask8) __U, 6890 _MM_FROUND_CUR_DIRECTION); 6891 } 6892 6893 static __inline__ __m512d __DEFAULT_FN_ATTRS 6894 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 6895 { 6896 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6897 (__v8df) __B, 6898 (__v8df) 6899 _mm512_setzero_pd (), 6900 (__mmask8) __U, 6901 _MM_FROUND_CUR_DIRECTION); 6902 } 6903 6904 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ 6905 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6906 (__v16sf)(__m512)(B), \ 6907 (__v16sf)_mm512_undefined_ps(), \ 6908 (__mmask16)-1, (int)(R)); }) 6909 6910 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ 6911 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6912 (__v16sf)(__m512)(B), \ 6913 (__v16sf)(__m512)(W), \ 6914 (__mmask16)(U), (int)(R)); }) 6915 6916 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ 6917 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6918 (__v16sf)(__m512)(B), \ 6919 (__v16sf)_mm512_setzero_ps(), \ 6920 (__mmask16)(U), (int)(R)); }) 6921 6922 static __inline__ __m512 __DEFAULT_FN_ATTRS 6923 _mm512_scalef_ps (__m512 __A, __m512 __B) 6924 { 6925 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6926 (__v16sf) __B, 6927 (__v16sf) 6928 _mm512_undefined_ps (), 6929 (__mmask16) -1, 6930 _MM_FROUND_CUR_DIRECTION); 6931 } 6932 6933 static __inline__ __m512 __DEFAULT_FN_ATTRS 6934 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6935 { 6936 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6937 (__v16sf) __B, 6938 (__v16sf) __W, 6939 (__mmask16) __U, 6940 _MM_FROUND_CUR_DIRECTION); 6941 } 6942 6943 static __inline__ __m512 __DEFAULT_FN_ATTRS 6944 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 6945 { 6946 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6947 (__v16sf) __B, 6948 (__v16sf) 6949 _mm512_setzero_ps (), 6950 (__mmask16) __U, 6951 _MM_FROUND_CUR_DIRECTION); 6952 } 6953 6954 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ 6955 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6956 (__v2df)(__m128d)(B), \ 6957 (__v2df)_mm_setzero_pd(), \ 6958 (__mmask8)-1, (int)(R)); }) 6959 6960 static __inline__ __m128d __DEFAULT_FN_ATTRS 6961 _mm_scalef_sd (__m128d __A, __m128d __B) 6962 { 6963 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 6964 (__v2df)( __B), (__v2df) _mm_setzero_pd(), 6965 (__mmask8) -1, 6966 _MM_FROUND_CUR_DIRECTION); 6967 } 6968 6969 static __inline__ __m128d __DEFAULT_FN_ATTRS 6970 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6971 { 6972 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6973 (__v2df) __B, 6974 (__v2df) __W, 6975 (__mmask8) __U, 6976 _MM_FROUND_CUR_DIRECTION); 6977 } 6978 6979 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ 6980 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6981 (__v2df)(__m128d)(B), \ 6982 (__v2df)(__m128d)(W), \ 6983 (__mmask8)(U), (int)(R)); }) 6984 6985 static __inline__ __m128d __DEFAULT_FN_ATTRS 6986 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 6987 { 6988 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6989 (__v2df) __B, 6990 (__v2df) _mm_setzero_pd (), 6991 (__mmask8) __U, 6992 _MM_FROUND_CUR_DIRECTION); 6993 } 6994 6995 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ 6996 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6997 (__v2df)(__m128d)(B), \ 6998 (__v2df)_mm_setzero_pd(), \ 6999 (__mmask8)(U), (int)(R)); }) 7000 7001 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ 7002 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7003 (__v4sf)(__m128)(B), \ 7004 (__v4sf)_mm_setzero_ps(), \ 7005 (__mmask8)-1, (int)(R)); }) 7006 7007 static __inline__ __m128 __DEFAULT_FN_ATTRS 7008 _mm_scalef_ss (__m128 __A, __m128 __B) 7009 { 7010 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 7011 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 7012 (__mmask8) -1, 7013 _MM_FROUND_CUR_DIRECTION); 7014 } 7015 7016 static __inline__ __m128 __DEFAULT_FN_ATTRS 7017 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7018 { 7019 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7020 (__v4sf) __B, 7021 (__v4sf) __W, 7022 (__mmask8) __U, 7023 _MM_FROUND_CUR_DIRECTION); 7024 } 7025 7026 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ 7027 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7028 (__v4sf)(__m128)(B), \ 7029 (__v4sf)(__m128)(W), \ 7030 (__mmask8)(U), (int)(R)); }) 7031 7032 static __inline__ __m128 __DEFAULT_FN_ATTRS 7033 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 7034 { 7035 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7036 (__v4sf) __B, 7037 (__v4sf) _mm_setzero_ps (), 7038 (__mmask8) __U, 7039 _MM_FROUND_CUR_DIRECTION); 7040 } 7041 7042 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ 7043 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7044 (__v4sf)(__m128)(B), \ 7045 (__v4sf)_mm_setzero_ps(), \ 7046 (__mmask8)(U), \ 7047 _MM_FROUND_CUR_DIRECTION); }) 7048 7049 static __inline__ __m512i __DEFAULT_FN_ATTRS 7050 _mm512_srai_epi32(__m512i __A, int __B) 7051 { 7052 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); 7053 } 7054 7055 static __inline__ __m512i __DEFAULT_FN_ATTRS 7056 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) 7057 { 7058 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7059 (__v16si)_mm512_srai_epi32(__A, __B), \ 7060 (__v16si)__W); 7061 } 7062 7063 static __inline__ __m512i __DEFAULT_FN_ATTRS 7064 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { 7065 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ 7066 (__v16si)_mm512_srai_epi32(__A, __B), \ 7067 (__v16si)_mm512_setzero_si512()); 7068 } 7069 7070 static __inline__ __m512i __DEFAULT_FN_ATTRS 7071 _mm512_srai_epi64(__m512i __A, int __B) 7072 { 7073 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); 7074 } 7075 7076 static __inline__ __m512i __DEFAULT_FN_ATTRS 7077 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) 7078 { 7079 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7080 (__v8di)_mm512_srai_epi64(__A, __B), \ 7081 (__v8di)__W); 7082 } 7083 7084 static __inline__ __m512i __DEFAULT_FN_ATTRS 7085 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) 7086 { 7087 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ 7088 (__v8di)_mm512_srai_epi64(__A, __B), \ 7089 (__v8di)_mm512_setzero_si512()); 7090 } 7091 7092 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7093 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7094 (__v16sf)(__m512)(B), (int)(imm), \ 7095 (__v16sf)_mm512_undefined_ps(), \ 7096 (__mmask16)-1); }) 7097 7098 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7099 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7100 (__v16sf)(__m512)(B), (int)(imm), \ 7101 (__v16sf)(__m512)(W), \ 7102 (__mmask16)(U)); }) 7103 7104 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7105 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7106 (__v16sf)(__m512)(B), (int)(imm), \ 7107 (__v16sf)_mm512_setzero_ps(), \ 7108 (__mmask16)(U)); }) 7109 7110 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7111 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7112 (__v8df)(__m512d)(B), (int)(imm), \ 7113 (__v8df)_mm512_undefined_pd(), \ 7114 (__mmask8)-1); }) 7115 7116 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7117 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7118 (__v8df)(__m512d)(B), (int)(imm), \ 7119 (__v8df)(__m512d)(W), \ 7120 (__mmask8)(U)); }) 7121 7122 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7123 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7124 (__v8df)(__m512d)(B), (int)(imm), \ 7125 (__v8df)_mm512_setzero_pd(), \ 7126 (__mmask8)(U)); }) 7127 7128 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7129 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7130 (__v16si)(__m512i)(B), (int)(imm), \ 7131 (__v16si)_mm512_setzero_si512(), \ 7132 (__mmask16)-1); }) 7133 7134 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7135 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7136 (__v16si)(__m512i)(B), (int)(imm), \ 7137 (__v16si)(__m512i)(W), \ 7138 (__mmask16)(U)); }) 7139 7140 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7141 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7142 (__v16si)(__m512i)(B), (int)(imm), \ 7143 (__v16si)_mm512_setzero_si512(), \ 7144 (__mmask16)(U)); }) 7145 7146 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7147 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7148 (__v8di)(__m512i)(B), (int)(imm), \ 7149 (__v8di)_mm512_setzero_si512(), \ 7150 (__mmask8)-1); }) 7151 7152 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7153 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7154 (__v8di)(__m512i)(B), (int)(imm), \ 7155 (__v8di)(__m512i)(W), \ 7156 (__mmask8)(U)); }) 7157 7158 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7159 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7160 (__v8di)(__m512i)(B), (int)(imm), \ 7161 (__v8di)_mm512_setzero_si512(), \ 7162 (__mmask8)(U)); }) 7163 7164 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ 7165 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7166 (__v8df)(__m512d)(B), \ 7167 0 + (((M) >> 0) & 0x1), \ 7168 8 + (((M) >> 1) & 0x1), \ 7169 2 + (((M) >> 2) & 0x1), \ 7170 10 + (((M) >> 3) & 0x1), \ 7171 4 + (((M) >> 4) & 0x1), \ 7172 12 + (((M) >> 5) & 0x1), \ 7173 6 + (((M) >> 6) & 0x1), \ 7174 14 + (((M) >> 7) & 0x1)); }) 7175 7176 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7177 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7178 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7179 (__v8df)(__m512d)(W)); }) 7180 7181 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7182 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7183 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7184 (__v8df)_mm512_setzero_pd()); }) 7185 7186 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ 7187 (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7188 (__v16sf)(__m512)(B), \ 7189 0 + (((M) >> 0) & 0x3), \ 7190 0 + (((M) >> 2) & 0x3), \ 7191 16 + (((M) >> 4) & 0x3), \ 7192 16 + (((M) >> 6) & 0x3), \ 7193 4 + (((M) >> 0) & 0x3), \ 7194 4 + (((M) >> 2) & 0x3), \ 7195 20 + (((M) >> 4) & 0x3), \ 7196 20 + (((M) >> 6) & 0x3), \ 7197 8 + (((M) >> 0) & 0x3), \ 7198 8 + (((M) >> 2) & 0x3), \ 7199 24 + (((M) >> 4) & 0x3), \ 7200 24 + (((M) >> 6) & 0x3), \ 7201 12 + (((M) >> 0) & 0x3), \ 7202 12 + (((M) >> 2) & 0x3), \ 7203 28 + (((M) >> 4) & 0x3), \ 7204 28 + (((M) >> 6) & 0x3)); }) 7205 7206 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7207 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7208 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7209 (__v16sf)(__m512)(W)); }) 7210 7211 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7212 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7213 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7214 (__v16sf)_mm512_setzero_ps()); }) 7215 7216 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ 7217 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7218 (__v2df)(__m128d)(B), \ 7219 (__v2df)_mm_setzero_pd(), \ 7220 (__mmask8)-1, (int)(R)); }) 7221 7222 static __inline__ __m128d __DEFAULT_FN_ATTRS 7223 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7224 { 7225 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7226 (__v2df) __B, 7227 (__v2df) __W, 7228 (__mmask8) __U, 7229 _MM_FROUND_CUR_DIRECTION); 7230 } 7231 7232 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ 7233 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7234 (__v2df)(__m128d)(B), \ 7235 (__v2df)(__m128d)(W), \ 7236 (__mmask8)(U), (int)(R)); }) 7237 7238 static __inline__ __m128d __DEFAULT_FN_ATTRS 7239 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 7240 { 7241 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 7242 (__v2df) __B, 7243 (__v2df) _mm_setzero_pd (), 7244 (__mmask8) __U, 7245 _MM_FROUND_CUR_DIRECTION); 7246 } 7247 7248 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ 7249 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7250 (__v2df)(__m128d)(B), \ 7251 (__v2df)_mm_setzero_pd(), \ 7252 (__mmask8)(U), (int)(R)); }) 7253 7254 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ 7255 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7256 (__v4sf)(__m128)(B), \ 7257 (__v4sf)_mm_setzero_ps(), \ 7258 (__mmask8)-1, (int)(R)); }) 7259 7260 static __inline__ __m128 __DEFAULT_FN_ATTRS 7261 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7262 { 7263 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7264 (__v4sf) __B, 7265 (__v4sf) __W, 7266 (__mmask8) __U, 7267 _MM_FROUND_CUR_DIRECTION); 7268 } 7269 7270 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ 7271 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7272 (__v4sf)(__m128)(B), \ 7273 (__v4sf)(__m128)(W), (__mmask8)(U), \ 7274 (int)(R)); }) 7275 7276 static __inline__ __m128 __DEFAULT_FN_ATTRS 7277 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 7278 { 7279 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7280 (__v4sf) __B, 7281 (__v4sf) _mm_setzero_ps (), 7282 (__mmask8) __U, 7283 _MM_FROUND_CUR_DIRECTION); 7284 } 7285 7286 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ 7287 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7288 (__v4sf)(__m128)(B), \ 7289 (__v4sf)_mm_setzero_ps(), \ 7290 (__mmask8)(U), (int)(R)); }) 7291 7292 static __inline__ __m512 __DEFAULT_FN_ATTRS 7293 _mm512_broadcast_f32x4(__m128 __A) 7294 { 7295 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 7296 0, 1, 2, 3, 0, 1, 2, 3, 7297 0, 1, 2, 3, 0, 1, 2, 3); 7298 } 7299 7300 static __inline__ __m512 __DEFAULT_FN_ATTRS 7301 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) 7302 { 7303 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 7304 (__v16sf)_mm512_broadcast_f32x4(__A), 7305 (__v16sf)__O); 7306 } 7307 7308 static __inline__ __m512 __DEFAULT_FN_ATTRS 7309 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) 7310 { 7311 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 7312 (__v16sf)_mm512_broadcast_f32x4(__A), 7313 (__v16sf)_mm512_setzero_ps()); 7314 } 7315 7316 static __inline__ __m512d __DEFAULT_FN_ATTRS 7317 _mm512_broadcast_f64x4(__m256d __A) 7318 { 7319 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 7320 0, 1, 2, 3, 0, 1, 2, 3); 7321 } 7322 7323 static __inline__ __m512d __DEFAULT_FN_ATTRS 7324 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) 7325 { 7326 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 7327 (__v8df)_mm512_broadcast_f64x4(__A), 7328 (__v8df)__O); 7329 } 7330 7331 static __inline__ __m512d __DEFAULT_FN_ATTRS 7332 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) 7333 { 7334 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 7335 (__v8df)_mm512_broadcast_f64x4(__A), 7336 (__v8df)_mm512_setzero_pd()); 7337 } 7338 7339 static __inline__ __m512i __DEFAULT_FN_ATTRS 7340 _mm512_broadcast_i32x4(__m128i __A) 7341 { 7342 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 7343 0, 1, 2, 3, 0, 1, 2, 3, 7344 0, 1, 2, 3, 0, 1, 2, 3); 7345 } 7346 7347 static __inline__ __m512i __DEFAULT_FN_ATTRS 7348 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) 7349 { 7350 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 7351 (__v16si)_mm512_broadcast_i32x4(__A), 7352 (__v16si)__O); 7353 } 7354 7355 static __inline__ __m512i __DEFAULT_FN_ATTRS 7356 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) 7357 { 7358 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 7359 (__v16si)_mm512_broadcast_i32x4(__A), 7360 (__v16si)_mm512_setzero_si512()); 7361 } 7362 7363 static __inline__ __m512i __DEFAULT_FN_ATTRS 7364 _mm512_broadcast_i64x4(__m256i __A) 7365 { 7366 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 7367 0, 1, 2, 3, 0, 1, 2, 3); 7368 } 7369 7370 static __inline__ __m512i __DEFAULT_FN_ATTRS 7371 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) 7372 { 7373 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 7374 (__v8di)_mm512_broadcast_i64x4(__A), 7375 (__v8di)__O); 7376 } 7377 7378 static __inline__ __m512i __DEFAULT_FN_ATTRS 7379 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) 7380 { 7381 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 7382 (__v8di)_mm512_broadcast_i64x4(__A), 7383 (__v8di)_mm512_setzero_si512()); 7384 } 7385 7386 static __inline__ __m512d __DEFAULT_FN_ATTRS 7387 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 7388 { 7389 return (__m512d)__builtin_ia32_selectpd_512(__M, 7390 (__v8df) _mm512_broadcastsd_pd(__A), 7391 (__v8df) __O); 7392 } 7393 7394 static __inline__ __m512d __DEFAULT_FN_ATTRS 7395 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7396 { 7397 return (__m512d)__builtin_ia32_selectpd_512(__M, 7398 (__v8df) _mm512_broadcastsd_pd(__A), 7399 (__v8df) _mm512_setzero_pd()); 7400 } 7401 7402 static __inline__ __m512 __DEFAULT_FN_ATTRS 7403 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 7404 { 7405 return (__m512)__builtin_ia32_selectps_512(__M, 7406 (__v16sf) _mm512_broadcastss_ps(__A), 7407 (__v16sf) __O); 7408 } 7409 7410 static __inline__ __m512 __DEFAULT_FN_ATTRS 7411 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 7412 { 7413 return (__m512)__builtin_ia32_selectps_512(__M, 7414 (__v16sf) _mm512_broadcastss_ps(__A), 7415 (__v16sf) _mm512_setzero_ps()); 7416 } 7417 7418 static __inline__ __m128i __DEFAULT_FN_ATTRS 7419 _mm512_cvtsepi32_epi8 (__m512i __A) 7420 { 7421 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7422 (__v16qi) _mm_undefined_si128 (), 7423 (__mmask16) -1); 7424 } 7425 7426 static __inline__ __m128i __DEFAULT_FN_ATTRS 7427 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7428 { 7429 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7430 (__v16qi) __O, __M); 7431 } 7432 7433 static __inline__ __m128i __DEFAULT_FN_ATTRS 7434 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 7435 { 7436 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7437 (__v16qi) _mm_setzero_si128 (), 7438 __M); 7439 } 7440 7441 static __inline__ void __DEFAULT_FN_ATTRS 7442 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7443 { 7444 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7445 } 7446 7447 static __inline__ __m256i __DEFAULT_FN_ATTRS 7448 _mm512_cvtsepi32_epi16 (__m512i __A) 7449 { 7450 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7451 (__v16hi) _mm256_undefined_si256 (), 7452 (__mmask16) -1); 7453 } 7454 7455 static __inline__ __m256i __DEFAULT_FN_ATTRS 7456 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7457 { 7458 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7459 (__v16hi) __O, __M); 7460 } 7461 7462 static __inline__ __m256i __DEFAULT_FN_ATTRS 7463 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 7464 { 7465 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7466 (__v16hi) _mm256_setzero_si256 (), 7467 __M); 7468 } 7469 7470 static __inline__ void __DEFAULT_FN_ATTRS 7471 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7472 { 7473 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7474 } 7475 7476 static __inline__ __m128i __DEFAULT_FN_ATTRS 7477 _mm512_cvtsepi64_epi8 (__m512i __A) 7478 { 7479 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7480 (__v16qi) _mm_undefined_si128 (), 7481 (__mmask8) -1); 7482 } 7483 7484 static __inline__ __m128i __DEFAULT_FN_ATTRS 7485 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7486 { 7487 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7488 (__v16qi) __O, __M); 7489 } 7490 7491 static __inline__ __m128i __DEFAULT_FN_ATTRS 7492 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 7493 { 7494 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7495 (__v16qi) _mm_setzero_si128 (), 7496 __M); 7497 } 7498 7499 static __inline__ void __DEFAULT_FN_ATTRS 7500 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7501 { 7502 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7503 } 7504 7505 static __inline__ __m256i __DEFAULT_FN_ATTRS 7506 _mm512_cvtsepi64_epi32 (__m512i __A) 7507 { 7508 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7509 (__v8si) _mm256_undefined_si256 (), 7510 (__mmask8) -1); 7511 } 7512 7513 static __inline__ __m256i __DEFAULT_FN_ATTRS 7514 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7515 { 7516 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7517 (__v8si) __O, __M); 7518 } 7519 7520 static __inline__ __m256i __DEFAULT_FN_ATTRS 7521 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 7522 { 7523 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7524 (__v8si) _mm256_setzero_si256 (), 7525 __M); 7526 } 7527 7528 static __inline__ void __DEFAULT_FN_ATTRS 7529 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 7530 { 7531 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7532 } 7533 7534 static __inline__ __m128i __DEFAULT_FN_ATTRS 7535 _mm512_cvtsepi64_epi16 (__m512i __A) 7536 { 7537 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7538 (__v8hi) _mm_undefined_si128 (), 7539 (__mmask8) -1); 7540 } 7541 7542 static __inline__ __m128i __DEFAULT_FN_ATTRS 7543 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7544 { 7545 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7546 (__v8hi) __O, __M); 7547 } 7548 7549 static __inline__ __m128i __DEFAULT_FN_ATTRS 7550 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 7551 { 7552 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7553 (__v8hi) _mm_setzero_si128 (), 7554 __M); 7555 } 7556 7557 static __inline__ void __DEFAULT_FN_ATTRS 7558 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 7559 { 7560 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7561 } 7562 7563 static __inline__ __m128i __DEFAULT_FN_ATTRS 7564 _mm512_cvtusepi32_epi8 (__m512i __A) 7565 { 7566 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7567 (__v16qi) _mm_undefined_si128 (), 7568 (__mmask16) -1); 7569 } 7570 7571 static __inline__ __m128i __DEFAULT_FN_ATTRS 7572 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7573 { 7574 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7575 (__v16qi) __O, 7576 __M); 7577 } 7578 7579 static __inline__ __m128i __DEFAULT_FN_ATTRS 7580 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 7581 { 7582 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7583 (__v16qi) _mm_setzero_si128 (), 7584 __M); 7585 } 7586 7587 static __inline__ void __DEFAULT_FN_ATTRS 7588 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7589 { 7590 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7591 } 7592 7593 static __inline__ __m256i __DEFAULT_FN_ATTRS 7594 _mm512_cvtusepi32_epi16 (__m512i __A) 7595 { 7596 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7597 (__v16hi) _mm256_undefined_si256 (), 7598 (__mmask16) -1); 7599 } 7600 7601 static __inline__ __m256i __DEFAULT_FN_ATTRS 7602 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7603 { 7604 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7605 (__v16hi) __O, 7606 __M); 7607 } 7608 7609 static __inline__ __m256i __DEFAULT_FN_ATTRS 7610 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 7611 { 7612 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7613 (__v16hi) _mm256_setzero_si256 (), 7614 __M); 7615 } 7616 7617 static __inline__ void __DEFAULT_FN_ATTRS 7618 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7619 { 7620 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7621 } 7622 7623 static __inline__ __m128i __DEFAULT_FN_ATTRS 7624 _mm512_cvtusepi64_epi8 (__m512i __A) 7625 { 7626 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7627 (__v16qi) _mm_undefined_si128 (), 7628 (__mmask8) -1); 7629 } 7630 7631 static __inline__ __m128i __DEFAULT_FN_ATTRS 7632 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7633 { 7634 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7635 (__v16qi) __O, 7636 __M); 7637 } 7638 7639 static __inline__ __m128i __DEFAULT_FN_ATTRS 7640 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 7641 { 7642 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7643 (__v16qi) _mm_setzero_si128 (), 7644 __M); 7645 } 7646 7647 static __inline__ void __DEFAULT_FN_ATTRS 7648 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7649 { 7650 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7651 } 7652 7653 static __inline__ __m256i __DEFAULT_FN_ATTRS 7654 _mm512_cvtusepi64_epi32 (__m512i __A) 7655 { 7656 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7657 (__v8si) _mm256_undefined_si256 (), 7658 (__mmask8) -1); 7659 } 7660 7661 static __inline__ __m256i __DEFAULT_FN_ATTRS 7662 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7663 { 7664 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7665 (__v8si) __O, __M); 7666 } 7667 7668 static __inline__ __m256i __DEFAULT_FN_ATTRS 7669 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 7670 { 7671 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7672 (__v8si) _mm256_setzero_si256 (), 7673 __M); 7674 } 7675 7676 static __inline__ void __DEFAULT_FN_ATTRS 7677 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7678 { 7679 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 7680 } 7681 7682 static __inline__ __m128i __DEFAULT_FN_ATTRS 7683 _mm512_cvtusepi64_epi16 (__m512i __A) 7684 { 7685 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7686 (__v8hi) _mm_undefined_si128 (), 7687 (__mmask8) -1); 7688 } 7689 7690 static __inline__ __m128i __DEFAULT_FN_ATTRS 7691 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7692 { 7693 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7694 (__v8hi) __O, __M); 7695 } 7696 7697 static __inline__ __m128i __DEFAULT_FN_ATTRS 7698 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 7699 { 7700 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7701 (__v8hi) _mm_setzero_si128 (), 7702 __M); 7703 } 7704 7705 static __inline__ void __DEFAULT_FN_ATTRS 7706 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7707 { 7708 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 7709 } 7710 7711 static __inline__ __m128i __DEFAULT_FN_ATTRS 7712 _mm512_cvtepi32_epi8 (__m512i __A) 7713 { 7714 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7715 (__v16qi) _mm_undefined_si128 (), 7716 (__mmask16) -1); 7717 } 7718 7719 static __inline__ __m128i __DEFAULT_FN_ATTRS 7720 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7721 { 7722 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7723 (__v16qi) __O, __M); 7724 } 7725 7726 static __inline__ __m128i __DEFAULT_FN_ATTRS 7727 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 7728 { 7729 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7730 (__v16qi) _mm_setzero_si128 (), 7731 __M); 7732 } 7733 7734 static __inline__ void __DEFAULT_FN_ATTRS 7735 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7736 { 7737 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7738 } 7739 7740 static __inline__ __m256i __DEFAULT_FN_ATTRS 7741 _mm512_cvtepi32_epi16 (__m512i __A) 7742 { 7743 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7744 (__v16hi) _mm256_undefined_si256 (), 7745 (__mmask16) -1); 7746 } 7747 7748 static __inline__ __m256i __DEFAULT_FN_ATTRS 7749 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7750 { 7751 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7752 (__v16hi) __O, __M); 7753 } 7754 7755 static __inline__ __m256i __DEFAULT_FN_ATTRS 7756 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 7757 { 7758 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7759 (__v16hi) _mm256_setzero_si256 (), 7760 __M); 7761 } 7762 7763 static __inline__ void __DEFAULT_FN_ATTRS 7764 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 7765 { 7766 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 7767 } 7768 7769 static __inline__ __m128i __DEFAULT_FN_ATTRS 7770 _mm512_cvtepi64_epi8 (__m512i __A) 7771 { 7772 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7773 (__v16qi) _mm_undefined_si128 (), 7774 (__mmask8) -1); 7775 } 7776 7777 static __inline__ __m128i __DEFAULT_FN_ATTRS 7778 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7779 { 7780 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7781 (__v16qi) __O, __M); 7782 } 7783 7784 static __inline__ __m128i __DEFAULT_FN_ATTRS 7785 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 7786 { 7787 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7788 (__v16qi) _mm_setzero_si128 (), 7789 __M); 7790 } 7791 7792 static __inline__ void __DEFAULT_FN_ATTRS 7793 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7794 { 7795 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7796 } 7797 7798 static __inline__ __m256i __DEFAULT_FN_ATTRS 7799 _mm512_cvtepi64_epi32 (__m512i __A) 7800 { 7801 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7802 (__v8si) _mm256_undefined_si256 (), 7803 (__mmask8) -1); 7804 } 7805 7806 static __inline__ __m256i __DEFAULT_FN_ATTRS 7807 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7808 { 7809 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7810 (__v8si) __O, __M); 7811 } 7812 7813 static __inline__ __m256i __DEFAULT_FN_ATTRS 7814 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 7815 { 7816 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7817 (__v8si) _mm256_setzero_si256 (), 7818 __M); 7819 } 7820 7821 static __inline__ void __DEFAULT_FN_ATTRS 7822 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7823 { 7824 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7825 } 7826 7827 static __inline__ __m128i __DEFAULT_FN_ATTRS 7828 _mm512_cvtepi64_epi16 (__m512i __A) 7829 { 7830 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7831 (__v8hi) _mm_undefined_si128 (), 7832 (__mmask8) -1); 7833 } 7834 7835 static __inline__ __m128i __DEFAULT_FN_ATTRS 7836 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7837 { 7838 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7839 (__v8hi) __O, __M); 7840 } 7841 7842 static __inline__ __m128i __DEFAULT_FN_ATTRS 7843 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 7844 { 7845 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7846 (__v8hi) _mm_setzero_si128 (), 7847 __M); 7848 } 7849 7850 static __inline__ void __DEFAULT_FN_ATTRS 7851 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7852 { 7853 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7854 } 7855 7856 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ 7857 (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7858 (__v16si)_mm512_undefined_epi32(), \ 7859 0 + ((imm) & 0x3) * 4, \ 7860 1 + ((imm) & 0x3) * 4, \ 7861 2 + ((imm) & 0x3) * 4, \ 7862 3 + ((imm) & 0x3) * 4); }) 7863 7864 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7865 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7866 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7867 (__v4si)(W)); }) 7868 7869 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7870 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7871 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7872 (__v4si)_mm_setzero_si128()); }) 7873 7874 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ 7875 (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7876 (__v8di)_mm512_undefined_epi32(), \ 7877 ((imm) & 1) ? 4 : 0, \ 7878 ((imm) & 1) ? 5 : 1, \ 7879 ((imm) & 1) ? 6 : 2, \ 7880 ((imm) & 1) ? 7 : 3); }) 7881 7882 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ 7883 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7884 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7885 (__v4di)(W)); }) 7886 7887 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ 7888 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7889 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7890 (__v4di)_mm256_setzero_si256()); }) 7891 7892 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ 7893 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7894 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ 7895 ((imm) & 0x1) ? 0 : 8, \ 7896 ((imm) & 0x1) ? 1 : 9, \ 7897 ((imm) & 0x1) ? 2 : 10, \ 7898 ((imm) & 0x1) ? 3 : 11, \ 7899 ((imm) & 0x1) ? 8 : 4, \ 7900 ((imm) & 0x1) ? 9 : 5, \ 7901 ((imm) & 0x1) ? 10 : 6, \ 7902 ((imm) & 0x1) ? 11 : 7); }) 7903 7904 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ 7905 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7906 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7907 (__v8df)(W)); }) 7908 7909 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ 7910 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7911 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7912 (__v8df)_mm512_setzero_pd()); }) 7913 7914 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ 7915 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7916 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ 7917 ((imm) & 0x1) ? 0 : 8, \ 7918 ((imm) & 0x1) ? 1 : 9, \ 7919 ((imm) & 0x1) ? 2 : 10, \ 7920 ((imm) & 0x1) ? 3 : 11, \ 7921 ((imm) & 0x1) ? 8 : 4, \ 7922 ((imm) & 0x1) ? 9 : 5, \ 7923 ((imm) & 0x1) ? 10 : 6, \ 7924 ((imm) & 0x1) ? 11 : 7); }) 7925 7926 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ 7927 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7928 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7929 (__v8di)(W)); }) 7930 7931 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ 7932 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7933 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7934 (__v8di)_mm512_setzero_si512()); }) 7935 7936 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ 7937 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7938 (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ 7939 (((imm) & 0x3) == 0) ? 16 : 0, \ 7940 (((imm) & 0x3) == 0) ? 17 : 1, \ 7941 (((imm) & 0x3) == 0) ? 18 : 2, \ 7942 (((imm) & 0x3) == 0) ? 19 : 3, \ 7943 (((imm) & 0x3) == 1) ? 16 : 4, \ 7944 (((imm) & 0x3) == 1) ? 17 : 5, \ 7945 (((imm) & 0x3) == 1) ? 18 : 6, \ 7946 (((imm) & 0x3) == 1) ? 19 : 7, \ 7947 (((imm) & 0x3) == 2) ? 16 : 8, \ 7948 (((imm) & 0x3) == 2) ? 17 : 9, \ 7949 (((imm) & 0x3) == 2) ? 18 : 10, \ 7950 (((imm) & 0x3) == 2) ? 19 : 11, \ 7951 (((imm) & 0x3) == 3) ? 16 : 12, \ 7952 (((imm) & 0x3) == 3) ? 17 : 13, \ 7953 (((imm) & 0x3) == 3) ? 18 : 14, \ 7954 (((imm) & 0x3) == 3) ? 19 : 15); }) 7955 7956 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 7957 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7958 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7959 (__v16sf)(W)); }) 7960 7961 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 7962 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7963 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7964 (__v16sf)_mm512_setzero_ps()); }) 7965 7966 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ 7967 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7968 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ 7969 (((imm) & 0x3) == 0) ? 16 : 0, \ 7970 (((imm) & 0x3) == 0) ? 17 : 1, \ 7971 (((imm) & 0x3) == 0) ? 18 : 2, \ 7972 (((imm) & 0x3) == 0) ? 19 : 3, \ 7973 (((imm) & 0x3) == 1) ? 16 : 4, \ 7974 (((imm) & 0x3) == 1) ? 17 : 5, \ 7975 (((imm) & 0x3) == 1) ? 18 : 6, \ 7976 (((imm) & 0x3) == 1) ? 19 : 7, \ 7977 (((imm) & 0x3) == 2) ? 16 : 8, \ 7978 (((imm) & 0x3) == 2) ? 17 : 9, \ 7979 (((imm) & 0x3) == 2) ? 18 : 10, \ 7980 (((imm) & 0x3) == 2) ? 19 : 11, \ 7981 (((imm) & 0x3) == 3) ? 16 : 12, \ 7982 (((imm) & 0x3) == 3) ? 17 : 13, \ 7983 (((imm) & 0x3) == 3) ? 18 : 14, \ 7984 (((imm) & 0x3) == 3) ? 19 : 15); }) 7985 7986 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 7987 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7988 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7989 (__v16si)(W)); }) 7990 7991 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 7992 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7993 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7994 (__v16si)_mm512_setzero_si512()); }) 7995 7996 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ 7997 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7998 (int)(((C)<<2) | (B)), \ 7999 (__v8df)_mm512_undefined_pd(), \ 8000 (__mmask8)-1, (int)(R)); }) 8001 8002 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ 8003 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8004 (int)(((C)<<2) | (B)), \ 8005 (__v8df)(__m512d)(W), \ 8006 (__mmask8)(U), (int)(R)); }) 8007 8008 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ 8009 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8010 (int)(((C)<<2) | (B)), \ 8011 (__v8df)_mm512_setzero_pd(), \ 8012 (__mmask8)(U), (int)(R)); }) 8013 8014 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \ 8015 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8016 (int)(((C)<<2) | (B)), \ 8017 (__v8df)_mm512_setzero_pd(), \ 8018 (__mmask8)-1, \ 8019 _MM_FROUND_CUR_DIRECTION); }) 8020 8021 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8022 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8023 (int)(((C)<<2) | (B)), \ 8024 (__v8df)(__m512d)(W), \ 8025 (__mmask8)(U), \ 8026 _MM_FROUND_CUR_DIRECTION); }) 8027 8028 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8029 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 8030 (int)(((C)<<2) | (B)), \ 8031 (__v8df)_mm512_setzero_pd(), \ 8032 (__mmask8)(U), \ 8033 _MM_FROUND_CUR_DIRECTION); }) 8034 8035 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ 8036 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8037 (int)(((C)<<2) | (B)), \ 8038 (__v16sf)_mm512_undefined_ps(), \ 8039 (__mmask16)-1, (int)(R)); }) 8040 8041 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ 8042 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8043 (int)(((C)<<2) | (B)), \ 8044 (__v16sf)(__m512)(W), \ 8045 (__mmask16)(U), (int)(R)); }) 8046 8047 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ 8048 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8049 (int)(((C)<<2) | (B)), \ 8050 (__v16sf)_mm512_setzero_ps(), \ 8051 (__mmask16)(U), (int)(R)); }) 8052 8053 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \ 8054 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8055 (int)(((C)<<2)|(B)), \ 8056 (__v16sf)_mm512_undefined_ps(), \ 8057 (__mmask16)-1, \ 8058 _MM_FROUND_CUR_DIRECTION); }) 8059 8060 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8061 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8062 (int)(((C)<<2)|(B)), \ 8063 (__v16sf)(__m512)(W), \ 8064 (__mmask16)(U), \ 8065 _MM_FROUND_CUR_DIRECTION); }) 8066 8067 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8068 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 8069 (int)(((C)<<2)|(B)), \ 8070 (__v16sf)_mm512_setzero_ps(), \ 8071 (__mmask16)(U), \ 8072 _MM_FROUND_CUR_DIRECTION); }) 8073 8074 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \ 8075 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8076 (__v8df)_mm512_undefined_pd(), \ 8077 (__mmask8)-1, (int)(R)); }) 8078 8079 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ 8080 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8081 (__v8df)(__m512d)(W), \ 8082 (__mmask8)(U), (int)(R)); }) 8083 8084 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ 8085 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8086 (__v8df)_mm512_setzero_pd(), \ 8087 (__mmask8)(U), (int)(R)); }) 8088 8089 static __inline__ __m512d __DEFAULT_FN_ATTRS 8090 _mm512_getexp_pd (__m512d __A) 8091 { 8092 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8093 (__v8df) _mm512_undefined_pd (), 8094 (__mmask8) -1, 8095 _MM_FROUND_CUR_DIRECTION); 8096 } 8097 8098 static __inline__ __m512d __DEFAULT_FN_ATTRS 8099 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 8100 { 8101 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8102 (__v8df) __W, 8103 (__mmask8) __U, 8104 _MM_FROUND_CUR_DIRECTION); 8105 } 8106 8107 static __inline__ __m512d __DEFAULT_FN_ATTRS 8108 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 8109 { 8110 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8111 (__v8df) _mm512_setzero_pd (), 8112 (__mmask8) __U, 8113 _MM_FROUND_CUR_DIRECTION); 8114 } 8115 8116 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \ 8117 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8118 (__v16sf)_mm512_undefined_ps(), \ 8119 (__mmask16)-1, (int)(R)); }) 8120 8121 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ 8122 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8123 (__v16sf)(__m512)(W), \ 8124 (__mmask16)(U), (int)(R)); }) 8125 8126 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ 8127 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8128 (__v16sf)_mm512_setzero_ps(), \ 8129 (__mmask16)(U), (int)(R)); }) 8130 8131 static __inline__ __m512 __DEFAULT_FN_ATTRS 8132 _mm512_getexp_ps (__m512 __A) 8133 { 8134 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8135 (__v16sf) _mm512_undefined_ps (), 8136 (__mmask16) -1, 8137 _MM_FROUND_CUR_DIRECTION); 8138 } 8139 8140 static __inline__ __m512 __DEFAULT_FN_ATTRS 8141 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 8142 { 8143 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8144 (__v16sf) __W, 8145 (__mmask16) __U, 8146 _MM_FROUND_CUR_DIRECTION); 8147 } 8148 8149 static __inline__ __m512 __DEFAULT_FN_ATTRS 8150 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 8151 { 8152 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8153 (__v16sf) _mm512_setzero_ps (), 8154 (__mmask16) __U, 8155 _MM_FROUND_CUR_DIRECTION); 8156 } 8157 8158 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ 8159 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 8160 (float const *)(addr), \ 8161 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8162 (int)(scale)); }) 8163 8164 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\ 8165 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 8166 (float const *)(addr), \ 8167 (__v8di)(__m512i)(index), \ 8168 (__mmask8)(mask), (int)(scale)); }) 8169 8170 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ 8171 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ 8172 (int const *)(addr), \ 8173 (__v8di)(__m512i)(index), \ 8174 (__mmask8)-1, (int)(scale)); }) 8175 8176 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8177 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 8178 (int const *)(addr), \ 8179 (__v8di)(__m512i)(index), \ 8180 (__mmask8)(mask), (int)(scale)); }) 8181 8182 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ 8183 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 8184 (double const *)(addr), \ 8185 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8186 (int)(scale)); }) 8187 8188 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8189 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 8190 (double const *)(addr), \ 8191 (__v8di)(__m512i)(index), \ 8192 (__mmask8)(mask), (int)(scale)); }) 8193 8194 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ 8195 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ 8196 (long long const *)(addr), \ 8197 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8198 (int)(scale)); }) 8199 8200 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8201 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 8202 (long long const *)(addr), \ 8203 (__v8di)(__m512i)(index), \ 8204 (__mmask8)(mask), (int)(scale)); }) 8205 8206 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ 8207 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 8208 (float const *)(addr), \ 8209 (__v16sf)(__m512)(index), \ 8210 (__mmask16)-1, (int)(scale)); }) 8211 8212 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8213 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 8214 (float const *)(addr), \ 8215 (__v16sf)(__m512)(index), \ 8216 (__mmask16)(mask), (int)(scale)); }) 8217 8218 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ 8219 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 8220 (int const *)(addr), \ 8221 (__v16si)(__m512i)(index), \ 8222 (__mmask16)-1, (int)(scale)); }) 8223 8224 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8225 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 8226 (int const *)(addr), \ 8227 (__v16si)(__m512i)(index), \ 8228 (__mmask16)(mask), (int)(scale)); }) 8229 8230 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ 8231 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 8232 (double const *)(addr), \ 8233 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8234 (int)(scale)); }) 8235 8236 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8237 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 8238 (double const *)(addr), \ 8239 (__v8si)(__m256i)(index), \ 8240 (__mmask8)(mask), (int)(scale)); }) 8241 8242 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ 8243 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 8244 (long long const *)(addr), \ 8245 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8246 (int)(scale)); }) 8247 8248 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8249 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 8250 (long long const *)(addr), \ 8251 (__v8si)(__m256i)(index), \ 8252 (__mmask8)(mask), (int)(scale)); }) 8253 8254 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ 8255 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 8256 (__v8di)(__m512i)(index), \ 8257 (__v8sf)(__m256)(v1), (int)(scale)); }) 8258 8259 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8260 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 8261 (__v8di)(__m512i)(index), \ 8262 (__v8sf)(__m256)(v1), (int)(scale)); }) 8263 8264 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8265 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 8266 (__v8di)(__m512i)(index), \ 8267 (__v8si)(__m256i)(v1), (int)(scale)); }) 8268 8269 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8270 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 8271 (__v8di)(__m512i)(index), \ 8272 (__v8si)(__m256i)(v1), (int)(scale)); }) 8273 8274 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ 8275 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 8276 (__v8di)(__m512i)(index), \ 8277 (__v8df)(__m512d)(v1), (int)(scale)); }) 8278 8279 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8280 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 8281 (__v8di)(__m512i)(index), \ 8282 (__v8df)(__m512d)(v1), (int)(scale)); }) 8283 8284 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8285 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 8286 (__v8di)(__m512i)(index), \ 8287 (__v8di)(__m512i)(v1), (int)(scale)); }) 8288 8289 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8290 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 8291 (__v8di)(__m512i)(index), \ 8292 (__v8di)(__m512i)(v1), (int)(scale)); }) 8293 8294 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ 8295 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 8296 (__v16si)(__m512i)(index), \ 8297 (__v16sf)(__m512)(v1), (int)(scale)); }) 8298 8299 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8300 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 8301 (__v16si)(__m512i)(index), \ 8302 (__v16sf)(__m512)(v1), (int)(scale)); }) 8303 8304 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8305 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 8306 (__v16si)(__m512i)(index), \ 8307 (__v16si)(__m512i)(v1), (int)(scale)); }) 8308 8309 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8310 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 8311 (__v16si)(__m512i)(index), \ 8312 (__v16si)(__m512i)(v1), (int)(scale)); }) 8313 8314 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ 8315 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 8316 (__v8si)(__m256i)(index), \ 8317 (__v8df)(__m512d)(v1), (int)(scale)); }) 8318 8319 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8320 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 8321 (__v8si)(__m256i)(index), \ 8322 (__v8df)(__m512d)(v1), (int)(scale)); }) 8323 8324 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8325 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 8326 (__v8si)(__m256i)(index), \ 8327 (__v8di)(__m512i)(v1), (int)(scale)); }) 8328 8329 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8330 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 8331 (__v8si)(__m256i)(index), \ 8332 (__v8di)(__m512i)(v1), (int)(scale)); }) 8333 8334 static __inline__ __m128 __DEFAULT_FN_ATTRS 8335 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8336 { 8337 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8338 (__v4sf) __A, 8339 (__v4sf) __B, 8340 (__mmask8) __U, 8341 _MM_FROUND_CUR_DIRECTION); 8342 } 8343 8344 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ 8345 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8346 (__v4sf)(__m128)(A), \ 8347 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8348 (int)(R)); }) 8349 8350 static __inline__ __m128 __DEFAULT_FN_ATTRS 8351 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8352 { 8353 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8354 (__v4sf) __B, 8355 (__v4sf) __C, 8356 (__mmask8) __U, 8357 _MM_FROUND_CUR_DIRECTION); 8358 } 8359 8360 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8361 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8362 (__v4sf)(__m128)(B), \ 8363 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8364 _MM_FROUND_CUR_DIRECTION); }) 8365 8366 static __inline__ __m128 __DEFAULT_FN_ATTRS 8367 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8368 { 8369 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 8370 (__v4sf) __X, 8371 (__v4sf) __Y, 8372 (__mmask8) __U, 8373 _MM_FROUND_CUR_DIRECTION); 8374 } 8375 8376 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ 8377 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8378 (__v4sf)(__m128)(X), \ 8379 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8380 (int)(R)); }) 8381 8382 static __inline__ __m128 __DEFAULT_FN_ATTRS 8383 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8384 { 8385 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8386 (__v4sf) __A, 8387 -(__v4sf) __B, 8388 (__mmask8) __U, 8389 _MM_FROUND_CUR_DIRECTION); 8390 } 8391 8392 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8393 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8394 (__v4sf)(__m128)(A), \ 8395 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8396 (int)(R)); }) 8397 8398 static __inline__ __m128 __DEFAULT_FN_ATTRS 8399 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8400 { 8401 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8402 (__v4sf) __B, 8403 -(__v4sf) __C, 8404 (__mmask8) __U, 8405 _MM_FROUND_CUR_DIRECTION); 8406 } 8407 8408 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8409 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8410 (__v4sf)(__m128)(B), \ 8411 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8412 (int)(R)); }) 8413 8414 static __inline__ __m128 __DEFAULT_FN_ATTRS 8415 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8416 { 8417 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 8418 (__v4sf) __X, 8419 (__v4sf) __Y, 8420 (__mmask8) __U, 8421 _MM_FROUND_CUR_DIRECTION); 8422 } 8423 8424 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ 8425 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 8426 (__v4sf)(__m128)(X), \ 8427 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8428 (int)(R)); }) 8429 8430 static __inline__ __m128 __DEFAULT_FN_ATTRS 8431 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8432 { 8433 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8434 -(__v4sf) __A, 8435 (__v4sf) __B, 8436 (__mmask8) __U, 8437 _MM_FROUND_CUR_DIRECTION); 8438 } 8439 8440 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ 8441 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8442 -(__v4sf)(__m128)(A), \ 8443 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8444 (int)(R)); }) 8445 8446 static __inline__ __m128 __DEFAULT_FN_ATTRS 8447 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8448 { 8449 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8450 (__v4sf) __B, 8451 (__v4sf) __C, 8452 (__mmask8) __U, 8453 _MM_FROUND_CUR_DIRECTION); 8454 } 8455 8456 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8457 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8458 (__v4sf)(__m128)(B), \ 8459 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8460 (int)(R)); }) 8461 8462 static __inline__ __m128 __DEFAULT_FN_ATTRS 8463 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8464 { 8465 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, 8466 (__v4sf) __X, 8467 (__v4sf) __Y, 8468 (__mmask8) __U, 8469 _MM_FROUND_CUR_DIRECTION); 8470 } 8471 8472 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ 8473 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8474 (__v4sf)(__m128)(X), \ 8475 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8476 (int)(R)); }) 8477 8478 static __inline__ __m128 __DEFAULT_FN_ATTRS 8479 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8480 { 8481 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 8482 -(__v4sf) __A, 8483 -(__v4sf) __B, 8484 (__mmask8) __U, 8485 _MM_FROUND_CUR_DIRECTION); 8486 } 8487 8488 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8489 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8490 -(__v4sf)(__m128)(A), \ 8491 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 8492 (int)(R)); }) 8493 8494 static __inline__ __m128 __DEFAULT_FN_ATTRS 8495 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8496 { 8497 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8498 (__v4sf) __B, 8499 -(__v4sf) __C, 8500 (__mmask8) __U, 8501 _MM_FROUND_CUR_DIRECTION); 8502 } 8503 8504 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8505 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8506 (__v4sf)(__m128)(B), \ 8507 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8508 _MM_FROUND_CUR_DIRECTION); }) 8509 8510 static __inline__ __m128 __DEFAULT_FN_ATTRS 8511 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8512 { 8513 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W, 8514 (__v4sf) __X, 8515 (__v4sf) __Y, 8516 (__mmask8) __U, 8517 _MM_FROUND_CUR_DIRECTION); 8518 } 8519 8520 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ 8521 (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ 8522 (__v4sf)(__m128)(X), \ 8523 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8524 (int)(R)); }) 8525 8526 static __inline__ __m128d __DEFAULT_FN_ATTRS 8527 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8528 { 8529 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8530 (__v2df) __A, 8531 (__v2df) __B, 8532 (__mmask8) __U, 8533 _MM_FROUND_CUR_DIRECTION); 8534 } 8535 8536 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ 8537 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8538 (__v2df)(__m128d)(A), \ 8539 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8540 (int)(R)); }) 8541 8542 static __inline__ __m128d __DEFAULT_FN_ATTRS 8543 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8544 { 8545 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8546 (__v2df) __B, 8547 (__v2df) __C, 8548 (__mmask8) __U, 8549 _MM_FROUND_CUR_DIRECTION); 8550 } 8551 8552 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8553 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8554 (__v2df)(__m128d)(B), \ 8555 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8556 _MM_FROUND_CUR_DIRECTION); }) 8557 8558 static __inline__ __m128d __DEFAULT_FN_ATTRS 8559 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8560 { 8561 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 8562 (__v2df) __X, 8563 (__v2df) __Y, 8564 (__mmask8) __U, 8565 _MM_FROUND_CUR_DIRECTION); 8566 } 8567 8568 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ 8569 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8570 (__v2df)(__m128d)(X), \ 8571 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8572 (int)(R)); }) 8573 8574 static __inline__ __m128d __DEFAULT_FN_ATTRS 8575 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8576 { 8577 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8578 (__v2df) __A, 8579 -(__v2df) __B, 8580 (__mmask8) __U, 8581 _MM_FROUND_CUR_DIRECTION); 8582 } 8583 8584 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8585 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8586 (__v2df)(__m128d)(A), \ 8587 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8588 (int)(R)); }) 8589 8590 static __inline__ __m128d __DEFAULT_FN_ATTRS 8591 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8592 { 8593 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8594 (__v2df) __B, 8595 -(__v2df) __C, 8596 (__mmask8) __U, 8597 _MM_FROUND_CUR_DIRECTION); 8598 } 8599 8600 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8601 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8602 (__v2df)(__m128d)(B), \ 8603 -(__v2df)(__m128d)(C), \ 8604 (__mmask8)(U), (int)(R)); }) 8605 8606 static __inline__ __m128d __DEFAULT_FN_ATTRS 8607 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8608 { 8609 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 8610 (__v2df) __X, 8611 (__v2df) __Y, 8612 (__mmask8) __U, 8613 _MM_FROUND_CUR_DIRECTION); 8614 } 8615 8616 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ 8617 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8618 (__v2df)(__m128d)(X), \ 8619 (__v2df)(__m128d)(Y), \ 8620 (__mmask8)(U), (int)(R)); }) 8621 8622 static __inline__ __m128d __DEFAULT_FN_ATTRS 8623 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8624 { 8625 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8626 -(__v2df) __A, 8627 (__v2df) __B, 8628 (__mmask8) __U, 8629 _MM_FROUND_CUR_DIRECTION); 8630 } 8631 8632 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ 8633 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8634 -(__v2df)(__m128d)(A), \ 8635 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8636 (int)(R)); }) 8637 8638 static __inline__ __m128d __DEFAULT_FN_ATTRS 8639 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8640 { 8641 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8642 (__v2df) __B, 8643 (__v2df) __C, 8644 (__mmask8) __U, 8645 _MM_FROUND_CUR_DIRECTION); 8646 } 8647 8648 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8649 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8650 (__v2df)(__m128d)(B), \ 8651 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8652 (int)(R)); }) 8653 8654 static __inline__ __m128d __DEFAULT_FN_ATTRS 8655 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8656 { 8657 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, 8658 (__v2df) __X, 8659 (__v2df) __Y, 8660 (__mmask8) __U, 8661 _MM_FROUND_CUR_DIRECTION); 8662 } 8663 8664 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ 8665 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8666 (__v2df)(__m128d)(X), \ 8667 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8668 (int)(R)); }) 8669 8670 static __inline__ __m128d __DEFAULT_FN_ATTRS 8671 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8672 { 8673 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W, 8674 -(__v2df) __A, 8675 -(__v2df) __B, 8676 (__mmask8) __U, 8677 _MM_FROUND_CUR_DIRECTION); 8678 } 8679 8680 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8681 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8682 -(__v2df)(__m128d)(A), \ 8683 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8684 (int)(R)); }) 8685 8686 static __inline__ __m128d __DEFAULT_FN_ATTRS 8687 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8688 { 8689 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8690 (__v2df) __B, 8691 -(__v2df) __C, 8692 (__mmask8) __U, 8693 _MM_FROUND_CUR_DIRECTION); 8694 } 8695 8696 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8697 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8698 (__v2df)(__m128d)(B), \ 8699 -(__v2df)(__m128d)(C), \ 8700 (__mmask8)(U), \ 8701 _MM_FROUND_CUR_DIRECTION); }) 8702 8703 static __inline__ __m128d __DEFAULT_FN_ATTRS 8704 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8705 { 8706 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W), 8707 (__v2df) __X, 8708 (__v2df) (__Y), 8709 (__mmask8) __U, 8710 _MM_FROUND_CUR_DIRECTION); 8711 } 8712 8713 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ 8714 (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ 8715 (__v2df)(__m128d)(X), \ 8716 (__v2df)(__m128d)(Y), \ 8717 (__mmask8)(U), (int)(R)); }) 8718 8719 #define _mm512_permutex_pd(X, C) __extension__ ({ \ 8720 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 8721 (__v8df)_mm512_undefined_pd(), \ 8722 0 + (((C) >> 0) & 0x3), \ 8723 0 + (((C) >> 2) & 0x3), \ 8724 0 + (((C) >> 4) & 0x3), \ 8725 0 + (((C) >> 6) & 0x3), \ 8726 4 + (((C) >> 0) & 0x3), \ 8727 4 + (((C) >> 2) & 0x3), \ 8728 4 + (((C) >> 4) & 0x3), \ 8729 4 + (((C) >> 6) & 0x3)); }) 8730 8731 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8732 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8733 (__v8df)_mm512_permutex_pd((X), (C)), \ 8734 (__v8df)(__m512d)(W)); }) 8735 8736 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8737 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8738 (__v8df)_mm512_permutex_pd((X), (C)), \ 8739 (__v8df)_mm512_setzero_pd()); }) 8740 8741 #define _mm512_permutex_epi64(X, C) __extension__ ({ \ 8742 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ 8743 (__v8di)_mm512_undefined_epi32(), \ 8744 0 + (((C) >> 0) & 0x3), \ 8745 0 + (((C) >> 2) & 0x3), \ 8746 0 + (((C) >> 4) & 0x3), \ 8747 0 + (((C) >> 6) & 0x3), \ 8748 4 + (((C) >> 0) & 0x3), \ 8749 4 + (((C) >> 2) & 0x3), \ 8750 4 + (((C) >> 4) & 0x3), \ 8751 4 + (((C) >> 6) & 0x3)); }) 8752 8753 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8754 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8755 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8756 (__v8di)(__m512i)(W)); }) 8757 8758 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8759 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8760 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8761 (__v8di)_mm512_setzero_si512()); }) 8762 8763 static __inline__ __m512d __DEFAULT_FN_ATTRS 8764 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) 8765 { 8766 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8767 (__v8di) __X, 8768 (__v8df) _mm512_undefined_pd (), 8769 (__mmask8) -1); 8770 } 8771 8772 static __inline__ __m512d __DEFAULT_FN_ATTRS 8773 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 8774 { 8775 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8776 (__v8di) __X, 8777 (__v8df) __W, 8778 (__mmask8) __U); 8779 } 8780 8781 static __inline__ __m512d __DEFAULT_FN_ATTRS 8782 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 8783 { 8784 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8785 (__v8di) __X, 8786 (__v8df) _mm512_setzero_pd (), 8787 (__mmask8) __U); 8788 } 8789 8790 static __inline__ __m512i __DEFAULT_FN_ATTRS 8791 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 8792 { 8793 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8794 (__v8di) __X, 8795 (__v8di) _mm512_setzero_si512 (), 8796 __M); 8797 } 8798 8799 static __inline__ __m512i __DEFAULT_FN_ATTRS 8800 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 8801 { 8802 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8803 (__v8di) __X, 8804 (__v8di) _mm512_undefined_epi32 (), 8805 (__mmask8) -1); 8806 } 8807 8808 static __inline__ __m512i __DEFAULT_FN_ATTRS 8809 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 8810 __m512i __Y) 8811 { 8812 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8813 (__v8di) __X, 8814 (__v8di) __W, 8815 __M); 8816 } 8817 8818 static __inline__ __m512 __DEFAULT_FN_ATTRS 8819 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) 8820 { 8821 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8822 (__v16si) __X, 8823 (__v16sf) _mm512_undefined_ps (), 8824 (__mmask16) -1); 8825 } 8826 8827 static __inline__ __m512 __DEFAULT_FN_ATTRS 8828 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 8829 { 8830 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8831 (__v16si) __X, 8832 (__v16sf) __W, 8833 (__mmask16) __U); 8834 } 8835 8836 static __inline__ __m512 __DEFAULT_FN_ATTRS 8837 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 8838 { 8839 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8840 (__v16si) __X, 8841 (__v16sf) _mm512_setzero_ps (), 8842 (__mmask16) __U); 8843 } 8844 8845 static __inline__ __m512i __DEFAULT_FN_ATTRS 8846 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 8847 { 8848 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8849 (__v16si) __X, 8850 (__v16si) _mm512_setzero_si512 (), 8851 __M); 8852 } 8853 8854 static __inline__ __m512i __DEFAULT_FN_ATTRS 8855 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 8856 { 8857 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8858 (__v16si) __X, 8859 (__v16si) _mm512_undefined_epi32 (), 8860 (__mmask16) -1); 8861 } 8862 8863 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8864 8865 static __inline__ __m512i __DEFAULT_FN_ATTRS 8866 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 8867 __m512i __Y) 8868 { 8869 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8870 (__v16si) __X, 8871 (__v16si) __W, 8872 __M); 8873 } 8874 8875 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8876 8877 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8878 _mm512_kand (__mmask16 __A, __mmask16 __B) 8879 { 8880 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 8881 } 8882 8883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8884 _mm512_kandn (__mmask16 __A, __mmask16 __B) 8885 { 8886 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 8887 } 8888 8889 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8890 _mm512_kor (__mmask16 __A, __mmask16 __B) 8891 { 8892 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 8893 } 8894 8895 static __inline__ int __DEFAULT_FN_ATTRS 8896 _mm512_kortestc (__mmask16 __A, __mmask16 __B) 8897 { 8898 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 8899 } 8900 8901 static __inline__ int __DEFAULT_FN_ATTRS 8902 _mm512_kortestz (__mmask16 __A, __mmask16 __B) 8903 { 8904 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 8905 } 8906 8907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8908 _mm512_kunpackb (__mmask16 __A, __mmask16 __B) 8909 { 8910 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 8911 } 8912 8913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8914 _mm512_kxnor (__mmask16 __A, __mmask16 __B) 8915 { 8916 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 8917 } 8918 8919 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8920 _mm512_kxor (__mmask16 __A, __mmask16 __B) 8921 { 8922 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 8923 } 8924 8925 static __inline__ void __DEFAULT_FN_ATTRS 8926 _mm512_stream_si512 (__m512i * __P, __m512i __A) 8927 { 8928 __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P); 8929 } 8930 8931 static __inline__ __m512i __DEFAULT_FN_ATTRS 8932 _mm512_stream_load_si512 (void *__P) 8933 { 8934 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8935 } 8936 8937 static __inline__ void __DEFAULT_FN_ATTRS 8938 _mm512_stream_pd (double *__P, __m512d __A) 8939 { 8940 __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P); 8941 } 8942 8943 static __inline__ void __DEFAULT_FN_ATTRS 8944 _mm512_stream_ps (float *__P, __m512 __A) 8945 { 8946 __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P); 8947 } 8948 8949 static __inline__ __m512d __DEFAULT_FN_ATTRS 8950 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 8951 { 8952 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8953 (__v8df) __W, 8954 (__mmask8) __U); 8955 } 8956 8957 static __inline__ __m512d __DEFAULT_FN_ATTRS 8958 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 8959 { 8960 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8961 (__v8df) 8962 _mm512_setzero_pd (), 8963 (__mmask8) __U); 8964 } 8965 8966 static __inline__ __m512i __DEFAULT_FN_ATTRS 8967 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 8968 { 8969 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8970 (__v8di) __W, 8971 (__mmask8) __U); 8972 } 8973 8974 static __inline__ __m512i __DEFAULT_FN_ATTRS 8975 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 8976 { 8977 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8978 (__v8di) 8979 _mm512_setzero_si512 (), 8980 (__mmask8) __U); 8981 } 8982 8983 static __inline__ __m512 __DEFAULT_FN_ATTRS 8984 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 8985 { 8986 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8987 (__v16sf) __W, 8988 (__mmask16) __U); 8989 } 8990 8991 static __inline__ __m512 __DEFAULT_FN_ATTRS 8992 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 8993 { 8994 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8995 (__v16sf) 8996 _mm512_setzero_ps (), 8997 (__mmask16) __U); 8998 } 8999 9000 static __inline__ __m512i __DEFAULT_FN_ATTRS 9001 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9002 { 9003 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9004 (__v16si) __W, 9005 (__mmask16) __U); 9006 } 9007 9008 static __inline__ __m512i __DEFAULT_FN_ATTRS 9009 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 9010 { 9011 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9012 (__v16si) 9013 _mm512_setzero_si512 (), 9014 (__mmask16) __U); 9015 } 9016 9017 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ 9018 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9019 (__v4sf)(__m128)(Y), (int)(P), \ 9020 (__mmask8)-1, (int)(R)); }) 9021 9022 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ 9023 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9024 (__v4sf)(__m128)(Y), (int)(P), \ 9025 (__mmask8)(M), (int)(R)); }) 9026 9027 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ 9028 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9029 (__v4sf)(__m128)(Y), (int)(P), \ 9030 (__mmask8)-1, \ 9031 _MM_FROUND_CUR_DIRECTION); }) 9032 9033 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ 9034 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 9035 (__v4sf)(__m128)(Y), (int)(P), \ 9036 (__mmask8)(M), \ 9037 _MM_FROUND_CUR_DIRECTION); }) 9038 9039 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ 9040 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9041 (__v2df)(__m128d)(Y), (int)(P), \ 9042 (__mmask8)-1, (int)(R)); }) 9043 9044 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ 9045 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9046 (__v2df)(__m128d)(Y), (int)(P), \ 9047 (__mmask8)(M), (int)(R)); }) 9048 9049 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ 9050 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9051 (__v2df)(__m128d)(Y), (int)(P), \ 9052 (__mmask8)-1, \ 9053 _MM_FROUND_CUR_DIRECTION); }) 9054 9055 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ 9056 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 9057 (__v2df)(__m128d)(Y), (int)(P), \ 9058 (__mmask8)(M), \ 9059 _MM_FROUND_CUR_DIRECTION); }) 9060 9061 static __inline__ __m512 __DEFAULT_FN_ATTRS 9062 _mm512_movehdup_ps (__m512 __A) 9063 { 9064 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9065 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); 9066 } 9067 9068 static __inline__ __m512 __DEFAULT_FN_ATTRS 9069 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9070 { 9071 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9072 (__v16sf)_mm512_movehdup_ps(__A), 9073 (__v16sf)__W); 9074 } 9075 9076 static __inline__ __m512 __DEFAULT_FN_ATTRS 9077 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 9078 { 9079 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9080 (__v16sf)_mm512_movehdup_ps(__A), 9081 (__v16sf)_mm512_setzero_ps()); 9082 } 9083 9084 static __inline__ __m512 __DEFAULT_FN_ATTRS 9085 _mm512_moveldup_ps (__m512 __A) 9086 { 9087 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 9088 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); 9089 } 9090 9091 static __inline__ __m512 __DEFAULT_FN_ATTRS 9092 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 9093 { 9094 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9095 (__v16sf)_mm512_moveldup_ps(__A), 9096 (__v16sf)__W); 9097 } 9098 9099 static __inline__ __m512 __DEFAULT_FN_ATTRS 9100 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 9101 { 9102 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 9103 (__v16sf)_mm512_moveldup_ps(__A), 9104 (__v16sf)_mm512_setzero_ps()); 9105 } 9106 9107 static __inline__ __m128 __DEFAULT_FN_ATTRS 9108 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 9109 { 9110 __m128 res = __A; 9111 res[0] = (__U & 1) ? __B[0] : __W[0]; 9112 return res; 9113 } 9114 9115 static __inline__ __m128 __DEFAULT_FN_ATTRS 9116 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 9117 { 9118 __m128 res = __A; 9119 res[0] = (__U & 1) ? __B[0] : 0; 9120 return res; 9121 } 9122 9123 static __inline__ __m128d __DEFAULT_FN_ATTRS 9124 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 9125 { 9126 __m128d res = __A; 9127 res[0] = (__U & 1) ? __B[0] : __W[0]; 9128 return res; 9129 } 9130 9131 static __inline__ __m128d __DEFAULT_FN_ATTRS 9132 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 9133 { 9134 __m128d res = __A; 9135 res[0] = (__U & 1) ? __B[0] : 0; 9136 return res; 9137 } 9138 9139 static __inline__ void __DEFAULT_FN_ATTRS 9140 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) 9141 { 9142 __builtin_ia32_storess128_mask ((__v16sf *)__W, 9143 (__v16sf) _mm512_castps128_ps512(__A), 9144 (__mmask16) __U & (__mmask16)1); 9145 } 9146 9147 static __inline__ void __DEFAULT_FN_ATTRS 9148 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) 9149 { 9150 __builtin_ia32_storesd128_mask ((__v8df *)__W, 9151 (__v8df) _mm512_castpd128_pd512(__A), 9152 (__mmask8) __U & 1); 9153 } 9154 9155 static __inline__ __m128 __DEFAULT_FN_ATTRS 9156 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) 9157 { 9158 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, 9159 (__v4sf) {0.0, 0.0, 0.0, 0.0}, 9160 0, 4, 4, 4); 9161 9162 return (__m128) __builtin_shufflevector( 9163 __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9164 (__v16sf) _mm512_castps128_ps512(src), 9165 (__mmask16) __U & 1), 9166 _mm512_undefined_ps(), 0, 1, 2, 3); 9167 } 9168 9169 static __inline__ __m128 __DEFAULT_FN_ATTRS 9170 _mm_maskz_load_ss (__mmask8 __U, const float* __A) 9171 { 9172 return (__m128) __builtin_shufflevector( 9173 __builtin_ia32_loadss128_mask ((__v16sf *) __A, 9174 (__v16sf) _mm512_setzero_ps(), 9175 (__mmask16) __U & 1), 9176 _mm512_undefined_ps(), 0, 1, 2, 3); 9177 } 9178 9179 static __inline__ __m128d __DEFAULT_FN_ATTRS 9180 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) 9181 { 9182 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, 9183 (__v2df) {0.0, 0.0}, 0, 2); 9184 9185 return (__m128d) __builtin_shufflevector( 9186 __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9187 (__v8df) _mm512_castpd128_pd512(src), 9188 (__mmask8) __U & 1), 9189 _mm512_undefined_pd(), 0, 1); 9190 } 9191 9192 static __inline__ __m128d __DEFAULT_FN_ATTRS 9193 _mm_maskz_load_sd (__mmask8 __U, const double* __A) 9194 { 9195 return (__m128d) __builtin_shufflevector( 9196 __builtin_ia32_loadsd128_mask ((__v8df *) __A, 9197 (__v8df) _mm512_setzero_pd(), 9198 (__mmask8) __U & 1), 9199 _mm512_undefined_pd(), 0, 1); 9200 } 9201 9202 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ 9203 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 9204 (__v16si)_mm512_undefined_epi32(), \ 9205 0 + (((I) >> 0) & 0x3), \ 9206 0 + (((I) >> 2) & 0x3), \ 9207 0 + (((I) >> 4) & 0x3), \ 9208 0 + (((I) >> 6) & 0x3), \ 9209 4 + (((I) >> 0) & 0x3), \ 9210 4 + (((I) >> 2) & 0x3), \ 9211 4 + (((I) >> 4) & 0x3), \ 9212 4 + (((I) >> 6) & 0x3), \ 9213 8 + (((I) >> 0) & 0x3), \ 9214 8 + (((I) >> 2) & 0x3), \ 9215 8 + (((I) >> 4) & 0x3), \ 9216 8 + (((I) >> 6) & 0x3), \ 9217 12 + (((I) >> 0) & 0x3), \ 9218 12 + (((I) >> 2) & 0x3), \ 9219 12 + (((I) >> 4) & 0x3), \ 9220 12 + (((I) >> 6) & 0x3)); }) 9221 9222 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ 9223 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9224 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9225 (__v16si)(__m512i)(W)); }) 9226 9227 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ 9228 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9229 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9230 (__v16si)_mm512_setzero_si512()); }) 9231 9232 static __inline__ __m512d __DEFAULT_FN_ATTRS 9233 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9234 { 9235 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9236 (__v8df) __W, 9237 (__mmask8) __U); 9238 } 9239 9240 static __inline__ __m512d __DEFAULT_FN_ATTRS 9241 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9242 { 9243 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9244 (__v8df) _mm512_setzero_pd (), 9245 (__mmask8) __U); 9246 } 9247 9248 static __inline__ __m512i __DEFAULT_FN_ATTRS 9249 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9250 { 9251 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9252 (__v8di) __W, 9253 (__mmask8) __U); 9254 } 9255 9256 static __inline__ __m512i __DEFAULT_FN_ATTRS 9257 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 9258 { 9259 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9260 (__v8di) _mm512_setzero_pd (), 9261 (__mmask8) __U); 9262 } 9263 9264 static __inline__ __m512d __DEFAULT_FN_ATTRS 9265 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 9266 { 9267 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9268 (__v8df) __W, 9269 (__mmask8) __U); 9270 } 9271 9272 static __inline__ __m512d __DEFAULT_FN_ATTRS 9273 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 9274 { 9275 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9276 (__v8df) _mm512_setzero_pd(), 9277 (__mmask8) __U); 9278 } 9279 9280 static __inline__ __m512i __DEFAULT_FN_ATTRS 9281 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 9282 { 9283 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9284 (__v8di) __W, 9285 (__mmask8) __U); 9286 } 9287 9288 static __inline__ __m512i __DEFAULT_FN_ATTRS 9289 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 9290 { 9291 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9292 (__v8di) _mm512_setzero_pd(), 9293 (__mmask8) __U); 9294 } 9295 9296 static __inline__ __m512 __DEFAULT_FN_ATTRS 9297 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 9298 { 9299 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9300 (__v16sf) __W, 9301 (__mmask16) __U); 9302 } 9303 9304 static __inline__ __m512 __DEFAULT_FN_ATTRS 9305 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 9306 { 9307 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9308 (__v16sf) _mm512_setzero_ps(), 9309 (__mmask16) __U); 9310 } 9311 9312 static __inline__ __m512i __DEFAULT_FN_ATTRS 9313 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 9314 { 9315 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9316 (__v16si) __W, 9317 (__mmask16) __U); 9318 } 9319 9320 static __inline__ __m512i __DEFAULT_FN_ATTRS 9321 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 9322 { 9323 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9324 (__v16si) _mm512_setzero_ps(), 9325 (__mmask16) __U); 9326 } 9327 9328 static __inline__ __m512 __DEFAULT_FN_ATTRS 9329 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9330 { 9331 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9332 (__v16sf) __W, 9333 (__mmask16) __U); 9334 } 9335 9336 static __inline__ __m512 __DEFAULT_FN_ATTRS 9337 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9338 { 9339 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9340 (__v16sf) _mm512_setzero_ps(), 9341 (__mmask16) __U); 9342 } 9343 9344 static __inline__ __m512i __DEFAULT_FN_ATTRS 9345 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9346 { 9347 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9348 (__v16si) __W, 9349 (__mmask16) __U); 9350 } 9351 9352 static __inline__ __m512i __DEFAULT_FN_ATTRS 9353 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9354 { 9355 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9356 (__v16si) _mm512_setzero_ps(), 9357 (__mmask16) __U); 9358 } 9359 9360 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ 9361 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9362 (__v8df)_mm512_undefined_pd(), \ 9363 (__mmask8)-1, (int)(R)); }) 9364 9365 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ 9366 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9367 (__v8df)(__m512d)(W), \ 9368 (__mmask8)(U), (int)(R)); }) 9369 9370 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ 9371 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9372 (__v8df)_mm512_setzero_pd(), \ 9373 (__mmask8)(U), (int)(R)); }) 9374 9375 static __inline__ __m512d __DEFAULT_FN_ATTRS 9376 _mm512_cvtps_pd (__m256 __A) 9377 { 9378 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9379 (__v8df) 9380 _mm512_undefined_pd (), 9381 (__mmask8) -1, 9382 _MM_FROUND_CUR_DIRECTION); 9383 } 9384 9385 static __inline__ __m512d __DEFAULT_FN_ATTRS 9386 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 9387 { 9388 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9389 (__v8df) __W, 9390 (__mmask8) __U, 9391 _MM_FROUND_CUR_DIRECTION); 9392 } 9393 9394 static __inline__ __m512d __DEFAULT_FN_ATTRS 9395 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 9396 { 9397 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9398 (__v8df) 9399 _mm512_setzero_pd (), 9400 (__mmask8) __U, 9401 _MM_FROUND_CUR_DIRECTION); 9402 } 9403 9404 static __inline__ __m512 __DEFAULT_FN_ATTRS 9405 _mm512_cvtpslo_pd (__m512 __A) 9406 { 9407 return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); 9408 } 9409 9410 static __inline__ __m512 __DEFAULT_FN_ATTRS 9411 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) 9412 { 9413 return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); 9414 } 9415 9416 static __inline__ __m512d __DEFAULT_FN_ATTRS 9417 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 9418 { 9419 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9420 (__v8df) __A, 9421 (__v8df) __W); 9422 } 9423 9424 static __inline__ __m512d __DEFAULT_FN_ATTRS 9425 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 9426 { 9427 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 9428 (__v8df) __A, 9429 (__v8df) _mm512_setzero_pd ()); 9430 } 9431 9432 static __inline__ __m512 __DEFAULT_FN_ATTRS 9433 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 9434 { 9435 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9436 (__v16sf) __A, 9437 (__v16sf) __W); 9438 } 9439 9440 static __inline__ __m512 __DEFAULT_FN_ATTRS 9441 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 9442 { 9443 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 9444 (__v16sf) __A, 9445 (__v16sf) _mm512_setzero_ps ()); 9446 } 9447 9448 static __inline__ void __DEFAULT_FN_ATTRS 9449 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9450 { 9451 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9452 (__mmask8) __U); 9453 } 9454 9455 static __inline__ void __DEFAULT_FN_ATTRS 9456 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9457 { 9458 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9459 (__mmask8) __U); 9460 } 9461 9462 static __inline__ void __DEFAULT_FN_ATTRS 9463 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9464 { 9465 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9466 (__mmask16) __U); 9467 } 9468 9469 static __inline__ void __DEFAULT_FN_ATTRS 9470 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9471 { 9472 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9473 (__mmask16) __U); 9474 } 9475 9476 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ 9477 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9478 (__v2df)(__m128d)(B), \ 9479 (__v4sf)_mm_undefined_ps(), \ 9480 (__mmask8)-1, (int)(R)); }) 9481 9482 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ 9483 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9484 (__v2df)(__m128d)(B), \ 9485 (__v4sf)(__m128)(W), \ 9486 (__mmask8)(U), (int)(R)); }) 9487 9488 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ 9489 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9490 (__v2df)(__m128d)(B), \ 9491 (__v4sf)_mm_setzero_ps(), \ 9492 (__mmask8)(U), (int)(R)); }) 9493 9494 static __inline__ __m128 __DEFAULT_FN_ATTRS 9495 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) 9496 { 9497 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9498 (__v2df)(__B), 9499 (__v4sf)(__W), 9500 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9501 } 9502 9503 static __inline__ __m128 __DEFAULT_FN_ATTRS 9504 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) 9505 { 9506 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), 9507 (__v2df)(__B), 9508 (__v4sf)_mm_setzero_ps(), 9509 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9510 } 9511 9512 #define _mm_cvtss_i32 _mm_cvtss_si32 9513 #define _mm_cvtsd_i32 _mm_cvtsd_si32 9514 #define _mm_cvti32_sd _mm_cvtsi32_sd 9515 #define _mm_cvti32_ss _mm_cvtsi32_ss 9516 #ifdef __x86_64__ 9517 #define _mm_cvtss_i64 _mm_cvtss_si64 9518 #define _mm_cvtsd_i64 _mm_cvtsd_si64 9519 #define _mm_cvti64_sd _mm_cvtsi64_sd 9520 #define _mm_cvti64_ss _mm_cvtsi64_ss 9521 #endif 9522 9523 #ifdef __x86_64__ 9524 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ 9525 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9526 (int)(R)); }) 9527 9528 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ 9529 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9530 (int)(R)); }) 9531 #endif 9532 9533 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ 9534 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9535 9536 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ 9537 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9538 9539 #ifdef __x86_64__ 9540 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ 9541 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9542 (int)(R)); }) 9543 9544 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ 9545 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9546 (int)(R)); }) 9547 #endif 9548 9549 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ 9550 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9551 (__v4sf)(__m128)(B), \ 9552 (__v2df)_mm_undefined_pd(), \ 9553 (__mmask8)-1, (int)(R)); }) 9554 9555 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ 9556 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9557 (__v4sf)(__m128)(B), \ 9558 (__v2df)(__m128d)(W), \ 9559 (__mmask8)(U), (int)(R)); }) 9560 9561 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ 9562 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9563 (__v4sf)(__m128)(B), \ 9564 (__v2df)_mm_setzero_pd(), \ 9565 (__mmask8)(U), (int)(R)); }) 9566 9567 static __inline__ __m128d __DEFAULT_FN_ATTRS 9568 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) 9569 { 9570 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9571 (__v4sf)(__B), 9572 (__v2df)(__W), 9573 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9574 } 9575 9576 static __inline__ __m128d __DEFAULT_FN_ATTRS 9577 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) 9578 { 9579 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), 9580 (__v4sf)(__B), 9581 (__v2df)_mm_setzero_pd(), 9582 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 9583 } 9584 9585 static __inline__ __m128d __DEFAULT_FN_ATTRS 9586 _mm_cvtu32_sd (__m128d __A, unsigned __B) 9587 { 9588 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 9589 } 9590 9591 #ifdef __x86_64__ 9592 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ 9593 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9594 (unsigned long long)(B), (int)(R)); }) 9595 9596 static __inline__ __m128d __DEFAULT_FN_ATTRS 9597 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) 9598 { 9599 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 9600 _MM_FROUND_CUR_DIRECTION); 9601 } 9602 #endif 9603 9604 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ 9605 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9606 (int)(R)); }) 9607 9608 static __inline__ __m128 __DEFAULT_FN_ATTRS 9609 _mm_cvtu32_ss (__m128 __A, unsigned __B) 9610 { 9611 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 9612 _MM_FROUND_CUR_DIRECTION); 9613 } 9614 9615 #ifdef __x86_64__ 9616 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ 9617 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9618 (unsigned long long)(B), (int)(R)); }) 9619 9620 static __inline__ __m128 __DEFAULT_FN_ATTRS 9621 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) 9622 { 9623 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 9624 _MM_FROUND_CUR_DIRECTION); 9625 } 9626 #endif 9627 9628 static __inline__ __m512i __DEFAULT_FN_ATTRS 9629 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 9630 { 9631 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 9632 __M); 9633 } 9634 9635 #ifdef __x86_64__ 9636 static __inline__ __m512i __DEFAULT_FN_ATTRS 9637 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 9638 { 9639 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 9640 __M); 9641 } 9642 #endif 9643 9644 static __inline __m512i __DEFAULT_FN_ATTRS 9645 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, 9646 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, 9647 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, 9648 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, 9649 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, 9650 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, 9651 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, 9652 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, 9653 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, 9654 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, 9655 char __e4, char __e3, char __e2, char __e1, char __e0) { 9656 9657 return __extension__ (__m512i)(__v64qi) 9658 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9659 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9660 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9661 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, 9662 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, 9663 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, 9664 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, 9665 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; 9666 } 9667 9668 static __inline __m512i __DEFAULT_FN_ATTRS 9669 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, 9670 short __e27, short __e26, short __e25, short __e24, short __e23, 9671 short __e22, short __e21, short __e20, short __e19, short __e18, 9672 short __e17, short __e16, short __e15, short __e14, short __e13, 9673 short __e12, short __e11, short __e10, short __e9, short __e8, 9674 short __e7, short __e6, short __e5, short __e4, short __e3, 9675 short __e2, short __e1, short __e0) { 9676 return __extension__ (__m512i)(__v32hi) 9677 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 9678 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 9679 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 9680 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; 9681 } 9682 9683 static __inline __m512i __DEFAULT_FN_ATTRS 9684 _mm512_set_epi32 (int __A, int __B, int __C, int __D, 9685 int __E, int __F, int __G, int __H, 9686 int __I, int __J, int __K, int __L, 9687 int __M, int __N, int __O, int __P) 9688 { 9689 return __extension__ (__m512i)(__v16si) 9690 { __P, __O, __N, __M, __L, __K, __J, __I, 9691 __H, __G, __F, __E, __D, __C, __B, __A }; 9692 } 9693 9694 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9695 e8,e9,e10,e11,e12,e13,e14,e15) \ 9696 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9697 (e5),(e4),(e3),(e2),(e1),(e0)) 9698 9699 static __inline__ __m512i __DEFAULT_FN_ATTRS 9700 _mm512_set_epi64 (long long __A, long long __B, long long __C, 9701 long long __D, long long __E, long long __F, 9702 long long __G, long long __H) 9703 { 9704 return __extension__ (__m512i) (__v8di) 9705 { __H, __G, __F, __E, __D, __C, __B, __A }; 9706 } 9707 9708 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9709 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9710 9711 static __inline__ __m512d __DEFAULT_FN_ATTRS 9712 _mm512_set_pd (double __A, double __B, double __C, double __D, 9713 double __E, double __F, double __G, double __H) 9714 { 9715 return __extension__ (__m512d) 9716 { __H, __G, __F, __E, __D, __C, __B, __A }; 9717 } 9718 9719 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9720 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9721 9722 static __inline__ __m512 __DEFAULT_FN_ATTRS 9723 _mm512_set_ps (float __A, float __B, float __C, float __D, 9724 float __E, float __F, float __G, float __H, 9725 float __I, float __J, float __K, float __L, 9726 float __M, float __N, float __O, float __P) 9727 { 9728 return __extension__ (__m512) 9729 { __P, __O, __N, __M, __L, __K, __J, __I, 9730 __H, __G, __F, __E, __D, __C, __B, __A }; 9731 } 9732 9733 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9734 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9735 (e4),(e3),(e2),(e1),(e0)) 9736 9737 static __inline__ __m512 __DEFAULT_FN_ATTRS 9738 _mm512_abs_ps(__m512 __A) 9739 { 9740 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9741 } 9742 9743 static __inline__ __m512 __DEFAULT_FN_ATTRS 9744 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) 9745 { 9746 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 9747 } 9748 9749 static __inline__ __m512d __DEFAULT_FN_ATTRS 9750 _mm512_abs_pd(__m512d __A) 9751 { 9752 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; 9753 } 9754 9755 static __inline__ __m512d __DEFAULT_FN_ATTRS 9756 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) 9757 { 9758 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); 9759 } 9760 9761 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as 9762 // outputs. This class of vector operation forms the basis of many scientific 9763 // computations. In vector-reduction arithmetic, the evaluation off is 9764 // independent of the order of the input elements of V. 9765 9766 // Used bisection method. At each step, we partition the vector with previous 9767 // step in half, and the operation is performed on its two halves. 9768 // This takes log2(n) steps where n is the number of elements in the vector. 9769 9770 // Vec512 - Vector with size 512. 9771 // Operator - Can be one of following: +,*,&,| 9772 // T2 - Can get 'i' for int and 'f' for float. 9773 // T1 - Can get 'i' for int and 'd' for double. 9774 9775 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ 9776 __extension__({ \ 9777 __m256##T1 Vec256 = __builtin_shufflevector( \ 9778 (__v8d##T2)Vec512, \ 9779 (__v8d##T2)Vec512, \ 9780 0, 1, 2, 3) \ 9781 Operator \ 9782 __builtin_shufflevector( \ 9783 (__v8d##T2)Vec512, \ 9784 (__v8d##T2)Vec512, \ 9785 4, 5, 6, 7); \ 9786 __m128##T1 Vec128 = __builtin_shufflevector( \ 9787 (__v4d##T2)Vec256, \ 9788 (__v4d##T2)Vec256, \ 9789 0, 1) \ 9790 Operator \ 9791 __builtin_shufflevector( \ 9792 (__v4d##T2)Vec256, \ 9793 (__v4d##T2)Vec256, \ 9794 2, 3); \ 9795 Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \ 9796 (__v2d##T2)Vec128, 0, -1) \ 9797 Operator \ 9798 __builtin_shufflevector((__v2d##T2)Vec128, \ 9799 (__v2d##T2)Vec128, 1, -1); \ 9800 return Vec128[0]; \ 9801 }) 9802 9803 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { 9804 _mm512_reduce_operator_64bit(__W, +, i, i); 9805 } 9806 9807 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { 9808 _mm512_reduce_operator_64bit(__W, *, i, i); 9809 } 9810 9811 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { 9812 _mm512_reduce_operator_64bit(__W, &, i, i); 9813 } 9814 9815 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { 9816 _mm512_reduce_operator_64bit(__W, |, i, i); 9817 } 9818 9819 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { 9820 _mm512_reduce_operator_64bit(__W, +, f, d); 9821 } 9822 9823 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { 9824 _mm512_reduce_operator_64bit(__W, *, f, d); 9825 } 9826 9827 // Vec512 - Vector with size 512. 9828 // Vec512Neutral - All vector elements set to the identity element. 9829 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0} 9830 // Operator - Can be one of following: +,*,&,| 9831 // Mask - Intrinsic Mask 9832 // T2 - Can get 'i' for int and 'f' for float. 9833 // T1 - Can get 'i' for int and 'd' for packed double-precision. 9834 // T3 - Can be Pd for packed double or q for q-word. 9835 9836 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ 9837 Mask, T2, T1, T3) \ 9838 __extension__({ \ 9839 Vec512 = __builtin_ia32_select##T3##_512( \ 9840 (__mmask8)Mask, \ 9841 (__v8d##T2)Vec512, \ 9842 (__v8d##T2)Vec512Neutral); \ 9843 _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ 9844 }) 9845 9846 static __inline__ long long __DEFAULT_FN_ATTRS 9847 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { 9848 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q); 9849 } 9850 9851 static __inline__ long long __DEFAULT_FN_ATTRS 9852 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { 9853 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q); 9854 } 9855 9856 static __inline__ long long __DEFAULT_FN_ATTRS 9857 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { 9858 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 9859 &, __M, i, i, q); 9860 } 9861 9862 static __inline__ long long __DEFAULT_FN_ATTRS 9863 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { 9864 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 9865 i, i, q); 9866 } 9867 9868 static __inline__ double __DEFAULT_FN_ATTRS 9869 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { 9870 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 9871 f, d, pd); 9872 } 9873 9874 static __inline__ double __DEFAULT_FN_ATTRS 9875 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { 9876 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, 9877 f, d, pd); 9878 } 9879 9880 // Vec512 - Vector with size 512. 9881 // Operator - Can be one of following: +,*,&,| 9882 // T2 - Can get 'i' for int and ' ' for packed single. 9883 // T1 - Can get 'i' for int and 'f' for float. 9884 9885 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ 9886 __m256##T1 Vec256 = \ 9887 (__m256##T1)(__builtin_shufflevector( \ 9888 (__v16s##T2)Vec512, \ 9889 (__v16s##T2)Vec512, \ 9890 0, 1, 2, 3, 4, 5, 6, 7) \ 9891 Operator \ 9892 __builtin_shufflevector( \ 9893 (__v16s##T2)Vec512, \ 9894 (__v16s##T2)Vec512, \ 9895 8, 9, 10, 11, 12, 13, 14, 15)); \ 9896 __m128##T1 Vec128 = \ 9897 (__m128##T1)(__builtin_shufflevector( \ 9898 (__v8s##T2)Vec256, \ 9899 (__v8s##T2)Vec256, \ 9900 0, 1, 2, 3) \ 9901 Operator \ 9902 __builtin_shufflevector( \ 9903 (__v8s##T2)Vec256, \ 9904 (__v8s##T2)Vec256, \ 9905 4, 5, 6, 7)); \ 9906 Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9907 (__v4s##T2)Vec128, \ 9908 (__v4s##T2)Vec128, \ 9909 0, 1, -1, -1) \ 9910 Operator \ 9911 __builtin_shufflevector( \ 9912 (__v4s##T2)Vec128, \ 9913 (__v4s##T2)Vec128, \ 9914 2, 3, -1, -1)); \ 9915 Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9916 (__v4s##T2)Vec128, \ 9917 (__v4s##T2)Vec128, \ 9918 0, -1, -1, -1) \ 9919 Operator \ 9920 __builtin_shufflevector( \ 9921 (__v4s##T2)Vec128, \ 9922 (__v4s##T2)Vec128, \ 9923 1, -1, -1, -1)); \ 9924 return Vec128[0]; \ 9925 }) 9926 9927 static __inline__ int __DEFAULT_FN_ATTRS 9928 _mm512_reduce_add_epi32(__m512i __W) { 9929 _mm512_reduce_operator_32bit(__W, +, i, i); 9930 } 9931 9932 static __inline__ int __DEFAULT_FN_ATTRS 9933 _mm512_reduce_mul_epi32(__m512i __W) { 9934 _mm512_reduce_operator_32bit(__W, *, i, i); 9935 } 9936 9937 static __inline__ int __DEFAULT_FN_ATTRS 9938 _mm512_reduce_and_epi32(__m512i __W) { 9939 _mm512_reduce_operator_32bit(__W, &, i, i); 9940 } 9941 9942 static __inline__ int __DEFAULT_FN_ATTRS 9943 _mm512_reduce_or_epi32(__m512i __W) { 9944 _mm512_reduce_operator_32bit(__W, |, i, i); 9945 } 9946 9947 static __inline__ float __DEFAULT_FN_ATTRS 9948 _mm512_reduce_add_ps(__m512 __W) { 9949 _mm512_reduce_operator_32bit(__W, +, f, ); 9950 } 9951 9952 static __inline__ float __DEFAULT_FN_ATTRS 9953 _mm512_reduce_mul_ps(__m512 __W) { 9954 _mm512_reduce_operator_32bit(__W, *, f, ); 9955 } 9956 9957 // Vec512 - Vector with size 512. 9958 // Vec512Neutral - All vector elements set to the identity element. 9959 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0} 9960 // Operator - Can be one of following: +,*,&,| 9961 // Mask - Intrinsic Mask 9962 // T2 - Can get 'i' for int and 'f' for float. 9963 // T1 - Can get 'i' for int and 'd' for double. 9964 // T3 - Can be Ps for packed single or d for d-word. 9965 9966 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \ 9967 Mask, T2, T1, T3) \ 9968 __extension__({ \ 9969 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 9970 (__mmask16)Mask, \ 9971 (__v16s##T2)Vec512, \ 9972 (__v16s##T2)Vec512Neutral); \ 9973 _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \ 9974 }) 9975 9976 static __inline__ int __DEFAULT_FN_ATTRS 9977 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { 9978 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d); 9979 } 9980 9981 static __inline__ int __DEFAULT_FN_ATTRS 9982 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { 9983 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d); 9984 } 9985 9986 static __inline__ int __DEFAULT_FN_ATTRS 9987 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { 9988 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 9989 i, i, d); 9990 } 9991 9992 static __inline__ int __DEFAULT_FN_ATTRS 9993 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { 9994 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d); 9995 } 9996 9997 static __inline__ float __DEFAULT_FN_ATTRS 9998 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { 9999 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps); 10000 } 10001 10002 static __inline__ float __DEFAULT_FN_ATTRS 10003 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { 10004 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); 10005 } 10006 10007 // Used bisection method. At each step, we partition the vector with previous 10008 // step in half, and the operation is performed on its two halves. 10009 // This takes log2(n) steps where n is the number of elements in the vector. 10010 // This macro uses only intrinsics from the AVX512F feature. 10011 10012 // Vec512 - Vector with size of 512. 10013 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 10014 // __mm512_max_epi64 10015 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 10016 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 10017 10018 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10019 Vec512 = _mm512_##IntrinName( \ 10020 (__m512##T1)__builtin_shufflevector( \ 10021 (__v8d##T2)Vec512, \ 10022 (__v8d##T2)Vec512, \ 10023 0, 1, 2, 3, -1, -1, -1, -1), \ 10024 (__m512##T1)__builtin_shufflevector( \ 10025 (__v8d##T2)Vec512, \ 10026 (__v8d##T2)Vec512, \ 10027 4, 5, 6, 7, -1, -1, -1, -1)); \ 10028 Vec512 = _mm512_##IntrinName( \ 10029 (__m512##T1)__builtin_shufflevector( \ 10030 (__v8d##T2)Vec512, \ 10031 (__v8d##T2)Vec512, \ 10032 0, 1, -1, -1, -1, -1, -1, -1),\ 10033 (__m512##T1)__builtin_shufflevector( \ 10034 (__v8d##T2)Vec512, \ 10035 (__v8d##T2)Vec512, \ 10036 2, 3, -1, -1, -1, -1, -1, \ 10037 -1)); \ 10038 Vec512 = _mm512_##IntrinName( \ 10039 (__m512##T1)__builtin_shufflevector( \ 10040 (__v8d##T2)Vec512, \ 10041 (__v8d##T2)Vec512, \ 10042 0, -1, -1, -1, -1, -1, -1, -1),\ 10043 (__m512##T1)__builtin_shufflevector( \ 10044 (__v8d##T2)Vec512, \ 10045 (__v8d##T2)Vec512, \ 10046 1, -1, -1, -1, -1, -1, -1, -1))\ 10047 ; \ 10048 return Vec512[0]; \ 10049 }) 10050 10051 static __inline__ long long __DEFAULT_FN_ATTRS 10052 _mm512_reduce_max_epi64(__m512i __V) { 10053 _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i); 10054 } 10055 10056 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 10057 _mm512_reduce_max_epu64(__m512i __V) { 10058 _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i); 10059 } 10060 10061 static __inline__ double __DEFAULT_FN_ATTRS 10062 _mm512_reduce_max_pd(__m512d __V) { 10063 _mm512_reduce_maxMin_64bit(__V, max_pd, d, f); 10064 } 10065 10066 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64 10067 (__m512i __V) { 10068 _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i); 10069 } 10070 10071 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 10072 _mm512_reduce_min_epu64(__m512i __V) { 10073 _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i); 10074 } 10075 10076 static __inline__ double __DEFAULT_FN_ATTRS 10077 _mm512_reduce_min_pd(__m512d __V) { 10078 _mm512_reduce_maxMin_64bit(__V, min_pd, d, f); 10079 } 10080 10081 // Vec512 - Vector with size 512. 10082 // Vec512Neutral - A 512 length vector with elements set to the identity element 10083 // Identity element: {max_epi,0x8000000000000000} 10084 // {max_epu,0x0000000000000000} 10085 // {max_pd, 0xFFF0000000000000} 10086 // {min_epi,0x7FFFFFFFFFFFFFFF} 10087 // {min_epu,0xFFFFFFFFFFFFFFFF} 10088 // {min_pd, 0x7FF0000000000000} 10089 // 10090 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example: 10091 // __mm512_max_epi64 10092 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}] 10093 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}] 10094 // T3 - Can get 'q' q word and 'pd' for packed double. 10095 // [__builtin_ia32_select{q|pd}_512] 10096 // Mask - Intrinsic Mask 10097 10098 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10099 T2, T3, Mask) \ 10100 __extension__({ \ 10101 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10102 (__mmask8)Mask, \ 10103 (__v8d##T2)Vec512, \ 10104 (__v8d##T2)Vec512Neutral); \ 10105 _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \ 10106 }) 10107 10108 static __inline__ long long __DEFAULT_FN_ATTRS 10109 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { 10110 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000), 10111 max_epi64, i, i, q, __M); 10112 } 10113 10114 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 10115 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { 10116 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000), 10117 max_epu64, i, i, q, __M); 10118 } 10119 10120 static __inline__ double __DEFAULT_FN_ATTRS 10121 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { 10122 _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), 10123 max_pd, d, f, pd, __M); 10124 } 10125 10126 static __inline__ long long __DEFAULT_FN_ATTRS 10127 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { 10128 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF), 10129 min_epi64, i, i, q, __M); 10130 } 10131 10132 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 10133 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { 10134 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 10135 min_epu64, i, i, q, __M); 10136 } 10137 10138 static __inline__ double __DEFAULT_FN_ATTRS 10139 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { 10140 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), 10141 min_pd, d, f, pd, __M); 10142 } 10143 10144 // Vec512 - Vector with size 512. 10145 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10146 // __mm512_max_epi32 10147 // T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10148 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10149 10150 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10151 Vec512 = _mm512_##IntrinName( \ 10152 (__m512##T1)__builtin_shufflevector( \ 10153 (__v16s##T2)Vec512, \ 10154 (__v16s##T2)Vec512, \ 10155 0, 1, 2, 3, 4, 5, 6, 7, \ 10156 -1, -1, -1, -1, -1, -1, -1, -1), \ 10157 (__m512##T1)__builtin_shufflevector( \ 10158 (__v16s##T2)Vec512, \ 10159 (__v16s##T2)Vec512, \ 10160 8, 9, 10, 11, 12, 13, 14, 15, \ 10161 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10162 Vec512 = _mm512_##IntrinName( \ 10163 (__m512##T1)__builtin_shufflevector( \ 10164 (__v16s##T2)Vec512, \ 10165 (__v16s##T2)Vec512, \ 10166 0, 1, 2, 3, -1, -1, -1, -1, \ 10167 -1, -1, -1, -1, -1, -1, -1, -1), \ 10168 (__m512##T1)__builtin_shufflevector( \ 10169 (__v16s##T2)Vec512, \ 10170 (__v16s##T2)Vec512, \ 10171 4, 5, 6, 7, -1, -1, -1, -1, \ 10172 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10173 Vec512 = _mm512_##IntrinName( \ 10174 (__m512##T1)__builtin_shufflevector( \ 10175 (__v16s##T2)Vec512, \ 10176 (__v16s##T2)Vec512, \ 10177 0, 1, -1, -1, -1, -1, -1, -1, \ 10178 -1, -1, -1, -1, -1, -1, -1, -1), \ 10179 (__m512##T1)__builtin_shufflevector( \ 10180 (__v16s##T2)Vec512, \ 10181 (__v16s##T2)Vec512, \ 10182 2, 3, -1, -1, -1, -1, -1, -1, \ 10183 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10184 Vec512 = _mm512_##IntrinName( \ 10185 (__m512##T1)__builtin_shufflevector( \ 10186 (__v16s##T2)Vec512, \ 10187 (__v16s##T2)Vec512, \ 10188 0, -1, -1, -1, -1, -1, -1, -1, \ 10189 -1, -1, -1, -1, -1, -1, -1, -1), \ 10190 (__m512##T1)__builtin_shufflevector( \ 10191 (__v16s##T2)Vec512, \ 10192 (__v16s##T2)Vec512, \ 10193 1, -1, -1, -1, -1, -1, -1, -1, \ 10194 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10195 return Vec512[0]; \ 10196 }) 10197 10198 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) { 10199 _mm512_reduce_maxMin_32bit(a, max_epi32, i, i); 10200 } 10201 10202 static __inline__ unsigned int __DEFAULT_FN_ATTRS 10203 _mm512_reduce_max_epu32(__m512i a) { 10204 _mm512_reduce_maxMin_32bit(a, max_epu32, i, i); 10205 } 10206 10207 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) { 10208 _mm512_reduce_maxMin_32bit(a, max_ps, , f); 10209 } 10210 10211 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) { 10212 _mm512_reduce_maxMin_32bit(a, min_epi32, i, i); 10213 } 10214 10215 static __inline__ unsigned int __DEFAULT_FN_ATTRS 10216 _mm512_reduce_min_epu32(__m512i a) { 10217 _mm512_reduce_maxMin_32bit(a, min_epu32, i, i); 10218 } 10219 10220 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) { 10221 _mm512_reduce_maxMin_32bit(a, min_ps, , f); 10222 } 10223 10224 // Vec512 - Vector with size 512. 10225 // Vec512Neutral - A 512 length vector with elements set to the identity element 10226 // Identity element: {max_epi,0x80000000} 10227 // {max_epu,0x00000000} 10228 // {max_ps, 0xFF800000} 10229 // {min_epi,0x7FFFFFFF} 10230 // {min_epu,0xFFFFFFFF} 10231 // {min_ps, 0x7F800000} 10232 // 10233 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: 10234 // __mm512_max_epi32 10235 // T1 - Can get 'i' for int and ' ' .[__m512{i|}] 10236 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}] 10237 // T3 - Can get 'q' q word and 'pd' for packed double. 10238 // [__builtin_ia32_select{q|pd}_512] 10239 // Mask - Intrinsic Mask 10240 10241 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10242 T2, T3, Mask) \ 10243 __extension__({ \ 10244 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10245 (__mmask16)Mask, \ 10246 (__v16s##T2)Vec512, \ 10247 (__v16s##T2)Vec512Neutral); \ 10248 _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \ 10249 }) 10250 10251 static __inline__ int __DEFAULT_FN_ATTRS 10252 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { 10253 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32, 10254 i, i, d, __M); 10255 } 10256 10257 static __inline__ unsigned int __DEFAULT_FN_ATTRS 10258 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { 10259 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32, 10260 i, i, d, __M); 10261 } 10262 10263 static __inline__ float __DEFAULT_FN_ATTRS 10264 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { 10265 _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, 10266 ps, __M); 10267 } 10268 10269 static __inline__ int __DEFAULT_FN_ATTRS 10270 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { 10271 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32, 10272 i, i, d, __M); 10273 } 10274 10275 static __inline__ unsigned int __DEFAULT_FN_ATTRS 10276 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { 10277 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32, 10278 i, i, d, __M); 10279 } 10280 10281 static __inline__ float __DEFAULT_FN_ATTRS 10282 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { 10283 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, 10284 ps, __M); 10285 } 10286 10287 #undef __DEFAULT_FN_ATTRS 10288 10289 #endif // __AVX512FINTRIN_H 10290