1 /*===---- immintrin.h - Intel intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #define __IMMINTRIN_H 26 27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 28 #include <mmintrin.h> 29 #endif 30 31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 32 #include <xmmintrin.h> 33 #endif 34 35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 36 #include <emmintrin.h> 37 #endif 38 39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 40 #include <pmmintrin.h> 41 #endif 42 43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 44 #include <tmmintrin.h> 45 #endif 46 47 #if !defined(_MSC_VER) || __has_feature(modules) || \ 48 (defined(__SSE4_2__) || defined(__SSE4_1__)) 49 #include <smmintrin.h> 50 #endif 51 52 #if !defined(_MSC_VER) || __has_feature(modules) || \ 53 (defined(__AES__) || defined(__PCLMUL__)) 54 #include <wmmintrin.h> 55 #endif 56 57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 58 #include <clflushoptintrin.h> 59 #endif 60 61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 62 #include <clwbintrin.h> 63 #endif 64 65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 66 #include <avxintrin.h> 67 #endif 68 69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 70 #include <avx2intrin.h> 71 72 /* The 256-bit versions of functions in f16cintrin.h. 73 Intel documents these as being in immintrin.h, and 74 they depend on typedefs from avxintrin.h. */ 75 76 /// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector 77 /// containing 16-bit half-precision float values. 78 /// 79 /// \headerfile <x86intrin.h> 80 /// 81 /// \code 82 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm); 83 /// \endcode 84 /// 85 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. 86 /// 87 /// \param a 88 /// A 256-bit vector containing 32-bit single-precision float values to be 89 /// converted to 16-bit half-precision float values. 90 /// \param imm 91 /// An immediate value controlling rounding using bits [2:0]: \n 92 /// 000: Nearest \n 93 /// 001: Down \n 94 /// 010: Up \n 95 /// 011: Truncate \n 96 /// 1XX: Use MXCSR.RC for rounding 97 /// \returns A 128-bit vector containing the converted 16-bit half-precision 98 /// float values. 99 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \ 100 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) 101 102 /// \brief Converts a 128-bit vector containing 16-bit half-precision float 103 /// values into a 256-bit vector of [8 x float]. 104 /// 105 /// \headerfile <x86intrin.h> 106 /// 107 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. 108 /// 109 /// \param __a 110 /// A 128-bit vector containing 16-bit half-precision float values to be 111 /// converted to 32-bit single-precision float values. 112 /// \returns A vector of [8 x float] containing the converted 32-bit 113 /// single-precision float values. 114 static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) 115 _mm256_cvtph_ps(__m128i __a) 116 { 117 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); 118 } 119 #endif /* __AVX2__ */ 120 121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 122 #include <bmiintrin.h> 123 #endif 124 125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 126 #include <bmi2intrin.h> 127 #endif 128 129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 130 #include <lzcntintrin.h> 131 #endif 132 133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 134 #include <fmaintrin.h> 135 #endif 136 137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 138 #include <avx512fintrin.h> 139 #endif 140 141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 142 #include <avx512vlintrin.h> 143 #endif 144 145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 146 #include <avx512bwintrin.h> 147 #endif 148 149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 150 #include <avx512cdintrin.h> 151 #endif 152 153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 154 #include <avx512vpopcntdqintrin.h> 155 #endif 156 157 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 158 #include <avx512dqintrin.h> 159 #endif 160 161 #if !defined(_MSC_VER) || __has_feature(modules) || \ 162 (defined(__AVX512VL__) && defined(__AVX512BW__)) 163 #include <avx512vlbwintrin.h> 164 #endif 165 166 #if !defined(_MSC_VER) || __has_feature(modules) || \ 167 (defined(__AVX512VL__) && defined(__AVX512CD__)) 168 #include <avx512vlcdintrin.h> 169 #endif 170 171 #if !defined(_MSC_VER) || __has_feature(modules) || \ 172 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 173 #include <avx512vldqintrin.h> 174 #endif 175 176 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 177 #include <avx512erintrin.h> 178 #endif 179 180 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 181 #include <avx512ifmaintrin.h> 182 #endif 183 184 #if !defined(_MSC_VER) || __has_feature(modules) || \ 185 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 186 #include <avx512ifmavlintrin.h> 187 #endif 188 189 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 190 #include <avx512vbmiintrin.h> 191 #endif 192 193 #if !defined(_MSC_VER) || __has_feature(modules) || \ 194 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 195 #include <avx512vbmivlintrin.h> 196 #endif 197 198 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 199 #include <avx512pfintrin.h> 200 #endif 201 202 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 203 #include <pkuintrin.h> 204 #endif 205 206 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 207 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 208 _rdrand16_step(unsigned short *__p) 209 { 210 return __builtin_ia32_rdrand16_step(__p); 211 } 212 213 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 214 _rdrand32_step(unsigned int *__p) 215 { 216 return __builtin_ia32_rdrand32_step(__p); 217 } 218 219 #ifdef __x86_64__ 220 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) 221 _rdrand64_step(unsigned long long *__p) 222 { 223 return __builtin_ia32_rdrand64_step(__p); 224 } 225 #endif 226 #endif /* __RDRND__ */ 227 228 /* __bit_scan_forward */ 229 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 230 _bit_scan_forward(int __A) { 231 return __builtin_ctz(__A); 232 } 233 234 /* __bit_scan_reverse */ 235 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 236 _bit_scan_reverse(int __A) { 237 return 31 - __builtin_clz(__A); 238 } 239 240 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 241 #ifdef __x86_64__ 242 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 243 _readfsbase_u32(void) 244 { 245 return __builtin_ia32_rdfsbase32(); 246 } 247 248 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 249 _readfsbase_u64(void) 250 { 251 return __builtin_ia32_rdfsbase64(); 252 } 253 254 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 255 _readgsbase_u32(void) 256 { 257 return __builtin_ia32_rdgsbase32(); 258 } 259 260 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 261 _readgsbase_u64(void) 262 { 263 return __builtin_ia32_rdgsbase64(); 264 } 265 266 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 267 _writefsbase_u32(unsigned int __V) 268 { 269 return __builtin_ia32_wrfsbase32(__V); 270 } 271 272 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 273 _writefsbase_u64(unsigned long long __V) 274 { 275 return __builtin_ia32_wrfsbase64(__V); 276 } 277 278 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 279 _writegsbase_u32(unsigned int __V) 280 { 281 return __builtin_ia32_wrgsbase32(__V); 282 } 283 284 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) 285 _writegsbase_u64(unsigned long long __V) 286 { 287 return __builtin_ia32_wrgsbase64(__V); 288 } 289 290 #endif 291 #endif /* __FSGSBASE__ */ 292 293 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 294 #include <rtmintrin.h> 295 #include <xtestintrin.h> 296 #endif 297 298 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 299 #include <shaintrin.h> 300 #endif 301 302 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 303 #include <fxsrintrin.h> 304 #endif 305 306 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) 307 #include <xsaveintrin.h> 308 #endif 309 310 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 311 #include <xsaveoptintrin.h> 312 #endif 313 314 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 315 #include <xsavecintrin.h> 316 #endif 317 318 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 319 #include <xsavesintrin.h> 320 #endif 321 322 /* Some intrinsics inside adxintrin.h are available only on processors with ADX, 323 * whereas others are also available at all times. */ 324 #include <adxintrin.h> 325 326 #endif /* __IMMINTRIN_H */ 327