Home | History | Annotate | Download | only in include
      1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #define __IMMINTRIN_H
     26 
     27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
     28 #include <mmintrin.h>
     29 #endif
     30 
     31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
     32 #include <xmmintrin.h>
     33 #endif
     34 
     35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
     36 #include <emmintrin.h>
     37 #endif
     38 
     39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
     40 #include <pmmintrin.h>
     41 #endif
     42 
     43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
     44 #include <tmmintrin.h>
     45 #endif
     46 
     47 #if !defined(_MSC_VER) || __has_feature(modules) || \
     48     (defined(__SSE4_2__) || defined(__SSE4_1__))
     49 #include <smmintrin.h>
     50 #endif
     51 
     52 #if !defined(_MSC_VER) || __has_feature(modules) || \
     53     (defined(__AES__) || defined(__PCLMUL__))
     54 #include <wmmintrin.h>
     55 #endif
     56 
     57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
     58 #include <clflushoptintrin.h>
     59 #endif
     60 
     61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
     62 #include <clwbintrin.h>
     63 #endif
     64 
     65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
     66 #include <avxintrin.h>
     67 #endif
     68 
     69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
     70 #include <avx2intrin.h>
     71 
     72 /* The 256-bit versions of functions in f16cintrin.h.
     73    Intel documents these as being in immintrin.h, and
     74    they depend on typedefs from avxintrin.h. */
     75 
     76 /// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
     77 ///    containing 16-bit half-precision float values.
     78 ///
     79 /// \headerfile <x86intrin.h>
     80 ///
     81 /// \code
     82 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
     83 /// \endcode
     84 ///
     85 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
     86 ///
     87 /// \param a
     88 ///    A 256-bit vector containing 32-bit single-precision float values to be
     89 ///    converted to 16-bit half-precision float values.
     90 /// \param imm
     91 ///    An immediate value controlling rounding using bits [2:0]: \n
     92 ///    000: Nearest \n
     93 ///    001: Down \n
     94 ///    010: Up \n
     95 ///    011: Truncate \n
     96 ///    1XX: Use MXCSR.RC for rounding
     97 /// \returns A 128-bit vector containing the converted 16-bit half-precision
     98 ///    float values.
     99 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
    100  (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
    101 
    102 /// \brief Converts a 128-bit vector containing 16-bit half-precision float
    103 ///    values into a 256-bit vector of [8 x float].
    104 ///
    105 /// \headerfile <x86intrin.h>
    106 ///
    107 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
    108 ///
    109 /// \param __a
    110 ///    A 128-bit vector containing 16-bit half-precision float values to be
    111 ///    converted to 32-bit single-precision float values.
    112 /// \returns A vector of [8 x float] containing the converted 32-bit
    113 ///    single-precision float values.
    114 static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
    115 _mm256_cvtph_ps(__m128i __a)
    116 {
    117   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
    118 }
    119 #endif /* __AVX2__ */
    120 
    121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
    122 #include <bmiintrin.h>
    123 #endif
    124 
    125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
    126 #include <bmi2intrin.h>
    127 #endif
    128 
    129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
    130 #include <lzcntintrin.h>
    131 #endif
    132 
    133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
    134 #include <fmaintrin.h>
    135 #endif
    136 
    137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
    138 #include <avx512fintrin.h>
    139 #endif
    140 
    141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
    142 #include <avx512vlintrin.h>
    143 #endif
    144 
    145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
    146 #include <avx512bwintrin.h>
    147 #endif
    148 
    149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
    150 #include <avx512cdintrin.h>
    151 #endif
    152 
    153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
    154 #include <avx512vpopcntdqintrin.h>
    155 #endif
    156 
    157 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
    158 #include <avx512dqintrin.h>
    159 #endif
    160 
    161 #if !defined(_MSC_VER) || __has_feature(modules) || \
    162     (defined(__AVX512VL__) && defined(__AVX512BW__))
    163 #include <avx512vlbwintrin.h>
    164 #endif
    165 
    166 #if !defined(_MSC_VER) || __has_feature(modules) || \
    167     (defined(__AVX512VL__) && defined(__AVX512CD__))
    168 #include <avx512vlcdintrin.h>
    169 #endif
    170 
    171 #if !defined(_MSC_VER) || __has_feature(modules) || \
    172     (defined(__AVX512VL__) && defined(__AVX512DQ__))
    173 #include <avx512vldqintrin.h>
    174 #endif
    175 
    176 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
    177 #include <avx512erintrin.h>
    178 #endif
    179 
    180 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
    181 #include <avx512ifmaintrin.h>
    182 #endif
    183 
    184 #if !defined(_MSC_VER) || __has_feature(modules) || \
    185     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
    186 #include <avx512ifmavlintrin.h>
    187 #endif
    188 
    189 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
    190 #include <avx512vbmiintrin.h>
    191 #endif
    192 
    193 #if !defined(_MSC_VER) || __has_feature(modules) || \
    194     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
    195 #include <avx512vbmivlintrin.h>
    196 #endif
    197 
    198 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
    199 #include <avx512pfintrin.h>
    200 #endif
    201 
    202 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
    203 #include <pkuintrin.h>
    204 #endif
    205 
    206 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
    207 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    208 _rdrand16_step(unsigned short *__p)
    209 {
    210   return __builtin_ia32_rdrand16_step(__p);
    211 }
    212 
    213 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    214 _rdrand32_step(unsigned int *__p)
    215 {
    216   return __builtin_ia32_rdrand32_step(__p);
    217 }
    218 
    219 #ifdef __x86_64__
    220 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    221 _rdrand64_step(unsigned long long *__p)
    222 {
    223   return __builtin_ia32_rdrand64_step(__p);
    224 }
    225 #endif
    226 #endif /* __RDRND__ */
    227 
    228 /* __bit_scan_forward */
    229 static __inline__ int __attribute__((__always_inline__, __nodebug__))
    230 _bit_scan_forward(int __A) {
    231   return __builtin_ctz(__A);
    232 }
    233 
    234 /* __bit_scan_reverse */
    235 static __inline__ int __attribute__((__always_inline__, __nodebug__))
    236 _bit_scan_reverse(int __A) {
    237   return 31 - __builtin_clz(__A);
    238 }
    239 
    240 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
    241 #ifdef __x86_64__
    242 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    243 _readfsbase_u32(void)
    244 {
    245   return __builtin_ia32_rdfsbase32();
    246 }
    247 
    248 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    249 _readfsbase_u64(void)
    250 {
    251   return __builtin_ia32_rdfsbase64();
    252 }
    253 
    254 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    255 _readgsbase_u32(void)
    256 {
    257   return __builtin_ia32_rdgsbase32();
    258 }
    259 
    260 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    261 _readgsbase_u64(void)
    262 {
    263   return __builtin_ia32_rdgsbase64();
    264 }
    265 
    266 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    267 _writefsbase_u32(unsigned int __V)
    268 {
    269   return __builtin_ia32_wrfsbase32(__V);
    270 }
    271 
    272 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    273 _writefsbase_u64(unsigned long long __V)
    274 {
    275   return __builtin_ia32_wrfsbase64(__V);
    276 }
    277 
    278 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    279 _writegsbase_u32(unsigned int __V)
    280 {
    281   return __builtin_ia32_wrgsbase32(__V);
    282 }
    283 
    284 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    285 _writegsbase_u64(unsigned long long __V)
    286 {
    287   return __builtin_ia32_wrgsbase64(__V);
    288 }
    289 
    290 #endif
    291 #endif /* __FSGSBASE__ */
    292 
    293 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
    294 #include <rtmintrin.h>
    295 #include <xtestintrin.h>
    296 #endif
    297 
    298 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
    299 #include <shaintrin.h>
    300 #endif
    301 
    302 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
    303 #include <fxsrintrin.h>
    304 #endif
    305 
    306 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
    307 #include <xsaveintrin.h>
    308 #endif
    309 
    310 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
    311 #include <xsaveoptintrin.h>
    312 #endif
    313 
    314 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
    315 #include <xsavecintrin.h>
    316 #endif
    317 
    318 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
    319 #include <xsavesintrin.h>
    320 #endif
    321 
    322 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
    323  * whereas others are also available at all times. */
    324 #include <adxintrin.h>
    325 
    326 #endif /* __IMMINTRIN_H */
    327