Home | History | Annotate | Download | only in include
      1 /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
      2  *
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a copy
      5  * of this software and associated documentation files (the "Software"), to deal
      6  * in the Software without restriction, including without limitation the rights
      7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8  * copies of the Software, and to permit persons to whom the Software is
      9  * furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20  * THE SOFTWARE.
     21  *
     22  *===-----------------------------------------------------------------------===
     23  */
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __VBMIVLINTRIN_H
     29 #define __VBMIVLINTRIN_H
     30 
     31 /* Define the default attributes for the functions in this file. */
     32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
     33 
     34 
     35 static __inline__ __m128i __DEFAULT_FN_ATTRS
     36 _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
     37             __m128i __B)
     38 {
     39   return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
     40               (__v16qi) __I
     41               /* idx */ ,
     42               (__v16qi) __B,
     43               (__mmask16)
     44               __U);
     45 }
     46 
     47 static __inline__ __m256i __DEFAULT_FN_ATTRS
     48 _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
     49          __mmask32 __U, __m256i __B)
     50 {
     51   return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
     52               (__v32qi) __I
     53               /* idx */ ,
     54               (__v32qi) __B,
     55               (__mmask32)
     56               __U);
     57 }
     58 
     59 static __inline__ __m128i __DEFAULT_FN_ATTRS
     60 _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
     61 {
     62   return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
     63               /* idx */ ,
     64               (__v16qi) __A,
     65               (__v16qi) __B,
     66               (__mmask16) -
     67               1);
     68 }
     69 
     70 static __inline__ __m128i __DEFAULT_FN_ATTRS
     71 _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
     72            __m128i __B)
     73 {
     74   return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
     75               /* idx */ ,
     76               (__v16qi) __A,
     77               (__v16qi) __B,
     78               (__mmask16)
     79               __U);
     80 }
     81 
     82 static __inline__ __m128i __DEFAULT_FN_ATTRS
     83 _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
     84             __m128i __B)
     85 {
     86   return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
     87                /* idx */ ,
     88                (__v16qi) __A,
     89                (__v16qi) __B,
     90                (__mmask16)
     91                __U);
     92 }
     93 
     94 static __inline__ __m256i __DEFAULT_FN_ATTRS
     95 _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
     96 {
     97   return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
     98               /* idx */ ,
     99               (__v32qi) __A,
    100               (__v32qi) __B,
    101               (__mmask32) -
    102               1);
    103 }
    104 
    105 static __inline__ __m256i __DEFAULT_FN_ATTRS
    106 _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
    107         __m256i __I, __m256i __B)
    108 {
    109   return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
    110               /* idx */ ,
    111               (__v32qi) __A,
    112               (__v32qi) __B,
    113               (__mmask32)
    114               __U);
    115 }
    116 
    117 static __inline__ __m256i __DEFAULT_FN_ATTRS
    118 _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
    119          __m256i __I, __m256i __B)
    120 {
    121   return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
    122                /* idx */ ,
    123                (__v32qi) __A,
    124                (__v32qi) __B,
    125                (__mmask32)
    126                __U);
    127 }
    128 
    129 static __inline__ __m128i __DEFAULT_FN_ATTRS
    130 _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
    131 {
    132   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
    133                  (__v16qi) __A,
    134                  (__v16qi) _mm_undefined_si128 (),
    135                  (__mmask16) -1);
    136 }
    137 
    138 static __inline__ __m128i __DEFAULT_FN_ATTRS
    139 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
    140 {
    141   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
    142                  (__v16qi) __A,
    143                  (__v16qi) _mm_setzero_si128 (),
    144                  (__mmask16) __M);
    145 }
    146 
    147 static __inline__ __m128i __DEFAULT_FN_ATTRS
    148 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
    149           __m128i __B)
    150 {
    151   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
    152                  (__v16qi) __A,
    153                  (__v16qi) __W,
    154                  (__mmask16) __M);
    155 }
    156 
    157 static __inline__ __m256i __DEFAULT_FN_ATTRS
    158 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
    159 {
    160   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
    161                  (__v32qi) __A,
    162                  (__v32qi) _mm256_undefined_si256 (),
    163                  (__mmask32) -1);
    164 }
    165 
    166 static __inline__ __m256i __DEFAULT_FN_ATTRS
    167 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
    168         __m256i __B)
    169 {
    170   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
    171                  (__v32qi) __A,
    172                  (__v32qi) _mm256_setzero_si256 (),
    173                  (__mmask32) __M);
    174 }
    175 
    176 static __inline__ __m256i __DEFAULT_FN_ATTRS
    177 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
    178              __m256i __B)
    179 {
    180   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
    181                  (__v32qi) __A,
    182                  (__v32qi) __W,
    183                  (__mmask32) __M);
    184 }
    185 
    186 static __inline__ __m128i __DEFAULT_FN_ATTRS
    187 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
    188 {
    189   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
    190                 (__v16qi) __Y,
    191                 (__v16qi) __W,
    192                 (__mmask16) __M);
    193 }
    194 
    195 static __inline__ __m128i __DEFAULT_FN_ATTRS
    196 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
    197 {
    198   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
    199                 (__v16qi) __Y,
    200                 (__v16qi)
    201                 _mm_setzero_si128 (),
    202                 (__mmask16) __M);
    203 }
    204 
    205 static __inline__ __m128i __DEFAULT_FN_ATTRS
    206 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
    207 {
    208   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
    209                 (__v16qi) __Y,
    210                 (__v16qi)
    211                 _mm_undefined_si128 (),
    212                 (__mmask16) -1);
    213 }
    214 
    215 static __inline__ __m256i __DEFAULT_FN_ATTRS
    216 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
    217 {
    218   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
    219                 (__v32qi) __Y,
    220                 (__v32qi) __W,
    221                 (__mmask32) __M);
    222 }
    223 
    224 static __inline__ __m256i __DEFAULT_FN_ATTRS
    225 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
    226 {
    227   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
    228                 (__v32qi) __Y,
    229                 (__v32qi)
    230                 _mm256_setzero_si256 (),
    231                 (__mmask32) __M);
    232 }
    233 
    234 static __inline__ __m256i __DEFAULT_FN_ATTRS
    235 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
    236 {
    237   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
    238                 (__v32qi) __Y,
    239                 (__v32qi)
    240                 _mm256_undefined_si256 (),
    241                 (__mmask32) -1);
    242 }
    243 
    244 
    245 #undef __DEFAULT_FN_ATTRS
    246 
    247 #endif
    248