1 /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __VBMIVLINTRIN_H 29 #define __VBMIVLINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) 33 34 35 static __inline__ __m128i __DEFAULT_FN_ATTRS 36 _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U, 37 __m128i __B) 38 { 39 return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, 40 (__v16qi) __I 41 /* idx */ , 42 (__v16qi) __B, 43 (__mmask16) 44 __U); 45 } 46 47 static __inline__ __m256i __DEFAULT_FN_ATTRS 48 _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I, 49 __mmask32 __U, __m256i __B) 50 { 51 return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, 52 (__v32qi) __I 53 /* idx */ , 54 (__v32qi) __B, 55 (__mmask32) 56 __U); 57 } 58 59 static __inline__ __m128i __DEFAULT_FN_ATTRS 60 _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) 61 { 62 return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 63 /* idx */ , 64 (__v16qi) __A, 65 (__v16qi) __B, 66 (__mmask16) - 67 1); 68 } 69 70 static __inline__ __m128i __DEFAULT_FN_ATTRS 71 _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I, 72 __m128i __B) 73 { 74 return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 75 /* idx */ , 76 (__v16qi) __A, 77 (__v16qi) __B, 78 (__mmask16) 79 __U); 80 } 81 82 static __inline__ __m128i __DEFAULT_FN_ATTRS 83 _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I, 84 __m128i __B) 85 { 86 return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I 87 /* idx */ , 88 (__v16qi) __A, 89 (__v16qi) __B, 90 (__mmask16) 91 __U); 92 } 93 94 static __inline__ __m256i __DEFAULT_FN_ATTRS 95 _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) 96 { 97 return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 98 /* idx */ , 99 (__v32qi) __A, 100 (__v32qi) __B, 101 (__mmask32) - 102 1); 103 } 104 105 static __inline__ __m256i __DEFAULT_FN_ATTRS 106 _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U, 107 __m256i __I, __m256i __B) 108 { 109 return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 110 /* idx */ , 111 (__v32qi) __A, 112 (__v32qi) __B, 113 (__mmask32) 114 __U); 115 } 116 117 static __inline__ __m256i __DEFAULT_FN_ATTRS 118 _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A, 119 __m256i __I, __m256i __B) 120 { 121 return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I 122 /* idx */ , 123 (__v32qi) __A, 124 (__v32qi) __B, 125 (__mmask32) 126 __U); 127 } 128 129 static __inline__ __m128i __DEFAULT_FN_ATTRS 130 _mm_permutexvar_epi8 (__m128i __A, __m128i __B) 131 { 132 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 133 (__v16qi) __A, 134 (__v16qi) _mm_undefined_si128 (), 135 (__mmask16) -1); 136 } 137 138 static __inline__ __m128i __DEFAULT_FN_ATTRS 139 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 140 { 141 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 142 (__v16qi) __A, 143 (__v16qi) _mm_setzero_si128 (), 144 (__mmask16) __M); 145 } 146 147 static __inline__ __m128i __DEFAULT_FN_ATTRS 148 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 149 __m128i __B) 150 { 151 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 152 (__v16qi) __A, 153 (__v16qi) __W, 154 (__mmask16) __M); 155 } 156 157 static __inline__ __m256i __DEFAULT_FN_ATTRS 158 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 159 { 160 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 161 (__v32qi) __A, 162 (__v32qi) _mm256_undefined_si256 (), 163 (__mmask32) -1); 164 } 165 166 static __inline__ __m256i __DEFAULT_FN_ATTRS 167 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 168 __m256i __B) 169 { 170 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 171 (__v32qi) __A, 172 (__v32qi) _mm256_setzero_si256 (), 173 (__mmask32) __M); 174 } 175 176 static __inline__ __m256i __DEFAULT_FN_ATTRS 177 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 178 __m256i __B) 179 { 180 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 181 (__v32qi) __A, 182 (__v32qi) __W, 183 (__mmask32) __M); 184 } 185 186 static __inline__ __m128i __DEFAULT_FN_ATTRS 187 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) 188 { 189 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 190 (__v16qi) __Y, 191 (__v16qi) __W, 192 (__mmask16) __M); 193 } 194 195 static __inline__ __m128i __DEFAULT_FN_ATTRS 196 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) 197 { 198 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 199 (__v16qi) __Y, 200 (__v16qi) 201 _mm_setzero_si128 (), 202 (__mmask16) __M); 203 } 204 205 static __inline__ __m128i __DEFAULT_FN_ATTRS 206 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) 207 { 208 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 209 (__v16qi) __Y, 210 (__v16qi) 211 _mm_undefined_si128 (), 212 (__mmask16) -1); 213 } 214 215 static __inline__ __m256i __DEFAULT_FN_ATTRS 216 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) 217 { 218 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 219 (__v32qi) __Y, 220 (__v32qi) __W, 221 (__mmask32) __M); 222 } 223 224 static __inline__ __m256i __DEFAULT_FN_ATTRS 225 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) 226 { 227 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 228 (__v32qi) __Y, 229 (__v32qi) 230 _mm256_setzero_si256 (), 231 (__mmask32) __M); 232 } 233 234 static __inline__ __m256i __DEFAULT_FN_ATTRS 235 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) 236 { 237 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 238 (__v32qi) __Y, 239 (__v32qi) 240 _mm256_undefined_si256 (), 241 (__mmask32) -1); 242 } 243 244 245 #undef __DEFAULT_FN_ATTRS 246 247 #endif 248