1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24 #ifndef __IMMINTRIN_H 25 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __AVX512BWINTRIN_H 29 #define __AVX512BWINTRIN_H 30 31 typedef unsigned int __mmask32; 32 typedef unsigned long long __mmask64; 33 typedef char __v64qi __attribute__ ((__vector_size__ (64))); 34 typedef short __v32hi __attribute__ ((__vector_size__ (64))); 35 36 37 /* Integer compare */ 38 39 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 40 _mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { 41 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 42 (__mmask64)-1); 43 } 44 45 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 46 _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 47 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 48 __u); 49 } 50 51 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 52 _mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { 53 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 54 (__mmask64)-1); 55 } 56 57 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 58 _mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 59 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 60 __u); 61 } 62 63 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 64 _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { 65 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 66 (__mmask32)-1); 67 } 68 69 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 70 _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 71 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 72 __u); 73 } 74 75 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 76 _mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { 77 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 78 (__mmask32)-1); 79 } 80 81 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 82 _mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 83 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 84 __u); 85 } 86 87 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 88 _mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { 89 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 90 (__mmask64)-1); 91 } 92 93 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 94 _mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 95 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 96 __u); 97 } 98 99 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 100 _mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { 101 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 102 (__mmask64)-1); 103 } 104 105 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 106 _mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 107 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 108 __u); 109 } 110 111 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 112 _mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { 113 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 114 (__mmask32)-1); 115 } 116 117 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 118 _mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 119 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 120 __u); 121 } 122 123 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 124 _mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { 125 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 126 (__mmask32)-1); 127 } 128 129 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 130 _mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 131 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 132 __u); 133 } 134 135 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 136 _mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { 137 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 138 (__mmask64)-1); 139 } 140 141 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 142 _mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 143 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 144 __u); 145 } 146 147 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 148 _mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { 149 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 150 (__mmask64)-1); 151 } 152 153 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 154 _mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 155 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 156 __u); 157 } 158 159 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 160 _mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { 161 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 162 (__mmask32)-1); 163 } 164 165 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 166 _mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 167 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 168 __u); 169 } 170 171 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 172 _mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { 173 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 174 (__mmask32)-1); 175 } 176 177 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 178 _mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 179 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 180 __u); 181 } 182 183 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 184 _mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { 185 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 186 (__mmask64)-1); 187 } 188 189 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 190 _mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 191 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 192 __u); 193 } 194 195 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 196 _mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { 197 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 198 (__mmask64)-1); 199 } 200 201 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 202 _mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 203 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 204 __u); 205 } 206 207 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 208 _mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { 209 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 210 (__mmask32)-1); 211 } 212 213 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 214 _mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 215 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 216 __u); 217 } 218 219 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 220 _mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { 221 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 222 (__mmask32)-1); 223 } 224 225 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 226 _mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 227 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 228 __u); 229 } 230 231 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 232 _mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { 233 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 234 (__mmask64)-1); 235 } 236 237 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 238 _mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 239 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 240 __u); 241 } 242 243 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 244 _mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { 245 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 246 (__mmask64)-1); 247 } 248 249 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 250 _mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 251 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 252 __u); 253 } 254 255 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 256 _mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { 257 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 258 (__mmask32)-1); 259 } 260 261 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 262 _mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 263 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 264 __u); 265 } 266 267 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 268 _mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { 269 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 270 (__mmask32)-1); 271 } 272 273 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 274 _mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 275 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 276 __u); 277 } 278 279 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 280 _mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { 281 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 282 (__mmask64)-1); 283 } 284 285 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 286 _mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 287 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 288 __u); 289 } 290 291 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 292 _mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { 293 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 294 (__mmask64)-1); 295 } 296 297 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 298 _mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 299 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 300 __u); 301 } 302 303 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 304 _mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { 305 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 306 (__mmask32)-1); 307 } 308 309 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 310 _mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 311 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 312 __u); 313 } 314 315 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 316 _mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { 317 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 318 (__mmask32)-1); 319 } 320 321 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 322 _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 323 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 324 __u); 325 } 326 327 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ 328 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 329 (__v64qi)(__m512i)(b), \ 330 (p), (__mmask64)-1); }) 331 332 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ 333 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 334 (__v64qi)(__m512i)(b), \ 335 (p), (__mmask64)(m)); }) 336 337 #define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ 338 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 339 (__v64qi)(__m512i)(b), \ 340 (p), (__mmask64)-1); }) 341 342 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ 343 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 344 (__v64qi)(__m512i)(b), \ 345 (p), (__mmask64)(m)); }) 346 347 #define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ 348 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 349 (__v32hi)(__m512i)(b), \ 350 (p), (__mmask32)-1); }) 351 352 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ 353 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 354 (__v32hi)(__m512i)(b), \ 355 (p), (__mmask32)(m)); }) 356 357 #define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ 358 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 359 (__v32hi)(__m512i)(b), \ 360 (p), (__mmask32)-1); }) 361 362 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ 363 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 364 (__v32hi)(__m512i)(b), \ 365 (p), (__mmask32)(m)); }) 366 367 #endif 368