Home | History | Annotate | Download | only in Headers
      1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
      2  *
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a copy
      5  * of this software and associated documentation files (the "Software"), to deal
      6  * in the Software without restriction, including without limitation the rights
      7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8  * copies of the Software, and to permit persons to whom the Software is
      9  * furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20  * THE SOFTWARE.
     21  *
     22  *===-----------------------------------------------------------------------===
     23  */
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __AVX512BWINTRIN_H
     29 #define __AVX512BWINTRIN_H
     30 
     31 typedef unsigned int __mmask32;
     32 typedef unsigned long long __mmask64;
     33 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
     34 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
     35 
     36 
     37 /* Integer compare */
     38 
     39 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     40 _mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
     41   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
     42                                                    (__mmask64)-1);
     43 }
     44 
     45 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     46 _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
     47   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
     48                                                    __u);
     49 }
     50 
     51 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     52 _mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
     53   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
     54                                                  (__mmask64)-1);
     55 }
     56 
     57 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     58 _mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
     59   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
     60                                                  __u);
     61 }
     62 
     63 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
     64 _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
     65   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
     66                                                    (__mmask32)-1);
     67 }
     68 
     69 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
     70 _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
     71   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
     72                                                    __u);
     73 }
     74 
     75 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
     76 _mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
     77   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
     78                                                  (__mmask32)-1);
     79 }
     80 
     81 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
     82 _mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
     83   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
     84                                                  __u);
     85 }
     86 
     87 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     88 _mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
     89   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
     90                                                 (__mmask64)-1);
     91 }
     92 
     93 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
     94 _mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
     95   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
     96                                                 __u);
     97 }
     98 
     99 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    100 _mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
    101   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
    102                                                  (__mmask64)-1);
    103 }
    104 
    105 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    106 _mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    107   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
    108                                                  __u);
    109 }
    110 
    111 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    112 _mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
    113   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
    114                                                 (__mmask32)-1);
    115 }
    116 
    117 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    118 _mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    119   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
    120                                                 __u);
    121 }
    122 
    123 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    124 _mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
    125   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
    126                                                  (__mmask32)-1);
    127 }
    128 
    129 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    130 _mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    131   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
    132                                                  __u);
    133 }
    134 
    135 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    136 _mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
    137   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
    138                                                    (__mmask64)-1);
    139 }
    140 
    141 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    142 _mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    143   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
    144                                                    __u);
    145 }
    146 
    147 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    148 _mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
    149   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
    150                                                  (__mmask64)-1);
    151 }
    152 
    153 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    154 _mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    155   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
    156                                                  __u);
    157 }
    158 
    159 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    160 _mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
    161   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
    162                                                    (__mmask32)-1);
    163 }
    164 
    165 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    166 _mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    167   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
    168                                                    __u);
    169 }
    170 
    171 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    172 _mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
    173   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
    174                                                  (__mmask32)-1);
    175 }
    176 
    177 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    178 _mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    179   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
    180                                                  __u);
    181 }
    182 
    183 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    184 _mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
    185   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
    186                                                 (__mmask64)-1);
    187 }
    188 
    189 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    190 _mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    191   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
    192                                                 __u);
    193 }
    194 
    195 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    196 _mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
    197   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
    198                                                  (__mmask64)-1);
    199 }
    200 
    201 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    202 _mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    203   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
    204                                                  __u);
    205 }
    206 
    207 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    208 _mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
    209   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
    210                                                 (__mmask32)-1);
    211 }
    212 
    213 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    214 _mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    215   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
    216                                                 __u);
    217 }
    218 
    219 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    220 _mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
    221   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
    222                                                  (__mmask32)-1);
    223 }
    224 
    225 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    226 _mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    227   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
    228                                                  __u);
    229 }
    230 
    231 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    232 _mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
    233   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
    234                                                 (__mmask64)-1);
    235 }
    236 
    237 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    238 _mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    239   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
    240                                                 __u);
    241 }
    242 
    243 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    244 _mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
    245   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
    246                                                  (__mmask64)-1);
    247 }
    248 
    249 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    250 _mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    251   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
    252                                                  __u);
    253 }
    254 
    255 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    256 _mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
    257   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
    258                                                 (__mmask32)-1);
    259 }
    260 
    261 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    262 _mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    263   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
    264                                                 __u);
    265 }
    266 
    267 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    268 _mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
    269   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
    270                                                  (__mmask32)-1);
    271 }
    272 
    273 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    274 _mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    275   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
    276                                                  __u);
    277 }
    278 
    279 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    280 _mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
    281   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
    282                                                 (__mmask64)-1);
    283 }
    284 
    285 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    286 _mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    287   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
    288                                                 __u);
    289 }
    290 
    291 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    292 _mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
    293   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
    294                                                  (__mmask64)-1);
    295 }
    296 
    297 static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
    298 _mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
    299   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
    300                                                  __u);
    301 }
    302 
    303 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    304 _mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
    305   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
    306                                                 (__mmask32)-1);
    307 }
    308 
    309 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    310 _mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    311   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
    312                                                 __u);
    313 }
    314 
    315 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    316 _mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
    317   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
    318                                                  (__mmask32)-1);
    319 }
    320 
    321 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
    322 _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
    323   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
    324                                                  __u);
    325 }
    326 
    327 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
    328   (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
    329                                          (__v64qi)(__m512i)(b), \
    330                                          (p), (__mmask64)-1); })
    331 
    332 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
    333   (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
    334                                          (__v64qi)(__m512i)(b), \
    335                                          (p), (__mmask64)(m)); })
    336 
    337 #define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
    338   (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
    339                                           (__v64qi)(__m512i)(b), \
    340                                           (p), (__mmask64)-1); })
    341 
    342 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
    343   (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
    344                                           (__v64qi)(__m512i)(b), \
    345                                           (p), (__mmask64)(m)); })
    346 
    347 #define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
    348   (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
    349                                          (__v32hi)(__m512i)(b), \
    350                                          (p), (__mmask32)-1); })
    351 
    352 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
    353   (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
    354                                          (__v32hi)(__m512i)(b), \
    355                                          (p), (__mmask32)(m)); })
    356 
    357 #define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
    358   (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
    359                                           (__v32hi)(__m512i)(b), \
    360                                           (p), (__mmask32)-1); })
    361 
    362 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
    363   (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
    364                                           (__v32hi)(__m512i)(b), \
    365                                           (p), (__mmask32)(m)); })
    366 
    367 #endif
    368