Home | History | Annotate | Download | only in clang-include
      1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __X86INTRIN_H
     25 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
     26 #endif
     27 
     28 #ifndef __XOPINTRIN_H
     29 #define __XOPINTRIN_H
     30 
     31 #ifndef __XOP__
     32 # error "XOP instruction set is not enabled"
     33 #else
     34 
     35 #include <fma4intrin.h>
     36 
     37 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     38 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
     39 {
     40   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
     41 }
     42 
     43 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     44 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
     45 {
     46   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
     47 }
     48 
     49 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     50 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
     51 {
     52   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     53 }
     54 
     55 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     56 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
     57 {
     58   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     59 }
     60 
     61 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     62 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
     63 {
     64   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
     65 }
     66 
     67 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     68 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
     69 {
     70   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
     71 }
     72 
     73 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     74 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
     75 {
     76   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
     77 }
     78 
     79 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     80 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
     81 {
     82   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
     83 }
     84 
     85 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     86 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
     87 {
     88   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
     89 }
     90 
     91 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     92 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
     93 {
     94   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
     95 }
     96 
     97 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     98 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
     99 {
    100   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
    101 }
    102 
    103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    104 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
    105 {
    106   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
    107 }
    108 
    109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    110 _mm_haddw_epi8(__m128i __A)
    111 {
    112   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
    113 }
    114 
    115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    116 _mm_haddd_epi8(__m128i __A)
    117 {
    118   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
    119 }
    120 
    121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    122 _mm_haddq_epi8(__m128i __A)
    123 {
    124   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
    125 }
    126 
    127 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    128 _mm_haddd_epi16(__m128i __A)
    129 {
    130   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
    131 }
    132 
    133 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    134 _mm_haddq_epi16(__m128i __A)
    135 {
    136   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
    137 }
    138 
    139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    140 _mm_haddq_epi32(__m128i __A)
    141 {
    142   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
    143 }
    144 
    145 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    146 _mm_haddw_epu8(__m128i __A)
    147 {
    148   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
    149 }
    150 
    151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    152 _mm_haddd_epu8(__m128i __A)
    153 {
    154   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
    155 }
    156 
    157 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    158 _mm_haddq_epu8(__m128i __A)
    159 {
    160   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
    161 }
    162 
    163 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    164 _mm_haddd_epu16(__m128i __A)
    165 {
    166   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
    167 }
    168 
    169 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    170 _mm_haddq_epu16(__m128i __A)
    171 {
    172   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
    173 }
    174 
    175 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    176 _mm_haddq_epu32(__m128i __A)
    177 {
    178   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
    179 }
    180 
    181 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    182 _mm_hsubw_epi8(__m128i __A)
    183 {
    184   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
    185 }
    186 
    187 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    188 _mm_hsubd_epi16(__m128i __A)
    189 {
    190   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
    191 }
    192 
    193 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    194 _mm_hsubq_epi32(__m128i __A)
    195 {
    196   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
    197 }
    198 
    199 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    200 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
    201 {
    202   return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
    203 }
    204 
    205 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
    206 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
    207 {
    208   return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
    209 }
    210 
    211 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    212 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
    213 {
    214   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
    215 }
    216 
    217 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    218 _mm_rot_epi8(__m128i __A, __m128i __B)
    219 {
    220   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
    221 }
    222 
    223 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    224 _mm_rot_epi16(__m128i __A, __m128i __B)
    225 {
    226   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
    227 }
    228 
    229 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    230 _mm_rot_epi32(__m128i __A, __m128i __B)
    231 {
    232   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
    233 }
    234 
    235 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    236 _mm_rot_epi64(__m128i __A, __m128i __B)
    237 {
    238   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
    239 }
    240 
    241 #define _mm_roti_epi8(A, N) __extension__ ({ \
    242   __m128i __A = (A); \
    243   (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
    244 
    245 #define _mm_roti_epi16(A, N) __extension__ ({ \
    246   __m128i __A = (A); \
    247   (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
    248 
    249 #define _mm_roti_epi32(A, N) __extension__ ({ \
    250   __m128i __A = (A); \
    251   (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
    252 
    253 #define _mm_roti_epi64(A, N) __extension__ ({ \
    254   __m128i __A = (A); \
    255   (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
    256 
    257 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    258 _mm_shl_epi8(__m128i __A, __m128i __B)
    259 {
    260   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
    261 }
    262 
    263 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    264 _mm_shl_epi16(__m128i __A, __m128i __B)
    265 {
    266   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
    267 }
    268 
    269 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    270 _mm_shl_epi32(__m128i __A, __m128i __B)
    271 {
    272   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
    273 }
    274 
    275 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    276 _mm_shl_epi64(__m128i __A, __m128i __B)
    277 {
    278   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
    279 }
    280 
    281 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    282 _mm_sha_epi8(__m128i __A, __m128i __B)
    283 {
    284   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
    285 }
    286 
    287 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    288 _mm_sha_epi16(__m128i __A, __m128i __B)
    289 {
    290   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
    291 }
    292 
    293 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    294 _mm_sha_epi32(__m128i __A, __m128i __B)
    295 {
    296   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
    297 }
    298 
    299 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    300 _mm_sha_epi64(__m128i __A, __m128i __B)
    301 {
    302   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
    303 }
    304 
    305 #define _mm_com_epu8(A, B, N) __extension__ ({ \
    306   __m128i __A = (A); \
    307   __m128i __B = (B); \
    308   (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
    309 
    310 #define _mm_com_epu16(A, B, N) __extension__ ({ \
    311   __m128i __A = (A); \
    312   __m128i __B = (B); \
    313   (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
    314 
    315 #define _mm_com_epu32(A, B, N) __extension__ ({ \
    316   __m128i __A = (A); \
    317   __m128i __B = (B); \
    318   (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
    319 
    320 #define _mm_com_epu64(A, B, N) __extension__ ({ \
    321   __m128i __A = (A); \
    322   __m128i __B = (B); \
    323   (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
    324 
    325 #define _mm_com_epi8(A, B, N) __extension__ ({ \
    326   __m128i __A = (A); \
    327   __m128i __B = (B); \
    328   (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
    329 
    330 #define _mm_com_epi16(A, B, N) __extension__ ({ \
    331   __m128i __A = (A); \
    332   __m128i __B = (B); \
    333   (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
    334 
    335 #define _mm_com_epi32(A, B, N) __extension__ ({ \
    336   __m128i __A = (A); \
    337   __m128i __B = (B); \
    338   (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
    339 
    340 #define _mm_com_epi64(A, B, N) __extension__ ({ \
    341   __m128i __A = (A); \
    342   __m128i __B = (B); \
    343   (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
    344 
    345 #define _MM_PCOMCTRL_LT    0
    346 #define _MM_PCOMCTRL_LE    1
    347 #define _MM_PCOMCTRL_GT    2
    348 #define _MM_PCOMCTRL_GE    3
    349 #define _MM_PCOMCTRL_EQ    4
    350 #define _MM_PCOMCTRL_NEQ   5
    351 #define _MM_PCOMCTRL_FALSE 6
    352 #define _MM_PCOMCTRL_TRUE  7
    353 
    354 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    355 _mm_comlt_epu8(__m128i __A, __m128i __B)
    356 {
    357   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
    358 }
    359 
    360 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    361 _mm_comle_epu8(__m128i __A, __m128i __B)
    362 {
    363   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
    364 }
    365 
    366 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    367 _mm_comgt_epu8(__m128i __A, __m128i __B)
    368 {
    369   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
    370 }
    371 
    372 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    373 _mm_comge_epu8(__m128i __A, __m128i __B)
    374 {
    375   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
    376 }
    377 
    378 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    379 _mm_comeq_epu8(__m128i __A, __m128i __B)
    380 {
    381   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
    382 }
    383 
    384 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    385 _mm_comneq_epu8(__m128i __A, __m128i __B)
    386 {
    387   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
    388 }
    389 
    390 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    391 _mm_comfalse_epu8(__m128i __A, __m128i __B)
    392 {
    393   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
    394 }
    395 
    396 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    397 _mm_comtrue_epu8(__m128i __A, __m128i __B)
    398 {
    399   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
    400 }
    401 
    402 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    403 _mm_comlt_epu16(__m128i __A, __m128i __B)
    404 {
    405   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
    406 }
    407 
    408 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    409 _mm_comle_epu16(__m128i __A, __m128i __B)
    410 {
    411   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
    412 }
    413 
    414 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    415 _mm_comgt_epu16(__m128i __A, __m128i __B)
    416 {
    417   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
    418 }
    419 
    420 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    421 _mm_comge_epu16(__m128i __A, __m128i __B)
    422 {
    423   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
    424 }
    425 
    426 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    427 _mm_comeq_epu16(__m128i __A, __m128i __B)
    428 {
    429   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
    430 }
    431 
    432 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    433 _mm_comneq_epu16(__m128i __A, __m128i __B)
    434 {
    435   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
    436 }
    437 
    438 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    439 _mm_comfalse_epu16(__m128i __A, __m128i __B)
    440 {
    441   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
    442 }
    443 
    444 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    445 _mm_comtrue_epu16(__m128i __A, __m128i __B)
    446 {
    447   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
    448 }
    449 
    450 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    451 _mm_comlt_epu32(__m128i __A, __m128i __B)
    452 {
    453   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
    454 }
    455 
    456 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    457 _mm_comle_epu32(__m128i __A, __m128i __B)
    458 {
    459   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
    460 }
    461 
    462 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    463 _mm_comgt_epu32(__m128i __A, __m128i __B)
    464 {
    465   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
    466 }
    467 
    468 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    469 _mm_comge_epu32(__m128i __A, __m128i __B)
    470 {
    471   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
    472 }
    473 
    474 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    475 _mm_comeq_epu32(__m128i __A, __m128i __B)
    476 {
    477   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
    478 }
    479 
    480 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    481 _mm_comneq_epu32(__m128i __A, __m128i __B)
    482 {
    483   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
    484 }
    485 
    486 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    487 _mm_comfalse_epu32(__m128i __A, __m128i __B)
    488 {
    489   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
    490 }
    491 
    492 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    493 _mm_comtrue_epu32(__m128i __A, __m128i __B)
    494 {
    495   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
    496 }
    497 
    498 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    499 _mm_comlt_epu64(__m128i __A, __m128i __B)
    500 {
    501   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
    502 }
    503 
    504 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    505 _mm_comle_epu64(__m128i __A, __m128i __B)
    506 {
    507   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
    508 }
    509 
    510 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    511 _mm_comgt_epu64(__m128i __A, __m128i __B)
    512 {
    513   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
    514 }
    515 
    516 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    517 _mm_comge_epu64(__m128i __A, __m128i __B)
    518 {
    519   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
    520 }
    521 
    522 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    523 _mm_comeq_epu64(__m128i __A, __m128i __B)
    524 {
    525   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
    526 }
    527 
    528 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    529 _mm_comneq_epu64(__m128i __A, __m128i __B)
    530 {
    531   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
    532 }
    533 
    534 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    535 _mm_comfalse_epu64(__m128i __A, __m128i __B)
    536 {
    537   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
    538 }
    539 
    540 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    541 _mm_comtrue_epu64(__m128i __A, __m128i __B)
    542 {
    543   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
    544 }
    545 
    546 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    547 _mm_comlt_epi8(__m128i __A, __m128i __B)
    548 {
    549   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
    550 }
    551 
    552 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    553 _mm_comle_epi8(__m128i __A, __m128i __B)
    554 {
    555   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
    556 }
    557 
    558 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    559 _mm_comgt_epi8(__m128i __A, __m128i __B)
    560 {
    561   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
    562 }
    563 
    564 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    565 _mm_comge_epi8(__m128i __A, __m128i __B)
    566 {
    567   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
    568 }
    569 
    570 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    571 _mm_comeq_epi8(__m128i __A, __m128i __B)
    572 {
    573   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
    574 }
    575 
    576 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    577 _mm_comneq_epi8(__m128i __A, __m128i __B)
    578 {
    579   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
    580 }
    581 
    582 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    583 _mm_comfalse_epi8(__m128i __A, __m128i __B)
    584 {
    585   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
    586 }
    587 
    588 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    589 _mm_comtrue_epi8(__m128i __A, __m128i __B)
    590 {
    591   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
    592 }
    593 
    594 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    595 _mm_comlt_epi16(__m128i __A, __m128i __B)
    596 {
    597   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
    598 }
    599 
    600 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    601 _mm_comle_epi16(__m128i __A, __m128i __B)
    602 {
    603   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
    604 }
    605 
    606 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    607 _mm_comgt_epi16(__m128i __A, __m128i __B)
    608 {
    609   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
    610 }
    611 
    612 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    613 _mm_comge_epi16(__m128i __A, __m128i __B)
    614 {
    615   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
    616 }
    617 
    618 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    619 _mm_comeq_epi16(__m128i __A, __m128i __B)
    620 {
    621   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
    622 }
    623 
    624 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    625 _mm_comneq_epi16(__m128i __A, __m128i __B)
    626 {
    627   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
    628 }
    629 
    630 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    631 _mm_comfalse_epi16(__m128i __A, __m128i __B)
    632 {
    633   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
    634 }
    635 
    636 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    637 _mm_comtrue_epi16(__m128i __A, __m128i __B)
    638 {
    639   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
    640 }
    641 
    642 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    643 _mm_comlt_epi32(__m128i __A, __m128i __B)
    644 {
    645   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
    646 }
    647 
    648 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    649 _mm_comle_epi32(__m128i __A, __m128i __B)
    650 {
    651   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
    652 }
    653 
    654 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    655 _mm_comgt_epi32(__m128i __A, __m128i __B)
    656 {
    657   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
    658 }
    659 
    660 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    661 _mm_comge_epi32(__m128i __A, __m128i __B)
    662 {
    663   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
    664 }
    665 
    666 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    667 _mm_comeq_epi32(__m128i __A, __m128i __B)
    668 {
    669   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
    670 }
    671 
    672 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    673 _mm_comneq_epi32(__m128i __A, __m128i __B)
    674 {
    675   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
    676 }
    677 
    678 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    679 _mm_comfalse_epi32(__m128i __A, __m128i __B)
    680 {
    681   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
    682 }
    683 
    684 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    685 _mm_comtrue_epi32(__m128i __A, __m128i __B)
    686 {
    687   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
    688 }
    689 
    690 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    691 _mm_comlt_epi64(__m128i __A, __m128i __B)
    692 {
    693   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
    694 }
    695 
    696 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    697 _mm_comle_epi64(__m128i __A, __m128i __B)
    698 {
    699   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
    700 }
    701 
    702 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    703 _mm_comgt_epi64(__m128i __A, __m128i __B)
    704 {
    705   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
    706 }
    707 
    708 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    709 _mm_comge_epi64(__m128i __A, __m128i __B)
    710 {
    711   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
    712 }
    713 
    714 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    715 _mm_comeq_epi64(__m128i __A, __m128i __B)
    716 {
    717   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
    718 }
    719 
    720 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    721 _mm_comneq_epi64(__m128i __A, __m128i __B)
    722 {
    723   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
    724 }
    725 
    726 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    727 _mm_comfalse_epi64(__m128i __A, __m128i __B)
    728 {
    729   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
    730 }
    731 
    732 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    733 _mm_comtrue_epi64(__m128i __A, __m128i __B)
    734 {
    735   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
    736 }
    737 
    738 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
    739   __m128d __X = (X); \
    740   __m128d __Y = (Y); \
    741   __m128i __C = (C); \
    742   (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
    743                                      (__v2di)__C, (I)); })
    744 
    745 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
    746   __m256d __X = (X); \
    747   __m256d __Y = (Y); \
    748   __m256i __C = (C); \
    749   (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
    750                                         (__v4di)__C, (I)); })
    751 
    752 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
    753   __m128 __X = (X); \
    754   __m128 __Y = (Y); \
    755   __m128i __C = (C); \
    756   (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
    757                                     (__v4si)__C, (I)); })
    758 
    759 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
    760   __m256 __X = (X); \
    761   __m256 __Y = (Y); \
    762   __m256i __C = (C); \
    763   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
    764                                        (__v8si)__C, (I)); })
    765 
    766 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    767 _mm_frcz_ss(__m128 __A)
    768 {
    769   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
    770 }
    771 
    772 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    773 _mm_frcz_sd(__m128d __A)
    774 {
    775   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
    776 }
    777 
    778 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    779 _mm_frcz_ps(__m128 __A)
    780 {
    781   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
    782 }
    783 
    784 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    785 _mm_frcz_pd(__m128d __A)
    786 {
    787   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
    788 }
    789 
    790 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    791 _mm256_frcz_ps(__m256 __A)
    792 {
    793   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
    794 }
    795 
    796 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    797 _mm256_frcz_pd(__m256d __A)
    798 {
    799   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
    800 }
    801 
    802 #endif /* __XOP__ */
    803 
    804 #endif /* __XOPINTRIN_H */
    805