Home | History | Annotate | Download | only in include
      1 /* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _X86INTRIN_H_INCLUDED
     25 # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
     26 #endif
     27 
     28 #ifndef _XOPMMINTRIN_H_INCLUDED
     29 #define _XOPMMINTRIN_H_INCLUDED
     30 
     31 #ifndef __XOP__
     32 # error "XOP instruction set not enabled"
     33 #else
     34 
     35 #include <fma4intrin.h>
     36 
     37 /* Integer multiply/add intructions. */
     38 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     39 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
     40 {
     41   return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
     42 }
     43 
     44 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     45 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
     46 {
     47   return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
     48 }
     49 
     50 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     51 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
     52 {
     53   return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     54 }
     55 
     56 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     57 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
     58 {
     59   return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     60 }
     61 
     62 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     63 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
     64 {
     65   return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
     66 }
     67 
     68 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     69 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
     70 {
     71   return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
     72 }
     73 
     74 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     75 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
     76 {
     77   return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     78 }
     79 
     80 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     81 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
     82 {
     83   return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     84 }
     85 
     86 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     87 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
     88 {
     89   return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     90 }
     91 
     92 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     93 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
     94 {
     95   return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     96 }
     97 
     98 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     99 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
    100 {
    101   return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    102 }
    103 
    104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    105 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
    106 {
    107   return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    108 }
    109 
    110 /* Packed Integer Horizontal Add and Subtract */
    111 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    112 _mm_haddw_epi8(__m128i __A)
    113 {
    114   return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
    115 }
    116 
    117 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    118 _mm_haddd_epi8(__m128i __A)
    119 {
    120   return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
    121 }
    122 
    123 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    124 _mm_haddq_epi8(__m128i __A)
    125 {
    126   return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
    127 }
    128 
    129 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    130 _mm_haddd_epi16(__m128i __A)
    131 {
    132   return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
    133 }
    134 
    135 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    136 _mm_haddq_epi16(__m128i __A)
    137 {
    138   return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
    139 }
    140 
    141 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    142 _mm_haddq_epi32(__m128i __A)
    143 {
    144   return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
    145 }
    146 
    147 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    148 _mm_haddw_epu8(__m128i __A)
    149 {
    150   return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
    151 }
    152 
    153 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    154 _mm_haddd_epu8(__m128i __A)
    155 {
    156   return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
    157 }
    158 
    159 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    160 _mm_haddq_epu8(__m128i __A)
    161 {
    162   return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
    163 }
    164 
    165 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    166 _mm_haddd_epu16(__m128i __A)
    167 {
    168   return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
    169 }
    170 
    171 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    172 _mm_haddq_epu16(__m128i __A)
    173 {
    174   return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
    175 }
    176 
    177 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    178 _mm_haddq_epu32(__m128i __A)
    179 {
    180   return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
    181 }
    182 
    183 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    184 _mm_hsubw_epi8(__m128i __A)
    185 {
    186   return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
    187 }
    188 
    189 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    190 _mm_hsubd_epi16(__m128i __A)
    191 {
    192   return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
    193 }
    194 
    195 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    196 _mm_hsubq_epi32(__m128i __A)
    197 {
    198   return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
    199 }
    200 
    201 /* Vector conditional move and permute */
    202 
    203 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    204 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
    205 {
    206   return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
    207 }
    208 
    209 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    210 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
    211 {
    212   return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
    213 }
    214 
    215 /* Packed Integer Rotates and Shifts
    216    Rotates - Non-Immediate form */
    217 
    218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    219 _mm_rot_epi8(__m128i __A,  __m128i __B)
    220 {
    221   return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
    222 }
    223 
    224 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    225 _mm_rot_epi16(__m128i __A,  __m128i __B)
    226 {
    227   return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
    228 }
    229 
    230 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    231 _mm_rot_epi32(__m128i __A,  __m128i __B)
    232 {
    233   return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
    234 }
    235 
    236 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    237 _mm_rot_epi64(__m128i __A,  __m128i __B)
    238 {
    239   return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
    240 }
    241 
    242 /* Rotates - Immediate form */
    243 
    244 #ifdef __OPTIMIZE__
    245 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    246 _mm_roti_epi8(__m128i __A, const int __B)
    247 {
    248   return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
    249 }
    250 
    251 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    252 _mm_roti_epi16(__m128i __A, const int __B)
    253 {
    254   return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
    255 }
    256 
    257 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    258 _mm_roti_epi32(__m128i __A, const int __B)
    259 {
    260   return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
    261 }
    262 
    263 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    264 _mm_roti_epi64(__m128i __A, const int __B)
    265 {
    266   return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
    267 }
    268 #else
    269 #define _mm_roti_epi8(A, N) \
    270   ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
    271 #define _mm_roti_epi16(A, N) \
    272   ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
    273 #define _mm_roti_epi32(A, N) \
    274   ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
    275 #define _mm_roti_epi64(A, N) \
    276   ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
    277 #endif
    278 
    279 /* Shifts */
    280 
    281 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    282 _mm_shl_epi8(__m128i __A,  __m128i __B)
    283 {
    284   return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
    285 }
    286 
    287 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    288 _mm_shl_epi16(__m128i __A,  __m128i __B)
    289 {
    290   return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
    291 }
    292 
    293 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    294 _mm_shl_epi32(__m128i __A,  __m128i __B)
    295 {
    296   return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
    297 }
    298 
    299 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    300 _mm_shl_epi64(__m128i __A,  __m128i __B)
    301 {
    302   return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
    303 }
    304 
    305 
    306 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    307 _mm_sha_epi8(__m128i __A,  __m128i __B)
    308 {
    309   return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
    310 }
    311 
    312 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    313 _mm_sha_epi16(__m128i __A,  __m128i __B)
    314 {
    315   return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
    316 }
    317 
    318 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    319 _mm_sha_epi32(__m128i __A,  __m128i __B)
    320 {
    321   return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
    322 }
    323 
    324 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    325 _mm_sha_epi64(__m128i __A,  __m128i __B)
    326 {
    327   return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
    328 }
    329 
    330 /* Compare and Predicate Generation
    331    pcom (integer, unsinged bytes) */
    332 
    333 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    334 _mm_comlt_epu8(__m128i __A, __m128i __B)
    335 {
    336   return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
    337 }
    338 
    339 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    340 _mm_comle_epu8(__m128i __A, __m128i __B)
    341 {
    342   return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
    343 }
    344 
    345 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    346 _mm_comgt_epu8(__m128i __A, __m128i __B)
    347 {
    348   return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
    349 }
    350 
    351 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    352 _mm_comge_epu8(__m128i __A, __m128i __B)
    353 {
    354   return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
    355 }
    356 
    357 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    358 _mm_comeq_epu8(__m128i __A, __m128i __B)
    359 {
    360   return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
    361 }
    362 
    363 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    364 _mm_comneq_epu8(__m128i __A, __m128i __B)
    365 {
    366   return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
    367 }
    368 
    369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    370 _mm_comfalse_epu8(__m128i __A, __m128i __B)
    371 {
    372   return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
    373 }
    374 
    375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    376 _mm_comtrue_epu8(__m128i __A, __m128i __B)
    377 {
    378   return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
    379 }
    380 
    381 /*pcom (integer, unsinged words) */
    382 
    383 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    384 _mm_comlt_epu16(__m128i __A, __m128i __B)
    385 {
    386   return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
    387 }
    388 
    389 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    390 _mm_comle_epu16(__m128i __A, __m128i __B)
    391 {
    392   return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
    393 }
    394 
    395 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    396 _mm_comgt_epu16(__m128i __A, __m128i __B)
    397 {
    398   return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
    399 }
    400 
    401 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    402 _mm_comge_epu16(__m128i __A, __m128i __B)
    403 {
    404   return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
    405 }
    406 
    407 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    408 _mm_comeq_epu16(__m128i __A, __m128i __B)
    409 {
    410   return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
    411 }
    412 
    413 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    414 _mm_comneq_epu16(__m128i __A, __m128i __B)
    415 {
    416   return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
    417 }
    418 
    419 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    420 _mm_comfalse_epu16(__m128i __A, __m128i __B)
    421 {
    422   return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
    423 }
    424 
    425 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    426 _mm_comtrue_epu16(__m128i __A, __m128i __B)
    427 {
    428   return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
    429 }
    430 
    431 /*pcom (integer, unsinged double words) */
    432 
    433 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    434 _mm_comlt_epu32(__m128i __A, __m128i __B)
    435 {
    436   return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
    437 }
    438 
    439 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    440 _mm_comle_epu32(__m128i __A, __m128i __B)
    441 {
    442   return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
    443 }
    444 
    445 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    446 _mm_comgt_epu32(__m128i __A, __m128i __B)
    447 {
    448   return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
    449 }
    450 
    451 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    452 _mm_comge_epu32(__m128i __A, __m128i __B)
    453 {
    454   return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
    455 }
    456 
    457 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    458 _mm_comeq_epu32(__m128i __A, __m128i __B)
    459 {
    460   return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
    461 }
    462 
    463 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    464 _mm_comneq_epu32(__m128i __A, __m128i __B)
    465 {
    466   return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
    467 }
    468 
    469 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    470 _mm_comfalse_epu32(__m128i __A, __m128i __B)
    471 {
    472   return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
    473 }
    474 
    475 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    476 _mm_comtrue_epu32(__m128i __A, __m128i __B)
    477 {
    478   return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
    479 }
    480 
    481 /*pcom (integer, unsinged quad words) */
    482 
    483 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    484 _mm_comlt_epu64(__m128i __A, __m128i __B)
    485 {
    486   return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
    487 }
    488 
    489 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    490 _mm_comle_epu64(__m128i __A, __m128i __B)
    491 {
    492   return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
    493 }
    494 
    495 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    496 _mm_comgt_epu64(__m128i __A, __m128i __B)
    497 {
    498   return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
    499 }
    500 
    501 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    502 _mm_comge_epu64(__m128i __A, __m128i __B)
    503 {
    504   return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
    505 }
    506 
    507 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    508 _mm_comeq_epu64(__m128i __A, __m128i __B)
    509 {
    510   return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
    511 }
    512 
    513 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    514 _mm_comneq_epu64(__m128i __A, __m128i __B)
    515 {
    516   return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
    517 }
    518 
    519 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    520 _mm_comfalse_epu64(__m128i __A, __m128i __B)
    521 {
    522   return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
    523 }
    524 
    525 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    526 _mm_comtrue_epu64(__m128i __A, __m128i __B)
    527 {
    528   return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
    529 }
    530 
    531 /*pcom (integer, signed bytes) */
    532 
    533 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    534 _mm_comlt_epi8(__m128i __A, __m128i __B)
    535 {
    536   return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
    537 }
    538 
    539 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    540 _mm_comle_epi8(__m128i __A, __m128i __B)
    541 {
    542   return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
    543 }
    544 
    545 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    546 _mm_comgt_epi8(__m128i __A, __m128i __B)
    547 {
    548   return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
    549 }
    550 
    551 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    552 _mm_comge_epi8(__m128i __A, __m128i __B)
    553 {
    554   return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
    555 }
    556 
    557 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    558 _mm_comeq_epi8(__m128i __A, __m128i __B)
    559 {
    560   return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
    561 }
    562 
    563 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    564 _mm_comneq_epi8(__m128i __A, __m128i __B)
    565 {
    566   return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
    567 }
    568 
    569 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    570 _mm_comfalse_epi8(__m128i __A, __m128i __B)
    571 {
    572   return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
    573 }
    574 
    575 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    576 _mm_comtrue_epi8(__m128i __A, __m128i __B)
    577 {
    578   return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
    579 }
    580 
    581 /*pcom (integer, signed words) */
    582 
    583 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    584 _mm_comlt_epi16(__m128i __A, __m128i __B)
    585 {
    586   return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
    587 }
    588 
    589 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    590 _mm_comle_epi16(__m128i __A, __m128i __B)
    591 {
    592   return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
    593 }
    594 
    595 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    596 _mm_comgt_epi16(__m128i __A, __m128i __B)
    597 {
    598   return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
    599 }
    600 
    601 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    602 _mm_comge_epi16(__m128i __A, __m128i __B)
    603 {
    604   return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
    605 }
    606 
    607 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    608 _mm_comeq_epi16(__m128i __A, __m128i __B)
    609 {
    610   return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
    611 }
    612 
    613 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    614 _mm_comneq_epi16(__m128i __A, __m128i __B)
    615 {
    616   return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
    617 }
    618 
    619 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    620 _mm_comfalse_epi16(__m128i __A, __m128i __B)
    621 {
    622   return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
    623 }
    624 
    625 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    626 _mm_comtrue_epi16(__m128i __A, __m128i __B)
    627 {
    628   return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
    629 }
    630 
    631 /*pcom (integer, signed double words) */
    632 
    633 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    634 _mm_comlt_epi32(__m128i __A, __m128i __B)
    635 {
    636   return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
    637 }
    638 
    639 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    640 _mm_comle_epi32(__m128i __A, __m128i __B)
    641 {
    642   return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
    643 }
    644 
    645 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    646 _mm_comgt_epi32(__m128i __A, __m128i __B)
    647 {
    648   return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
    649 }
    650 
    651 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    652 _mm_comge_epi32(__m128i __A, __m128i __B)
    653 {
    654   return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
    655 }
    656 
    657 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    658 _mm_comeq_epi32(__m128i __A, __m128i __B)
    659 {
    660   return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
    661 }
    662 
    663 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    664 _mm_comneq_epi32(__m128i __A, __m128i __B)
    665 {
    666   return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
    667 }
    668 
    669 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    670 _mm_comfalse_epi32(__m128i __A, __m128i __B)
    671 {
    672   return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
    673 }
    674 
    675 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    676 _mm_comtrue_epi32(__m128i __A, __m128i __B)
    677 {
    678   return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
    679 }
    680 
    681 /*pcom (integer, signed quad words) */
    682 
    683 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    684 _mm_comlt_epi64(__m128i __A, __m128i __B)
    685 {
    686   return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
    687 }
    688 
    689 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    690 _mm_comle_epi64(__m128i __A, __m128i __B)
    691 {
    692   return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
    693 }
    694 
    695 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    696 _mm_comgt_epi64(__m128i __A, __m128i __B)
    697 {
    698   return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
    699 }
    700 
    701 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    702 _mm_comge_epi64(__m128i __A, __m128i __B)
    703 {
    704   return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
    705 }
    706 
    707 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    708 _mm_comeq_epi64(__m128i __A, __m128i __B)
    709 {
    710   return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
    711 }
    712 
    713 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    714 _mm_comneq_epi64(__m128i __A, __m128i __B)
    715 {
    716   return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
    717 }
    718 
    719 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    720 _mm_comfalse_epi64(__m128i __A, __m128i __B)
    721 {
    722   return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
    723 }
    724 
    725 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    726 _mm_comtrue_epi64(__m128i __A, __m128i __B)
    727 {
    728   return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
    729 }
    730 
    731 /* FRCZ */
    732 
    733 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    734 _mm_frcz_ps (__m128 __A)
    735 {
    736   return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
    737 }
    738 
    739 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    740 _mm_frcz_pd (__m128d __A)
    741 {
    742   return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
    743 }
    744 
    745 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    746 _mm_frcz_ss (__m128 __A, __m128 __B)
    747 {
    748   return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
    749 }
    750 
    751 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    752 _mm_frcz_sd (__m128d __A, __m128d __B)
    753 {
    754   return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
    755 }
    756 
    757 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    758 _mm256_frcz_ps (__m256 __A)
    759 {
    760   return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
    761 }
    762 
    763 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    764 _mm256_frcz_pd (__m256d __A)
    765 {
    766   return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
    767 }
    768 
    769 /* PERMIL2 */
    770 
    771 #ifdef __OPTIMIZE__
    772 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    773 _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
    774 {
    775   return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
    776 					      (__v2df)__Y,
    777 					      (__v2di)__C,
    778 					      __I);
    779 }
    780 
    781 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    782 _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
    783 {
    784   return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
    785 						 (__v4df)__Y,
    786 						 (__v4di)__C,
    787 						 __I);
    788 }
    789 
    790 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    791 _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
    792 {
    793   return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
    794 					     (__v4sf)__Y,
    795 					     (__v4si)__C,
    796 					     __I);
    797 }
    798 
    799 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    800 _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
    801 {
    802   return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
    803 						(__v8sf)__Y,
    804 						(__v8si)__C,
    805 						__I);
    806 }
    807 #else
    808 #define _mm_permute2_pd(X, Y, C, I)					\
    809   ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
    810 					(__v2df)(__m128d)(Y),		\
    811 					(__v2di)(__m128d)(C),		\
    812 					(int)(I)))
    813 
    814 #define _mm256_permute2_pd(X, Y, C, I)					\
    815   ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
    816 					   (__v4df)(__m256d)(Y),	\
    817 					   (__v4di)(__m256d)(C),	\
    818 					   (int)(I)))
    819 
    820 #define _mm_permute2_ps(X, Y, C, I)					\
    821   ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
    822 				       (__v4sf)(__m128)(Y),		\
    823 				       (__v4si)(__m128)(C),		\
    824 				       (int)(I)))
    825 
    826 #define _mm256_permute2_ps(X, Y, C, I)					\
    827   ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
    828 					  (__v8sf)(__m256)(Y),  	\
    829 					  (__v8si)(__m256)(C),		\
    830  					  (int)(I)))
    831 #endif /* __OPTIMIZE__ */
    832 
    833 #endif /* __XOP__ */
    834 
    835 #endif /* _XOPMMINTRIN_H_INCLUDED */
    836