Home | History | Annotate | Download | only in include
      1 /* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _BMMINTRIN_H_INCLUDED
     25 #define _BMMINTRIN_H_INCLUDED
     26 
     27 #ifndef __SSE5__
     28 # error "SSE5 instruction set not enabled"
     29 #else
     30 
     31 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
     32 #include <ammintrin.h>
     33 #include <mmintrin-common.h>
     34 
     35 /* Floating point multiply/add type instructions */
     36 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     37 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
     38 {
     39   return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     40 }
     41 
     42 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     43 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
     44 {
     45   return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     46 }
     47 
     48 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     49 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
     50 {
     51   return  (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     52 }
     53 
     54 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     55 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
     56 {
     57   return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     58 }
     59 
     60 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     61 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
     62 {
     63   return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     64 }
     65 
     66 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     67 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
     68 {
     69   return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     70 }
     71 
     72 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     73 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
     74 {
     75   return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     76 }
     77 
     78 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     79 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
     80 {
     81   return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     82 }
     83 
     84 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
     86 {
     87   return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     88 }
     89 
     90 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     91 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
     92 {
     93   return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     94 }
     95 
     96 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     97 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
     98 {
     99   return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    100 }
    101 
    102 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    103 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
    104 {
    105   return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
    106 }
    107 
    108 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    109 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
    110 {
    111   return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    112 }
    113 
    114 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    115 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
    116 {
    117   return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
    118 }
    119 
    120 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    121 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
    122 {
    123   return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    124 }
    125 
    126 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    127 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
    128 {
    129   return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
    130 }
    131 
    132 /* Integer multiply/add intructions. */
    133 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    134 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
    135 {
    136   return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
    137 }
    138 
    139 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    140 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
    141 {
    142   return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
    143 }
    144 
    145 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    146 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
    147 {
    148   return  (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
    149 }
    150 
    151 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    152 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
    153 {
    154   return  (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
    155 }
    156 
    157 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    158 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
    159 {
    160   return  (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
    161 }
    162 
    163 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    164 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
    165 {
    166   return  (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
    167 }
    168 
    169 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    170 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
    171 {
    172   return  (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
    173 }
    174 
    175 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    176 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
    177 {
    178   return  (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
    179 }
    180 
    181 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    182 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
    183 {
    184   return  (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
    185 }
    186 
    187 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    188 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
    189 {
    190   return  (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
    191 }
    192 
    193 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    194 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
    195 {
    196   return  (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    197 }
    198 
    199 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    200 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
    201 {
    202   return  (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    203 }
    204 
    205 /* Packed Integer Horizontal Add and Subtract */
    206 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    207 _mm_haddw_epi8(__m128i __A)
    208 {
    209   return  (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A);
    210 }
    211 
    212 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    213 _mm_haddd_epi8(__m128i __A)
    214 {
    215   return  (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A);
    216 }
    217 
    218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    219 _mm_haddq_epi8(__m128i __A)
    220 {
    221   return  (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A);
    222 }
    223 
    224 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    225 _mm_haddd_epi16(__m128i __A)
    226 {
    227   return  (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A);
    228 }
    229 
    230 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    231 _mm_haddq_epi16(__m128i __A)
    232 {
    233   return  (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A);
    234 }
    235 
    236 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    237 _mm_haddq_epi32(__m128i __A)
    238 {
    239   return  (__m128i) __builtin_ia32_phadddq ((__v4si)__A);
    240 }
    241 
    242 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    243 _mm_haddw_epu8(__m128i __A)
    244 {
    245   return  (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A);
    246 }
    247 
    248 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    249 _mm_haddd_epu8(__m128i __A)
    250 {
    251   return  (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A);
    252 }
    253 
    254 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    255 _mm_haddq_epu8(__m128i __A)
    256 {
    257   return  (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A);
    258 }
    259 
    260 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    261 _mm_haddd_epu16(__m128i __A)
    262 {
    263   return  (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A);
    264 }
    265 
    266 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    267 _mm_haddq_epu16(__m128i __A)
    268 {
    269   return  (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A);
    270 }
    271 
    272 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    273 _mm_haddq_epu32(__m128i __A)
    274 {
    275   return  (__m128i) __builtin_ia32_phaddudq ((__v4si)__A);
    276 }
    277 
    278 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    279 _mm_hsubw_epi8(__m128i __A)
    280 {
    281   return  (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A);
    282 }
    283 
    284 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    285 _mm_hsubd_epi16(__m128i __A)
    286 {
    287   return  (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A);
    288 }
    289 
    290 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    291 _mm_hsubq_epi32(__m128i __A)
    292 {
    293   return  (__m128i) __builtin_ia32_phsubdq ((__v4si)__A);
    294 }
    295 
    296 /* Vector conditional move and permute */
    297 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    298 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
    299 {
    300   return  (__m128i) __builtin_ia32_pcmov (__A, __B, __C);
    301 }
    302 
    303 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    304 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
    305 {
    306   return  (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
    307 }
    308 
    309 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    310 _mm_perm_ps(__m128 __A, __m128 __B, __m128i __C)
    311 {
    312   return  (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C);
    313 }
    314 
    315 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    316 _mm_perm_pd(__m128d __A, __m128d __B, __m128i __C)
    317 {
    318   return  (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C);
    319 }
    320 
    321 /* Packed Integer Rotates and Shifts */
    322 
    323 /* Rotates - Non-Immediate form */
    324 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    325 _mm_rot_epi8(__m128i __A,  __m128i __B)
    326 {
    327   return  (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
    328 }
    329 
    330 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    331 _mm_rot_epi16(__m128i __A,  __m128i __B)
    332 {
    333   return  (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
    334 }
    335 
    336 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    337 _mm_rot_epi32(__m128i __A,  __m128i __B)
    338 {
    339   return  (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
    340 }
    341 
    342 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    343 _mm_rot_epi64(__m128i __A,  __m128i __B)
    344 {
    345   return (__m128i)  __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
    346 }
    347 
    348 
    349 /* Rotates - Immediate form */
    350 #ifdef __OPTIMIZE__
    351 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    352 _mm_roti_epi8(__m128i __A, const int __B)
    353 {
    354   return  (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
    355 }
    356 
    357 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    358 _mm_roti_epi16(__m128i __A, const int __B)
    359 {
    360   return  (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
    361 }
    362 
    363 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    364 _mm_roti_epi32(__m128i __A, const int __B)
    365 {
    366   return  (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
    367 }
    368 
    369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    370 _mm_roti_epi64(__m128i __A, const int __B)
    371 {
    372   return  (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
    373 }
    374 #else
    375 #define _mm_roti_epi8(A, N) \
    376   ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(N)))
    377 #define _mm_roti_epi16(A, N) \
    378   ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(N)))
    379 #define _mm_roti_epi32(A, N) \
    380   ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(N)))
    381 #define _mm_roti_epi64(A, N) \
    382   ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(N)))
    383 #endif
    384 
    385 /* pshl */
    386 
    387 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    388 _mm_shl_epi8(__m128i __A,  __m128i __B)
    389 {
    390   return  (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
    391 }
    392 
    393 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    394 _mm_shl_epi16(__m128i __A,  __m128i __B)
    395 {
    396   return  (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
    397 }
    398 
    399 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    400 _mm_shl_epi32(__m128i __A,  __m128i __B)
    401 {
    402   return  (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
    403 }
    404 
    405 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    406 _mm_shl_epi64(__m128i __A,  __m128i __B)
    407 {
    408   return  (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
    409 }
    410 
    411 /* psha */
    412 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    413 _mm_sha_epi8(__m128i __A,  __m128i __B)
    414 {
    415   return  (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
    416 }
    417 
    418 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    419 _mm_sha_epi16(__m128i __A,  __m128i __B)
    420 {
    421   return  (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
    422 }
    423 
    424 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    425 _mm_sha_epi32(__m128i __A,  __m128i __B)
    426 {
    427   return  (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
    428 }
    429 
    430 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    431 _mm_sha_epi64(__m128i __A,  __m128i __B)
    432 {
    433   return  (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
    434 }
    435 
    436 /* Compare and Predicate Generation */
    437 
    438 /* com (floating point, packed single) */
    439 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    440 _mm_comeq_ps(__m128 __A, __m128 __B)
    441 {
    442   return  (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B);
    443 }
    444 
    445 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    446 _mm_comlt_ps(__m128 __A, __m128 __B)
    447 {
    448   return  (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B);
    449 }
    450 
    451 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    452 _mm_comle_ps(__m128 __A, __m128 __B)
    453 {
    454   return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B);
    455 }
    456 
    457 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    458 _mm_comunord_ps(__m128 __A, __m128 __B)
    459 {
    460   return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B);
    461 }
    462 
    463 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    464 _mm_comneq_ps(__m128 __A, __m128 __B)
    465 {
    466   return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
    467 }
    468 
    469 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    470 _mm_comnlt_ps(__m128 __A, __m128 __B)
    471 {
    472   return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
    473 }
    474 
    475 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    476 _mm_comnle_ps(__m128 __A, __m128 __B)
    477 {
    478   return (__m128)  __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
    479 }
    480 
    481 
    482 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    483 _mm_comord_ps(__m128 __A, __m128 __B)
    484 {
    485   return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B);
    486 }
    487 
    488 
    489 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    490 _mm_comueq_ps(__m128 __A, __m128 __B)
    491 {
    492   return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
    493 }
    494 
    495 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    496 _mm_comnge_ps(__m128 __A, __m128 __B)
    497 {
    498   return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
    499 }
    500 
    501 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    502 _mm_comngt_ps(__m128 __A, __m128 __B)
    503 {
    504   return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
    505 }
    506 
    507 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    508 _mm_comfalse_ps(__m128 __A, __m128 __B)
    509 {
    510   return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
    511 }
    512 
    513 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    514 _mm_comoneq_ps(__m128 __A, __m128 __B)
    515 {
    516   return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
    517 }
    518 
    519 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    520 _mm_comge_ps(__m128 __A, __m128 __B)
    521 {
    522   return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
    523 }
    524 
    525 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    526 _mm_comgt_ps(__m128 __A, __m128 __B)
    527 {
    528   return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
    529 }
    530 
    531 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    532 _mm_comtrue_ps(__m128 __A, __m128 __B)
    533 {
    534   return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
    535 }
    536 
    537 /* com (floating point, packed double) */
    538 
    539 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    540 _mm_comeq_pd(__m128d __A, __m128d __B)
    541 {
    542   return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B);
    543 }
    544 
    545 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    546 _mm_comlt_pd(__m128d __A, __m128d __B)
    547 {
    548   return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B);
    549 }
    550 
    551 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    552 _mm_comle_pd(__m128d __A, __m128d __B)
    553 {
    554   return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B);
    555 }
    556 
    557 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    558 _mm_comunord_pd(__m128d __A, __m128d __B)
    559 {
    560   return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B);
    561 }
    562 
    563 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    564 _mm_comneq_pd(__m128d __A, __m128d __B)
    565 {
    566   return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
    567 }
    568 
    569 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    570 _mm_comnlt_pd(__m128d __A, __m128d __B)
    571 {
    572   return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
    573 }
    574 
    575 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    576 _mm_comnle_pd(__m128d __A, __m128d __B)
    577 {
    578   return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
    579 }
    580 
    581 
    582 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    583 _mm_comord_pd(__m128d __A, __m128d __B)
    584 {
    585   return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B);
    586 }
    587 
    588 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    589 _mm_comueq_pd(__m128d __A, __m128d __B)
    590 {
    591   return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
    592 }
    593 
    594 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    595 _mm_comnge_pd(__m128d __A, __m128d __B)
    596 {
    597   return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
    598 }
    599 
    600 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    601 _mm_comngt_pd(__m128d __A, __m128d __B)
    602 {
    603   return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
    604 }
    605 
    606 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    607 _mm_comfalse_pd(__m128d __A, __m128d __B)
    608 {
    609   return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
    610 }
    611 
    612 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    613 _mm_comoneq_pd(__m128d __A, __m128d __B)
    614 {
    615   return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
    616 }
    617 
    618 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    619 _mm_comge_pd(__m128d __A, __m128d __B)
    620 {
    621   return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
    622 }
    623 
    624 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    625 _mm_comgt_pd(__m128d __A, __m128d __B)
    626 {
    627   return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
    628 }
    629 
    630 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    631 _mm_comtrue_pd(__m128d __A, __m128d __B)
    632 {
    633   return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
    634 }
    635 
    636 /* com (floating point, scalar single) */
    637 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    638 _mm_comeq_ss(__m128 __A, __m128 __B)
    639 {
    640   return (__m128)  __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B);
    641 }
    642 
    643 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    644 _mm_comlt_ss(__m128 __A, __m128 __B)
    645 {
    646   return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B);
    647 }
    648 
    649 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    650 _mm_comle_ss(__m128 __A, __m128 __B)
    651 {
    652   return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B);
    653 }
    654 
    655 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    656 _mm_comunord_ss(__m128 __A, __m128 __B)
    657 {
    658   return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B);
    659 }
    660 
    661 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    662 _mm_comneq_ss(__m128 __A, __m128 __B)
    663 {
    664   return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
    665 }
    666 
    667 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    668 _mm_comnlt_ss(__m128 __A, __m128 __B)
    669 {
    670   return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
    671 }
    672 
    673 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    674 _mm_comnle_ss(__m128 __A, __m128 __B)
    675 {
    676   return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
    677 }
    678 
    679 
    680 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    681 _mm_comord_ss(__m128 __A, __m128 __B)
    682 {
    683   return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B);
    684 }
    685 
    686 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    687 _mm_comueq_ss(__m128 __A, __m128 __B)
    688 {
    689   return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
    690 }
    691 
    692 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    693 _mm_comnge_ss(__m128 __A, __m128 __B)
    694 {
    695   return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
    696 }
    697 
    698 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    699 _mm_comngt_ss(__m128 __A, __m128 __B)
    700 {
    701   return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
    702 }
    703 
    704 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    705 _mm_comfalse_ss(__m128 __A, __m128 __B)
    706 {
    707   return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
    708 }
    709 
    710 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    711 _mm_comoneq_ss(__m128 __A, __m128 __B)
    712 {
    713   return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
    714 }
    715 
    716 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    717 _mm_comge_ss(__m128 __A, __m128 __B)
    718 {
    719   return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
    720 }
    721 
    722 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    723 _mm_comgt_ss(__m128 __A, __m128 __B)
    724 {
    725   return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
    726 }
    727 
    728 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    729 _mm_comtrue_ss(__m128 __A, __m128 __B)
    730 {
    731   return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
    732 }
    733 
    734 /* com (floating point, scalar double) */
    735 
    736 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    737 _mm_comeq_sd(__m128d __A, __m128d __B)
    738 {
    739   return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B);
    740 }
    741 
    742 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    743 _mm_comlt_sd(__m128d __A, __m128d __B)
    744 {
    745   return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B);
    746 }
    747 
    748 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    749 _mm_comle_sd(__m128d __A, __m128d __B)
    750 {
    751   return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B);
    752 }
    753 
    754 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    755 _mm_comunord_sd(__m128d __A, __m128d __B)
    756 {
    757   return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B);
    758 }
    759 
    760 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    761 _mm_comneq_sd(__m128d __A, __m128d __B)
    762 {
    763   return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
    764 }
    765 
    766 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    767 _mm_comnlt_sd(__m128d __A, __m128d __B)
    768 {
    769   return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
    770 }
    771 
    772 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    773 _mm_comnle_sd(__m128d __A, __m128d __B)
    774 {
    775   return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
    776 }
    777 
    778 
    779 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    780 _mm_comord_sd(__m128d __A, __m128d __B)
    781 {
    782   return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B);
    783 }
    784 
    785 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    786 _mm_comueq_sd(__m128d __A, __m128d __B)
    787 {
    788   return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
    789 }
    790 
    791 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    792 _mm_comnge_sd(__m128d __A, __m128d __B)
    793 {
    794   return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
    795 }
    796 
    797 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    798 _mm_comngt_sd(__m128d __A, __m128d __B)
    799 {
    800   return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
    801 }
    802 
    803 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    804 _mm_comfalse_sd(__m128d __A, __m128d __B)
    805 {
    806   return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
    807 }
    808 
    809 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    810 _mm_comoneq_sd(__m128d __A, __m128d __B)
    811 {
    812   return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
    813 }
    814 
    815 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    816 _mm_comge_sd(__m128d __A, __m128d __B)
    817 {
    818   return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
    819 }
    820 
    821 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    822 _mm_comgt_sd(__m128d __A, __m128d __B)
    823 {
    824   return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
    825 }
    826 
    827 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    828 _mm_comtrue_sd(__m128d __A, __m128d __B)
    829 {
    830   return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
    831 }
    832 
    833 
    834 /*pcom (integer, unsinged bytes) */
    835 
    836 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    837 _mm_comlt_epu8(__m128i __A, __m128i __B)
    838 {
    839   return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
    840 }
    841 
    842 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    843 _mm_comle_epu8(__m128i __A, __m128i __B)
    844 {
    845   return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
    846 }
    847 
    848 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    849 _mm_comgt_epu8(__m128i __A, __m128i __B)
    850 {
    851   return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
    852 }
    853 
    854 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    855 _mm_comge_epu8(__m128i __A, __m128i __B)
    856 {
    857   return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
    858 }
    859 
    860 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    861 _mm_comeq_epu8(__m128i __A, __m128i __B)
    862 {
    863   return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
    864 }
    865 
    866 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    867 _mm_comneq_epu8(__m128i __A, __m128i __B)
    868 {
    869   return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
    870 }
    871 
    872 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    873 _mm_comfalse_epu8(__m128i __A, __m128i __B)
    874 {
    875   return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
    876 }
    877 
    878 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    879 _mm_comtrue_epu8(__m128i __A, __m128i __B)
    880 {
    881   return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
    882 }
    883 
    884 /*pcom (integer, unsinged words) */
    885 
    886 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    887 _mm_comlt_epu16(__m128i __A, __m128i __B)
    888 {
    889   return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
    890 }
    891 
    892 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    893 _mm_comle_epu16(__m128i __A, __m128i __B)
    894 {
    895   return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
    896 }
    897 
    898 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    899 _mm_comgt_epu16(__m128i __A, __m128i __B)
    900 {
    901   return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
    902 }
    903 
    904 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    905 _mm_comge_epu16(__m128i __A, __m128i __B)
    906 {
    907   return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
    908 }
    909 
    910 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    911 _mm_comeq_epu16(__m128i __A, __m128i __B)
    912 {
    913   return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
    914 }
    915 
    916 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    917 _mm_comneq_epu16(__m128i __A, __m128i __B)
    918 {
    919   return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
    920 }
    921 
    922 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    923 _mm_comfalse_epu16(__m128i __A, __m128i __B)
    924 {
    925   return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
    926 }
    927 
    928 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    929 _mm_comtrue_epu16(__m128i __A, __m128i __B)
    930 {
    931   return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
    932 }
    933 
    934 /*pcom (integer, unsinged double words) */
    935 
    936 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    937 _mm_comlt_epu32(__m128i __A, __m128i __B)
    938 {
    939   return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
    940 }
    941 
    942 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    943 _mm_comle_epu32(__m128i __A, __m128i __B)
    944 {
    945   return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
    946 }
    947 
    948 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    949 _mm_comgt_epu32(__m128i __A, __m128i __B)
    950 {
    951   return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
    952 }
    953 
    954 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    955 _mm_comge_epu32(__m128i __A, __m128i __B)
    956 {
    957   return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
    958 }
    959 
    960 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    961 _mm_comeq_epu32(__m128i __A, __m128i __B)
    962 {
    963   return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
    964 }
    965 
    966 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    967 _mm_comneq_epu32(__m128i __A, __m128i __B)
    968 {
    969   return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
    970 }
    971 
    972 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    973 _mm_comfalse_epu32(__m128i __A, __m128i __B)
    974 {
    975   return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
    976 }
    977 
    978 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    979 _mm_comtrue_epu32(__m128i __A, __m128i __B)
    980 {
    981   return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
    982 }
    983 
    984 /*pcom (integer, unsinged quad words) */
    985 
    986 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    987 _mm_comlt_epu64(__m128i __A, __m128i __B)
    988 {
    989   return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
    990 }
    991 
    992 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    993 _mm_comle_epu64(__m128i __A, __m128i __B)
    994 {
    995   return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
    996 }
    997 
    998 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    999 _mm_comgt_epu64(__m128i __A, __m128i __B)
   1000 {
   1001   return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
   1002 }
   1003 
   1004 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1005 _mm_comge_epu64(__m128i __A, __m128i __B)
   1006 {
   1007   return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
   1008 }
   1009 
   1010 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1011 _mm_comeq_epu64(__m128i __A, __m128i __B)
   1012 {
   1013   return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
   1014 }
   1015 
   1016 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1017 _mm_comneq_epu64(__m128i __A, __m128i __B)
   1018 {
   1019   return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
   1020 }
   1021 
   1022 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1023 _mm_comfalse_epu64(__m128i __A, __m128i __B)
   1024 {
   1025   return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
   1026 }
   1027 
   1028 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1029 _mm_comtrue_epu64(__m128i __A, __m128i __B)
   1030 {
   1031   return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
   1032 }
   1033 
   1034 /*pcom (integer, signed bytes) */
   1035 
   1036 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1037 _mm_comlt_epi8(__m128i __A, __m128i __B)
   1038 {
   1039   return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
   1040 }
   1041 
   1042 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1043 _mm_comle_epi8(__m128i __A, __m128i __B)
   1044 {
   1045   return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
   1046 }
   1047 
   1048 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1049 _mm_comgt_epi8(__m128i __A, __m128i __B)
   1050 {
   1051   return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
   1052 }
   1053 
   1054 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1055 _mm_comge_epi8(__m128i __A, __m128i __B)
   1056 {
   1057   return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
   1058 }
   1059 
   1060 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1061 _mm_comeq_epi8(__m128i __A, __m128i __B)
   1062 {
   1063   return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
   1064 }
   1065 
   1066 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1067 _mm_comneq_epi8(__m128i __A, __m128i __B)
   1068 {
   1069   return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
   1070 }
   1071 
   1072 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1073 _mm_comfalse_epi8(__m128i __A, __m128i __B)
   1074 {
   1075   return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
   1076 }
   1077 
   1078 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1079 _mm_comtrue_epi8(__m128i __A, __m128i __B)
   1080 {
   1081   return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
   1082 }
   1083 
   1084 /*pcom (integer, signed words) */
   1085 
   1086 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1087 _mm_comlt_epi16(__m128i __A, __m128i __B)
   1088 {
   1089   return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
   1090 }
   1091 
   1092 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1093 _mm_comle_epi16(__m128i __A, __m128i __B)
   1094 {
   1095   return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
   1096 }
   1097 
   1098 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1099 _mm_comgt_epi16(__m128i __A, __m128i __B)
   1100 {
   1101   return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
   1102 }
   1103 
   1104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1105 _mm_comge_epi16(__m128i __A, __m128i __B)
   1106 {
   1107   return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
   1108 }
   1109 
   1110 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1111 _mm_comeq_epi16(__m128i __A, __m128i __B)
   1112 {
   1113   return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
   1114 }
   1115 
   1116 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1117 _mm_comneq_epi16(__m128i __A, __m128i __B)
   1118 {
   1119   return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
   1120 }
   1121 
   1122 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1123 _mm_comfalse_epi16(__m128i __A, __m128i __B)
   1124 {
   1125   return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
   1126 }
   1127 
   1128 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1129 _mm_comtrue_epi16(__m128i __A, __m128i __B)
   1130 {
   1131   return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
   1132 }
   1133 
   1134 /*pcom (integer, signed double words) */
   1135 
   1136 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1137 _mm_comlt_epi32(__m128i __A, __m128i __B)
   1138 {
   1139   return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
   1140 }
   1141 
   1142 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1143 _mm_comle_epi32(__m128i __A, __m128i __B)
   1144 {
   1145   return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
   1146 }
   1147 
   1148 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1149 _mm_comgt_epi32(__m128i __A, __m128i __B)
   1150 {
   1151   return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
   1152 }
   1153 
   1154 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1155 _mm_comge_epi32(__m128i __A, __m128i __B)
   1156 {
   1157   return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
   1158 }
   1159 
   1160 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1161 _mm_comeq_epi32(__m128i __A, __m128i __B)
   1162 {
   1163   return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
   1164 }
   1165 
   1166 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1167 _mm_comneq_epi32(__m128i __A, __m128i __B)
   1168 {
   1169   return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
   1170 }
   1171 
   1172 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1173 _mm_comfalse_epi32(__m128i __A, __m128i __B)
   1174 {
   1175   return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
   1176 }
   1177 
   1178 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1179 _mm_comtrue_epi32(__m128i __A, __m128i __B)
   1180 {
   1181   return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
   1182 }
   1183 
   1184 /*pcom (integer, signed quad words) */
   1185 
   1186 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1187 _mm_comlt_epi64(__m128i __A, __m128i __B)
   1188 {
   1189   return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
   1190 }
   1191 
   1192 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1193 _mm_comle_epi64(__m128i __A, __m128i __B)
   1194 {
   1195   return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
   1196 }
   1197 
   1198 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1199 _mm_comgt_epi64(__m128i __A, __m128i __B)
   1200 {
   1201   return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
   1202 }
   1203 
   1204 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1205 _mm_comge_epi64(__m128i __A, __m128i __B)
   1206 {
   1207   return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
   1208 }
   1209 
   1210 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1211 _mm_comeq_epi64(__m128i __A, __m128i __B)
   1212 {
   1213   return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
   1214 }
   1215 
   1216 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1217 _mm_comneq_epi64(__m128i __A, __m128i __B)
   1218 {
   1219   return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
   1220 }
   1221 
   1222 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1223 _mm_comfalse_epi64(__m128i __A, __m128i __B)
   1224 {
   1225   return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
   1226 }
   1227 
   1228 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1229 _mm_comtrue_epi64(__m128i __A, __m128i __B)
   1230 {
   1231   return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
   1232 }
   1233 
   1234 /* FRCZ */
   1235 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1236 _mm_frcz_ps (__m128 __A)
   1237 {
   1238   return (__m128) __builtin_ia32_frczps ((__v4sf)__A);
   1239 }
   1240 
   1241 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1242 _mm_frcz_pd (__m128d __A)
   1243 {
   1244   return (__m128d) __builtin_ia32_frczpd ((__v2df)__A);
   1245 }
   1246 
   1247 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1248 _mm_frcz_ss (__m128 __A, __m128 __B)
   1249 {
   1250   return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B);
   1251 }
   1252 
   1253 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1254 _mm_frcz_sd (__m128d __A, __m128d __B)
   1255 {
   1256   return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B);
   1257 }
   1258 
   1259 #endif /* __SSE5__ */
   1260 
   1261 #endif /* _BMMINTRIN_H_INCLUDED */
   1262