1 /* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _X86INTRIN_H_INCLUDED 25 # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26 #endif 27 28 #ifndef _XOPMMINTRIN_H_INCLUDED 29 #define _XOPMMINTRIN_H_INCLUDED 30 31 #ifndef __XOP__ 32 # error "XOP instruction set not enabled" 33 #else 34 35 #include <fma4intrin.h> 36 37 /* Integer multiply/add intructions. */ 38 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 40 { 41 return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); 42 } 43 44 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 45 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 46 { 47 return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 48 } 49 50 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 51 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 52 { 53 return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 54 } 55 56 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 58 { 59 return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 60 } 61 62 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 63 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 64 { 65 return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 66 } 67 68 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 69 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 70 { 71 return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 72 } 73 74 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 75 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 76 { 77 return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 78 } 79 80 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 81 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 82 { 83 return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 84 } 85 86 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 87 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 88 { 89 return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 90 } 91 92 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 93 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 94 { 95 return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 96 } 97 98 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 99 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 100 { 101 return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 102 } 103 104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 105 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 106 { 107 return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 108 } 109 110 /* Packed Integer Horizontal Add and Subtract */ 111 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 112 _mm_haddw_epi8(__m128i __A) 113 { 114 return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); 115 } 116 117 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 118 _mm_haddd_epi8(__m128i __A) 119 { 120 return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); 121 } 122 123 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 124 _mm_haddq_epi8(__m128i __A) 125 { 126 return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); 127 } 128 129 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130 _mm_haddd_epi16(__m128i __A) 131 { 132 return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); 133 } 134 135 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 136 _mm_haddq_epi16(__m128i __A) 137 { 138 return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); 139 } 140 141 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 142 _mm_haddq_epi32(__m128i __A) 143 { 144 return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); 145 } 146 147 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 148 _mm_haddw_epu8(__m128i __A) 149 { 150 return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); 151 } 152 153 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 154 _mm_haddd_epu8(__m128i __A) 155 { 156 return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); 157 } 158 159 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 160 _mm_haddq_epu8(__m128i __A) 161 { 162 return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); 163 } 164 165 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 166 _mm_haddd_epu16(__m128i __A) 167 { 168 return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); 169 } 170 171 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 172 _mm_haddq_epu16(__m128i __A) 173 { 174 return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); 175 } 176 177 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 178 _mm_haddq_epu32(__m128i __A) 179 { 180 return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); 181 } 182 183 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 184 _mm_hsubw_epi8(__m128i __A) 185 { 186 return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); 187 } 188 189 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 190 _mm_hsubd_epi16(__m128i __A) 191 { 192 return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); 193 } 194 195 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 196 _mm_hsubq_epi32(__m128i __A) 197 { 198 return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); 199 } 200 201 /* Vector conditional move and permute */ 202 203 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 204 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 205 { 206 return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); 207 } 208 209 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 210 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 211 { 212 return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 213 } 214 215 /* Packed Integer Rotates and Shifts 216 Rotates - Non-Immediate form */ 217 218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219 _mm_rot_epi8(__m128i __A, __m128i __B) 220 { 221 return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); 222 } 223 224 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 225 _mm_rot_epi16(__m128i __A, __m128i __B) 226 { 227 return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); 228 } 229 230 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 231 _mm_rot_epi32(__m128i __A, __m128i __B) 232 { 233 return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); 234 } 235 236 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237 _mm_rot_epi64(__m128i __A, __m128i __B) 238 { 239 return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); 240 } 241 242 /* Rotates - Immediate form */ 243 244 #ifdef __OPTIMIZE__ 245 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 246 _mm_roti_epi8(__m128i __A, const int __B) 247 { 248 return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); 249 } 250 251 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 252 _mm_roti_epi16(__m128i __A, const int __B) 253 { 254 return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); 255 } 256 257 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 258 _mm_roti_epi32(__m128i __A, const int __B) 259 { 260 return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); 261 } 262 263 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 264 _mm_roti_epi64(__m128i __A, const int __B) 265 { 266 return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); 267 } 268 #else 269 #define _mm_roti_epi8(A, N) \ 270 ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) 271 #define _mm_roti_epi16(A, N) \ 272 ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) 273 #define _mm_roti_epi32(A, N) \ 274 ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) 275 #define _mm_roti_epi64(A, N) \ 276 ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) 277 #endif 278 279 /* Shifts */ 280 281 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 282 _mm_shl_epi8(__m128i __A, __m128i __B) 283 { 284 return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); 285 } 286 287 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 288 _mm_shl_epi16(__m128i __A, __m128i __B) 289 { 290 return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); 291 } 292 293 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 294 _mm_shl_epi32(__m128i __A, __m128i __B) 295 { 296 return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); 297 } 298 299 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 300 _mm_shl_epi64(__m128i __A, __m128i __B) 301 { 302 return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); 303 } 304 305 306 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 307 _mm_sha_epi8(__m128i __A, __m128i __B) 308 { 309 return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); 310 } 311 312 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 313 _mm_sha_epi16(__m128i __A, __m128i __B) 314 { 315 return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); 316 } 317 318 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 319 _mm_sha_epi32(__m128i __A, __m128i __B) 320 { 321 return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); 322 } 323 324 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 325 _mm_sha_epi64(__m128i __A, __m128i __B) 326 { 327 return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); 328 } 329 330 /* Compare and Predicate Generation 331 pcom (integer, unsinged bytes) */ 332 333 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 334 _mm_comlt_epu8(__m128i __A, __m128i __B) 335 { 336 return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); 337 } 338 339 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 340 _mm_comle_epu8(__m128i __A, __m128i __B) 341 { 342 return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); 343 } 344 345 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 346 _mm_comgt_epu8(__m128i __A, __m128i __B) 347 { 348 return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); 349 } 350 351 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 352 _mm_comge_epu8(__m128i __A, __m128i __B) 353 { 354 return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); 355 } 356 357 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 358 _mm_comeq_epu8(__m128i __A, __m128i __B) 359 { 360 return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); 361 } 362 363 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 364 _mm_comneq_epu8(__m128i __A, __m128i __B) 365 { 366 return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); 367 } 368 369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 370 _mm_comfalse_epu8(__m128i __A, __m128i __B) 371 { 372 return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); 373 } 374 375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 376 _mm_comtrue_epu8(__m128i __A, __m128i __B) 377 { 378 return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); 379 } 380 381 /*pcom (integer, unsinged words) */ 382 383 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 384 _mm_comlt_epu16(__m128i __A, __m128i __B) 385 { 386 return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); 387 } 388 389 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 390 _mm_comle_epu16(__m128i __A, __m128i __B) 391 { 392 return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); 393 } 394 395 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 396 _mm_comgt_epu16(__m128i __A, __m128i __B) 397 { 398 return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); 399 } 400 401 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 402 _mm_comge_epu16(__m128i __A, __m128i __B) 403 { 404 return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); 405 } 406 407 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 408 _mm_comeq_epu16(__m128i __A, __m128i __B) 409 { 410 return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); 411 } 412 413 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 414 _mm_comneq_epu16(__m128i __A, __m128i __B) 415 { 416 return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); 417 } 418 419 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 420 _mm_comfalse_epu16(__m128i __A, __m128i __B) 421 { 422 return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); 423 } 424 425 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 426 _mm_comtrue_epu16(__m128i __A, __m128i __B) 427 { 428 return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); 429 } 430 431 /*pcom (integer, unsinged double words) */ 432 433 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 434 _mm_comlt_epu32(__m128i __A, __m128i __B) 435 { 436 return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); 437 } 438 439 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 440 _mm_comle_epu32(__m128i __A, __m128i __B) 441 { 442 return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); 443 } 444 445 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 446 _mm_comgt_epu32(__m128i __A, __m128i __B) 447 { 448 return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); 449 } 450 451 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 452 _mm_comge_epu32(__m128i __A, __m128i __B) 453 { 454 return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); 455 } 456 457 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 458 _mm_comeq_epu32(__m128i __A, __m128i __B) 459 { 460 return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); 461 } 462 463 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 464 _mm_comneq_epu32(__m128i __A, __m128i __B) 465 { 466 return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); 467 } 468 469 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 470 _mm_comfalse_epu32(__m128i __A, __m128i __B) 471 { 472 return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); 473 } 474 475 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 476 _mm_comtrue_epu32(__m128i __A, __m128i __B) 477 { 478 return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); 479 } 480 481 /*pcom (integer, unsinged quad words) */ 482 483 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 484 _mm_comlt_epu64(__m128i __A, __m128i __B) 485 { 486 return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); 487 } 488 489 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 490 _mm_comle_epu64(__m128i __A, __m128i __B) 491 { 492 return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); 493 } 494 495 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 496 _mm_comgt_epu64(__m128i __A, __m128i __B) 497 { 498 return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); 499 } 500 501 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 502 _mm_comge_epu64(__m128i __A, __m128i __B) 503 { 504 return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); 505 } 506 507 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 508 _mm_comeq_epu64(__m128i __A, __m128i __B) 509 { 510 return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); 511 } 512 513 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 514 _mm_comneq_epu64(__m128i __A, __m128i __B) 515 { 516 return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); 517 } 518 519 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 520 _mm_comfalse_epu64(__m128i __A, __m128i __B) 521 { 522 return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); 523 } 524 525 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 526 _mm_comtrue_epu64(__m128i __A, __m128i __B) 527 { 528 return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); 529 } 530 531 /*pcom (integer, signed bytes) */ 532 533 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 534 _mm_comlt_epi8(__m128i __A, __m128i __B) 535 { 536 return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); 537 } 538 539 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 540 _mm_comle_epi8(__m128i __A, __m128i __B) 541 { 542 return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); 543 } 544 545 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 546 _mm_comgt_epi8(__m128i __A, __m128i __B) 547 { 548 return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); 549 } 550 551 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 552 _mm_comge_epi8(__m128i __A, __m128i __B) 553 { 554 return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); 555 } 556 557 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 558 _mm_comeq_epi8(__m128i __A, __m128i __B) 559 { 560 return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); 561 } 562 563 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 564 _mm_comneq_epi8(__m128i __A, __m128i __B) 565 { 566 return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); 567 } 568 569 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 570 _mm_comfalse_epi8(__m128i __A, __m128i __B) 571 { 572 return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); 573 } 574 575 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 576 _mm_comtrue_epi8(__m128i __A, __m128i __B) 577 { 578 return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); 579 } 580 581 /*pcom (integer, signed words) */ 582 583 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 584 _mm_comlt_epi16(__m128i __A, __m128i __B) 585 { 586 return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); 587 } 588 589 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 590 _mm_comle_epi16(__m128i __A, __m128i __B) 591 { 592 return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); 593 } 594 595 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 596 _mm_comgt_epi16(__m128i __A, __m128i __B) 597 { 598 return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); 599 } 600 601 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 602 _mm_comge_epi16(__m128i __A, __m128i __B) 603 { 604 return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); 605 } 606 607 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 608 _mm_comeq_epi16(__m128i __A, __m128i __B) 609 { 610 return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); 611 } 612 613 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 614 _mm_comneq_epi16(__m128i __A, __m128i __B) 615 { 616 return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); 617 } 618 619 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 620 _mm_comfalse_epi16(__m128i __A, __m128i __B) 621 { 622 return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); 623 } 624 625 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 626 _mm_comtrue_epi16(__m128i __A, __m128i __B) 627 { 628 return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); 629 } 630 631 /*pcom (integer, signed double words) */ 632 633 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 634 _mm_comlt_epi32(__m128i __A, __m128i __B) 635 { 636 return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); 637 } 638 639 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 640 _mm_comle_epi32(__m128i __A, __m128i __B) 641 { 642 return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); 643 } 644 645 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 646 _mm_comgt_epi32(__m128i __A, __m128i __B) 647 { 648 return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); 649 } 650 651 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 652 _mm_comge_epi32(__m128i __A, __m128i __B) 653 { 654 return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); 655 } 656 657 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 658 _mm_comeq_epi32(__m128i __A, __m128i __B) 659 { 660 return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); 661 } 662 663 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 664 _mm_comneq_epi32(__m128i __A, __m128i __B) 665 { 666 return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); 667 } 668 669 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 670 _mm_comfalse_epi32(__m128i __A, __m128i __B) 671 { 672 return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); 673 } 674 675 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 676 _mm_comtrue_epi32(__m128i __A, __m128i __B) 677 { 678 return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); 679 } 680 681 /*pcom (integer, signed quad words) */ 682 683 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 684 _mm_comlt_epi64(__m128i __A, __m128i __B) 685 { 686 return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); 687 } 688 689 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 690 _mm_comle_epi64(__m128i __A, __m128i __B) 691 { 692 return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); 693 } 694 695 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 696 _mm_comgt_epi64(__m128i __A, __m128i __B) 697 { 698 return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); 699 } 700 701 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 702 _mm_comge_epi64(__m128i __A, __m128i __B) 703 { 704 return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); 705 } 706 707 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 708 _mm_comeq_epi64(__m128i __A, __m128i __B) 709 { 710 return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); 711 } 712 713 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 714 _mm_comneq_epi64(__m128i __A, __m128i __B) 715 { 716 return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); 717 } 718 719 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 720 _mm_comfalse_epi64(__m128i __A, __m128i __B) 721 { 722 return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); 723 } 724 725 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 726 _mm_comtrue_epi64(__m128i __A, __m128i __B) 727 { 728 return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); 729 } 730 731 /* FRCZ */ 732 733 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734 _mm_frcz_ps (__m128 __A) 735 { 736 return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); 737 } 738 739 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 740 _mm_frcz_pd (__m128d __A) 741 { 742 return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); 743 } 744 745 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 746 _mm_frcz_ss (__m128 __A, __m128 __B) 747 { 748 return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B); 749 } 750 751 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 752 _mm_frcz_sd (__m128d __A, __m128d __B) 753 { 754 return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B); 755 } 756 757 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 758 _mm256_frcz_ps (__m256 __A) 759 { 760 return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); 761 } 762 763 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 764 _mm256_frcz_pd (__m256d __A) 765 { 766 return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); 767 } 768 769 /* PERMIL2 */ 770 771 #ifdef __OPTIMIZE__ 772 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 773 _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) 774 { 775 return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, 776 (__v2df)__Y, 777 (__v2di)__C, 778 __I); 779 } 780 781 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 782 _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) 783 { 784 return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, 785 (__v4df)__Y, 786 (__v4di)__C, 787 __I); 788 } 789 790 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 791 _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) 792 { 793 return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, 794 (__v4sf)__Y, 795 (__v4si)__C, 796 __I); 797 } 798 799 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 800 _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) 801 { 802 return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, 803 (__v8sf)__Y, 804 (__v8si)__C, 805 __I); 806 } 807 #else 808 #define _mm_permute2_pd(X, Y, C, I) \ 809 ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ 810 (__v2df)(__m128d)(Y), \ 811 (__v2di)(__m128d)(C), \ 812 (int)(I))) 813 814 #define _mm256_permute2_pd(X, Y, C, I) \ 815 ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ 816 (__v4df)(__m256d)(Y), \ 817 (__v4di)(__m256d)(C), \ 818 (int)(I))) 819 820 #define _mm_permute2_ps(X, Y, C, I) \ 821 ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ 822 (__v4sf)(__m128)(Y), \ 823 (__v4si)(__m128)(C), \ 824 (int)(I))) 825 826 #define _mm256_permute2_ps(X, Y, C, I) \ 827 ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ 828 (__v8sf)(__m256)(Y), \ 829 (__v8si)(__m256)(C), \ 830 (int)(I))) 831 #endif /* __OPTIMIZE__ */ 832 833 #endif /* __XOP__ */ 834 835 #endif /* _XOPMMINTRIN_H_INCLUDED */ 836