1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __X86INTRIN_H 25 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26 #endif 27 28 #ifndef __XOPINTRIN_H 29 #define __XOPINTRIN_H 30 31 #ifndef __XOP__ 32 # error "XOP instruction set is not enabled" 33 #else 34 35 #include <fma4intrin.h> 36 37 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 38 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 39 { 40 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 41 } 42 43 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 44 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 45 { 46 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 47 } 48 49 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 50 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 51 { 52 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 53 } 54 55 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 56 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 57 { 58 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 59 } 60 61 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 62 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 63 { 64 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 65 } 66 67 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 68 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 69 { 70 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 71 } 72 73 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 74 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 75 { 76 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 77 } 78 79 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 81 { 82 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 83 } 84 85 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 86 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 87 { 88 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 89 } 90 91 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 92 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 93 { 94 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 95 } 96 97 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 98 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 99 { 100 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 101 } 102 103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 104 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 105 { 106 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 107 } 108 109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 110 _mm_haddw_epi8(__m128i __A) 111 { 112 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); 113 } 114 115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 116 _mm_haddd_epi8(__m128i __A) 117 { 118 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); 119 } 120 121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 122 _mm_haddq_epi8(__m128i __A) 123 { 124 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); 125 } 126 127 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 128 _mm_haddd_epi16(__m128i __A) 129 { 130 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); 131 } 132 133 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 134 _mm_haddq_epi16(__m128i __A) 135 { 136 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); 137 } 138 139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 140 _mm_haddq_epi32(__m128i __A) 141 { 142 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); 143 } 144 145 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 146 _mm_haddw_epu8(__m128i __A) 147 { 148 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); 149 } 150 151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 152 _mm_haddd_epu8(__m128i __A) 153 { 154 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); 155 } 156 157 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 158 _mm_haddq_epu8(__m128i __A) 159 { 160 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); 161 } 162 163 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 164 _mm_haddd_epu16(__m128i __A) 165 { 166 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); 167 } 168 169 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 170 _mm_haddq_epu16(__m128i __A) 171 { 172 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); 173 } 174 175 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 176 _mm_haddq_epu32(__m128i __A) 177 { 178 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); 179 } 180 181 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 182 _mm_hsubw_epi8(__m128i __A) 183 { 184 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); 185 } 186 187 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 188 _mm_hsubd_epi16(__m128i __A) 189 { 190 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); 191 } 192 193 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 194 _mm_hsubq_epi32(__m128i __A) 195 { 196 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); 197 } 198 199 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 200 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 201 { 202 return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); 203 } 204 205 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 206 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 207 { 208 return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); 209 } 210 211 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 212 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 213 { 214 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 215 } 216 217 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 218 _mm_rot_epi8(__m128i __A, __m128i __B) 219 { 220 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); 221 } 222 223 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 224 _mm_rot_epi16(__m128i __A, __m128i __B) 225 { 226 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); 227 } 228 229 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 230 _mm_rot_epi32(__m128i __A, __m128i __B) 231 { 232 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); 233 } 234 235 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 236 _mm_rot_epi64(__m128i __A, __m128i __B) 237 { 238 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); 239 } 240 241 #define _mm_roti_epi8(A, N) __extension__ ({ \ 242 __m128i __A = (A); \ 243 (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) 244 245 #define _mm_roti_epi16(A, N) __extension__ ({ \ 246 __m128i __A = (A); \ 247 (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) 248 249 #define _mm_roti_epi32(A, N) __extension__ ({ \ 250 __m128i __A = (A); \ 251 (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) 252 253 #define _mm_roti_epi64(A, N) __extension__ ({ \ 254 __m128i __A = (A); \ 255 (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) 256 257 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 258 _mm_shl_epi8(__m128i __A, __m128i __B) 259 { 260 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); 261 } 262 263 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 264 _mm_shl_epi16(__m128i __A, __m128i __B) 265 { 266 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); 267 } 268 269 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 270 _mm_shl_epi32(__m128i __A, __m128i __B) 271 { 272 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); 273 } 274 275 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 276 _mm_shl_epi64(__m128i __A, __m128i __B) 277 { 278 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); 279 } 280 281 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 282 _mm_sha_epi8(__m128i __A, __m128i __B) 283 { 284 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); 285 } 286 287 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 288 _mm_sha_epi16(__m128i __A, __m128i __B) 289 { 290 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); 291 } 292 293 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 294 _mm_sha_epi32(__m128i __A, __m128i __B) 295 { 296 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); 297 } 298 299 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 300 _mm_sha_epi64(__m128i __A, __m128i __B) 301 { 302 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); 303 } 304 305 #define _mm_com_epu8(A, B, N) __extension__ ({ \ 306 __m128i __A = (A); \ 307 __m128i __B = (B); \ 308 (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) 309 310 #define _mm_com_epu16(A, B, N) __extension__ ({ \ 311 __m128i __A = (A); \ 312 __m128i __B = (B); \ 313 (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) 314 315 #define _mm_com_epu32(A, B, N) __extension__ ({ \ 316 __m128i __A = (A); \ 317 __m128i __B = (B); \ 318 (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) 319 320 #define _mm_com_epu64(A, B, N) __extension__ ({ \ 321 __m128i __A = (A); \ 322 __m128i __B = (B); \ 323 (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) 324 325 #define _mm_com_epi8(A, B, N) __extension__ ({ \ 326 __m128i __A = (A); \ 327 __m128i __B = (B); \ 328 (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) 329 330 #define _mm_com_epi16(A, B, N) __extension__ ({ \ 331 __m128i __A = (A); \ 332 __m128i __B = (B); \ 333 (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) 334 335 #define _mm_com_epi32(A, B, N) __extension__ ({ \ 336 __m128i __A = (A); \ 337 __m128i __B = (B); \ 338 (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) 339 340 #define _mm_com_epi64(A, B, N) __extension__ ({ \ 341 __m128i __A = (A); \ 342 __m128i __B = (B); \ 343 (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) 344 345 #define _MM_PCOMCTRL_LT 0 346 #define _MM_PCOMCTRL_LE 1 347 #define _MM_PCOMCTRL_GT 2 348 #define _MM_PCOMCTRL_GE 3 349 #define _MM_PCOMCTRL_EQ 4 350 #define _MM_PCOMCTRL_NEQ 5 351 #define _MM_PCOMCTRL_FALSE 6 352 #define _MM_PCOMCTRL_TRUE 7 353 354 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 355 _mm_comlt_epu8(__m128i __A, __m128i __B) 356 { 357 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); 358 } 359 360 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 361 _mm_comle_epu8(__m128i __A, __m128i __B) 362 { 363 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); 364 } 365 366 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 367 _mm_comgt_epu8(__m128i __A, __m128i __B) 368 { 369 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); 370 } 371 372 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 373 _mm_comge_epu8(__m128i __A, __m128i __B) 374 { 375 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); 376 } 377 378 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 379 _mm_comeq_epu8(__m128i __A, __m128i __B) 380 { 381 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); 382 } 383 384 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 385 _mm_comneq_epu8(__m128i __A, __m128i __B) 386 { 387 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); 388 } 389 390 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 391 _mm_comfalse_epu8(__m128i __A, __m128i __B) 392 { 393 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); 394 } 395 396 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 397 _mm_comtrue_epu8(__m128i __A, __m128i __B) 398 { 399 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); 400 } 401 402 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 403 _mm_comlt_epu16(__m128i __A, __m128i __B) 404 { 405 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); 406 } 407 408 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 409 _mm_comle_epu16(__m128i __A, __m128i __B) 410 { 411 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); 412 } 413 414 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 415 _mm_comgt_epu16(__m128i __A, __m128i __B) 416 { 417 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); 418 } 419 420 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 421 _mm_comge_epu16(__m128i __A, __m128i __B) 422 { 423 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); 424 } 425 426 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 427 _mm_comeq_epu16(__m128i __A, __m128i __B) 428 { 429 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); 430 } 431 432 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 433 _mm_comneq_epu16(__m128i __A, __m128i __B) 434 { 435 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); 436 } 437 438 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 439 _mm_comfalse_epu16(__m128i __A, __m128i __B) 440 { 441 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); 442 } 443 444 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 445 _mm_comtrue_epu16(__m128i __A, __m128i __B) 446 { 447 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); 448 } 449 450 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 451 _mm_comlt_epu32(__m128i __A, __m128i __B) 452 { 453 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); 454 } 455 456 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 457 _mm_comle_epu32(__m128i __A, __m128i __B) 458 { 459 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); 460 } 461 462 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 463 _mm_comgt_epu32(__m128i __A, __m128i __B) 464 { 465 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); 466 } 467 468 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 469 _mm_comge_epu32(__m128i __A, __m128i __B) 470 { 471 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); 472 } 473 474 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 475 _mm_comeq_epu32(__m128i __A, __m128i __B) 476 { 477 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); 478 } 479 480 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 481 _mm_comneq_epu32(__m128i __A, __m128i __B) 482 { 483 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); 484 } 485 486 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 487 _mm_comfalse_epu32(__m128i __A, __m128i __B) 488 { 489 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); 490 } 491 492 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 493 _mm_comtrue_epu32(__m128i __A, __m128i __B) 494 { 495 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); 496 } 497 498 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 499 _mm_comlt_epu64(__m128i __A, __m128i __B) 500 { 501 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); 502 } 503 504 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 505 _mm_comle_epu64(__m128i __A, __m128i __B) 506 { 507 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); 508 } 509 510 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 511 _mm_comgt_epu64(__m128i __A, __m128i __B) 512 { 513 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); 514 } 515 516 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 517 _mm_comge_epu64(__m128i __A, __m128i __B) 518 { 519 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); 520 } 521 522 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 523 _mm_comeq_epu64(__m128i __A, __m128i __B) 524 { 525 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); 526 } 527 528 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 529 _mm_comneq_epu64(__m128i __A, __m128i __B) 530 { 531 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); 532 } 533 534 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 535 _mm_comfalse_epu64(__m128i __A, __m128i __B) 536 { 537 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); 538 } 539 540 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 541 _mm_comtrue_epu64(__m128i __A, __m128i __B) 542 { 543 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); 544 } 545 546 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 547 _mm_comlt_epi8(__m128i __A, __m128i __B) 548 { 549 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); 550 } 551 552 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 553 _mm_comle_epi8(__m128i __A, __m128i __B) 554 { 555 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); 556 } 557 558 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 559 _mm_comgt_epi8(__m128i __A, __m128i __B) 560 { 561 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); 562 } 563 564 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 565 _mm_comge_epi8(__m128i __A, __m128i __B) 566 { 567 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); 568 } 569 570 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 571 _mm_comeq_epi8(__m128i __A, __m128i __B) 572 { 573 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); 574 } 575 576 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 577 _mm_comneq_epi8(__m128i __A, __m128i __B) 578 { 579 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); 580 } 581 582 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 583 _mm_comfalse_epi8(__m128i __A, __m128i __B) 584 { 585 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); 586 } 587 588 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 589 _mm_comtrue_epi8(__m128i __A, __m128i __B) 590 { 591 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); 592 } 593 594 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 595 _mm_comlt_epi16(__m128i __A, __m128i __B) 596 { 597 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); 598 } 599 600 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 601 _mm_comle_epi16(__m128i __A, __m128i __B) 602 { 603 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); 604 } 605 606 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 607 _mm_comgt_epi16(__m128i __A, __m128i __B) 608 { 609 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); 610 } 611 612 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 613 _mm_comge_epi16(__m128i __A, __m128i __B) 614 { 615 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); 616 } 617 618 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 619 _mm_comeq_epi16(__m128i __A, __m128i __B) 620 { 621 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); 622 } 623 624 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 625 _mm_comneq_epi16(__m128i __A, __m128i __B) 626 { 627 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); 628 } 629 630 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 631 _mm_comfalse_epi16(__m128i __A, __m128i __B) 632 { 633 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); 634 } 635 636 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 637 _mm_comtrue_epi16(__m128i __A, __m128i __B) 638 { 639 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); 640 } 641 642 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 643 _mm_comlt_epi32(__m128i __A, __m128i __B) 644 { 645 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); 646 } 647 648 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 649 _mm_comle_epi32(__m128i __A, __m128i __B) 650 { 651 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); 652 } 653 654 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 655 _mm_comgt_epi32(__m128i __A, __m128i __B) 656 { 657 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); 658 } 659 660 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 661 _mm_comge_epi32(__m128i __A, __m128i __B) 662 { 663 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); 664 } 665 666 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 667 _mm_comeq_epi32(__m128i __A, __m128i __B) 668 { 669 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); 670 } 671 672 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 673 _mm_comneq_epi32(__m128i __A, __m128i __B) 674 { 675 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); 676 } 677 678 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 679 _mm_comfalse_epi32(__m128i __A, __m128i __B) 680 { 681 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); 682 } 683 684 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 685 _mm_comtrue_epi32(__m128i __A, __m128i __B) 686 { 687 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); 688 } 689 690 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 691 _mm_comlt_epi64(__m128i __A, __m128i __B) 692 { 693 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); 694 } 695 696 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 697 _mm_comle_epi64(__m128i __A, __m128i __B) 698 { 699 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); 700 } 701 702 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 703 _mm_comgt_epi64(__m128i __A, __m128i __B) 704 { 705 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); 706 } 707 708 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 709 _mm_comge_epi64(__m128i __A, __m128i __B) 710 { 711 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); 712 } 713 714 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 715 _mm_comeq_epi64(__m128i __A, __m128i __B) 716 { 717 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); 718 } 719 720 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 721 _mm_comneq_epi64(__m128i __A, __m128i __B) 722 { 723 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); 724 } 725 726 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 727 _mm_comfalse_epi64(__m128i __A, __m128i __B) 728 { 729 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); 730 } 731 732 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 733 _mm_comtrue_epi64(__m128i __A, __m128i __B) 734 { 735 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); 736 } 737 738 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ 739 __m128d __X = (X); \ 740 __m128d __Y = (Y); \ 741 __m128i __C = (C); \ 742 (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ 743 (__v2di)__C, (I)); }) 744 745 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ 746 __m256d __X = (X); \ 747 __m256d __Y = (Y); \ 748 __m256i __C = (C); \ 749 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ 750 (__v4di)__C, (I)); }) 751 752 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ 753 __m128 __X = (X); \ 754 __m128 __Y = (Y); \ 755 __m128i __C = (C); \ 756 (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ 757 (__v4si)__C, (I)); }) 758 759 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ 760 __m256 __X = (X); \ 761 __m256 __Y = (Y); \ 762 __m256i __C = (C); \ 763 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ 764 (__v8si)__C, (I)); }) 765 766 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 767 _mm_frcz_ss(__m128 __A) 768 { 769 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); 770 } 771 772 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 773 _mm_frcz_sd(__m128d __A) 774 { 775 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); 776 } 777 778 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 779 _mm_frcz_ps(__m128 __A) 780 { 781 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); 782 } 783 784 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 785 _mm_frcz_pd(__m128d __A) 786 { 787 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); 788 } 789 790 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 791 _mm256_frcz_ps(__m256 __A) 792 { 793 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); 794 } 795 796 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 797 _mm256_frcz_pd(__m256d __A) 798 { 799 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); 800 } 801 802 #endif /* __XOP__ */ 803 804 #endif /* __XOPINTRIN_H */ 805