1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __MMINTRIN_H 25 #define __MMINTRIN_H 26 27 typedef long long __m64 __attribute__((__vector_size__(8))); 28 29 typedef int __v2si __attribute__((__vector_size__(8))); 30 typedef short __v4hi __attribute__((__vector_size__(8))); 31 typedef char __v8qi __attribute__((__vector_size__(8))); 32 33 /* Define the default attributes for the functions in this file. */ 34 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 35 36 static __inline__ void __DEFAULT_FN_ATTRS 37 _mm_empty(void) 38 { 39 __builtin_ia32_emms(); 40 } 41 42 static __inline__ __m64 __DEFAULT_FN_ATTRS 43 _mm_cvtsi32_si64(int __i) 44 { 45 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 46 } 47 48 static __inline__ int __DEFAULT_FN_ATTRS 49 _mm_cvtsi64_si32(__m64 __m) 50 { 51 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 52 } 53 54 static __inline__ __m64 __DEFAULT_FN_ATTRS 55 _mm_cvtsi64_m64(long long __i) 56 { 57 return (__m64)__i; 58 } 59 60 static __inline__ long long __DEFAULT_FN_ATTRS 61 _mm_cvtm64_si64(__m64 __m) 62 { 63 return (long long)__m; 64 } 65 66 static __inline__ __m64 __DEFAULT_FN_ATTRS 67 _mm_packs_pi16(__m64 __m1, __m64 __m2) 68 { 69 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 70 } 71 72 static __inline__ __m64 __DEFAULT_FN_ATTRS 73 _mm_packs_pi32(__m64 __m1, __m64 __m2) 74 { 75 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 76 } 77 78 static __inline__ __m64 __DEFAULT_FN_ATTRS 79 _mm_packs_pu16(__m64 __m1, __m64 __m2) 80 { 81 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 82 } 83 84 static __inline__ __m64 __DEFAULT_FN_ATTRS 85 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 86 { 87 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 88 } 89 90 static __inline__ __m64 __DEFAULT_FN_ATTRS 91 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 92 { 93 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 94 } 95 96 static __inline__ __m64 __DEFAULT_FN_ATTRS 97 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 98 { 99 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 100 } 101 102 static __inline__ __m64 __DEFAULT_FN_ATTRS 103 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 104 { 105 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 106 } 107 108 static __inline__ __m64 __DEFAULT_FN_ATTRS 109 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 110 { 111 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 112 } 113 114 static __inline__ __m64 __DEFAULT_FN_ATTRS 115 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 116 { 117 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 118 } 119 120 static __inline__ __m64 __DEFAULT_FN_ATTRS 121 _mm_add_pi8(__m64 __m1, __m64 __m2) 122 { 123 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 124 } 125 126 static __inline__ __m64 __DEFAULT_FN_ATTRS 127 _mm_add_pi16(__m64 __m1, __m64 __m2) 128 { 129 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 130 } 131 132 static __inline__ __m64 __DEFAULT_FN_ATTRS 133 _mm_add_pi32(__m64 __m1, __m64 __m2) 134 { 135 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 136 } 137 138 static __inline__ __m64 __DEFAULT_FN_ATTRS 139 _mm_adds_pi8(__m64 __m1, __m64 __m2) 140 { 141 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 142 } 143 144 static __inline__ __m64 __DEFAULT_FN_ATTRS 145 _mm_adds_pi16(__m64 __m1, __m64 __m2) 146 { 147 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 148 } 149 150 static __inline__ __m64 __DEFAULT_FN_ATTRS 151 _mm_adds_pu8(__m64 __m1, __m64 __m2) 152 { 153 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 154 } 155 156 static __inline__ __m64 __DEFAULT_FN_ATTRS 157 _mm_adds_pu16(__m64 __m1, __m64 __m2) 158 { 159 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 160 } 161 162 static __inline__ __m64 __DEFAULT_FN_ATTRS 163 _mm_sub_pi8(__m64 __m1, __m64 __m2) 164 { 165 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 166 } 167 168 static __inline__ __m64 __DEFAULT_FN_ATTRS 169 _mm_sub_pi16(__m64 __m1, __m64 __m2) 170 { 171 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 172 } 173 174 static __inline__ __m64 __DEFAULT_FN_ATTRS 175 _mm_sub_pi32(__m64 __m1, __m64 __m2) 176 { 177 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 178 } 179 180 static __inline__ __m64 __DEFAULT_FN_ATTRS 181 _mm_subs_pi8(__m64 __m1, __m64 __m2) 182 { 183 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 184 } 185 186 static __inline__ __m64 __DEFAULT_FN_ATTRS 187 _mm_subs_pi16(__m64 __m1, __m64 __m2) 188 { 189 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 190 } 191 192 static __inline__ __m64 __DEFAULT_FN_ATTRS 193 _mm_subs_pu8(__m64 __m1, __m64 __m2) 194 { 195 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 196 } 197 198 static __inline__ __m64 __DEFAULT_FN_ATTRS 199 _mm_subs_pu16(__m64 __m1, __m64 __m2) 200 { 201 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 202 } 203 204 static __inline__ __m64 __DEFAULT_FN_ATTRS 205 _mm_madd_pi16(__m64 __m1, __m64 __m2) 206 { 207 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 208 } 209 210 static __inline__ __m64 __DEFAULT_FN_ATTRS 211 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 212 { 213 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 214 } 215 216 static __inline__ __m64 __DEFAULT_FN_ATTRS 217 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 218 { 219 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 220 } 221 222 static __inline__ __m64 __DEFAULT_FN_ATTRS 223 _mm_sll_pi16(__m64 __m, __m64 __count) 224 { 225 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 226 } 227 228 static __inline__ __m64 __DEFAULT_FN_ATTRS 229 _mm_slli_pi16(__m64 __m, int __count) 230 { 231 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 232 } 233 234 static __inline__ __m64 __DEFAULT_FN_ATTRS 235 _mm_sll_pi32(__m64 __m, __m64 __count) 236 { 237 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 238 } 239 240 static __inline__ __m64 __DEFAULT_FN_ATTRS 241 _mm_slli_pi32(__m64 __m, int __count) 242 { 243 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 244 } 245 246 static __inline__ __m64 __DEFAULT_FN_ATTRS 247 _mm_sll_si64(__m64 __m, __m64 __count) 248 { 249 return (__m64)__builtin_ia32_psllq(__m, __count); 250 } 251 252 static __inline__ __m64 __DEFAULT_FN_ATTRS 253 _mm_slli_si64(__m64 __m, int __count) 254 { 255 return (__m64)__builtin_ia32_psllqi(__m, __count); 256 } 257 258 static __inline__ __m64 __DEFAULT_FN_ATTRS 259 _mm_sra_pi16(__m64 __m, __m64 __count) 260 { 261 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 262 } 263 264 static __inline__ __m64 __DEFAULT_FN_ATTRS 265 _mm_srai_pi16(__m64 __m, int __count) 266 { 267 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 268 } 269 270 static __inline__ __m64 __DEFAULT_FN_ATTRS 271 _mm_sra_pi32(__m64 __m, __m64 __count) 272 { 273 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 274 } 275 276 static __inline__ __m64 __DEFAULT_FN_ATTRS 277 _mm_srai_pi32(__m64 __m, int __count) 278 { 279 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 280 } 281 282 static __inline__ __m64 __DEFAULT_FN_ATTRS 283 _mm_srl_pi16(__m64 __m, __m64 __count) 284 { 285 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 286 } 287 288 static __inline__ __m64 __DEFAULT_FN_ATTRS 289 _mm_srli_pi16(__m64 __m, int __count) 290 { 291 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 292 } 293 294 static __inline__ __m64 __DEFAULT_FN_ATTRS 295 _mm_srl_pi32(__m64 __m, __m64 __count) 296 { 297 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 298 } 299 300 static __inline__ __m64 __DEFAULT_FN_ATTRS 301 _mm_srli_pi32(__m64 __m, int __count) 302 { 303 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 304 } 305 306 static __inline__ __m64 __DEFAULT_FN_ATTRS 307 _mm_srl_si64(__m64 __m, __m64 __count) 308 { 309 return (__m64)__builtin_ia32_psrlq(__m, __count); 310 } 311 312 static __inline__ __m64 __DEFAULT_FN_ATTRS 313 _mm_srli_si64(__m64 __m, int __count) 314 { 315 return (__m64)__builtin_ia32_psrlqi(__m, __count); 316 } 317 318 static __inline__ __m64 __DEFAULT_FN_ATTRS 319 _mm_and_si64(__m64 __m1, __m64 __m2) 320 { 321 return __builtin_ia32_pand(__m1, __m2); 322 } 323 324 static __inline__ __m64 __DEFAULT_FN_ATTRS 325 _mm_andnot_si64(__m64 __m1, __m64 __m2) 326 { 327 return __builtin_ia32_pandn(__m1, __m2); 328 } 329 330 static __inline__ __m64 __DEFAULT_FN_ATTRS 331 _mm_or_si64(__m64 __m1, __m64 __m2) 332 { 333 return __builtin_ia32_por(__m1, __m2); 334 } 335 336 static __inline__ __m64 __DEFAULT_FN_ATTRS 337 _mm_xor_si64(__m64 __m1, __m64 __m2) 338 { 339 return __builtin_ia32_pxor(__m1, __m2); 340 } 341 342 static __inline__ __m64 __DEFAULT_FN_ATTRS 343 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 344 { 345 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 346 } 347 348 static __inline__ __m64 __DEFAULT_FN_ATTRS 349 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 350 { 351 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 352 } 353 354 static __inline__ __m64 __DEFAULT_FN_ATTRS 355 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 356 { 357 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 358 } 359 360 static __inline__ __m64 __DEFAULT_FN_ATTRS 361 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 362 { 363 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 364 } 365 366 static __inline__ __m64 __DEFAULT_FN_ATTRS 367 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 368 { 369 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 370 } 371 372 static __inline__ __m64 __DEFAULT_FN_ATTRS 373 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 374 { 375 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 376 } 377 378 static __inline__ __m64 __DEFAULT_FN_ATTRS 379 _mm_setzero_si64(void) 380 { 381 return (__m64){ 0LL }; 382 } 383 384 static __inline__ __m64 __DEFAULT_FN_ATTRS 385 _mm_set_pi32(int __i1, int __i0) 386 { 387 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 388 } 389 390 static __inline__ __m64 __DEFAULT_FN_ATTRS 391 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 392 { 393 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 394 } 395 396 static __inline__ __m64 __DEFAULT_FN_ATTRS 397 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 398 char __b1, char __b0) 399 { 400 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 401 __b4, __b5, __b6, __b7); 402 } 403 404 static __inline__ __m64 __DEFAULT_FN_ATTRS 405 _mm_set1_pi32(int __i) 406 { 407 return _mm_set_pi32(__i, __i); 408 } 409 410 static __inline__ __m64 __DEFAULT_FN_ATTRS 411 _mm_set1_pi16(short __w) 412 { 413 return _mm_set_pi16(__w, __w, __w, __w); 414 } 415 416 static __inline__ __m64 __DEFAULT_FN_ATTRS 417 _mm_set1_pi8(char __b) 418 { 419 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 420 } 421 422 static __inline__ __m64 __DEFAULT_FN_ATTRS 423 _mm_setr_pi32(int __i0, int __i1) 424 { 425 return _mm_set_pi32(__i1, __i0); 426 } 427 428 static __inline__ __m64 __DEFAULT_FN_ATTRS 429 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 430 { 431 return _mm_set_pi16(__w3, __w2, __w1, __w0); 432 } 433 434 static __inline__ __m64 __DEFAULT_FN_ATTRS 435 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 436 char __b6, char __b7) 437 { 438 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 439 } 440 441 #undef __DEFAULT_FN_ATTRS 442 443 /* Aliases for compatibility. */ 444 #define _m_empty _mm_empty 445 #define _m_from_int _mm_cvtsi32_si64 446 #define _m_from_int64 _mm_cvtsi64_m64 447 #define _m_to_int _mm_cvtsi64_si32 448 #define _m_to_int64 _mm_cvtm64_si64 449 #define _m_packsswb _mm_packs_pi16 450 #define _m_packssdw _mm_packs_pi32 451 #define _m_packuswb _mm_packs_pu16 452 #define _m_punpckhbw _mm_unpackhi_pi8 453 #define _m_punpckhwd _mm_unpackhi_pi16 454 #define _m_punpckhdq _mm_unpackhi_pi32 455 #define _m_punpcklbw _mm_unpacklo_pi8 456 #define _m_punpcklwd _mm_unpacklo_pi16 457 #define _m_punpckldq _mm_unpacklo_pi32 458 #define _m_paddb _mm_add_pi8 459 #define _m_paddw _mm_add_pi16 460 #define _m_paddd _mm_add_pi32 461 #define _m_paddsb _mm_adds_pi8 462 #define _m_paddsw _mm_adds_pi16 463 #define _m_paddusb _mm_adds_pu8 464 #define _m_paddusw _mm_adds_pu16 465 #define _m_psubb _mm_sub_pi8 466 #define _m_psubw _mm_sub_pi16 467 #define _m_psubd _mm_sub_pi32 468 #define _m_psubsb _mm_subs_pi8 469 #define _m_psubsw _mm_subs_pi16 470 #define _m_psubusb _mm_subs_pu8 471 #define _m_psubusw _mm_subs_pu16 472 #define _m_pmaddwd _mm_madd_pi16 473 #define _m_pmulhw _mm_mulhi_pi16 474 #define _m_pmullw _mm_mullo_pi16 475 #define _m_psllw _mm_sll_pi16 476 #define _m_psllwi _mm_slli_pi16 477 #define _m_pslld _mm_sll_pi32 478 #define _m_pslldi _mm_slli_pi32 479 #define _m_psllq _mm_sll_si64 480 #define _m_psllqi _mm_slli_si64 481 #define _m_psraw _mm_sra_pi16 482 #define _m_psrawi _mm_srai_pi16 483 #define _m_psrad _mm_sra_pi32 484 #define _m_psradi _mm_srai_pi32 485 #define _m_psrlw _mm_srl_pi16 486 #define _m_psrlwi _mm_srli_pi16 487 #define _m_psrld _mm_srl_pi32 488 #define _m_psrldi _mm_srli_pi32 489 #define _m_psrlq _mm_srl_si64 490 #define _m_psrlqi _mm_srli_si64 491 #define _m_pand _mm_and_si64 492 #define _m_pandn _mm_andnot_si64 493 #define _m_por _mm_or_si64 494 #define _m_pxor _mm_xor_si64 495 #define _m_pcmpeqb _mm_cmpeq_pi8 496 #define _m_pcmpeqw _mm_cmpeq_pi16 497 #define _m_pcmpeqd _mm_cmpeq_pi32 498 #define _m_pcmpgtb _mm_cmpgt_pi8 499 #define _m_pcmpgtw _mm_cmpgt_pi16 500 #define _m_pcmpgtd _mm_cmpgt_pi32 501 502 #endif /* __MMINTRIN_H */ 503 504