1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __MMINTRIN_H 25 #define __MMINTRIN_H 26 27 #ifndef __MMX__ 28 #error "MMX instruction set not enabled" 29 #else 30 31 typedef long long __m64 __attribute__((__vector_size__(8))); 32 33 typedef int __v2si __attribute__((__vector_size__(8))); 34 typedef short __v4hi __attribute__((__vector_size__(8))); 35 typedef char __v8qi __attribute__((__vector_size__(8))); 36 37 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 38 _mm_empty(void) 39 { 40 __builtin_ia32_emms(); 41 } 42 43 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 44 _mm_cvtsi32_si64(int __i) 45 { 46 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 47 } 48 49 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 50 _mm_cvtsi64_si32(__m64 __m) 51 { 52 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 53 } 54 55 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 56 _mm_cvtsi64_m64(long long __i) 57 { 58 return (__m64)__i; 59 } 60 61 static __inline__ long long __attribute__((__always_inline__, __nodebug__)) 62 _mm_cvtm64_si64(__m64 __m) 63 { 64 return (long long)__m; 65 } 66 67 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 68 _mm_packs_pi16(__m64 __m1, __m64 __m2) 69 { 70 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 71 } 72 73 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 74 _mm_packs_pi32(__m64 __m1, __m64 __m2) 75 { 76 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 77 } 78 79 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 80 _mm_packs_pu16(__m64 __m1, __m64 __m2) 81 { 82 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 83 } 84 85 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 86 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 87 { 88 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 89 } 90 91 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 92 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 93 { 94 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 95 } 96 97 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 98 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 99 { 100 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 101 } 102 103 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 104 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 105 { 106 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 107 } 108 109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 110 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 111 { 112 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 113 } 114 115 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 116 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 117 { 118 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 119 } 120 121 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 122 _mm_add_pi8(__m64 __m1, __m64 __m2) 123 { 124 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 125 } 126 127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 128 _mm_add_pi16(__m64 __m1, __m64 __m2) 129 { 130 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 131 } 132 133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 134 _mm_add_pi32(__m64 __m1, __m64 __m2) 135 { 136 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 137 } 138 139 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 140 _mm_adds_pi8(__m64 __m1, __m64 __m2) 141 { 142 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 143 } 144 145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 146 _mm_adds_pi16(__m64 __m1, __m64 __m2) 147 { 148 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 149 } 150 151 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 152 _mm_adds_pu8(__m64 __m1, __m64 __m2) 153 { 154 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 155 } 156 157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 158 _mm_adds_pu16(__m64 __m1, __m64 __m2) 159 { 160 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 161 } 162 163 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 164 _mm_sub_pi8(__m64 __m1, __m64 __m2) 165 { 166 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 167 } 168 169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 170 _mm_sub_pi16(__m64 __m1, __m64 __m2) 171 { 172 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 173 } 174 175 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 176 _mm_sub_pi32(__m64 __m1, __m64 __m2) 177 { 178 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 179 } 180 181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 182 _mm_subs_pi8(__m64 __m1, __m64 __m2) 183 { 184 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 185 } 186 187 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 188 _mm_subs_pi16(__m64 __m1, __m64 __m2) 189 { 190 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 191 } 192 193 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 194 _mm_subs_pu8(__m64 __m1, __m64 __m2) 195 { 196 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 197 } 198 199 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 200 _mm_subs_pu16(__m64 __m1, __m64 __m2) 201 { 202 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 203 } 204 205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 206 _mm_madd_pi16(__m64 __m1, __m64 __m2) 207 { 208 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 209 } 210 211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 212 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 213 { 214 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 215 } 216 217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 218 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 219 { 220 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 221 } 222 223 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 224 _mm_sll_pi16(__m64 __m, __m64 __count) 225 { 226 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 227 } 228 229 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 230 _mm_slli_pi16(__m64 __m, int __count) 231 { 232 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 233 } 234 235 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 236 _mm_sll_pi32(__m64 __m, __m64 __count) 237 { 238 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 239 } 240 241 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 242 _mm_slli_pi32(__m64 __m, int __count) 243 { 244 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 245 } 246 247 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 248 _mm_sll_si64(__m64 __m, __m64 __count) 249 { 250 return (__m64)__builtin_ia32_psllq(__m, __count); 251 } 252 253 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 254 _mm_slli_si64(__m64 __m, int __count) 255 { 256 return (__m64)__builtin_ia32_psllqi(__m, __count); 257 } 258 259 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 260 _mm_sra_pi16(__m64 __m, __m64 __count) 261 { 262 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 263 } 264 265 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 266 _mm_srai_pi16(__m64 __m, int __count) 267 { 268 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 269 } 270 271 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 272 _mm_sra_pi32(__m64 __m, __m64 __count) 273 { 274 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 275 } 276 277 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 278 _mm_srai_pi32(__m64 __m, int __count) 279 { 280 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 281 } 282 283 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 284 _mm_srl_pi16(__m64 __m, __m64 __count) 285 { 286 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 287 } 288 289 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 290 _mm_srli_pi16(__m64 __m, int __count) 291 { 292 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 293 } 294 295 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 296 _mm_srl_pi32(__m64 __m, __m64 __count) 297 { 298 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 299 } 300 301 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 302 _mm_srli_pi32(__m64 __m, int __count) 303 { 304 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 305 } 306 307 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 308 _mm_srl_si64(__m64 __m, __m64 __count) 309 { 310 return (__m64)__builtin_ia32_psrlq(__m, __count); 311 } 312 313 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 314 _mm_srli_si64(__m64 __m, int __count) 315 { 316 return (__m64)__builtin_ia32_psrlqi(__m, __count); 317 } 318 319 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 320 _mm_and_si64(__m64 __m1, __m64 __m2) 321 { 322 return __builtin_ia32_pand(__m1, __m2); 323 } 324 325 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 326 _mm_andnot_si64(__m64 __m1, __m64 __m2) 327 { 328 return __builtin_ia32_pandn(__m1, __m2); 329 } 330 331 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 332 _mm_or_si64(__m64 __m1, __m64 __m2) 333 { 334 return __builtin_ia32_por(__m1, __m2); 335 } 336 337 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 338 _mm_xor_si64(__m64 __m1, __m64 __m2) 339 { 340 return __builtin_ia32_pxor(__m1, __m2); 341 } 342 343 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 344 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 345 { 346 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 347 } 348 349 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 350 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 351 { 352 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 353 } 354 355 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 356 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 357 { 358 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 359 } 360 361 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 362 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 363 { 364 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 365 } 366 367 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 368 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 369 { 370 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 371 } 372 373 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 374 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 375 { 376 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 377 } 378 379 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 380 _mm_setzero_si64(void) 381 { 382 return (__m64){ 0LL }; 383 } 384 385 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 386 _mm_set_pi32(int __i1, int __i0) 387 { 388 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 389 } 390 391 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 392 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 393 { 394 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 395 } 396 397 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 398 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 399 char __b1, char __b0) 400 { 401 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 402 __b4, __b5, __b6, __b7); 403 } 404 405 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 406 _mm_set1_pi32(int __i) 407 { 408 return _mm_set_pi32(__i, __i); 409 } 410 411 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 412 _mm_set1_pi16(short __w) 413 { 414 return _mm_set_pi16(__w, __w, __w, __w); 415 } 416 417 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 418 _mm_set1_pi8(char __b) 419 { 420 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 421 } 422 423 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 424 _mm_setr_pi32(int __i0, int __i1) 425 { 426 return _mm_set_pi32(__i1, __i0); 427 } 428 429 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 430 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 431 { 432 return _mm_set_pi16(__w3, __w2, __w1, __w0); 433 } 434 435 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 436 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 437 char __b6, char __b7) 438 { 439 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 440 } 441 442 443 /* Aliases for compatibility. */ 444 #define _m_empty _mm_empty 445 #define _m_from_int _mm_cvtsi32_si64 446 #define _m_to_int _mm_cvtsi64_si32 447 #define _m_packsswb _mm_packs_pi16 448 #define _m_packssdw _mm_packs_pi32 449 #define _m_packuswb _mm_packs_pu16 450 #define _m_punpckhbw _mm_unpackhi_pi8 451 #define _m_punpckhwd _mm_unpackhi_pi16 452 #define _m_punpckhdq _mm_unpackhi_pi32 453 #define _m_punpcklbw _mm_unpacklo_pi8 454 #define _m_punpcklwd _mm_unpacklo_pi16 455 #define _m_punpckldq _mm_unpacklo_pi32 456 #define _m_paddb _mm_add_pi8 457 #define _m_paddw _mm_add_pi16 458 #define _m_paddd _mm_add_pi32 459 #define _m_paddsb _mm_adds_pi8 460 #define _m_paddsw _mm_adds_pi16 461 #define _m_paddusb _mm_adds_pu8 462 #define _m_paddusw _mm_adds_pu16 463 #define _m_psubb _mm_sub_pi8 464 #define _m_psubw _mm_sub_pi16 465 #define _m_psubd _mm_sub_pi32 466 #define _m_psubsb _mm_subs_pi8 467 #define _m_psubsw _mm_subs_pi16 468 #define _m_psubusb _mm_subs_pu8 469 #define _m_psubusw _mm_subs_pu16 470 #define _m_pmaddwd _mm_madd_pi16 471 #define _m_pmulhw _mm_mulhi_pi16 472 #define _m_pmullw _mm_mullo_pi16 473 #define _m_psllw _mm_sll_pi16 474 #define _m_psllwi _mm_slli_pi16 475 #define _m_pslld _mm_sll_pi32 476 #define _m_pslldi _mm_slli_pi32 477 #define _m_psllq _mm_sll_si64 478 #define _m_psllqi _mm_slli_si64 479 #define _m_psraw _mm_sra_pi16 480 #define _m_psrawi _mm_srai_pi16 481 #define _m_psrad _mm_sra_pi32 482 #define _m_psradi _mm_srai_pi32 483 #define _m_psrlw _mm_srl_pi16 484 #define _m_psrlwi _mm_srli_pi16 485 #define _m_psrld _mm_srl_pi32 486 #define _m_psrldi _mm_srli_pi32 487 #define _m_psrlq _mm_srl_si64 488 #define _m_psrlqi _mm_srli_si64 489 #define _m_pand _mm_and_si64 490 #define _m_pandn _mm_andnot_si64 491 #define _m_por _mm_or_si64 492 #define _m_pxor _mm_xor_si64 493 #define _m_pcmpeqb _mm_cmpeq_pi8 494 #define _m_pcmpeqw _mm_cmpeq_pi16 495 #define _m_pcmpeqd _mm_cmpeq_pi32 496 #define _m_pcmpgtb _mm_cmpgt_pi8 497 #define _m_pcmpgtw _mm_cmpgt_pi16 498 #define _m_pcmpgtd _mm_cmpgt_pi32 499 500 #endif /* __MMX__ */ 501 502 #endif /* __MMINTRIN_H */ 503 504