Home | History | Annotate | Download | only in clang-include
      1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __MMINTRIN_H
     25 #define __MMINTRIN_H
     26 
     27 #ifndef __MMX__
     28 #error "MMX instruction set not enabled"
     29 #else
     30 
     31 typedef long long __m64 __attribute__((__vector_size__(8)));
     32 
     33 typedef int __v2si __attribute__((__vector_size__(8)));
     34 typedef short __v4hi __attribute__((__vector_size__(8)));
     35 typedef char __v8qi __attribute__((__vector_size__(8)));
     36 
     37 static __inline__ void __attribute__((__always_inline__, __nodebug__))
     38 _mm_empty(void)
     39 {
     40     __builtin_ia32_emms();
     41 }
     42 
     43 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     44 _mm_cvtsi32_si64(int __i)
     45 {
     46     return (__m64)(__v2si){__i, 0};
     47 }
     48 
     49 static __inline__ int __attribute__((__always_inline__, __nodebug__))
     50 _mm_cvtsi64_si32(__m64 __m)
     51 {
     52     __v2si __mmx_var2 = (__v2si)__m;
     53     return __mmx_var2[0];
     54 }
     55 
     56 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     57 _mm_cvtsi64_m64(long long __i)
     58 {
     59     return (__m64)__i;
     60 }
     61 
     62 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
     63 _mm_cvtm64_si64(__m64 __m)
     64 {
     65     return (long long)__m;
     66 }
     67 
     68 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     69 _mm_packs_pi16(__m64 __m1, __m64 __m2)
     70 {
     71     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
     72 }
     73 
     74 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     75 _mm_packs_pi32(__m64 __m1, __m64 __m2)
     76 {
     77     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
     78 }
     79 
     80 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     81 _mm_packs_pu16(__m64 __m1, __m64 __m2)
     82 {
     83     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
     84 }
     85 
     86 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     87 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
     88 {
     89     return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 8+4, 5,
     90                                           8+5, 6, 8+6, 7, 8+7);
     91 }
     92 
     93 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     94 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
     95 {
     96     return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 4+2, 3,
     97                                           4+3);
     98 }
     99 
    100 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    101 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
    102 {
    103     return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 2+1);
    104 }
    105 
    106 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    107 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
    108 {
    109     return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8+0, 1,
    110                                           8+1, 2, 8+2, 3, 8+3);
    111 }
    112 
    113 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    114 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
    115 {
    116     return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4+0, 1,
    117                                           4+1);
    118 }
    119 
    120 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    121 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
    122 {
    123     return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2+0);
    124 }
    125 
    126 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    127 _mm_add_pi8(__m64 __m1, __m64 __m2)
    128 {
    129     return (__m64)((__v8qi)__m1 + (__v8qi)__m2);
    130 }
    131 
    132 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    133 _mm_add_pi16(__m64 __m1, __m64 __m2)
    134 {
    135     return (__m64)((__v4hi)__m1 + (__v4hi)__m2);
    136 }
    137 
    138 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    139 _mm_add_pi32(__m64 __m1, __m64 __m2)
    140 {
    141     return (__m64)((__v2si)__m1 + (__v2si)__m2);
    142 }
    143 
    144 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    145 _mm_adds_pi8(__m64 __m1, __m64 __m2)
    146 {
    147     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
    148 }
    149 
    150 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    151 _mm_adds_pi16(__m64 __m1, __m64 __m2)
    152 {
    153     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
    154 }
    155 
    156 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    157 _mm_adds_pu8(__m64 __m1, __m64 __m2)
    158 {
    159     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
    160 }
    161 
    162 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    163 _mm_adds_pu16(__m64 __m1, __m64 __m2)
    164 {
    165     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
    166 }
    167 
    168 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    169 _mm_sub_pi8(__m64 __m1, __m64 __m2)
    170 {
    171     return (__m64)((__v8qi)__m1 - (__v8qi)__m2);
    172 }
    173 
    174 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    175 _mm_sub_pi16(__m64 __m1, __m64 __m2)
    176 {
    177     return (__m64)((__v4hi)__m1 - (__v4hi)__m2);
    178 }
    179 
    180 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    181 _mm_sub_pi32(__m64 __m1, __m64 __m2)
    182 {
    183     return (__m64)((__v2si)__m1 - (__v2si)__m2);
    184 }
    185 
    186 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    187 _mm_subs_pi8(__m64 __m1, __m64 __m2)
    188 {
    189     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
    190 }
    191 
    192 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    193 _mm_subs_pi16(__m64 __m1, __m64 __m2)
    194 {
    195     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
    196 }
    197 
    198 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    199 _mm_subs_pu8(__m64 __m1, __m64 __m2)
    200 {
    201     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
    202 }
    203 
    204 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    205 _mm_subs_pu16(__m64 __m1, __m64 __m2)
    206 {
    207     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
    208 }
    209 
    210 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    211 _mm_madd_pi16(__m64 __m1, __m64 __m2)
    212 {
    213     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
    214 }
    215 
    216 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    217 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
    218 {
    219     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
    220 }
    221 
    222 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    223 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
    224 {
    225     return (__m64)((__v4hi)__m1 * (__v4hi)__m2);
    226 }
    227 
    228 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    229 _mm_sll_pi16(__m64 __m, __m64 __count)
    230 {
    231     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
    232 }
    233 
    234 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    235 _mm_slli_pi16(__m64 __m, int __count)
    236 {
    237     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
    238 }
    239 
    240 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    241 _mm_sll_pi32(__m64 __m, __m64 __count)
    242 {
    243     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
    244 }
    245 
    246 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    247 _mm_slli_pi32(__m64 __m, int __count)
    248 {
    249     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
    250 }
    251 
    252 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    253 _mm_sll_si64(__m64 __m, __m64 __count)
    254 {
    255     return __builtin_ia32_psllq(__m, __count);
    256 }
    257 
    258 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    259 _mm_slli_si64(__m64 __m, int __count)
    260 {
    261     return __builtin_ia32_psllqi(__m, __count);
    262 }
    263 
    264 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    265 _mm_sra_pi16(__m64 __m, __m64 __count)
    266 {
    267     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
    268 }
    269 
    270 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    271 _mm_srai_pi16(__m64 __m, int __count)
    272 {
    273     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
    274 }
    275 
    276 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    277 _mm_sra_pi32(__m64 __m, __m64 __count)
    278 {
    279     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
    280 }
    281 
    282 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    283 _mm_srai_pi32(__m64 __m, int __count)
    284 {
    285     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
    286 }
    287 
    288 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    289 _mm_srl_pi16(__m64 __m, __m64 __count)
    290 {
    291     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
    292 }
    293 
    294 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    295 _mm_srli_pi16(__m64 __m, int __count)
    296 {
    297     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
    298 }
    299 
    300 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    301 _mm_srl_pi32(__m64 __m, __m64 __count)
    302 {
    303     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
    304 }
    305 
    306 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    307 _mm_srli_pi32(__m64 __m, int __count)
    308 {
    309     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
    310 }
    311 
    312 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    313 _mm_srl_si64(__m64 __m, __m64 __count)
    314 {
    315     return (__m64)__builtin_ia32_psrlq(__m, __count);
    316 }
    317 
    318 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    319 _mm_srli_si64(__m64 __m, int __count)
    320 {
    321     return __builtin_ia32_psrlqi(__m, __count);
    322 }
    323 
    324 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    325 _mm_and_si64(__m64 __m1, __m64 __m2)
    326 {
    327     return __m1 & __m2;
    328 }
    329 
    330 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    331 _mm_andnot_si64(__m64 __m1, __m64 __m2)
    332 {
    333     return ~__m1 & __m2;
    334 }
    335 
    336 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    337 _mm_or_si64(__m64 __m1, __m64 __m2)
    338 {
    339     return __m1 | __m2;
    340 }
    341 
    342 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    343 _mm_xor_si64(__m64 __m1, __m64 __m2)
    344 {
    345     return __m1 ^ __m2;
    346 }
    347 
    348 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    349 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
    350 {
    351     return (__m64)((__v8qi)__m1 == (__v8qi)__m2);
    352 }
    353 
    354 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    355 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
    356 {
    357     return (__m64)((__v4hi)__m1 == (__v4hi)__m2);
    358 }
    359 
    360 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    361 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
    362 {
    363     return (__m64)((__v2si)__m1 == (__v2si)__m2);
    364 }
    365 
    366 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    367 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
    368 {
    369     return (__m64)((__v8qi)__m1 > (__v8qi)__m2);
    370 }
    371 
    372 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    373 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
    374 {
    375     return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
    376 }
    377 
    378 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    379 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
    380 {
    381     return (__m64)((__v2si)__m1 > (__v2si)__m2);
    382 }
    383 
    384 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    385 _mm_setzero_si64(void)
    386 {
    387     return (__m64){ 0LL };
    388 }
    389 
    390 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    391 _mm_set_pi32(int __i1, int __i0)
    392 {
    393     return (__m64)(__v2si){ __i0, __i1 };
    394 }
    395 
    396 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    397 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
    398 {
    399     return (__m64)(__v4hi){ __s0, __s1, __s2, __s3 };
    400 }
    401 
    402 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    403 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
    404             char __b1, char __b0)
    405 {
    406     return (__m64)(__v8qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7 };
    407 }
    408 
    409 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    410 _mm_set1_pi32(int __i)
    411 {
    412     return (__m64)(__v2si){ __i, __i };
    413 }
    414 
    415 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    416 _mm_set1_pi16(short __s)
    417 {
    418     return (__m64)(__v4hi){ __s, __s, __s, __s };
    419 }
    420 
    421 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    422 _mm_set1_pi8(char __b)
    423 {
    424     return (__m64)(__v8qi){ __b, __b, __b, __b, __b, __b, __b, __b };
    425 }
    426 
    427 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    428 _mm_setr_pi32(int __i1, int __i0)
    429 {
    430     return (__m64)(__v2si){ __i1, __i0 };
    431 }
    432 
    433 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    434 _mm_setr_pi16(short __s3, short __s2, short __s1, short __s0)
    435 {
    436     return (__m64)(__v4hi){ __s3, __s2, __s1, __s0 };
    437 }
    438 
    439 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    440 _mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
    441              char __b1, char __b0)
    442 {
    443     return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 };
    444 }
    445 
    446 
    447 /* Aliases for compatibility. */
    448 #define _m_empty _mm_empty
    449 #define _m_from_int _mm_cvtsi32_si64
    450 #define _m_to_int _mm_cvtsi64_si32
    451 #define _m_packsswb _mm_packs_pi16
    452 #define _m_packssdw _mm_packs_pi32
    453 #define _m_packuswb _mm_packs_pu16
    454 #define _m_punpckhbw _mm_unpackhi_pi8
    455 #define _m_punpckhwd _mm_unpackhi_pi16
    456 #define _m_punpckhdq _mm_unpackhi_pi32
    457 #define _m_punpcklbw _mm_unpacklo_pi8
    458 #define _m_punpcklwd _mm_unpacklo_pi16
    459 #define _m_punpckldq _mm_unpacklo_pi32
    460 #define _m_paddb _mm_add_pi8
    461 #define _m_paddw _mm_add_pi16
    462 #define _m_paddd _mm_add_pi32
    463 #define _m_paddsb _mm_adds_pi8
    464 #define _m_paddsw _mm_adds_pi16
    465 #define _m_paddusb _mm_adds_pu8
    466 #define _m_paddusw _mm_adds_pu16
    467 #define _m_psubb _mm_sub_pi8
    468 #define _m_psubw _mm_sub_pi16
    469 #define _m_psubd _mm_sub_pi32
    470 #define _m_psubsb _mm_subs_pi8
    471 #define _m_psubsw _mm_subs_pi16
    472 #define _m_psubusb _mm_subs_pu8
    473 #define _m_psubusw _mm_subs_pu16
    474 #define _m_pmaddwd _mm_madd_pi16
    475 #define _m_pmulhw _mm_mulhi_pi16
    476 #define _m_pmullw _mm_mullo_pi16
    477 #define _m_psllw _mm_sll_pi16
    478 #define _m_psllwi _mm_slli_pi16
    479 #define _m_pslld _mm_sll_pi32
    480 #define _m_pslldi _mm_slli_pi32
    481 #define _m_psllq _mm_sll_si64
    482 #define _m_psllqi _mm_slli_si64
    483 #define _m_psraw _mm_sra_pi16
    484 #define _m_psrawi _mm_srai_pi16
    485 #define _m_psrad _mm_sra_pi32
    486 #define _m_psradi _mm_srai_pi32
    487 #define _m_psrlw _mm_srl_pi16
    488 #define _m_psrlwi _mm_srli_pi16
    489 #define _m_psrld _mm_srl_pi32
    490 #define _m_psrldi _mm_srli_pi32
    491 #define _m_psrlq _mm_srl_si64
    492 #define _m_psrlqi _mm_srli_si64
    493 #define _m_pand _mm_and_si64
    494 #define _m_pandn _mm_andnot_si64
    495 #define _m_por _mm_or_si64
    496 #define _m_pxor _mm_xor_si64
    497 #define _m_pcmpeqb _mm_cmpeq_pi8
    498 #define _m_pcmpeqw _mm_cmpeq_pi16
    499 #define _m_pcmpeqd _mm_cmpeq_pi32
    500 #define _m_pcmpgtb _mm_cmpgt_pi8
    501 #define _m_pcmpgtw _mm_cmpgt_pi16
    502 #define _m_pcmpgtd _mm_cmpgt_pi32
    503 
    504 #endif /* __MMX__ */
    505 
    506 #endif /* __MMINTRIN_H */
    507 
    508