Home | History | Annotate | Download | only in include
      1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __MMINTRIN_H
     25 #define __MMINTRIN_H
     26 
     27 typedef long long __m64 __attribute__((__vector_size__(8)));
     28 
     29 typedef int __v2si __attribute__((__vector_size__(8)));
     30 typedef short __v4hi __attribute__((__vector_size__(8)));
     31 typedef char __v8qi __attribute__((__vector_size__(8)));
     32 
     33 /* Define the default attributes for the functions in this file. */
     34 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
     35 
     36 static __inline__ void __DEFAULT_FN_ATTRS
     37 _mm_empty(void)
     38 {
     39     __builtin_ia32_emms();
     40 }
     41 
     42 static __inline__ __m64 __DEFAULT_FN_ATTRS
     43 _mm_cvtsi32_si64(int __i)
     44 {
     45     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
     46 }
     47 
     48 static __inline__ int __DEFAULT_FN_ATTRS
     49 _mm_cvtsi64_si32(__m64 __m)
     50 {
     51     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
     52 }
     53 
     54 static __inline__ __m64 __DEFAULT_FN_ATTRS
     55 _mm_cvtsi64_m64(long long __i)
     56 {
     57     return (__m64)__i;
     58 }
     59 
     60 static __inline__ long long __DEFAULT_FN_ATTRS
     61 _mm_cvtm64_si64(__m64 __m)
     62 {
     63     return (long long)__m;
     64 }
     65 
     66 static __inline__ __m64 __DEFAULT_FN_ATTRS
     67 _mm_packs_pi16(__m64 __m1, __m64 __m2)
     68 {
     69     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
     70 }
     71 
     72 static __inline__ __m64 __DEFAULT_FN_ATTRS
     73 _mm_packs_pi32(__m64 __m1, __m64 __m2)
     74 {
     75     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
     76 }
     77 
     78 static __inline__ __m64 __DEFAULT_FN_ATTRS
     79 _mm_packs_pu16(__m64 __m1, __m64 __m2)
     80 {
     81     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
     82 }
     83 
     84 static __inline__ __m64 __DEFAULT_FN_ATTRS
     85 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
     86 {
     87     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
     88 }
     89 
     90 static __inline__ __m64 __DEFAULT_FN_ATTRS
     91 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
     92 {
     93     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
     94 }
     95 
     96 static __inline__ __m64 __DEFAULT_FN_ATTRS
     97 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
     98 {
     99     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
    100 }
    101 
    102 static __inline__ __m64 __DEFAULT_FN_ATTRS
    103 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
    104 {
    105     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
    106 }
    107 
    108 static __inline__ __m64 __DEFAULT_FN_ATTRS
    109 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
    110 {
    111     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
    112 }
    113 
    114 static __inline__ __m64 __DEFAULT_FN_ATTRS
    115 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
    116 {
    117     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
    118 }
    119 
    120 static __inline__ __m64 __DEFAULT_FN_ATTRS
    121 _mm_add_pi8(__m64 __m1, __m64 __m2)
    122 {
    123     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
    124 }
    125 
    126 static __inline__ __m64 __DEFAULT_FN_ATTRS
    127 _mm_add_pi16(__m64 __m1, __m64 __m2)
    128 {
    129     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
    130 }
    131 
    132 static __inline__ __m64 __DEFAULT_FN_ATTRS
    133 _mm_add_pi32(__m64 __m1, __m64 __m2)
    134 {
    135     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
    136 }
    137 
    138 static __inline__ __m64 __DEFAULT_FN_ATTRS
    139 _mm_adds_pi8(__m64 __m1, __m64 __m2)
    140 {
    141     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
    142 }
    143 
    144 static __inline__ __m64 __DEFAULT_FN_ATTRS
    145 _mm_adds_pi16(__m64 __m1, __m64 __m2)
    146 {
    147     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
    148 }
    149 
    150 static __inline__ __m64 __DEFAULT_FN_ATTRS
    151 _mm_adds_pu8(__m64 __m1, __m64 __m2)
    152 {
    153     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
    154 }
    155 
    156 static __inline__ __m64 __DEFAULT_FN_ATTRS
    157 _mm_adds_pu16(__m64 __m1, __m64 __m2)
    158 {
    159     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
    160 }
    161 
    162 static __inline__ __m64 __DEFAULT_FN_ATTRS
    163 _mm_sub_pi8(__m64 __m1, __m64 __m2)
    164 {
    165     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
    166 }
    167 
    168 static __inline__ __m64 __DEFAULT_FN_ATTRS
    169 _mm_sub_pi16(__m64 __m1, __m64 __m2)
    170 {
    171     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
    172 }
    173 
    174 static __inline__ __m64 __DEFAULT_FN_ATTRS
    175 _mm_sub_pi32(__m64 __m1, __m64 __m2)
    176 {
    177     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
    178 }
    179 
    180 static __inline__ __m64 __DEFAULT_FN_ATTRS
    181 _mm_subs_pi8(__m64 __m1, __m64 __m2)
    182 {
    183     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
    184 }
    185 
    186 static __inline__ __m64 __DEFAULT_FN_ATTRS
    187 _mm_subs_pi16(__m64 __m1, __m64 __m2)
    188 {
    189     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
    190 }
    191 
    192 static __inline__ __m64 __DEFAULT_FN_ATTRS
    193 _mm_subs_pu8(__m64 __m1, __m64 __m2)
    194 {
    195     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
    196 }
    197 
    198 static __inline__ __m64 __DEFAULT_FN_ATTRS
    199 _mm_subs_pu16(__m64 __m1, __m64 __m2)
    200 {
    201     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
    202 }
    203 
    204 static __inline__ __m64 __DEFAULT_FN_ATTRS
    205 _mm_madd_pi16(__m64 __m1, __m64 __m2)
    206 {
    207     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
    208 }
    209 
    210 static __inline__ __m64 __DEFAULT_FN_ATTRS
    211 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
    212 {
    213     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
    214 }
    215 
    216 static __inline__ __m64 __DEFAULT_FN_ATTRS
    217 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
    218 {
    219     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
    220 }
    221 
    222 static __inline__ __m64 __DEFAULT_FN_ATTRS
    223 _mm_sll_pi16(__m64 __m, __m64 __count)
    224 {
    225     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
    226 }
    227 
    228 static __inline__ __m64 __DEFAULT_FN_ATTRS
    229 _mm_slli_pi16(__m64 __m, int __count)
    230 {
    231     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
    232 }
    233 
    234 static __inline__ __m64 __DEFAULT_FN_ATTRS
    235 _mm_sll_pi32(__m64 __m, __m64 __count)
    236 {
    237     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
    238 }
    239 
    240 static __inline__ __m64 __DEFAULT_FN_ATTRS
    241 _mm_slli_pi32(__m64 __m, int __count)
    242 {
    243     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
    244 }
    245 
    246 static __inline__ __m64 __DEFAULT_FN_ATTRS
    247 _mm_sll_si64(__m64 __m, __m64 __count)
    248 {
    249     return (__m64)__builtin_ia32_psllq(__m, __count);
    250 }
    251 
    252 static __inline__ __m64 __DEFAULT_FN_ATTRS
    253 _mm_slli_si64(__m64 __m, int __count)
    254 {
    255     return (__m64)__builtin_ia32_psllqi(__m, __count);
    256 }
    257 
    258 static __inline__ __m64 __DEFAULT_FN_ATTRS
    259 _mm_sra_pi16(__m64 __m, __m64 __count)
    260 {
    261     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
    262 }
    263 
    264 static __inline__ __m64 __DEFAULT_FN_ATTRS
    265 _mm_srai_pi16(__m64 __m, int __count)
    266 {
    267     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
    268 }
    269 
    270 static __inline__ __m64 __DEFAULT_FN_ATTRS
    271 _mm_sra_pi32(__m64 __m, __m64 __count)
    272 {
    273     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
    274 }
    275 
    276 static __inline__ __m64 __DEFAULT_FN_ATTRS
    277 _mm_srai_pi32(__m64 __m, int __count)
    278 {
    279     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
    280 }
    281 
    282 static __inline__ __m64 __DEFAULT_FN_ATTRS
    283 _mm_srl_pi16(__m64 __m, __m64 __count)
    284 {
    285     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
    286 }
    287 
    288 static __inline__ __m64 __DEFAULT_FN_ATTRS
    289 _mm_srli_pi16(__m64 __m, int __count)
    290 {
    291     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
    292 }
    293 
    294 static __inline__ __m64 __DEFAULT_FN_ATTRS
    295 _mm_srl_pi32(__m64 __m, __m64 __count)
    296 {
    297     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
    298 }
    299 
    300 static __inline__ __m64 __DEFAULT_FN_ATTRS
    301 _mm_srli_pi32(__m64 __m, int __count)
    302 {
    303     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
    304 }
    305 
    306 static __inline__ __m64 __DEFAULT_FN_ATTRS
    307 _mm_srl_si64(__m64 __m, __m64 __count)
    308 {
    309     return (__m64)__builtin_ia32_psrlq(__m, __count);
    310 }
    311 
    312 static __inline__ __m64 __DEFAULT_FN_ATTRS
    313 _mm_srli_si64(__m64 __m, int __count)
    314 {
    315     return (__m64)__builtin_ia32_psrlqi(__m, __count);
    316 }
    317 
    318 static __inline__ __m64 __DEFAULT_FN_ATTRS
    319 _mm_and_si64(__m64 __m1, __m64 __m2)
    320 {
    321     return __builtin_ia32_pand(__m1, __m2);
    322 }
    323 
    324 static __inline__ __m64 __DEFAULT_FN_ATTRS
    325 _mm_andnot_si64(__m64 __m1, __m64 __m2)
    326 {
    327     return __builtin_ia32_pandn(__m1, __m2);
    328 }
    329 
    330 static __inline__ __m64 __DEFAULT_FN_ATTRS
    331 _mm_or_si64(__m64 __m1, __m64 __m2)
    332 {
    333     return __builtin_ia32_por(__m1, __m2);
    334 }
    335 
    336 static __inline__ __m64 __DEFAULT_FN_ATTRS
    337 _mm_xor_si64(__m64 __m1, __m64 __m2)
    338 {
    339     return __builtin_ia32_pxor(__m1, __m2);
    340 }
    341 
    342 static __inline__ __m64 __DEFAULT_FN_ATTRS
    343 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
    344 {
    345     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
    346 }
    347 
    348 static __inline__ __m64 __DEFAULT_FN_ATTRS
    349 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
    350 {
    351     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
    352 }
    353 
    354 static __inline__ __m64 __DEFAULT_FN_ATTRS
    355 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
    356 {
    357     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
    358 }
    359 
    360 static __inline__ __m64 __DEFAULT_FN_ATTRS
    361 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
    362 {
    363     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
    364 }
    365 
    366 static __inline__ __m64 __DEFAULT_FN_ATTRS
    367 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
    368 {
    369     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
    370 }
    371 
    372 static __inline__ __m64 __DEFAULT_FN_ATTRS
    373 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
    374 {
    375     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
    376 }
    377 
    378 static __inline__ __m64 __DEFAULT_FN_ATTRS
    379 _mm_setzero_si64(void)
    380 {
    381     return (__m64){ 0LL };
    382 }
    383 
    384 static __inline__ __m64 __DEFAULT_FN_ATTRS
    385 _mm_set_pi32(int __i1, int __i0)
    386 {
    387     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
    388 }
    389 
    390 static __inline__ __m64 __DEFAULT_FN_ATTRS
    391 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
    392 {
    393     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
    394 }
    395 
    396 static __inline__ __m64 __DEFAULT_FN_ATTRS
    397 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
    398             char __b1, char __b0)
    399 {
    400     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
    401                                                __b4, __b5, __b6, __b7);
    402 }
    403 
    404 static __inline__ __m64 __DEFAULT_FN_ATTRS
    405 _mm_set1_pi32(int __i)
    406 {
    407     return _mm_set_pi32(__i, __i);
    408 }
    409 
    410 static __inline__ __m64 __DEFAULT_FN_ATTRS
    411 _mm_set1_pi16(short __w)
    412 {
    413     return _mm_set_pi16(__w, __w, __w, __w);
    414 }
    415 
    416 static __inline__ __m64 __DEFAULT_FN_ATTRS
    417 _mm_set1_pi8(char __b)
    418 {
    419     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
    420 }
    421 
    422 static __inline__ __m64 __DEFAULT_FN_ATTRS
    423 _mm_setr_pi32(int __i0, int __i1)
    424 {
    425     return _mm_set_pi32(__i1, __i0);
    426 }
    427 
    428 static __inline__ __m64 __DEFAULT_FN_ATTRS
    429 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
    430 {
    431     return _mm_set_pi16(__w3, __w2, __w1, __w0);
    432 }
    433 
    434 static __inline__ __m64 __DEFAULT_FN_ATTRS
    435 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
    436              char __b6, char __b7)
    437 {
    438     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
    439 }
    440 
    441 #undef __DEFAULT_FN_ATTRS
    442 
    443 /* Aliases for compatibility. */
    444 #define _m_empty _mm_empty
    445 #define _m_from_int _mm_cvtsi32_si64
    446 #define _m_from_int64 _mm_cvtsi64_m64
    447 #define _m_to_int _mm_cvtsi64_si32
    448 #define _m_to_int64 _mm_cvtm64_si64
    449 #define _m_packsswb _mm_packs_pi16
    450 #define _m_packssdw _mm_packs_pi32
    451 #define _m_packuswb _mm_packs_pu16
    452 #define _m_punpckhbw _mm_unpackhi_pi8
    453 #define _m_punpckhwd _mm_unpackhi_pi16
    454 #define _m_punpckhdq _mm_unpackhi_pi32
    455 #define _m_punpcklbw _mm_unpacklo_pi8
    456 #define _m_punpcklwd _mm_unpacklo_pi16
    457 #define _m_punpckldq _mm_unpacklo_pi32
    458 #define _m_paddb _mm_add_pi8
    459 #define _m_paddw _mm_add_pi16
    460 #define _m_paddd _mm_add_pi32
    461 #define _m_paddsb _mm_adds_pi8
    462 #define _m_paddsw _mm_adds_pi16
    463 #define _m_paddusb _mm_adds_pu8
    464 #define _m_paddusw _mm_adds_pu16
    465 #define _m_psubb _mm_sub_pi8
    466 #define _m_psubw _mm_sub_pi16
    467 #define _m_psubd _mm_sub_pi32
    468 #define _m_psubsb _mm_subs_pi8
    469 #define _m_psubsw _mm_subs_pi16
    470 #define _m_psubusb _mm_subs_pu8
    471 #define _m_psubusw _mm_subs_pu16
    472 #define _m_pmaddwd _mm_madd_pi16
    473 #define _m_pmulhw _mm_mulhi_pi16
    474 #define _m_pmullw _mm_mullo_pi16
    475 #define _m_psllw _mm_sll_pi16
    476 #define _m_psllwi _mm_slli_pi16
    477 #define _m_pslld _mm_sll_pi32
    478 #define _m_pslldi _mm_slli_pi32
    479 #define _m_psllq _mm_sll_si64
    480 #define _m_psllqi _mm_slli_si64
    481 #define _m_psraw _mm_sra_pi16
    482 #define _m_psrawi _mm_srai_pi16
    483 #define _m_psrad _mm_sra_pi32
    484 #define _m_psradi _mm_srai_pi32
    485 #define _m_psrlw _mm_srl_pi16
    486 #define _m_psrlwi _mm_srli_pi16
    487 #define _m_psrld _mm_srl_pi32
    488 #define _m_psrldi _mm_srli_pi32
    489 #define _m_psrlq _mm_srl_si64
    490 #define _m_psrlqi _mm_srli_si64
    491 #define _m_pand _mm_and_si64
    492 #define _m_pandn _mm_andnot_si64
    493 #define _m_por _mm_or_si64
    494 #define _m_pxor _mm_xor_si64
    495 #define _m_pcmpeqb _mm_cmpeq_pi8
    496 #define _m_pcmpeqw _mm_cmpeq_pi16
    497 #define _m_pcmpeqd _mm_cmpeq_pi32
    498 #define _m_pcmpgtb _mm_cmpgt_pi8
    499 #define _m_pcmpgtw _mm_cmpgt_pi16
    500 #define _m_pcmpgtd _mm_cmpgt_pi32
    501 
    502 #endif /* __MMINTRIN_H */
    503 
    504