Home | History | Annotate | Download | only in include
      1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __MMINTRIN_H
     25 #define __MMINTRIN_H
     26 
     27 #ifndef __MMX__
     28 #error "MMX instruction set not enabled"
     29 #else
     30 
     31 typedef long long __m64 __attribute__((__vector_size__(8)));
     32 
     33 typedef int __v2si __attribute__((__vector_size__(8)));
     34 typedef short __v4hi __attribute__((__vector_size__(8)));
     35 typedef char __v8qi __attribute__((__vector_size__(8)));
     36 
     37 static __inline__ void __attribute__((__always_inline__, __nodebug__))
     38 _mm_empty(void)
     39 {
     40     __builtin_ia32_emms();
     41 }
     42 
     43 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     44 _mm_cvtsi32_si64(int __i)
     45 {
     46     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
     47 }
     48 
     49 static __inline__ int __attribute__((__always_inline__, __nodebug__))
     50 _mm_cvtsi64_si32(__m64 __m)
     51 {
     52     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
     53 }
     54 
     55 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     56 _mm_cvtsi64_m64(long long __i)
     57 {
     58     return (__m64)__i;
     59 }
     60 
     61 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
     62 _mm_cvtm64_si64(__m64 __m)
     63 {
     64     return (long long)__m;
     65 }
     66 
     67 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     68 _mm_packs_pi16(__m64 __m1, __m64 __m2)
     69 {
     70     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
     71 }
     72 
     73 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     74 _mm_packs_pi32(__m64 __m1, __m64 __m2)
     75 {
     76     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
     77 }
     78 
     79 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     80 _mm_packs_pu16(__m64 __m1, __m64 __m2)
     81 {
     82     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
     83 }
     84 
     85 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     86 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
     87 {
     88     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
     89 }
     90 
     91 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     92 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
     93 {
     94     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
     95 }
     96 
     97 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     98 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
     99 {
    100     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
    101 }
    102 
    103 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    104 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
    105 {
    106     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
    107 }
    108 
    109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    110 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
    111 {
    112     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
    113 }
    114 
    115 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    116 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
    117 {
    118     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
    119 }
    120 
    121 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    122 _mm_add_pi8(__m64 __m1, __m64 __m2)
    123 {
    124     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
    125 }
    126 
    127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    128 _mm_add_pi16(__m64 __m1, __m64 __m2)
    129 {
    130     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
    131 }
    132 
    133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    134 _mm_add_pi32(__m64 __m1, __m64 __m2)
    135 {
    136     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
    137 }
    138 
    139 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    140 _mm_adds_pi8(__m64 __m1, __m64 __m2)
    141 {
    142     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
    143 }
    144 
    145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    146 _mm_adds_pi16(__m64 __m1, __m64 __m2)
    147 {
    148     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
    149 }
    150 
    151 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    152 _mm_adds_pu8(__m64 __m1, __m64 __m2)
    153 {
    154     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
    155 }
    156 
    157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    158 _mm_adds_pu16(__m64 __m1, __m64 __m2)
    159 {
    160     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
    161 }
    162 
    163 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    164 _mm_sub_pi8(__m64 __m1, __m64 __m2)
    165 {
    166     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
    167 }
    168 
    169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    170 _mm_sub_pi16(__m64 __m1, __m64 __m2)
    171 {
    172     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
    173 }
    174 
    175 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    176 _mm_sub_pi32(__m64 __m1, __m64 __m2)
    177 {
    178     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
    179 }
    180 
    181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    182 _mm_subs_pi8(__m64 __m1, __m64 __m2)
    183 {
    184     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
    185 }
    186 
    187 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    188 _mm_subs_pi16(__m64 __m1, __m64 __m2)
    189 {
    190     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
    191 }
    192 
    193 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    194 _mm_subs_pu8(__m64 __m1, __m64 __m2)
    195 {
    196     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
    197 }
    198 
    199 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    200 _mm_subs_pu16(__m64 __m1, __m64 __m2)
    201 {
    202     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
    203 }
    204 
    205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    206 _mm_madd_pi16(__m64 __m1, __m64 __m2)
    207 {
    208     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
    209 }
    210 
    211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    212 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
    213 {
    214     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
    215 }
    216 
    217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    218 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
    219 {
    220     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
    221 }
    222 
    223 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    224 _mm_sll_pi16(__m64 __m, __m64 __count)
    225 {
    226     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
    227 }
    228 
    229 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    230 _mm_slli_pi16(__m64 __m, int __count)
    231 {
    232     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
    233 }
    234 
    235 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    236 _mm_sll_pi32(__m64 __m, __m64 __count)
    237 {
    238     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
    239 }
    240 
    241 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    242 _mm_slli_pi32(__m64 __m, int __count)
    243 {
    244     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
    245 }
    246 
    247 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    248 _mm_sll_si64(__m64 __m, __m64 __count)
    249 {
    250     return (__m64)__builtin_ia32_psllq(__m, __count);
    251 }
    252 
    253 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    254 _mm_slli_si64(__m64 __m, int __count)
    255 {
    256     return (__m64)__builtin_ia32_psllqi(__m, __count);
    257 }
    258 
    259 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    260 _mm_sra_pi16(__m64 __m, __m64 __count)
    261 {
    262     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
    263 }
    264 
    265 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    266 _mm_srai_pi16(__m64 __m, int __count)
    267 {
    268     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
    269 }
    270 
    271 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    272 _mm_sra_pi32(__m64 __m, __m64 __count)
    273 {
    274     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
    275 }
    276 
    277 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    278 _mm_srai_pi32(__m64 __m, int __count)
    279 {
    280     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
    281 }
    282 
    283 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    284 _mm_srl_pi16(__m64 __m, __m64 __count)
    285 {
    286     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
    287 }
    288 
    289 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    290 _mm_srli_pi16(__m64 __m, int __count)
    291 {
    292     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
    293 }
    294 
    295 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    296 _mm_srl_pi32(__m64 __m, __m64 __count)
    297 {
    298     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
    299 }
    300 
    301 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    302 _mm_srli_pi32(__m64 __m, int __count)
    303 {
    304     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
    305 }
    306 
    307 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    308 _mm_srl_si64(__m64 __m, __m64 __count)
    309 {
    310     return (__m64)__builtin_ia32_psrlq(__m, __count);
    311 }
    312 
    313 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    314 _mm_srli_si64(__m64 __m, int __count)
    315 {
    316     return (__m64)__builtin_ia32_psrlqi(__m, __count);
    317 }
    318 
    319 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    320 _mm_and_si64(__m64 __m1, __m64 __m2)
    321 {
    322     return __builtin_ia32_pand(__m1, __m2);
    323 }
    324 
    325 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    326 _mm_andnot_si64(__m64 __m1, __m64 __m2)
    327 {
    328     return __builtin_ia32_pandn(__m1, __m2);
    329 }
    330 
    331 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    332 _mm_or_si64(__m64 __m1, __m64 __m2)
    333 {
    334     return __builtin_ia32_por(__m1, __m2);
    335 }
    336 
    337 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    338 _mm_xor_si64(__m64 __m1, __m64 __m2)
    339 {
    340     return __builtin_ia32_pxor(__m1, __m2);
    341 }
    342 
    343 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    344 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
    345 {
    346     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
    347 }
    348 
    349 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    350 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
    351 {
    352     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
    353 }
    354 
    355 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    356 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
    357 {
    358     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
    359 }
    360 
    361 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    362 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
    363 {
    364     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
    365 }
    366 
    367 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    368 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
    369 {
    370     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
    371 }
    372 
    373 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    374 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
    375 {
    376     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
    377 }
    378 
    379 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    380 _mm_setzero_si64(void)
    381 {
    382     return (__m64){ 0LL };
    383 }
    384 
    385 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    386 _mm_set_pi32(int __i1, int __i0)
    387 {
    388     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
    389 }
    390 
    391 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    392 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
    393 {
    394     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
    395 }
    396 
    397 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    398 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
    399             char __b1, char __b0)
    400 {
    401     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
    402                                                __b4, __b5, __b6, __b7);
    403 }
    404 
    405 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    406 _mm_set1_pi32(int __i)
    407 {
    408     return _mm_set_pi32(__i, __i);
    409 }
    410 
    411 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    412 _mm_set1_pi16(short __w)
    413 {
    414     return _mm_set_pi16(__w, __w, __w, __w);
    415 }
    416 
    417 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    418 _mm_set1_pi8(char __b)
    419 {
    420     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
    421 }
    422 
    423 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    424 _mm_setr_pi32(int __i0, int __i1)
    425 {
    426     return _mm_set_pi32(__i1, __i0);
    427 }
    428 
    429 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    430 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
    431 {
    432     return _mm_set_pi16(__w3, __w2, __w1, __w0);
    433 }
    434 
    435 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    436 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
    437              char __b6, char __b7)
    438 {
    439     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
    440 }
    441 
    442 
    443 /* Aliases for compatibility. */
    444 #define _m_empty _mm_empty
    445 #define _m_from_int _mm_cvtsi32_si64
    446 #define _m_to_int _mm_cvtsi64_si32
    447 #define _m_packsswb _mm_packs_pi16
    448 #define _m_packssdw _mm_packs_pi32
    449 #define _m_packuswb _mm_packs_pu16
    450 #define _m_punpckhbw _mm_unpackhi_pi8
    451 #define _m_punpckhwd _mm_unpackhi_pi16
    452 #define _m_punpckhdq _mm_unpackhi_pi32
    453 #define _m_punpcklbw _mm_unpacklo_pi8
    454 #define _m_punpcklwd _mm_unpacklo_pi16
    455 #define _m_punpckldq _mm_unpacklo_pi32
    456 #define _m_paddb _mm_add_pi8
    457 #define _m_paddw _mm_add_pi16
    458 #define _m_paddd _mm_add_pi32
    459 #define _m_paddsb _mm_adds_pi8
    460 #define _m_paddsw _mm_adds_pi16
    461 #define _m_paddusb _mm_adds_pu8
    462 #define _m_paddusw _mm_adds_pu16
    463 #define _m_psubb _mm_sub_pi8
    464 #define _m_psubw _mm_sub_pi16
    465 #define _m_psubd _mm_sub_pi32
    466 #define _m_psubsb _mm_subs_pi8
    467 #define _m_psubsw _mm_subs_pi16
    468 #define _m_psubusb _mm_subs_pu8
    469 #define _m_psubusw _mm_subs_pu16
    470 #define _m_pmaddwd _mm_madd_pi16
    471 #define _m_pmulhw _mm_mulhi_pi16
    472 #define _m_pmullw _mm_mullo_pi16
    473 #define _m_psllw _mm_sll_pi16
    474 #define _m_psllwi _mm_slli_pi16
    475 #define _m_pslld _mm_sll_pi32
    476 #define _m_pslldi _mm_slli_pi32
    477 #define _m_psllq _mm_sll_si64
    478 #define _m_psllqi _mm_slli_si64
    479 #define _m_psraw _mm_sra_pi16
    480 #define _m_psrawi _mm_srai_pi16
    481 #define _m_psrad _mm_sra_pi32
    482 #define _m_psradi _mm_srai_pi32
    483 #define _m_psrlw _mm_srl_pi16
    484 #define _m_psrlwi _mm_srli_pi16
    485 #define _m_psrld _mm_srl_pi32
    486 #define _m_psrldi _mm_srli_pi32
    487 #define _m_psrlq _mm_srl_si64
    488 #define _m_psrlqi _mm_srli_si64
    489 #define _m_pand _mm_and_si64
    490 #define _m_pandn _mm_andnot_si64
    491 #define _m_por _mm_or_si64
    492 #define _m_pxor _mm_xor_si64
    493 #define _m_pcmpeqb _mm_cmpeq_pi8
    494 #define _m_pcmpeqw _mm_cmpeq_pi16
    495 #define _m_pcmpeqd _mm_cmpeq_pi32
    496 #define _m_pcmpgtb _mm_cmpgt_pi8
    497 #define _m_pcmpgtw _mm_cmpgt_pi16
    498 #define _m_pcmpgtd _mm_cmpgt_pi32
    499 
    500 #endif /* __MMX__ */
    501 
    502 #endif /* __MMINTRIN_H */
    503 
    504