Home | History | Annotate | Download | only in include
      1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __TMMINTRIN_H
     25 #define __TMMINTRIN_H
     26 
     27 #ifndef __SSSE3__
     28 #error "SSSE3 instruction set not enabled"
     29 #else
     30 
     31 #include <pmmintrin.h>
     32 
     33 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     34 _mm_abs_pi8(__m64 a)
     35 {
     36     return (__m64)__builtin_ia32_pabsb((__v8qi)a);
     37 }
     38 
     39 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     40 _mm_abs_epi8(__m128i a)
     41 {
     42     return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
     43 }
     44 
     45 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     46 _mm_abs_pi16(__m64 a)
     47 {
     48     return (__m64)__builtin_ia32_pabsw((__v4hi)a);
     49 }
     50 
     51 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     52 _mm_abs_epi16(__m128i a)
     53 {
     54     return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
     55 }
     56 
     57 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     58 _mm_abs_pi32(__m64 a)
     59 {
     60     return (__m64)__builtin_ia32_pabsd((__v2si)a);
     61 }
     62 
     63 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     64 _mm_abs_epi32(__m128i a)
     65 {
     66     return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
     67 }
     68 
     69 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
     70   __m128i __a = (a); \
     71   __m128i __b = (b); \
     72   (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
     73 
     74 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
     75   __m64 __a = (a); \
     76   __m64 __b = (b); \
     77   (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
     78 
     79 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     80 _mm_hadd_epi16(__m128i a, __m128i b)
     81 {
     82     return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
     83 }
     84 
     85 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
     86 _mm_hadd_epi32(__m128i a, __m128i b)
     87 {
     88     return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
     89 }
     90 
     91 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     92 _mm_hadd_pi16(__m64 a, __m64 b)
     93 {
     94     return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
     95 }
     96 
     97 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
     98 _mm_hadd_pi32(__m64 a, __m64 b)
     99 {
    100     return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
    101 }
    102 
    103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    104 _mm_hadds_epi16(__m128i a, __m128i b)
    105 {
    106     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
    107 }
    108 
    109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    110 _mm_hadds_pi16(__m64 a, __m64 b)
    111 {
    112     return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
    113 }
    114 
    115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    116 _mm_hsub_epi16(__m128i a, __m128i b)
    117 {
    118     return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
    119 }
    120 
    121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    122 _mm_hsub_epi32(__m128i a, __m128i b)
    123 {
    124     return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
    125 }
    126 
    127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    128 _mm_hsub_pi16(__m64 a, __m64 b)
    129 {
    130     return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
    131 }
    132 
    133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    134 _mm_hsub_pi32(__m64 a, __m64 b)
    135 {
    136     return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
    137 }
    138 
    139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    140 _mm_hsubs_epi16(__m128i a, __m128i b)
    141 {
    142     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
    143 }
    144 
    145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    146 _mm_hsubs_pi16(__m64 a, __m64 b)
    147 {
    148     return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
    149 }
    150 
    151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    152 _mm_maddubs_epi16(__m128i a, __m128i b)
    153 {
    154     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
    155 }
    156 
    157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    158 _mm_maddubs_pi16(__m64 a, __m64 b)
    159 {
    160     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
    161 }
    162 
    163 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    164 _mm_mulhrs_epi16(__m128i a, __m128i b)
    165 {
    166     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
    167 }
    168 
    169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    170 _mm_mulhrs_pi16(__m64 a, __m64 b)
    171 {
    172     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
    173 }
    174 
    175 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    176 _mm_shuffle_epi8(__m128i a, __m128i b)
    177 {
    178     return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
    179 }
    180 
    181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    182 _mm_shuffle_pi8(__m64 a, __m64 b)
    183 {
    184     return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
    185 }
    186 
    187 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    188 _mm_sign_epi8(__m128i a, __m128i b)
    189 {
    190     return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
    191 }
    192 
    193 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    194 _mm_sign_epi16(__m128i a, __m128i b)
    195 {
    196     return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
    197 }
    198 
    199 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
    200 _mm_sign_epi32(__m128i a, __m128i b)
    201 {
    202     return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
    203 }
    204 
    205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    206 _mm_sign_pi8(__m64 a, __m64 b)
    207 {
    208     return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);
    209 }
    210 
    211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    212 _mm_sign_pi16(__m64 a, __m64 b)
    213 {
    214     return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);
    215 }
    216 
    217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
    218 _mm_sign_pi32(__m64 a, __m64 b)
    219 {
    220     return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
    221 }
    222 
    223 #endif /* __SSSE3__ */
    224 
    225 #endif /* __TMMINTRIN_H */
    226