1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __TMMINTRIN_H 25 #define __TMMINTRIN_H 26 27 #ifndef __SSSE3__ 28 #error "SSSE3 instruction set not enabled" 29 #else 30 31 #include <pmmintrin.h> 32 33 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 34 _mm_abs_pi8(__m64 a) 35 { 36 return (__m64)__builtin_ia32_pabsb((__v8qi)a); 37 } 38 39 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 40 _mm_abs_epi8(__m128i a) 41 { 42 return (__m128i)__builtin_ia32_pabsb128((__v16qi)a); 43 } 44 45 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 46 _mm_abs_pi16(__m64 a) 47 { 48 return (__m64)__builtin_ia32_pabsw((__v4hi)a); 49 } 50 51 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 52 _mm_abs_epi16(__m128i a) 53 { 54 return (__m128i)__builtin_ia32_pabsw128((__v8hi)a); 55 } 56 57 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 58 _mm_abs_pi32(__m64 a) 59 { 60 return (__m64)__builtin_ia32_pabsd((__v2si)a); 61 } 62 63 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 64 _mm_abs_epi32(__m128i a) 65 { 66 return (__m128i)__builtin_ia32_pabsd128((__v4si)a); 67 } 68 69 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \ 70 __m128i __a = (a); \ 71 __m128i __b = (b); \ 72 (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); }) 73 74 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \ 75 __m64 __a = (a); \ 76 __m64 __b = (b); \ 77 (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); }) 78 79 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80 _mm_hadd_epi16(__m128i a, __m128i b) 81 { 82 return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b); 83 } 84 85 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 86 _mm_hadd_epi32(__m128i a, __m128i b) 87 { 88 return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b); 89 } 90 91 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 92 _mm_hadd_pi16(__m64 a, __m64 b) 93 { 94 return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b); 95 } 96 97 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 98 _mm_hadd_pi32(__m64 a, __m64 b) 99 { 100 return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b); 101 } 102 103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 104 _mm_hadds_epi16(__m128i a, __m128i b) 105 { 106 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b); 107 } 108 109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 110 _mm_hadds_pi16(__m64 a, __m64 b) 111 { 112 return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b); 113 } 114 115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 116 _mm_hsub_epi16(__m128i a, __m128i b) 117 { 118 return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b); 119 } 120 121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 122 _mm_hsub_epi32(__m128i a, __m128i b) 123 { 124 return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b); 125 } 126 127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 128 _mm_hsub_pi16(__m64 a, __m64 b) 129 { 130 return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b); 131 } 132 133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 134 _mm_hsub_pi32(__m64 a, __m64 b) 135 { 136 return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b); 137 } 138 139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 140 _mm_hsubs_epi16(__m128i a, __m128i b) 141 { 142 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b); 143 } 144 145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 146 _mm_hsubs_pi16(__m64 a, __m64 b) 147 { 148 return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b); 149 } 150 151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 152 _mm_maddubs_epi16(__m128i a, __m128i b) 153 { 154 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b); 155 } 156 157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 158 _mm_maddubs_pi16(__m64 a, __m64 b) 159 { 160 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b); 161 } 162 163 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 164 _mm_mulhrs_epi16(__m128i a, __m128i b) 165 { 166 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b); 167 } 168 169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 170 _mm_mulhrs_pi16(__m64 a, __m64 b) 171 { 172 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b); 173 } 174 175 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 176 _mm_shuffle_epi8(__m128i a, __m128i b) 177 { 178 return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b); 179 } 180 181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 182 _mm_shuffle_pi8(__m64 a, __m64 b) 183 { 184 return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b); 185 } 186 187 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 188 _mm_sign_epi8(__m128i a, __m128i b) 189 { 190 return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b); 191 } 192 193 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 194 _mm_sign_epi16(__m128i a, __m128i b) 195 { 196 return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b); 197 } 198 199 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 200 _mm_sign_epi32(__m128i a, __m128i b) 201 { 202 return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b); 203 } 204 205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 206 _mm_sign_pi8(__m64 a, __m64 b) 207 { 208 return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b); 209 } 210 211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 212 _mm_sign_pi16(__m64 a, __m64 b) 213 { 214 return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b); 215 } 216 217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 218 _mm_sign_pi32(__m64 a, __m64 b) 219 { 220 return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b); 221 } 222 223 #endif /* __SSSE3__ */ 224 225 #endif /* __TMMINTRIN_H */ 226