1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __TMMINTRIN_H 25 #define __TMMINTRIN_H 26 27 #ifndef __SSSE3__ 28 #error "SSSE3 instruction set not enabled" 29 #else 30 31 #include <pmmintrin.h> 32 33 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 34 _mm_abs_pi8(__m64 a) 35 { 36 return (__m64)__builtin_ia32_pabsb((__v8qi)a); 37 } 38 39 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 40 _mm_abs_epi8(__m128i a) 41 { 42 return (__m128i)__builtin_ia32_pabsb128((__v16qi)a); 43 } 44 45 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 46 _mm_abs_pi16(__m64 a) 47 { 48 return (__m64)__builtin_ia32_pabsw((__v4hi)a); 49 } 50 51 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 52 _mm_abs_epi16(__m128i a) 53 { 54 return (__m128i)__builtin_ia32_pabsw128((__v8hi)a); 55 } 56 57 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 58 _mm_abs_pi32(__m64 a) 59 { 60 return (__m64)__builtin_ia32_pabsd((__v2si)a); 61 } 62 63 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 64 _mm_abs_epi32(__m128i a) 65 { 66 return (__m128i)__builtin_ia32_pabsd128((__v4si)a); 67 } 68 69 #define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n))) 70 #define _mm_alignr_pi8(a, b, n) (__builtin_ia32_palignr((a), (b), (n))) 71 72 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 73 _mm_hadd_epi16(__m128i a, __m128i b) 74 { 75 return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b); 76 } 77 78 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 79 _mm_hadd_epi32(__m128i a, __m128i b) 80 { 81 return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b); 82 } 83 84 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 85 _mm_hadd_pi16(__m64 a, __m64 b) 86 { 87 return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b); 88 } 89 90 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 91 _mm_hadd_pi32(__m64 a, __m64 b) 92 { 93 return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b); 94 } 95 96 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 97 _mm_hadds_epi16(__m128i a, __m128i b) 98 { 99 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b); 100 } 101 102 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 103 _mm_hadds_pi16(__m64 a, __m64 b) 104 { 105 return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b); 106 } 107 108 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 109 _mm_hsub_epi16(__m128i a, __m128i b) 110 { 111 return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b); 112 } 113 114 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 115 _mm_hsub_epi32(__m128i a, __m128i b) 116 { 117 return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b); 118 } 119 120 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 121 _mm_hsub_pi16(__m64 a, __m64 b) 122 { 123 return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b); 124 } 125 126 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 127 _mm_hsub_pi32(__m64 a, __m64 b) 128 { 129 return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b); 130 } 131 132 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 133 _mm_hsubs_epi16(__m128i a, __m128i b) 134 { 135 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b); 136 } 137 138 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 139 _mm_hsubs_pi16(__m64 a, __m64 b) 140 { 141 return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b); 142 } 143 144 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 145 _mm_maddubs_epi16(__m128i a, __m128i b) 146 { 147 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b); 148 } 149 150 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 151 _mm_maddubs_pi16(__m64 a, __m64 b) 152 { 153 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b); 154 } 155 156 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 157 _mm_mulhrs_epi16(__m128i a, __m128i b) 158 { 159 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b); 160 } 161 162 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 163 _mm_mulhrs_pi16(__m64 a, __m64 b) 164 { 165 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b); 166 } 167 168 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 169 _mm_shuffle_epi8(__m128i a, __m128i b) 170 { 171 return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b); 172 } 173 174 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 175 _mm_shuffle_pi8(__m64 a, __m64 b) 176 { 177 return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b); 178 } 179 180 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 181 _mm_sign_epi8(__m128i a, __m128i b) 182 { 183 return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b); 184 } 185 186 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 187 _mm_sign_epi16(__m128i a, __m128i b) 188 { 189 return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b); 190 } 191 192 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 193 _mm_sign_epi32(__m128i a, __m128i b) 194 { 195 return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b); 196 } 197 198 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 199 _mm_sign_pi8(__m64 a, __m64 b) 200 { 201 return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b); 202 } 203 204 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 205 _mm_sign_pi16(__m64 a, __m64 b) 206 { 207 return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b); 208 } 209 210 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 211 _mm_sign_pi32(__m64 a, __m64 b) 212 { 213 return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b); 214 } 215 216 #endif /* __SSSE3__ */ 217 218 #endif /* __TMMINTRIN_H */ 219