1 /* 2 * Copyright 2015 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Author: Oded Gabbay <oded.gabbay (at) redhat.com> 24 */ 25 26 /** 27 * @file 28 * POWER8 intrinsics portability header. 29 * 30 */ 31 32 #ifndef U_PWR8_H_ 33 #define U_PWR8_H_ 34 35 #if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN) 36 37 #define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16))) 38 39 typedef VECTOR_ALIGN_16 vector unsigned char __m128i; 40 41 typedef VECTOR_ALIGN_16 union m128i { 42 __m128i m128i; 43 vector signed int m128si; 44 vector unsigned int m128ui; 45 ubyte ub[16]; 46 ushort us[8]; 47 int i[4]; 48 uint ui[4]; 49 } __m128i_union; 50 51 static inline __m128i 52 vec_set_epi32 (int i3, int i2, int i1, int i0) 53 { 54 __m128i_union vdst; 55 56 #ifdef PIPE_ARCH_LITTLE_ENDIAN 57 vdst.i[0] = i0; 58 vdst.i[1] = i1; 59 vdst.i[2] = i2; 60 vdst.i[3] = i3; 61 #else 62 vdst.i[3] = i0; 63 vdst.i[2] = i1; 64 vdst.i[1] = i2; 65 vdst.i[0] = i3; 66 #endif 67 68 return (__m128i) vdst.m128si; 69 } 70 71 static inline __m128i 72 vec_setr_epi32 (int i0, int i1, int i2, int i3) 73 { 74 return vec_set_epi32 (i3, i2, i1, i0); 75 } 76 77 static inline __m128i 78 vec_unpacklo_epi32 (__m128i even, __m128i odd) 79 { 80 static const __m128i perm_mask = 81 #ifdef PIPE_ARCH_LITTLE_ENDIAN 82 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}; 83 #else 84 {24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15}; 85 #endif 86 87 return vec_perm (even, odd, perm_mask); 88 } 89 90 static inline __m128i 91 vec_unpackhi_epi32 (__m128i even, __m128i odd) 92 { 93 static const __m128i perm_mask = 94 #ifdef PIPE_ARCH_LITTLE_ENDIAN 95 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}; 96 #else 97 {16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7}; 98 #endif 99 100 return vec_perm (even, odd, perm_mask); 101 } 102 103 static inline __m128i 104 vec_unpacklo_epi64 (__m128i even, __m128i odd) 105 { 106 static const __m128i perm_mask = 107 #ifdef PIPE_ARCH_LITTLE_ENDIAN 108 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; 109 #else 110 {24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15}; 111 #endif 112 113 return vec_perm (even, odd, perm_mask); 114 } 115 116 static inline __m128i 117 vec_unpackhi_epi64 (__m128i even, __m128i odd) 118 { 119 static const __m128i perm_mask = 120 #ifdef PIPE_ARCH_LITTLE_ENDIAN 121 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; 122 #else 123 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7}; 124 #endif 125 126 return vec_perm (even, odd, perm_mask); 127 } 128 129 static inline __m128i 130 vec_add_epi32 (__m128i a, __m128i b) 131 { 132 return (__m128i) vec_add ((vector signed int) a, (vector signed int) b); 133 } 134 135 static inline __m128i 136 vec_sub_epi32 (__m128i a, __m128i b) 137 { 138 return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b); 139 } 140 141 /* Call this function ONLY on POWER8 and newer platforms */ 142 static inline __m128i 143 vec_mullo_epi32 (__m128i a, __m128i b) 144 { 145 __m128i v; 146 147 __asm__( 148 "vmuluwm %0, %1, %2 \n" 149 : "=v" (v) 150 : "v" (a), "v" (b) 151 ); 152 153 return v; 154 } 155 156 static inline __m128i 157 vec_andnot_si128 (__m128i a, __m128i b) 158 { 159 return vec_andc (b, a); 160 } 161 162 static inline void 163 transpose4_epi32(const __m128i * restrict a, 164 const __m128i * restrict b, 165 const __m128i * restrict c, 166 const __m128i * restrict d, 167 __m128i * restrict o, 168 __m128i * restrict p, 169 __m128i * restrict q, 170 __m128i * restrict r) 171 { 172 __m128i t0 = vec_unpacklo_epi32(*a, *b); 173 __m128i t1 = vec_unpacklo_epi32(*c, *d); 174 __m128i t2 = vec_unpackhi_epi32(*a, *b); 175 __m128i t3 = vec_unpackhi_epi32(*c, *d); 176 177 *o = vec_unpacklo_epi64(t0, t1); 178 *p = vec_unpackhi_epi64(t0, t1); 179 *q = vec_unpacklo_epi64(t2, t3); 180 *r = vec_unpackhi_epi64(t2, t3); 181 } 182 183 static inline __m128i 184 vec_slli_epi32 (__m128i vsrc, unsigned int count) 185 { 186 __m128i_union vec_count; 187 188 if (count >= 32) 189 return (__m128i) vec_splats (0); 190 else if (count == 0) 191 return vsrc; 192 193 /* In VMX, all shift count fields must contain the same value */ 194 vec_count.m128si = (vector signed int) vec_splats (count); 195 return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui); 196 } 197 198 static inline __m128i 199 vec_srli_epi32 (__m128i vsrc, unsigned int count) 200 { 201 __m128i_union vec_count; 202 203 if (count >= 32) 204 return (__m128i) vec_splats (0); 205 else if (count == 0) 206 return vsrc; 207 208 /* In VMX, all shift count fields must contain the same value */ 209 vec_count.m128si = (vector signed int) vec_splats (count); 210 return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui); 211 } 212 213 static inline __m128i 214 vec_srai_epi32 (__m128i vsrc, unsigned int count) 215 { 216 __m128i_union vec_count; 217 218 if (count >= 32) 219 return (__m128i) vec_splats (0); 220 else if (count == 0) 221 return vsrc; 222 223 /* In VMX, all shift count fields must contain the same value */ 224 vec_count.m128si = (vector signed int) vec_splats (count); 225 return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui); 226 } 227 228 static inline __m128i 229 vec_cmpeq_epi32 (__m128i a, __m128i b) 230 { 231 return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b); 232 } 233 234 static inline __m128i 235 vec_loadu_si128 (const uint32_t* src) 236 { 237 __m128i_union vsrc; 238 239 #ifdef PIPE_ARCH_LITTLE_ENDIAN 240 241 vsrc.m128ui = *((vector unsigned int *) src); 242 243 #else 244 245 __m128i vmask, tmp1, tmp2; 246 247 vmask = vec_lvsl(0, src); 248 249 tmp1 = (__m128i) vec_ld (0, src); 250 tmp2 = (__m128i) vec_ld (15, src); 251 vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask); 252 253 #endif 254 255 return vsrc.m128i; 256 } 257 258 static inline __m128i 259 vec_load_si128 (const uint32_t* src) 260 { 261 __m128i_union vsrc; 262 263 vsrc.m128ui = *((vector unsigned int *) src); 264 265 return vsrc.m128i; 266 } 267 268 static inline void 269 vec_store_si128 (uint32_t* dest, __m128i vdata) 270 { 271 vec_st ((vector unsigned int) vdata, 0, dest); 272 } 273 274 /* Call this function ONLY on POWER8 and newer platforms */ 275 static inline int 276 vec_movemask_epi8 (__m128i vsrc) 277 { 278 __m128i_union vtemp; 279 int result; 280 281 vtemp.m128i = vec_vgbbd(vsrc); 282 283 #ifdef PIPE_ARCH_LITTLE_ENDIAN 284 result = vtemp.ub[15] << 8 | vtemp.ub[7]; 285 #else 286 result = vtemp.ub[0] << 8 | vtemp.ub[8]; 287 #endif 288 289 return result; 290 } 291 292 static inline __m128i 293 vec_packs_epi16 (__m128i a, __m128i b) 294 { 295 #ifdef PIPE_ARCH_LITTLE_ENDIAN 296 return (__m128i) vec_packs ((vector signed short) a, 297 (vector signed short) b); 298 #else 299 return (__m128i) vec_packs ((vector signed short) b, 300 (vector signed short) a); 301 #endif 302 } 303 304 static inline __m128i 305 vec_packs_epi32 (__m128i a, __m128i b) 306 { 307 #ifdef PIPE_ARCH_LITTLE_ENDIAN 308 return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b); 309 #else 310 return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a); 311 #endif 312 } 313 314 #endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */ 315 316 #endif /* U_PWR8_H_ */ 317