Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright 2015 Red Hat Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Author: Oded Gabbay <oded.gabbay (at) redhat.com>
     24  */
     25 
     26 /**
     27  * @file
     28  * POWER8 intrinsics portability header.
     29  *
     30  */
     31 
     32 #ifndef U_PWR8_H_
     33 #define U_PWR8_H_
     34 
     35 #if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
     36 
     37 #define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
     38 
     39 typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
     40 
     41 typedef VECTOR_ALIGN_16 union m128i {
     42    __m128i m128i;
     43    vector signed int m128si;
     44    vector unsigned int m128ui;
     45    ubyte ub[16];
     46    ushort us[8];
     47    int i[4];
     48    uint ui[4];
     49 } __m128i_union;
     50 
     51 static inline __m128i
     52 vec_set_epi32 (int i3, int i2, int i1, int i0)
     53 {
     54    __m128i_union vdst;
     55 
     56 #ifdef PIPE_ARCH_LITTLE_ENDIAN
     57    vdst.i[0] = i0;
     58    vdst.i[1] = i1;
     59    vdst.i[2] = i2;
     60    vdst.i[3] = i3;
     61 #else
     62    vdst.i[3] = i0;
     63    vdst.i[2] = i1;
     64    vdst.i[1] = i2;
     65    vdst.i[0] = i3;
     66 #endif
     67 
     68    return (__m128i) vdst.m128si;
     69 }
     70 
     71 static inline __m128i
     72 vec_setr_epi32 (int i0, int i1, int i2, int i3)
     73 {
     74   return vec_set_epi32 (i3, i2, i1, i0);
     75 }
     76 
     77 static inline __m128i
     78 vec_unpacklo_epi32 (__m128i even, __m128i odd)
     79 {
     80    static const __m128i perm_mask =
     81 #ifdef PIPE_ARCH_LITTLE_ENDIAN
     82       { 0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23};
     83 #else
     84       {24, 25, 26, 27,  8,  9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
     85 #endif
     86 
     87    return vec_perm (even, odd, perm_mask);
     88 }
     89 
     90 static inline __m128i
     91 vec_unpackhi_epi32 (__m128i even, __m128i odd)
     92 {
     93    static const __m128i perm_mask =
     94 #ifdef PIPE_ARCH_LITTLE_ENDIAN
     95       { 8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
     96 #else
     97       {16, 17, 18, 19,  0,  1,  2,  3, 20, 21, 22, 23,  4,  5,  6,  7};
     98 #endif
     99 
    100    return vec_perm (even, odd, perm_mask);
    101 }
    102 
    103 static inline __m128i
    104 vec_unpacklo_epi64 (__m128i even, __m128i odd)
    105 {
    106    static const __m128i perm_mask =
    107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    108       { 0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23};
    109 #else
    110       {24, 25, 26, 27, 28, 29, 30, 31,  8,  9, 10, 11, 12, 13, 14, 15};
    111 #endif
    112 
    113    return vec_perm (even, odd, perm_mask);
    114 }
    115 
    116 static inline __m128i
    117 vec_unpackhi_epi64 (__m128i even, __m128i odd)
    118 {
    119    static const __m128i perm_mask =
    120 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    121       { 8,  9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
    122 #else
    123       {16, 17, 18, 19, 20, 21, 22, 23,  0,  1,  2,  3,  4,  5,  6,  7};
    124 #endif
    125 
    126    return vec_perm (even, odd, perm_mask);
    127 }
    128 
    129 static inline __m128i
    130 vec_add_epi32 (__m128i a, __m128i b)
    131 {
    132    return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
    133 }
    134 
    135 static inline __m128i
    136 vec_sub_epi32 (__m128i a, __m128i b)
    137 {
    138    return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
    139 }
    140 
    141 /* Call this function ONLY on POWER8 and newer platforms */
    142 static inline __m128i
    143 vec_mullo_epi32 (__m128i a, __m128i b)
    144 {
    145    __m128i v;
    146 
    147    __asm__(
    148            "vmuluwm %0, %1, %2   \n"
    149            : "=v" (v)
    150            : "v" (a), "v" (b)
    151            );
    152 
    153    return v;
    154 }
    155 
    156 static inline __m128i
    157 vec_andnot_si128 (__m128i a, __m128i b)
    158 {
    159    return vec_andc (b, a);
    160 }
    161 
    162 static inline void
    163 transpose4_epi32(const __m128i * restrict a,
    164                  const __m128i * restrict b,
    165                  const __m128i * restrict c,
    166                  const __m128i * restrict d,
    167                  __m128i * restrict o,
    168                  __m128i * restrict p,
    169                  __m128i * restrict q,
    170                  __m128i * restrict r)
    171 {
    172    __m128i t0 = vec_unpacklo_epi32(*a, *b);
    173    __m128i t1 = vec_unpacklo_epi32(*c, *d);
    174    __m128i t2 = vec_unpackhi_epi32(*a, *b);
    175    __m128i t3 = vec_unpackhi_epi32(*c, *d);
    176 
    177    *o = vec_unpacklo_epi64(t0, t1);
    178    *p = vec_unpackhi_epi64(t0, t1);
    179    *q = vec_unpacklo_epi64(t2, t3);
    180    *r = vec_unpackhi_epi64(t2, t3);
    181 }
    182 
    183 static inline __m128i
    184 vec_slli_epi32 (__m128i vsrc, unsigned int count)
    185 {
    186    __m128i_union vec_count;
    187 
    188    if (count >= 32)
    189       return (__m128i) vec_splats (0);
    190    else if (count == 0)
    191       return vsrc;
    192 
    193    /* In VMX, all shift count fields must contain the same value */
    194    vec_count.m128si = (vector signed int) vec_splats (count);
    195    return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
    196 }
    197 
    198 static inline __m128i
    199 vec_srli_epi32 (__m128i vsrc, unsigned int count)
    200 {
    201    __m128i_union vec_count;
    202 
    203    if (count >= 32)
    204       return (__m128i) vec_splats (0);
    205    else if (count == 0)
    206       return vsrc;
    207 
    208    /* In VMX, all shift count fields must contain the same value */
    209    vec_count.m128si = (vector signed int) vec_splats (count);
    210    return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
    211 }
    212 
    213 static inline __m128i
    214 vec_srai_epi32 (__m128i vsrc, unsigned int count)
    215 {
    216    __m128i_union vec_count;
    217 
    218    if (count >= 32)
    219       return (__m128i) vec_splats (0);
    220    else if (count == 0)
    221       return vsrc;
    222 
    223    /* In VMX, all shift count fields must contain the same value */
    224    vec_count.m128si = (vector signed int) vec_splats (count);
    225    return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
    226 }
    227 
    228 static inline __m128i
    229 vec_cmpeq_epi32 (__m128i a, __m128i b)
    230 {
    231    return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
    232 }
    233 
    234 static inline __m128i
    235 vec_loadu_si128 (const uint32_t* src)
    236 {
    237    __m128i_union vsrc;
    238 
    239 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    240 
    241    vsrc.m128ui = *((vector unsigned int *) src);
    242 
    243 #else
    244 
    245    __m128i vmask, tmp1, tmp2;
    246 
    247    vmask = vec_lvsl(0, src);
    248 
    249    tmp1 = (__m128i) vec_ld (0, src);
    250    tmp2 = (__m128i) vec_ld (15, src);
    251    vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
    252 
    253 #endif
    254 
    255    return vsrc.m128i;
    256 }
    257 
    258 static inline __m128i
    259 vec_load_si128 (const uint32_t* src)
    260 {
    261    __m128i_union vsrc;
    262 
    263    vsrc.m128ui = *((vector unsigned int *) src);
    264 
    265    return vsrc.m128i;
    266 }
    267 
    268 static inline void
    269 vec_store_si128 (uint32_t* dest, __m128i vdata)
    270 {
    271    vec_st ((vector unsigned int) vdata, 0, dest);
    272 }
    273 
    274 /* Call this function ONLY on POWER8 and newer platforms */
    275 static inline int
    276 vec_movemask_epi8 (__m128i vsrc)
    277 {
    278    __m128i_union vtemp;
    279    int result;
    280 
    281    vtemp.m128i = vec_vgbbd(vsrc);
    282 
    283 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    284    result = vtemp.ub[15] << 8 | vtemp.ub[7];
    285 #else
    286    result = vtemp.ub[0] << 8 | vtemp.ub[8];
    287 #endif
    288 
    289    return result;
    290 }
    291 
    292 static inline __m128i
    293 vec_packs_epi16 (__m128i a, __m128i b)
    294 {
    295 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    296    return (__m128i) vec_packs ((vector signed short) a,
    297                                (vector signed short) b);
    298 #else
    299    return (__m128i) vec_packs ((vector signed short) b,
    300                                (vector signed short) a);
    301 #endif
    302 }
    303 
    304 static inline __m128i
    305 vec_packs_epi32 (__m128i a, __m128i b)
    306 {
    307 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    308    return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
    309 #else
    310    return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
    311 #endif
    312 }
    313 
    314 #endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
    315 
    316 #endif /* U_PWR8_H_ */
    317