Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                            host_generic_simd128.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2010-2010 OpenWorks GbR
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /* Generic helper functions for doing 128-bit SIMD arithmetic in cases
     32    where the instruction selectors cannot generate code in-line.
     33    These are purely back-end entities and cannot be seen/referenced
     34    from IR. */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "host_generic_simd128.h"
     38 
     39 
     40 /* Primitive helpers always take args of the real type (signed vs
     41    unsigned) but return an unsigned result, so there's no conversion
     42    weirdness when stuffing results back in the V128 union fields,
     43    which are all unsigned. */
     44 
     45 static inline UInt mul32 ( Int xx, Int yy )
     46 {
     47    Int t = ((Int)xx) * ((Int)yy);
     48    return toUInt(t);
     49 }
     50 
     51 static inline UInt max32S ( Int xx, Int yy )
     52 {
     53    return toUInt((xx > yy) ? xx : yy);
     54 }
     55 
     56 static inline UInt min32S ( Int xx, Int yy )
     57 {
     58    return toUInt((xx < yy) ? xx : yy);
     59 }
     60 
     61 static inline UInt max32U ( UInt xx, UInt yy )
     62 {
     63    return toUInt((xx > yy) ? xx : yy);
     64 }
     65 
     66 static inline UInt min32U ( UInt xx, UInt yy )
     67 {
     68    return toUInt((xx < yy) ? xx : yy);
     69 }
     70 
     71 static inline UShort max16U ( UShort xx, UShort yy )
     72 {
     73    return toUShort((xx > yy) ? xx : yy);
     74 }
     75 
     76 static inline UShort min16U ( UShort xx, UShort yy )
     77 {
     78    return toUShort((xx < yy) ? xx : yy);
     79 }
     80 
     81 static inline UChar max8S ( Char xx, Char yy )
     82 {
     83    return toUChar((xx > yy) ? xx : yy);
     84 }
     85 
     86 static inline UChar min8S ( Char xx, Char yy )
     87 {
     88    return toUChar((xx < yy) ? xx : yy);
     89 }
     90 
     91 static inline ULong cmpGT64S ( Long xx, Long yy )
     92 {
     93    return (((Long)xx) > ((Long)yy))
     94              ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
     95 }
     96 
     97 static inline ULong sar64 ( ULong v, UInt n )
     98 {
     99    return ((Long)v) >> n;
    100 }
    101 
    102 static inline UChar sar8 ( UChar v, UInt n )
    103 {
    104    return toUChar(((Char)v) >> n);
    105 }
    106 
    107 void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
    108                               V128* argL, V128* argR )
    109 {
    110    res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
    111    res->w32[1] = mul32(argL->w32[1], argR->w32[1]);
    112    res->w32[2] = mul32(argL->w32[2], argR->w32[2]);
    113    res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
    114 }
    115 
    116 void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
    117                                V128* argL, V128* argR )
    118 {
    119    res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
    120    res->w32[1] = max32S(argL->w32[1], argR->w32[1]);
    121    res->w32[2] = max32S(argL->w32[2], argR->w32[2]);
    122    res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
    123 }
    124 
    125 void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
    126                                V128* argL, V128* argR )
    127 {
    128    res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
    129    res->w32[1] = min32S(argL->w32[1], argR->w32[1]);
    130    res->w32[2] = min32S(argL->w32[2], argR->w32[2]);
    131    res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
    132 }
    133 
    134 void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
    135                                V128* argL, V128* argR )
    136 {
    137    res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
    138    res->w32[1] = max32U(argL->w32[1], argR->w32[1]);
    139    res->w32[2] = max32U(argL->w32[2], argR->w32[2]);
    140    res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
    141 }
    142 
    143 void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
    144                                V128* argL, V128* argR )
    145 {
    146    res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
    147    res->w32[1] = min32U(argL->w32[1], argR->w32[1]);
    148    res->w32[2] = min32U(argL->w32[2], argR->w32[2]);
    149    res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
    150 }
    151 
    152 void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
    153                                V128* argL, V128* argR )
    154 {
    155    res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
    156    res->w16[1] = max16U(argL->w16[1], argR->w16[1]);
    157    res->w16[2] = max16U(argL->w16[2], argR->w16[2]);
    158    res->w16[3] = max16U(argL->w16[3], argR->w16[3]);
    159    res->w16[4] = max16U(argL->w16[4], argR->w16[4]);
    160    res->w16[5] = max16U(argL->w16[5], argR->w16[5]);
    161    res->w16[6] = max16U(argL->w16[6], argR->w16[6]);
    162    res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
    163 }
    164 
    165 void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
    166                                V128* argL, V128* argR )
    167 {
    168    res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
    169    res->w16[1] = min16U(argL->w16[1], argR->w16[1]);
    170    res->w16[2] = min16U(argL->w16[2], argR->w16[2]);
    171    res->w16[3] = min16U(argL->w16[3], argR->w16[3]);
    172    res->w16[4] = min16U(argL->w16[4], argR->w16[4]);
    173    res->w16[5] = min16U(argL->w16[5], argR->w16[5]);
    174    res->w16[6] = min16U(argL->w16[6], argR->w16[6]);
    175    res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
    176 }
    177 
    178 void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
    179                                V128* argL, V128* argR )
    180 {
    181    res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
    182    res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]);
    183    res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]);
    184    res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]);
    185    res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]);
    186    res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]);
    187    res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]);
    188    res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]);
    189    res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]);
    190    res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]);
    191    res->w8[10] = max8S(argL->w8[10], argR->w8[10]);
    192    res->w8[11] = max8S(argL->w8[11], argR->w8[11]);
    193    res->w8[12] = max8S(argL->w8[12], argR->w8[12]);
    194    res->w8[13] = max8S(argL->w8[13], argR->w8[13]);
    195    res->w8[14] = max8S(argL->w8[14], argR->w8[14]);
    196    res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
    197 }
    198 
    199 void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
    200                                V128* argL, V128* argR )
    201 {
    202    res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
    203    res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]);
    204    res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]);
    205    res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]);
    206    res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]);
    207    res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]);
    208    res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]);
    209    res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]);
    210    res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]);
    211    res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]);
    212    res->w8[10] = min8S(argL->w8[10], argR->w8[10]);
    213    res->w8[11] = min8S(argL->w8[11], argR->w8[11]);
    214    res->w8[12] = min8S(argL->w8[12], argR->w8[12]);
    215    res->w8[13] = min8S(argL->w8[13], argR->w8[13]);
    216    res->w8[14] = min8S(argL->w8[14], argR->w8[14]);
    217    res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
    218 }
    219 
    220 void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
    221                                  V128* argL, V128* argR )
    222 {
    223    res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
    224    res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
    225 }
    226 
    227 /* ------------ Shifting ------------ */
    228 /* Note that because these primops are undefined if the shift amount
    229    equals or exceeds the lane width, the shift amount is masked so
    230    that the scalar shifts are always in range.  In fact, given the
    231    semantics of these primops (Sar64x2, etc) it is an error if in
    232    fact we are ever given an out-of-range shift amount.
    233 */
    234 void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
    235                                V128* argL, UInt nn)
    236 {
    237    /* vassert(nn < 64); */
    238    nn &= 63;
    239    res->w64[0] = sar64(argL->w64[0], nn);
    240    res->w64[1] = sar64(argL->w64[1], nn);
    241 }
    242 
    243 void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
    244                               V128* argL, UInt nn)
    245 {
    246    /* vassert(nn < 8); */
    247    nn &= 7;
    248    res->w8[ 0] = sar8(argL->w8[ 0], nn);
    249    res->w8[ 1] = sar8(argL->w8[ 1], nn);
    250    res->w8[ 2] = sar8(argL->w8[ 2], nn);
    251    res->w8[ 3] = sar8(argL->w8[ 3], nn);
    252    res->w8[ 4] = sar8(argL->w8[ 4], nn);
    253    res->w8[ 5] = sar8(argL->w8[ 5], nn);
    254    res->w8[ 6] = sar8(argL->w8[ 6], nn);
    255    res->w8[ 7] = sar8(argL->w8[ 7], nn);
    256    res->w8[ 8] = sar8(argL->w8[ 8], nn);
    257    res->w8[ 9] = sar8(argL->w8[ 9], nn);
    258    res->w8[10] = sar8(argL->w8[10], nn);
    259    res->w8[11] = sar8(argL->w8[11], nn);
    260    res->w8[12] = sar8(argL->w8[12], nn);
    261    res->w8[13] = sar8(argL->w8[13], nn);
    262    res->w8[14] = sar8(argL->w8[14], nn);
    263    res->w8[15] = sar8(argL->w8[15], nn);
    264 }
    265 
    266 /*---------------------------------------------------------------*/
    267 /*--- end                              host_generic_simd128.c ---*/
    268 /*---------------------------------------------------------------*/
    269