Home | History | Annotate | Download | only in Tensor
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog (at) gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
     12 
     13 namespace Eigen {
     14 namespace internal {
     15 
     16 
     17 template <uint64_t n>
     18 struct static_val {
     19   static const uint64_t value = n;
     20   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
     21 
     22   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
     23 
     24   template <typename T>
     25   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
     26     eigen_assert(v == n);
     27   }
     28 };
     29 
     30 
     31 template <typename HIGH = uint64_t, typename LOW = uint64_t>
     32 struct TensorUInt128
     33 {
     34   HIGH high;
     35   LOW low;
     36 
     37   template<typename OTHER_HIGH, typename OTHER_LOW>
     38   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     39   TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
     40     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
     41     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
     42   }
     43 
     44   template<typename OTHER_HIGH, typename OTHER_LOW>
     45   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     46   TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) {
     47     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
     48     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
     49     high = other.high;
     50     low = other.low;
     51     return *this;
     52   }
     53 
     54   template<typename T>
     55   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     56   explicit TensorUInt128(const T& x) : high(0), low(x) {
     57     eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
     58     eigen_assert(x >= 0);
     59   }
     60 
     61   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     62   TensorUInt128(HIGH y, LOW x) : high(y), low(x) { }
     63 
     64   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const {
     65     return low;
     66   }
     67   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const {
     68     return low;
     69   }
     70   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const {
     71     return high;
     72   }
     73 };
     74 
     75 
     76 template <typename HL, typename LL, typename HR, typename LR>
     77 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     78 bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
     79 {
     80   return (lhs.high == rhs.high) & (lhs.low == rhs.low);
     81 }
     82 
     83 template <typename HL, typename LL, typename HR, typename LR>
     84 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     85 bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
     86 {
     87   return (lhs.high != rhs.high) | (lhs.low != rhs.low);
     88 }
     89 
     90 template <typename HL, typename LL, typename HR, typename LR>
     91 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
     92 bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
     93 {
     94   if (lhs.high != rhs.high) {
     95     return lhs.high > rhs.high;
     96   }
     97   return lhs.low >= rhs.low;
     98 }
     99 
    100 template <typename HL, typename LL, typename HR, typename LR>
    101 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    102 bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
    103 {
    104   if (lhs.high != rhs.high) {
    105     return lhs.high < rhs.high;
    106   }
    107   return lhs.low < rhs.low;
    108 }
    109 
    110 template <typename HL, typename LL, typename HR, typename LR>
    111 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    112 TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
    113 {
    114   TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
    115   if (result.low < rhs.low) {
    116     result.high += 1;
    117   }
    118   return result;
    119 }
    120 
    121 template <typename HL, typename LL, typename HR, typename LR>
    122 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    123 TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
    124 {
    125   TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
    126   if (result.low > lhs.low) {
    127     result.high -= 1;
    128   }
    129   return result;
    130 }
    131 
    132 
    133 template <typename HL, typename LL, typename HR, typename LR>
    134 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    135 TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
    136 {
    137   // Split each 128-bit integer into 4 32-bit integers, and then do the
    138   // multiplications by hand as follow:
    139   //   lhs      a  b  c  d
    140   //   rhs      e  f  g  h
    141   //           -----------
    142   //           ah bh ch dh
    143   //           bg cg dg
    144   //           cf df
    145   //           de
    146   // The result is stored in 2 64bit integers, high and low.
    147 
    148   const uint64_t LOW = 0x00000000FFFFFFFFLL;
    149   const uint64_t HIGH = 0xFFFFFFFF00000000LL;
    150 
    151   uint64_t d = lhs.low & LOW;
    152   uint64_t c = (lhs.low & HIGH) >> 32LL;
    153   uint64_t b = lhs.high & LOW;
    154   uint64_t a = (lhs.high & HIGH) >> 32LL;
    155 
    156   uint64_t h = rhs.low & LOW;
    157   uint64_t g = (rhs.low & HIGH) >> 32LL;
    158   uint64_t f = rhs.high & LOW;
    159   uint64_t e = (rhs.high & HIGH) >> 32LL;
    160 
    161   // Compute the low 32 bits of low
    162   uint64_t acc = d * h;
    163   uint64_t low = acc & LOW;
    164   //  Compute the high 32 bits of low. Add a carry every time we wrap around
    165   acc >>= 32LL;
    166   uint64_t carry = 0;
    167   uint64_t acc2 = acc + c * h;
    168   if (acc2 < acc) {
    169     carry++;
    170   }
    171   acc = acc2 + d * g;
    172   if (acc < acc2) {
    173     carry++;
    174   }
    175   low |= (acc << 32LL);
    176 
    177   // Carry forward the high bits of acc to initiate the computation of the
    178   // low 32 bits of high
    179   acc2 = (acc >> 32LL) | (carry << 32LL);
    180   carry = 0;
    181 
    182   acc = acc2 + b * h;
    183   if (acc < acc2) {
    184     carry++;
    185   }
    186   acc2 = acc + c * g;
    187   if (acc2 < acc) {
    188     carry++;
    189   }
    190   acc = acc2 + d * f;
    191   if (acc < acc2) {
    192     carry++;
    193   }
    194   uint64_t high = acc & LOW;
    195 
    196   // Start to compute the high 32 bits of high.
    197   acc2 = (acc >> 32LL) | (carry << 32LL);
    198 
    199   acc = acc2 + a * h;
    200   acc2 = acc + b * g;
    201   acc = acc2 + c * f;
    202   acc2 = acc + d * e;
    203   high |= (acc2 << 32LL);
    204 
    205   return TensorUInt128<uint64_t, uint64_t>(high, low);
    206 }
    207 
    208 template <typename HL, typename LL, typename HR, typename LR>
    209 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    210 TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
    211 {
    212   if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
    213     return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
    214   } else if (lhs < rhs) {
    215     return TensorUInt128<uint64_t, uint64_t>(0);
    216   } else {
    217     // calculate the biggest power of 2 times rhs that's less than or equal to lhs
    218     TensorUInt128<uint64_t, uint64_t> power2(1);
    219     TensorUInt128<uint64_t, uint64_t> d(rhs);
    220     TensorUInt128<uint64_t, uint64_t> tmp(lhs - d);
    221     while (lhs >= d) {
    222       tmp = tmp - d;
    223       d = d + d;
    224       power2 = power2 + power2;
    225     }
    226 
    227     tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
    228     TensorUInt128<uint64_t, uint64_t> result(0);
    229     while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) {
    230       if (tmp >= d) {
    231         tmp = tmp - d;
    232         result = result + power2;
    233       }
    234       // Shift right
    235       power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63));
    236       d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63));
    237     }
    238 
    239     return result;
    240   }
    241 }
    242 
    243 
    244 }  // namespace internal
    245 }  // namespace Eigen
    246 
    247 
    248 #endif  // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
    249