1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog (at) gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H 12 13 namespace Eigen { 14 namespace internal { 15 16 namespace { 17 18 EIGEN_DEVICE_FUNC uint64_t get_random_seed() { 19 #ifdef __CUDA_ARCH__ 20 // We don't support 3d kernels since we currently only use 1 and 21 // 2d kernels. 22 assert(threadIdx.z == 0); 23 return clock64() + 24 blockIdx.x * blockDim.x + threadIdx.x + 25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); 26 27 #elif defined _WIN32 28 // Use the current time as a baseline. 29 SYSTEMTIME st; 30 GetSystemTime(&st); 31 int time = st.wSecond + 1000 * st.wMilliseconds; 32 // Mix in a random number to make sure that we get different seeds if 33 // we try to generate seeds faster than the clock resolution. 34 // We need 2 random values since the generator only generate 16 bits at 35 // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) 36 int rnd1 = ::rand(); 37 int rnd2 = ::rand(); 38 uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; 39 return rnd; 40 41 #elif defined __APPLE__ 42 // Same approach as for win32, except that the random number generator 43 // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). 44 uint64_t rnd = ::random() ^ mach_absolute_time(); 45 return rnd; 46 47 #else 48 // Augment the current time with pseudo random number generation 49 // to ensure that we get different seeds if we try to generate seeds 50 // faster than the clock resolution. 51 timespec ts; 52 clock_gettime(CLOCK_REALTIME, &ts); 53 uint64_t rnd = ::random() ^ ts.tv_nsec; 54 return rnd; 55 #endif 56 } 57 58 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { 59 // TODO: Unify with the implementation in the non blocking thread pool. 60 uint64_t current = *state; 61 // Update the internal state 62 *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; 63 // Generate the random output (using the PCG-XSH-RS scheme) 64 return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); 65 } 66 67 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { 68 seed = seed ? seed : get_random_seed(); 69 return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; 70 } 71 72 } // namespace 73 74 75 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 76 T RandomToTypeUniform(uint64_t* state) { 77 unsigned rnd = PCG_XSH_RS_generator(state); 78 return static_cast<T>(rnd); 79 } 80 81 82 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 83 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) { 84 Eigen::half result; 85 // Generate 10 random bits for the mantissa 86 unsigned rnd = PCG_XSH_RS_generator(state); 87 result.x = static_cast<uint16_t>(rnd & 0x3ffu); 88 // Set the exponent 89 result.x |= (static_cast<uint16_t>(15) << 10); 90 // Return the final result 91 return result - Eigen::half(1.0f); 92 } 93 94 95 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 96 float RandomToTypeUniform<float>(uint64_t* state) { 97 typedef union { 98 uint32_t raw; 99 float fp; 100 } internal; 101 internal result; 102 // Generate 23 random bits for the mantissa mantissa 103 const unsigned rnd = PCG_XSH_RS_generator(state); 104 result.raw = rnd & 0x7fffffu; 105 // Set the exponent 106 result.raw |= (static_cast<uint32_t>(127) << 23); 107 // Return the final result 108 return result.fp - 1.0f; 109 } 110 111 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 112 double RandomToTypeUniform<double>(uint64_t* state) { 113 typedef union { 114 uint64_t raw; 115 double dp; 116 } internal; 117 internal result; 118 result.raw = 0; 119 // Generate 52 random bits for the mantissa 120 // First generate the upper 20 bits 121 unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; 122 // The generate the lower 32 bits 123 unsigned rnd2 = PCG_XSH_RS_generator(state); 124 result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2; 125 // Set the exponent 126 result.raw |= (static_cast<uint64_t>(1023) << 52); 127 // Return the final result 128 return result.dp - 1.0; 129 } 130 131 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 132 std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) { 133 return std::complex<float>(RandomToTypeUniform<float>(state), 134 RandomToTypeUniform<float>(state)); 135 } 136 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 137 std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) { 138 return std::complex<double>(RandomToTypeUniform<double>(state), 139 RandomToTypeUniform<double>(state)); 140 } 141 142 template <typename T> class UniformRandomGenerator { 143 public: 144 static const bool PacketAccess = true; 145 146 // Uses the given "seed" if non-zero, otherwise uses a random seed. 147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( 148 uint64_t seed = 0) { 149 m_state = PCG_XSH_RS_state(seed); 150 } 151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( 152 const UniformRandomGenerator& other) { 153 m_state = other.m_state; 154 } 155 156 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 157 T operator()(Index i) const { 158 uint64_t local_state = m_state + i; 159 T result = RandomToTypeUniform<T>(&local_state); 160 m_state = local_state; 161 return result; 162 } 163 164 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 165 Packet packetOp(Index i) const { 166 const int packetSize = internal::unpacket_traits<Packet>::size; 167 EIGEN_ALIGN_MAX T values[packetSize]; 168 uint64_t local_state = m_state + i; 169 for (int j = 0; j < packetSize; ++j) { 170 values[j] = RandomToTypeUniform<T>(&local_state); 171 } 172 m_state = local_state; 173 return internal::pload<Packet>(values); 174 } 175 176 private: 177 mutable uint64_t m_state; 178 }; 179 180 template <typename Scalar> 181 struct functor_traits<UniformRandomGenerator<Scalar> > { 182 enum { 183 // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). 184 Cost = 12 * NumTraits<Scalar>::AddCost * 185 ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), 186 PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess 187 }; 188 }; 189 190 191 192 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 193 T RandomToTypeNormal(uint64_t* state) { 194 // Use the ratio of uniform method to generate numbers following a normal 195 // distribution. See for example Numerical Recipes chapter 7.3.9 for the 196 // details. 197 T u, v, q; 198 do { 199 u = RandomToTypeUniform<T>(state); 200 v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5)); 201 const T x = u - T(0.449871); 202 const T y = numext::abs(v) + T(0.386595); 203 q = x*x + y * (T(0.196)*y - T(0.25472)*x); 204 } while (q > T(0.27597) && 205 (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); 206 207 return v/u; 208 } 209 210 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 211 std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) { 212 return std::complex<float>(RandomToTypeNormal<float>(state), 213 RandomToTypeNormal<float>(state)); 214 } 215 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 216 std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) { 217 return std::complex<double>(RandomToTypeNormal<double>(state), 218 RandomToTypeNormal<double>(state)); 219 } 220 221 222 template <typename T> class NormalRandomGenerator { 223 public: 224 static const bool PacketAccess = true; 225 226 // Uses the given "seed" if non-zero, otherwise uses a random seed. 227 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { 228 m_state = PCG_XSH_RS_state(seed); 229 } 230 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( 231 const NormalRandomGenerator& other) { 232 m_state = other.m_state; 233 } 234 235 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 236 T operator()(Index i) const { 237 uint64_t local_state = m_state + i; 238 T result = RandomToTypeNormal<T>(&local_state); 239 m_state = local_state; 240 return result; 241 } 242 243 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 244 Packet packetOp(Index i) const { 245 const int packetSize = internal::unpacket_traits<Packet>::size; 246 EIGEN_ALIGN_MAX T values[packetSize]; 247 uint64_t local_state = m_state + i; 248 for (int j = 0; j < packetSize; ++j) { 249 values[j] = RandomToTypeNormal<T>(&local_state); 250 } 251 m_state = local_state; 252 return internal::pload<Packet>(values); 253 } 254 255 private: 256 mutable uint64_t m_state; 257 }; 258 259 260 template <typename Scalar> 261 struct functor_traits<NormalRandomGenerator<Scalar> > { 262 enum { 263 // On average, we need to generate about 3 random numbers 264 // 15 mul, 8 add, 1.5 logs 265 Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + 266 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost + 267 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2, 268 PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess 269 }; 270 }; 271 272 273 } // end namespace internal 274 } // end namespace Eigen 275 276 #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H 277