1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ 17 #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ 18 19 #define _USE_MATH_DEFINES 20 #include <math.h> 21 #include <cmath> 22 #undef _USE_MATH_DEFINES 23 24 #include <string.h> 25 #include <algorithm> 26 #include <type_traits> 27 28 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 29 #include "tensorflow/core/lib/bfloat16/bfloat16.h" 30 #include "tensorflow/core/lib/random/philox_random.h" 31 32 namespace tensorflow { 33 namespace random { 34 35 // Helper function to convert a 16-bit integer to a half between [0..1). 36 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x); 37 // Helper function to convert a 16-bit integer to a bfloat16 between [0..1). 38 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x); 39 // Helper function to convert a 32-bit integer to a float between [0..1). 40 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); 41 // Helper function to convert two 32-bit integers to a double between [0..1). 42 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); 43 44 // Computes a + b. Requires that the result is representable in the destination 45 // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b 46 // need *not* be representable in that type. (The condition on b excludes the 47 // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot 48 // compute.) 49 template <typename Int> 50 PHILOX_DEVICE_INLINE Int SignedAdd(Int a, 51 typename std::make_unsigned<Int>::type b) { 52 // Implementation note: both b_div_2 and b - b_div_2 are positive and 53 // representatble as Int. 54 auto b_div_2 = b >> 1; 55 return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2); 56 } 57 58 // A class that generates uniform distribution random numbers from the 59 // underlying random integer generator. 60 // Arguments: 61 // Generator: a generator type that returns a number of uint32 upon each 62 // invocation. It needs to define kResultElementCount for the 63 // sample count for each invocation, and ResultType for the 64 // actual returned sample type. 65 // RealType: the data type of the real numbers that will be returned by the 66 // distribution. This could be either float or double for now. 67 // This class is meant to be implemented through specialization. The default 68 // is not defined by design. 69 template <class Generator, typename RealType> 70 class UniformDistribution; 71 72 template <class Generator> 73 class UniformDistribution<Generator, Eigen::half> { 74 public: 75 // The number of elements that will be returned. 76 static const int kResultElementCount = Generator::kResultElementCount; 77 // Cost of generation of a single element (in cycles). 78 static const int kElementCost = 3; 79 // Indicate that this distribution may take variable number of samples 80 // during the runtime. 81 static const bool kVariableSamplesPerOutput = false; 82 typedef Array<Eigen::half, kResultElementCount> ResultType; 83 typedef Eigen::half ResultElementType; 84 85 PHILOX_DEVICE_INLINE 86 ResultType operator()(Generator* gen) { 87 typename Generator::ResultType sample = (*gen)(); 88 ResultType result; 89 for (int i = 0; i < kResultElementCount; ++i) { 90 result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits. 91 } 92 return result; 93 } 94 }; 95 96 template <class Generator> 97 class UniformDistribution<Generator, bfloat16> { 98 public: 99 // The number of elements that will be returned. 100 static const int kResultElementCount = Generator::kResultElementCount; 101 // Cost of generation of a single element (in cycles). 102 static const int kElementCost = 3; 103 // Indicate that this distribution may take variable number of samples 104 // during the runtime. 105 static const bool kVariableSamplesPerOutput = false; 106 typedef Array<bfloat16, kResultElementCount> ResultType; 107 typedef bfloat16 ResultElementType; 108 109 PHILOX_DEVICE_INLINE 110 ResultType operator()(Generator* gen) { 111 typename Generator::ResultType sample = (*gen)(); 112 ResultType result; 113 for (int i = 0; i < kResultElementCount; ++i) { 114 result[i] = Uint16ToGfloat16(sample[i]); 115 } 116 return result; 117 } 118 }; 119 120 template <class Generator> 121 class UniformDistribution<Generator, float> { 122 public: 123 // The number of elements that will be returned. 124 static const int kResultElementCount = Generator::kResultElementCount; 125 // Cost of generation of a single element (in cycles). 126 static const int kElementCost = 3; 127 // Indicate that this distribution may take variable number of samples 128 // during the runtime. 129 static const bool kVariableSamplesPerOutput = false; 130 typedef Array<float, kResultElementCount> ResultType; 131 typedef float ResultElementType; 132 133 PHILOX_DEVICE_INLINE 134 ResultType operator()(Generator* gen) { 135 typename Generator::ResultType sample = (*gen)(); 136 ResultType result; 137 for (int i = 0; i < kResultElementCount; ++i) { 138 result[i] = Uint32ToFloat(sample[i]); 139 } 140 return result; 141 } 142 }; 143 144 template <class Generator> 145 class UniformDistribution<Generator, double> { 146 public: 147 // The number of elements that will be returned. 148 static const int kResultElementCount = Generator::kResultElementCount / 2; 149 // Cost of generation of a single element (in cycles). 150 static const int kElementCost = 3; 151 // Indicate that this distribution may take variable number of samples 152 // during the runtime. 153 static const bool kVariableSamplesPerOutput = false; 154 typedef Array<double, kResultElementCount> ResultType; 155 typedef double ResultElementType; 156 157 PHILOX_DEVICE_INLINE 158 ResultType operator()(Generator* gen) { 159 typename Generator::ResultType sample = (*gen)(); 160 ResultType result; 161 for (int i = 0; i < kResultElementCount; ++i) { 162 result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]); 163 } 164 return result; 165 } 166 }; 167 168 template <class Generator> 169 class UniformDistribution<Generator, int32> { 170 public: 171 // The number of elements that will be returned. 172 static const int kResultElementCount = Generator::kResultElementCount; 173 // Cost of generation of a single element (in cycles). 174 static const int kElementCost = 3; 175 // Indicate that this distribution may take variable number of samples 176 // during the runtime. 177 static const bool kVariableSamplesPerOutput = false; 178 typedef Array<int32, kResultElementCount> ResultType; 179 typedef int32 ResultElementType; 180 181 // Must have lo < hi 182 UniformDistribution(int32 lo, int32 hi) 183 : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {} 184 185 PHILOX_DEVICE_INLINE 186 ResultType operator()(Generator* gen) { 187 typename Generator::ResultType sample = (*gen)(); 188 ResultType result; 189 for (int i = 0; i < kResultElementCount; ++i) { 190 result[i] = SignedAdd(lo_, sample[i] % range_); 191 } 192 return result; 193 } 194 195 private: 196 // Note that lo_ is intentionally signed while range_ is intentionally 197 // unsigned. This is because hi - lo can overflow signed integers if 198 // lo < 0 < hi, but always fits in unsigned. 199 int32 lo_; 200 uint32 range_; 201 }; 202 203 template <class Generator> 204 class UniformDistribution<Generator, int64> { 205 public: 206 // The number of elements that will be returned. 207 static const int kResultElementCount = Generator::kResultElementCount / 2; 208 // Cost of generation of a single element (in cycles). 209 static const int kElementCost = 3; 210 // Indicate that this distribution may take variable number of samples 211 // during the runtime. 212 static const bool kVariableSamplesPerOutput = false; 213 typedef Array<int64, kResultElementCount> ResultType; 214 typedef int64 ResultElementType; 215 216 // Must have lo < hi 217 UniformDistribution(int64 lo, int64 hi) 218 : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {} 219 220 PHILOX_DEVICE_INLINE 221 ResultType operator()(Generator* gen) { 222 typename Generator::ResultType sample = (*gen)(); 223 ResultType result; 224 for (int i = 0; i < kResultElementCount; ++i) { 225 auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32; 226 result[i] = SignedAdd(lo_, bits % range_); 227 } 228 return result; 229 } 230 231 private: 232 // Note that lo_ is intentionally signed while range_ is intentionally 233 // unsigned. This is because hi - lo can overflow signed integers if 234 // lo < 0 < hi, but always fits in unsigned. 235 int64 lo_; 236 uint64 range_; 237 }; 238 239 // A class that adapts the underlying native multiple samples to return a single 240 // sample at a time. 241 template <class Generator> 242 class SingleSampleAdapter { 243 public: 244 // The number of elements that will be returned. 245 static const int kResultElementCount = 1; 246 // The number of elements that will be returned by the underlying generator. 247 static const int kNativeElementCount = Generator::kResultElementCount; 248 typedef typename Generator::ResultElementType ResultType; 249 typedef typename Generator::ResultElementType ResultElementType; 250 251 PHILOX_DEVICE_INLINE 252 explicit SingleSampleAdapter(Generator* gen) 253 : generator_(gen), used_result_index_(Generator::kResultElementCount) {} 254 255 PHILOX_DEVICE_INLINE 256 ResultType operator()() { 257 if (used_result_index_ == Generator::kResultElementCount) { 258 unused_results_ = (*generator_)(); 259 used_result_index_ = 0; 260 } 261 262 return unused_results_[used_result_index_++]; 263 } 264 265 PHILOX_DEVICE_INLINE 266 void Skip(uint64 num_skips) { 267 if (!num_skips) { 268 return; 269 } 270 int num_unused_results = kNativeElementCount - used_result_index_; 271 if (num_skips <= num_unused_results) { 272 used_result_index_ += num_skips; 273 return; 274 } 275 num_skips -= num_unused_results; 276 used_result_index_ = kNativeElementCount; 277 SkipFromGenerator(num_skips / kNativeElementCount); 278 num_skips = num_skips % kNativeElementCount; 279 if (num_skips) { 280 unused_results_ = (*generator_)(); 281 used_result_index_ = num_skips; 282 } 283 } 284 285 private: 286 // This implementation iteratively skips over `num_skips` samples 287 // from `generator_`. There is an O(1) implementation for PhiloxRandom 288 // in random_distributions.cc. 289 PHILOX_DEVICE_INLINE 290 void SkipFromGenerator(uint64 num_skips) { 291 while (num_skips--) { 292 (*generator_)(); 293 } 294 } 295 296 Generator* generator_; 297 typename Generator::ResultType unused_results_; 298 int used_result_index_; 299 }; 300 301 // A class that generates unit normal distribution random numbers from the 302 // underlying random integer generator. 303 // Arguments: 304 // Generator: a generator type that returns a number of uint32 upon each 305 // each invocation. It needs to define kResultElementCount for the 306 // sample count for each invocation, and ResultType for actual 307 // returned sample type. 308 // RealType: the data type of the real numbers that will be returned by the 309 // distribution. This could be either float or double for now. 310 // This class is meant to be implemented through specialization. The default 311 // is not defined by design. 312 template <class Generator, typename RealType> 313 class NormalDistribution; 314 315 PHILOX_DEVICE_INLINE 316 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1); 317 318 PHILOX_DEVICE_INLINE 319 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, 320 double* d1); 321 322 // Exactly like the float version, except that we convert to half afterwards; 323 // since we don't have half-precision sin/cos even on GPUs, there's nothing to 324 // gain from working in half internally. 325 template <class Generator> 326 class NormalDistribution<Generator, Eigen::half> { 327 public: 328 // The number of elements that will be returned. 329 static const int kResultElementCount = Generator::kResultElementCount; 330 // Cost of generation of a single element (in cycles). 331 static const int kElementCost = 70; 332 // Indicate that this distribution may take variable number of samples 333 // during the runtime. 334 static const bool kVariableSamplesPerOutput = false; 335 typedef Array<Eigen::half, kResultElementCount> ResultType; 336 typedef Eigen::half ResultElementType; 337 338 PHILOX_DEVICE_INLINE 339 ResultType operator()(Generator* gen) { 340 typename Generator::ResultType sample = (*gen)(); 341 ResultType result; 342 for (int i = 0; i < kResultElementCount; i += 2) { 343 float f[2]; 344 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]); 345 result[i] = Eigen::half(f[0]); 346 result[i + 1] = Eigen::half(f[1]); 347 } 348 return result; 349 } 350 }; 351 352 template <class Generator> 353 class NormalDistribution<Generator, bfloat16> { 354 public: 355 // The number of elements that will be returned. 356 static const int kResultElementCount = Generator::kResultElementCount; 357 // Cost of generation of a single element (in cycles). 358 static const int kElementCost = 70; 359 // Indicate that this distribution may take variable number of samples 360 // during the runtime. 361 static const bool kVariableSamplesPerOutput = false; 362 typedef Array<bfloat16, kResultElementCount> ResultType; 363 typedef bfloat16 ResultElementType; 364 365 PHILOX_DEVICE_INLINE 366 ResultType operator()(Generator* gen) { 367 typename Generator::ResultType sample = (*gen)(); 368 ResultType result; 369 static_assert(kResultElementCount % 2 == 0, 370 "kResultElementCount should be an even number"); 371 for (int i = 0; i < kResultElementCount; i += 2) { 372 float f[2]; 373 // Box-Muller transform requires processing 2 elements at a time. 374 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]); 375 result[i] = bfloat16(f[0]); 376 result[i + 1] = bfloat16(f[1]); 377 } 378 return result; 379 } 380 }; 381 382 template <class Generator> 383 class NormalDistribution<Generator, float> { 384 public: 385 // The number of elements that will be returned. 386 static const int kResultElementCount = Generator::kResultElementCount; 387 // Cost of generation of a single element (in cycles). 388 static const int kElementCost = 70; 389 // Indicate that this distribution may take variable number of samples 390 // during the runtime. 391 static const bool kVariableSamplesPerOutput = false; 392 typedef Array<float, kResultElementCount> ResultType; 393 typedef float ResultElementType; 394 395 PHILOX_DEVICE_INLINE 396 ResultType operator()(Generator* gen) { 397 typename Generator::ResultType sample = (*gen)(); 398 ResultType result; 399 for (int i = 0; i < kResultElementCount; i += 2) { 400 BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]); 401 } 402 return result; 403 } 404 }; 405 406 template <class Generator> 407 class NormalDistribution<Generator, double> { 408 public: 409 // The number of elements that will be returned. 410 static const int kResultElementCount = Generator::kResultElementCount / 2; 411 // Cost of generation of a single element (in cycles). 412 static const int kElementCost = 70; 413 // Indicate that this distribution may take variable number of samples 414 // during the runtime. 415 static const bool kVariableSamplesPerOutput = false; 416 typedef Array<double, kResultElementCount> ResultType; 417 typedef double ResultElementType; 418 419 PHILOX_DEVICE_INLINE 420 ResultType operator()(Generator* gen) { 421 typename Generator::ResultType sample = (*gen)(); 422 ResultType result; 423 for (int i = 0; i < kResultElementCount; i += 2) { 424 const int i2 = 2 * i; 425 BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], 426 sample[i2 + 3], &result[i], &result[i + 1]); 427 } 428 return result; 429 } 430 }; 431 432 // A class that returns standard normal distribution between 433 // [-kTruncateValue, kTruncateValue]. 434 // Arguments: 435 // Generator: a generator type that returns a number of uint32 upon each 436 // each invocation. It needs to define kResultElementCount for the 437 // sample count for each invocation, and ResultType for actual 438 // returned sample type. 439 // RealType: the data type of the real numbers that will be returned by the 440 // distribution. This could be either float or double for now. 441 // This class is meant to be implemented through specialization. The default 442 // is not defined by design. 443 template <class SingleSampleGenerator, typename RealType> 444 class TruncatedNormalDistribution; 445 446 // Exactly like the float version, except that we convert to half afterwards; 447 // since we don't have half-precision sin/cos even on GPUs, there's nothing to 448 // gain from working in half internally. 449 template <class SingleSampleGenerator> 450 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> { 451 public: 452 // The number of elements that will be returned. 453 static const int kResultElementCount = 454 SingleSampleGenerator::kNativeElementCount; 455 // Cost of generation of a single element (in cycles). 456 static const int kElementCost = 90; 457 // Indicate that this distribution may take variable number of samples 458 // during the runtime. 459 static const bool kVariableSamplesPerOutput = true; 460 // The threshold where the normal distribution is truncated. 461 const float kTruncateValue = 2.0f; 462 463 typedef Array<Eigen::half, kResultElementCount> ResultType; 464 typedef Eigen::half ResultElementType; 465 466 PHILOX_DEVICE_INLINE 467 ResultType operator()(SingleSampleGenerator* gen) { 468 ResultType results; 469 int index = 0; 470 while (true) { 471 // Repeatedly take samples from the normal distribution, until we have 472 // the desired number of elements that fall within the pre-defined cutoff 473 // threshold. 474 const uint32 x0 = (*gen)(); 475 const uint32 x1 = (*gen)(); 476 float f[2]; 477 BoxMullerFloat(x0, x1, &f[0], &f[1]); 478 479 for (int i = 0; i < 2; ++i) { 480 if (Eigen::numext::abs(f[i]) < kTruncateValue) { 481 results[index++] = Eigen::half(f[i]); 482 if (index >= kResultElementCount) { 483 return results; 484 } 485 } 486 } 487 } 488 } 489 }; 490 491 template <class SingleSampleGenerator> 492 class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> { 493 public: 494 // The number of elements that will be returned. 495 static const int kResultElementCount = 496 SingleSampleGenerator::kNativeElementCount; 497 // Cost of generation of a single element (in cycles). 498 static const int kElementCost = 90; 499 // Indicate that this distribution may take variable number of samples 500 // during the runtime. 501 static const bool kVariableSamplesPerOutput = true; 502 // The threshold where the normal distribution is truncated. 503 const float kTruncateValue = 2.0f; 504 505 typedef Array<bfloat16, kResultElementCount> ResultType; 506 typedef bfloat16 ResultElementType; 507 508 PHILOX_DEVICE_INLINE 509 ResultType operator()(SingleSampleGenerator* gen) { 510 ResultType results; 511 int index = 0; 512 while (true) { 513 // Repeatedly take samples from the normal distribution, until we have 514 // the desired number of elements that fall within the pre-defined cutoff 515 // threshold. 516 const uint32 x0 = (*gen)(); 517 const uint32 x1 = (*gen)(); 518 float f[2]; 519 BoxMullerFloat(x0, x1, &f[0], &f[1]); 520 521 for (int i = 0; i < 2; ++i) { 522 if (Eigen::numext::abs(f[i]) < kTruncateValue) { 523 results[index++] = bfloat16(f[i]); 524 if (index >= kResultElementCount) { 525 return results; 526 } 527 } 528 } 529 } 530 } 531 }; 532 533 // Partial specialization for float. 534 template <class SingleSampleGenerator> 535 class TruncatedNormalDistribution<SingleSampleGenerator, float> { 536 public: 537 // The number of elements that will be returned. 538 static const int kResultElementCount = 539 SingleSampleGenerator::kNativeElementCount; 540 // Cost of generation of a single element (in cycles). 541 static const int kElementCost = 90; 542 // Indicate that this distribution may take variable number of samples 543 // during the runtime. 544 static const bool kVariableSamplesPerOutput = true; 545 // The threshold where the normal distribution is truncated. 546 const float kTruncateValue = 2.0f; 547 548 typedef Array<float, kResultElementCount> ResultType; 549 typedef float ResultElementType; 550 551 PHILOX_DEVICE_INLINE 552 ResultType operator()(SingleSampleGenerator* gen) { 553 ResultType results; 554 int index = 0; 555 while (true) { 556 // Repeatedly take samples from the normal distribution, until we have 557 // the desired number of elements that fall within the pre-defined cutoff 558 // threshold. 559 const uint32 x0 = (*gen)(); 560 const uint32 x1 = (*gen)(); 561 float f[2]; 562 BoxMullerFloat(x0, x1, &f[0], &f[1]); 563 564 for (int i = 0; i < 2; ++i) { 565 if (Eigen::numext::abs(f[i]) < kTruncateValue) { 566 results[index++] = f[i]; 567 if (index >= kResultElementCount) { 568 return results; 569 } 570 } 571 } 572 } 573 } 574 }; 575 576 // Partial specialization for double. 577 template <class SingleSampleGenerator> 578 class TruncatedNormalDistribution<SingleSampleGenerator, double> { 579 public: 580 // The number of elements that will be returned. 581 static const int kResultElementCount = 582 (SingleSampleGenerator::kNativeElementCount > 1) 583 ? SingleSampleGenerator::kNativeElementCount / 2 584 : 1; 585 // Cost of generation of a single element (in cycles). 586 static const int kElementCost = 90; 587 // Indicate that this distribution may take variable number of samples 588 // during the runtime. 589 static const bool kVariableSamplesPerOutput = true; 590 typedef Array<double, kResultElementCount> ResultType; 591 typedef double ResultElementType; 592 const double kTruncateValue = 2.0; 593 594 PHILOX_DEVICE_INLINE 595 ResultType operator()(SingleSampleGenerator* gen) { 596 ResultType results; 597 int index = 0; 598 while (1) { 599 const uint32 x0 = (*gen)(); 600 const uint32 x1 = (*gen)(); 601 const uint32 x2 = (*gen)(); 602 const uint32 x3 = (*gen)(); 603 double d[2]; 604 BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]); 605 606 for (int i = 0; i < 2; ++i) { 607 if (Eigen::numext::abs(d[i]) < kTruncateValue) { 608 results[index++] = d[i]; 609 if (index >= kResultElementCount) { 610 return results; 611 } 612 } 613 } 614 } 615 } 616 }; 617 618 // Helper function to convert two 32-bit uniform integers to two floats 619 // under the unit normal distribution. 620 PHILOX_DEVICE_INLINE 621 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) { 622 // This function implements the Box-Muller transform: 623 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form 624 // Do not send a really small number to log(). 625 // We cannot mark "epsilon" as "static const" because NVCC would complain 626 const float epsilon = 1.0e-7f; 627 float u1 = Uint32ToFloat(x0); 628 if (u1 < epsilon) { 629 u1 = epsilon; 630 } 631 const float v1 = 2.0f * M_PI * Uint32ToFloat(x1); 632 const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1)); 633 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__) 634 *f0 = Eigen::numext::sin(v1); 635 *f1 = Eigen::numext::cos(v1); 636 #else 637 sincosf(v1, f0, f1); 638 #endif 639 *f0 *= u2; 640 *f1 *= u2; 641 } 642 643 // Helper function to convert four 32-bit uniform integers to two doubles 644 // under the unit normal distribution. 645 PHILOX_DEVICE_INLINE 646 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, 647 double* d1) { 648 // This function implements the Box-Muller transform: 649 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form 650 // Do not send a really small number to log(). 651 // We cannot mark "epsilon" as "static const" because NVCC would complain 652 const double epsilon = 1.0e-7; 653 double u1 = Uint64ToDouble(x0, x1); 654 if (u1 < epsilon) { 655 u1 = epsilon; 656 } 657 const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3); 658 const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1)); 659 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__) 660 *d0 = Eigen::numext::sin(v1); 661 *d1 = Eigen::numext::cos(v1); 662 #else 663 sincos(v1, d0, d1); 664 #endif 665 *d0 *= u2; 666 *d1 *= u2; 667 } 668 669 // Helper function to convert an 16-bit integer to a half between [0..1). 670 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) { 671 // IEEE754 halfs are formatted as follows (MSB first): 672 // sign(1) exponent(5) mantissa(10) 673 // Conceptually construct the following: 674 // sign == 0 675 // exponent == 15 -- an excess 15 representation of a zero exponent 676 // mantissa == 10 random bits 677 const uint16 man = x & 0x3ffu; // 10 bit mantissa 678 const uint16 exp = static_cast<uint16>(15); 679 const uint16 val = (exp << 10) | man; 680 681 Eigen::half result; 682 result.x = val; 683 return result - Eigen::half(1.0); 684 } 685 686 // Helper function to convert an 16-bit integer to a bfloat16 between [0..1). 687 // This can create a uniform distribution of values between [0..1). 688 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { 689 // bfloat are formatted as follows (MSB first): 690 // sign(1) exponent(8) mantissa(7) 691 // Conceptually construct the following: 692 // sign == 0 693 // exponent == 127 -- an excess 127 representation of a zero exponent 694 // mantissa == 7 random bits 695 const uint16 man = x & 0x7fu; // 7 bit mantissa 696 const uint16 exp = static_cast<uint16>(127); 697 const uint16 val = (exp << 7) | man; 698 699 bfloat16 result; 700 memcpy(&result, &val, sizeof(val)); 701 // The mantissa has an implicit leading 1, so the above code creates a value 702 // in [1, 2). The minus will not cause a rounding that makes the result 1. 703 // Instead it will just be close to 1. 704 return result - bfloat16(1.0); 705 } 706 707 // Helper function to convert an 32-bit integer to a float between [0..1). 708 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { 709 // IEEE754 floats are formatted as follows (MSB first): 710 // sign(1) exponent(8) mantissa(23) 711 // Conceptually construct the following: 712 // sign == 0 713 // exponent == 127 -- an excess 127 representation of a zero exponent 714 // mantissa == 23 random bits 715 const uint32 man = x & 0x7fffffu; // 23 bit mantissa 716 const uint32 exp = static_cast<uint32>(127); 717 const uint32 val = (exp << 23) | man; 718 719 // Assumes that endian-ness is same for float and uint32. 720 float result; 721 memcpy(&result, &val, sizeof(val)); 722 return result - 1.0f; 723 } 724 725 // Helper function to convert two 32-bit integers to a double between [0..1). 726 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) { 727 // IEEE754 doubles are formatted as follows (MSB first): 728 // sign(1) exponent(11) mantissa(52) 729 // Conceptually construct the following: 730 // sign == 0 731 // exponent == 1023 -- an excess 1023 representation of a zero exponent 732 // mantissa == 52 random bits 733 const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa 734 const uint32 mlo = x1; // lower 32 bits of mantissa 735 const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa 736 const uint64 exp = static_cast<uint64>(1023); 737 const uint64 val = (exp << 52) | man; 738 // Assumes that endian-ness is same for double and uint64. 739 double result; 740 memcpy(&result, &val, sizeof(val)); 741 return result - 1.0; 742 } 743 744 } // namespace random 745 } // namespace tensorflow 746 747 #endif // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ 748