Home | History | Annotate | Download | only in random
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
     17 #define TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
     18 
     19 #define _USE_MATH_DEFINES
     20 #include <math.h>
     21 #include <cmath>
     22 #undef _USE_MATH_DEFINES
     23 
     24 #include <string.h>
     25 #include <algorithm>
     26 
     27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     28 #include "tensorflow/core/lib/random/philox_random.h"
     29 
     30 namespace tensorflow {
     31 namespace random {
     32 
     33 // Helper function to convert a 16-bit integer to a half between [0..1).
     34 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
     35 // Helper function to convert a 32-bit integer to a float between [0..1).
     36 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
     37 // Helper function to convert two 32-bit integers to a double between [0..1).
     38 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
     39 
     40 // A class that generates uniform distribution random numbers from the
     41 // underlying random integer generator.
     42 // Arguments:
     43 //   Generator: a generator type that returns a number of uint32 upon each
     44 //              invocation. It needs to define kResultElementCount for the
     45 //              sample count for each invocation, and ResultType for the
     46 //              actual returned sample type.
     47 //   RealType: the data type of the real numbers that will be returned by the
     48 //             distribution. This could be either float or double for now.
     49 // This class is meant to be implemented through specialization. The default
     50 // is not defined by design.
     51 template <class Generator, typename RealType>
     52 class UniformDistribution;
     53 
     54 template <class Generator>
     55 class UniformDistribution<Generator, Eigen::half> {
     56  public:
     57   // The number of elements that will be returned.
     58   static const int kResultElementCount = Generator::kResultElementCount;
     59   // Cost of generation of a single element (in cycles).
     60   static const int kElementCost = 3;
     61   // Indicate that this distribution may take variable number of samples
     62   // during the runtime.
     63   static const bool kVariableSamplesPerOutput = false;
     64   typedef Array<Eigen::half, kResultElementCount> ResultType;
     65   typedef Eigen::half ResultElementType;
     66 
     67   PHILOX_DEVICE_INLINE
     68   ResultType operator()(Generator* gen) {
     69     typename Generator::ResultType sample = (*gen)();
     70     ResultType result;
     71     for (int i = 0; i < kResultElementCount; ++i) {
     72       result[i] = Uint16ToHalf(sample[i]);  // Truncate the upper 16 bits.
     73     }
     74     return result;
     75   }
     76 };
     77 
     78 template <class Generator>
     79 class UniformDistribution<Generator, float> {
     80  public:
     81   // The number of elements that will be returned.
     82   static const int kResultElementCount = Generator::kResultElementCount;
     83   // Cost of generation of a single element (in cycles).
     84   static const int kElementCost = 3;
     85   // Indicate that this distribution may take variable number of samples
     86   // during the runtime.
     87   static const bool kVariableSamplesPerOutput = false;
     88   typedef Array<float, kResultElementCount> ResultType;
     89   typedef float ResultElementType;
     90 
     91   PHILOX_DEVICE_INLINE
     92   ResultType operator()(Generator* gen) {
     93     typename Generator::ResultType sample = (*gen)();
     94     ResultType result;
     95     for (int i = 0; i < kResultElementCount; ++i) {
     96       result[i] = Uint32ToFloat(sample[i]);
     97     }
     98     return result;
     99   }
    100 };
    101 
    102 template <class Generator>
    103 class UniformDistribution<Generator, double> {
    104  public:
    105   // The number of elements that will be returned.
    106   static const int kResultElementCount = Generator::kResultElementCount / 2;
    107   // Cost of generation of a single element (in cycles).
    108   static const int kElementCost = 3;
    109   // Indicate that this distribution may take variable number of samples
    110   // during the runtime.
    111   static const bool kVariableSamplesPerOutput = false;
    112   typedef Array<double, kResultElementCount> ResultType;
    113   typedef double ResultElementType;
    114 
    115   PHILOX_DEVICE_INLINE
    116   ResultType operator()(Generator* gen) {
    117     typename Generator::ResultType sample = (*gen)();
    118     ResultType result;
    119     for (int i = 0; i < kResultElementCount; ++i) {
    120       result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
    121     }
    122     return result;
    123   }
    124 };
    125 
    126 template <class Generator>
    127 class UniformDistribution<Generator, int32> {
    128  public:
    129   // The number of elements that will be returned.
    130   static const int kResultElementCount = Generator::kResultElementCount;
    131   // Cost of generation of a single element (in cycles).
    132   static const int kElementCost = 3;
    133   // Indicate that this distribution may take variable number of samples
    134   // during the runtime.
    135   static const bool kVariableSamplesPerOutput = false;
    136   typedef Array<int32, kResultElementCount> ResultType;
    137   typedef int32 ResultElementType;
    138 
    139   // Must have lo < hi
    140   UniformDistribution(int32 lo, int32 hi) : lo_(lo), range_(hi - lo) {}
    141 
    142   PHILOX_DEVICE_INLINE
    143   ResultType operator()(Generator* gen) {
    144     typename Generator::ResultType sample = (*gen)();
    145     ResultType result;
    146     for (int i = 0; i < kResultElementCount; ++i) {
    147       result[i] = lo_ + static_cast<int32>(sample[i] % range_);
    148     }
    149     return result;
    150   }
    151 
    152  private:
    153   // Note that lo_ is intentionally signed while range_ is intentionally
    154   // unsigned.  This is because hi - lo can overflow signed integers if
    155   // lo < 0 < hi, but always fits in unsigned.
    156   int32 lo_;
    157   uint32 range_;
    158 };
    159 
    160 template <class Generator>
    161 class UniformDistribution<Generator, int64> {
    162  public:
    163   // The number of elements that will be returned.
    164   static const int kResultElementCount = Generator::kResultElementCount / 2;
    165   // Cost of generation of a single element (in cycles).
    166   static const int kElementCost = 3;
    167   // Indicate that this distribution may take variable number of samples
    168   // during the runtime.
    169   static const bool kVariableSamplesPerOutput = false;
    170   typedef Array<int64, kResultElementCount> ResultType;
    171   typedef int64 ResultElementType;
    172 
    173   // Must have lo < hi
    174   UniformDistribution(int64 lo, int64 hi) : lo_(lo), range_(hi - lo) {}
    175 
    176   PHILOX_DEVICE_INLINE
    177   ResultType operator()(Generator* gen) {
    178     typename Generator::ResultType sample = (*gen)();
    179     ResultType result;
    180     for (int i = 0; i < kResultElementCount; ++i) {
    181       auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
    182       result[i] = lo_ + static_cast<int64>(bits % range_);
    183     }
    184     return result;
    185   }
    186 
    187  private:
    188   // Note that lo_ is intentionally signed while range_ is intentionally
    189   // unsigned.  This is because hi - lo can overflow signed integers if
    190   // lo < 0 < hi, but always fits in unsigned.
    191   int64 lo_;
    192   uint64 range_;
    193 };
    194 
    195 // A class that adapts the underlying native multiple samples to return a single
    196 // sample at a time.
    197 template <class Generator>
    198 class SingleSampleAdapter {
    199  public:
    200   // The number of elements that will be returned.
    201   static const int kResultElementCount = 1;
    202   // The number of elements that will be returned by the underlying generator.
    203   static const int kNativeElementCount = Generator::kResultElementCount;
    204   typedef typename Generator::ResultElementType ResultType;
    205   typedef typename Generator::ResultElementType ResultElementType;
    206 
    207   PHILOX_DEVICE_INLINE
    208   explicit SingleSampleAdapter(Generator* gen)
    209       : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
    210 
    211   PHILOX_DEVICE_INLINE
    212   ResultType operator()() {
    213     if (used_result_index_ == Generator::kResultElementCount) {
    214       unused_results_ = (*generator_)();
    215       used_result_index_ = 0;
    216     }
    217 
    218     return unused_results_[used_result_index_++];
    219   }
    220 
    221   PHILOX_DEVICE_INLINE
    222   void Skip(uint64 num_skips) {
    223     if (!num_skips) {
    224       return;
    225     }
    226     int num_unused_results = kNativeElementCount - used_result_index_;
    227     if (num_skips <= num_unused_results) {
    228       used_result_index_ += num_skips;
    229       return;
    230     }
    231     num_skips -= num_unused_results;
    232     used_result_index_ = kNativeElementCount;
    233     SkipFromGenerator(num_skips / kNativeElementCount);
    234     num_skips = num_skips % kNativeElementCount;
    235     if (num_skips) {
    236       unused_results_ = (*generator_)();
    237       used_result_index_ = num_skips;
    238     }
    239   }
    240 
    241  private:
    242   // This implementation iteratively skips over `num_skips` samples
    243   // from `generator_`. There is an O(1) implementation for PhiloxRandom
    244   // in random_distributions.cc.
    245   PHILOX_DEVICE_INLINE
    246   void SkipFromGenerator(uint64 num_skips) {
    247     while (num_skips--) {
    248       (*generator_)();
    249     }
    250   }
    251 
    252   Generator* generator_;
    253   typename Generator::ResultType unused_results_;
    254   int used_result_index_;
    255 };
    256 
    257 // A class that generates unit normal distribution random numbers from the
    258 // underlying random integer generator.
    259 // Arguments:
    260 //   Generator: a generator type that returns a number of uint32 upon each
    261 //              each invocation. It needs to define kResultElementCount for the
    262 //              sample count for each invocation, and ResultType for actual
    263 //              returned sample type.
    264 //   RealType: the data type of the real numbers that will be returned by the
    265 //             distribution. This could be either float or double for now.
    266 // This class is meant to be implemented through specialization. The default
    267 // is not defined by design.
    268 template <class Generator, typename RealType>
    269 class NormalDistribution;
    270 
    271 PHILOX_DEVICE_INLINE
    272 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
    273 
    274 PHILOX_DEVICE_INLINE
    275 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
    276                      double* d1);
    277 
    278 // Exactly like the float version, except that we convert to half afterwards;
    279 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
    280 // gain from working in half internally.
    281 template <class Generator>
    282 class NormalDistribution<Generator, Eigen::half> {
    283  public:
    284   // The number of elements that will be returned.
    285   static const int kResultElementCount = Generator::kResultElementCount;
    286   // Cost of generation of a single element (in cycles).
    287   static const int kElementCost = 70;
    288   // Indicate that this distribution may take variable number of samples
    289   // during the runtime.
    290   static const bool kVariableSamplesPerOutput = false;
    291   typedef Array<Eigen::half, kResultElementCount> ResultType;
    292   typedef Eigen::half ResultElementType;
    293 
    294   PHILOX_DEVICE_INLINE
    295   ResultType operator()(Generator* gen) {
    296     typename Generator::ResultType sample = (*gen)();
    297     ResultType result;
    298     for (int i = 0; i < kResultElementCount; i += 2) {
    299       float f[2];
    300       BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
    301       result[i] = Eigen::half(f[0]);
    302       result[i + 1] = Eigen::half(f[1]);
    303     }
    304     return result;
    305   }
    306 };
    307 
    308 template <class Generator>
    309 class NormalDistribution<Generator, float> {
    310  public:
    311   // The number of elements that will be returned.
    312   static const int kResultElementCount = Generator::kResultElementCount;
    313   // Cost of generation of a single element (in cycles).
    314   static const int kElementCost = 70;
    315   // Indicate that this distribution may take variable number of samples
    316   // during the runtime.
    317   static const bool kVariableSamplesPerOutput = false;
    318   typedef Array<float, kResultElementCount> ResultType;
    319   typedef float ResultElementType;
    320 
    321   PHILOX_DEVICE_INLINE
    322   ResultType operator()(Generator* gen) {
    323     typename Generator::ResultType sample = (*gen)();
    324     ResultType result;
    325     for (int i = 0; i < kResultElementCount; i += 2) {
    326       BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
    327     }
    328     return result;
    329   }
    330 };
    331 
    332 template <class Generator>
    333 class NormalDistribution<Generator, double> {
    334  public:
    335   // The number of elements that will be returned.
    336   static const int kResultElementCount = Generator::kResultElementCount / 2;
    337   // Cost of generation of a single element (in cycles).
    338   static const int kElementCost = 70;
    339   // Indicate that this distribution may take variable number of samples
    340   // during the runtime.
    341   static const bool kVariableSamplesPerOutput = false;
    342   typedef Array<double, kResultElementCount> ResultType;
    343   typedef double ResultElementType;
    344 
    345   PHILOX_DEVICE_INLINE
    346   ResultType operator()(Generator* gen) {
    347     typename Generator::ResultType sample = (*gen)();
    348     ResultType result;
    349     for (int i = 0; i < kResultElementCount; i += 2) {
    350       const int i2 = 2 * i;
    351       BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2],
    352                       sample[i2 + 3], &result[i], &result[i + 1]);
    353     }
    354     return result;
    355   }
    356 };
    357 
    358 // A class that returns standard normal distribution between
    359 // [-kTruncateValue, kTruncateValue].
    360 // Arguments:
    361 //   Generator: a generator type that returns a number of uint32 upon each
    362 //              each invocation. It needs to define kResultElementCount for the
    363 //              sample count for each invocation, and ResultType for actual
    364 //              returned sample type.
    365 //   RealType: the data type of the real numbers that will be returned by the
    366 //             distribution. This could be either float or double for now.
    367 // This class is meant to be implemented through specialization. The default
    368 // is not defined by design.
    369 template <class SingleSampleGenerator, typename RealType>
    370 class TruncatedNormalDistribution;
    371 
    372 // Exactly like the float version, except that we convert to half afterwards;
    373 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
    374 // gain from working in half internally.
    375 template <class SingleSampleGenerator>
    376 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
    377  public:
    378   // The number of elements that will be returned.
    379   static const int kResultElementCount =
    380       SingleSampleGenerator::kNativeElementCount;
    381   // Cost of generation of a single element (in cycles).
    382   static const int kElementCost = 90;
    383   // Indicate that this distribution may take variable number of samples
    384   // during the runtime.
    385   static const bool kVariableSamplesPerOutput = true;
    386   // The threshold where the normal distribution is truncated.
    387   const float kTruncateValue = 2.0f;
    388 
    389   typedef Array<Eigen::half, kResultElementCount> ResultType;
    390   typedef Eigen::half ResultElementType;
    391 
    392   PHILOX_DEVICE_INLINE
    393   ResultType operator()(SingleSampleGenerator* gen) {
    394     ResultType results;
    395     int index = 0;
    396     while (true) {
    397       // Repeatedly take samples from the normal distribution, until we have
    398       // the desired number of elements that fall within the pre-defined cutoff
    399       // threshold.
    400       const uint32 x0 = (*gen)();
    401       const uint32 x1 = (*gen)();
    402       float f[2];
    403       BoxMullerFloat(x0, x1, &f[0], &f[1]);
    404 
    405       for (int i = 0; i < 2; ++i) {
    406         if (Eigen::numext::abs(f[i]) < kTruncateValue) {
    407           results[index++] = Eigen::half(f[i]);
    408           if (index >= kResultElementCount) {
    409             return results;
    410           }
    411         }
    412       }
    413     }
    414   }
    415 };
    416 
    417 // Partial specialization for float.
    418 template <class SingleSampleGenerator>
    419 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
    420  public:
    421   // The number of elements that will be returned.
    422   static const int kResultElementCount =
    423       SingleSampleGenerator::kNativeElementCount;
    424   // Cost of generation of a single element (in cycles).
    425   static const int kElementCost = 90;
    426   // Indicate that this distribution may take variable number of samples
    427   // during the runtime.
    428   static const bool kVariableSamplesPerOutput = true;
    429   // The threshold where the normal distribution is truncated.
    430   const float kTruncateValue = 2.0f;
    431 
    432   typedef Array<float, kResultElementCount> ResultType;
    433   typedef float ResultElementType;
    434 
    435   PHILOX_DEVICE_INLINE
    436   ResultType operator()(SingleSampleGenerator* gen) {
    437     ResultType results;
    438     int index = 0;
    439     while (true) {
    440       // Repeatedly take samples from the normal distribution, until we have
    441       // the desired number of elements that fall within the pre-defined cutoff
    442       // threshold.
    443       const uint32 x0 = (*gen)();
    444       const uint32 x1 = (*gen)();
    445       float f[2];
    446       BoxMullerFloat(x0, x1, &f[0], &f[1]);
    447 
    448       for (int i = 0; i < 2; ++i) {
    449         if (Eigen::numext::abs(f[i]) < kTruncateValue) {
    450           results[index++] = f[i];
    451           if (index >= kResultElementCount) {
    452             return results;
    453           }
    454         }
    455       }
    456     }
    457   }
    458 };
    459 
    460 // Partial specialization for double.
    461 template <class SingleSampleGenerator>
    462 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
    463  public:
    464   // The number of elements that will be returned.
    465   static const int kResultElementCount =
    466       (SingleSampleGenerator::kNativeElementCount > 1)
    467           ? SingleSampleGenerator::kNativeElementCount / 2
    468           : 1;
    469   // Cost of generation of a single element (in cycles).
    470   static const int kElementCost = 90;
    471   // Indicate that this distribution may take variable number of samples
    472   // during the runtime.
    473   static const bool kVariableSamplesPerOutput = true;
    474   typedef Array<double, kResultElementCount> ResultType;
    475   typedef double ResultElementType;
    476   const double kTruncateValue = 2.0;
    477 
    478   PHILOX_DEVICE_INLINE
    479   ResultType operator()(SingleSampleGenerator* gen) {
    480     ResultType results;
    481     int index = 0;
    482     while (1) {
    483       const uint32 x0 = (*gen)();
    484       const uint32 x1 = (*gen)();
    485       const uint32 x2 = (*gen)();
    486       const uint32 x3 = (*gen)();
    487       double d[2];
    488       BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
    489 
    490       for (int i = 0; i < 2; ++i) {
    491         if (Eigen::numext::abs(d[i]) < kTruncateValue) {
    492           results[index++] = d[i];
    493           if (index >= kResultElementCount) {
    494             return results;
    495           }
    496         }
    497       }
    498     }
    499   }
    500 };
    501 
    502 // Helper function to convert two 32-bit uniform integers to two floats
    503 // under the unit normal distribution.
    504 PHILOX_DEVICE_INLINE
    505 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
    506   // This function implements the Box-Muller transform:
    507   // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
    508   // Do not send a really small number to log().
    509   // We cannot mark "epsilon" as "static const" because NVCC would complain
    510   const float epsilon = 1.0e-7f;
    511   float u1 = Uint32ToFloat(x0);
    512   if (u1 < epsilon) {
    513     u1 = epsilon;
    514   }
    515   const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
    516   const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
    517 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
    518   *f0 = Eigen::numext::sin(v1);
    519   *f1 = Eigen::numext::cos(v1);
    520 #else
    521   sincosf(v1, f0, f1);
    522 #endif
    523   *f0 *= u2;
    524   *f1 *= u2;
    525 }
    526 
    527 // Helper function to convert four 32-bit uniform integers to two doubles
    528 // under the unit normal distribution.
    529 PHILOX_DEVICE_INLINE
    530 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
    531                      double* d1) {
    532   // This function implements the Box-Muller transform:
    533   // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
    534   // Do not send a really small number to log().
    535   // We cannot mark "epsilon" as "static const" because NVCC would complain
    536   const double epsilon = 1.0e-7;
    537   double u1 = Uint64ToDouble(x0, x1);
    538   if (u1 < epsilon) {
    539     u1 = epsilon;
    540   }
    541   const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
    542   const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
    543 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
    544   *d0 = Eigen::numext::sin(v1);
    545   *d1 = Eigen::numext::cos(v1);
    546 #else
    547   sincos(v1, d0, d1);
    548 #endif
    549   *d0 *= u2;
    550   *d1 *= u2;
    551 }
    552 
    553 // Helper function to convert an 16-bit integer to a half between [0..1).
    554 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
    555   // IEEE754 halfs are formatted as follows (MSB first):
    556   //    sign(1) exponent(5) mantissa(10)
    557   // Conceptually construct the following:
    558   //    sign == 0
    559   //    exponent == 15  -- an excess 15 representation of a zero exponent
    560   //    mantissa == 10 random bits
    561   const uint16 man = x & 0x3ffu;  // 10 bit mantissa
    562   const uint16 exp = static_cast<uint16>(15);
    563   const uint16 val = (exp << 10) | man;
    564 
    565   Eigen::half result;
    566   result.x = val;
    567   return result - Eigen::half(1.0);
    568 }
    569 
    570 // Helper function to convert an 32-bit integer to a float between [0..1).
    571 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
    572   // IEEE754 floats are formatted as follows (MSB first):
    573   //    sign(1) exponent(8) mantissa(23)
    574   // Conceptually construct the following:
    575   //    sign == 0
    576   //    exponent == 127  -- an excess 127 representation of a zero exponent
    577   //    mantissa == 23 random bits
    578   const uint32 man = x & 0x7fffffu;  // 23 bit mantissa
    579   const uint32 exp = static_cast<uint32>(127);
    580   const uint32 val = (exp << 23) | man;
    581 
    582   // Assumes that endian-ness is same for float and uint32.
    583   float result;
    584   memcpy(&result, &val, sizeof(val));
    585   return result - 1.0f;
    586 }
    587 
    588 // Helper function to convert two 32-bit integers to a double between [0..1).
    589 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
    590   // IEEE754 doubles are formatted as follows (MSB first):
    591   //    sign(1) exponent(11) mantissa(52)
    592   // Conceptually construct the following:
    593   //    sign == 0
    594   //    exponent == 1023  -- an excess 1023 representation of a zero exponent
    595   //    mantissa == 52 random bits
    596   const uint32 mhi = x0 & 0xfffffu;  // upper 20 bits of mantissa
    597   const uint32 mlo = x1;             // lower 32 bits of mantissa
    598   const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo;  // mantissa
    599   const uint64 exp = static_cast<uint64>(1023);
    600   const uint64 val = (exp << 52) | man;
    601   // Assumes that endian-ness is same for double and uint64.
    602   double result;
    603   memcpy(&result, &val, sizeof(val));
    604   return result - 1.0;
    605 }
    606 
    607 }  // namespace random
    608 }  // namespace tensorflow
    609 
    610 #endif  // TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
    611