Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <random>
     17 
     18 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     19 #include "tensorflow/core/framework/tensor.h"
     20 #include "tensorflow/core/lib/math/math_util.h"
     21 #include "tensorflow/core/lib/random/philox_random.h"
     22 #include "tensorflow/core/platform/test.h"
     23 #include "tensorflow/core/platform/test_benchmark.h"
     24 
     25 namespace tensorflow {
     26 namespace {
     27 
     28 Tensor VecShape(int64 v) {
     29   if (v >= std::numeric_limits<int32>::max()) {
     30     Tensor shape(DT_INT64, TensorShape({1}));
     31     shape.vec<int64>()(0) = v;
     32     return shape;
     33   } else {
     34     Tensor shape(DT_INT32, TensorShape({1}));
     35     shape.vec<int32>()(0) = v;
     36     return shape;
     37   }
     38 }
     39 
     40 Graph* RandomUniform(int64 n) {
     41   Graph* g = new Graph(OpRegistry::Global());
     42   test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)),
     43                              DT_FLOAT);
     44   return g;
     45 }
     46 
     47 Graph* RandomNormal(int64 n) {
     48   Graph* g = new Graph(OpRegistry::Global());
     49   test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)),
     50                               DT_FLOAT);
     51   return g;
     52 }
     53 
     54 Graph* TruncatedNormal(int64 n) {
     55   Graph* g = new Graph(OpRegistry::Global());
     56   test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)),
     57                                DT_FLOAT);
     58   return g;
     59 }
     60 
     61 #define BM_RNG(DEVICE, RNG)                                   \
     62   void BM_##DEVICE##_##RNG(int iters, int arg) {              \
     63     testing::ItemsProcessed(static_cast<int64>(iters) * arg); \
     64     test::Benchmark(#DEVICE, RNG(arg)).Run(iters);            \
     65   }                                                           \
     66   BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20);
     67 
     68 BM_RNG(cpu, RandomUniform);
     69 BM_RNG(cpu, RandomNormal);
     70 BM_RNG(cpu, TruncatedNormal);
     71 
     72 BM_RNG(gpu, RandomUniform);
     73 BM_RNG(gpu, RandomNormal);
     74 BM_RNG(gpu, TruncatedNormal);
     75 
     76 Tensor VecAlphas(int64 n) {
     77   Tensor alphas(DT_DOUBLE, TensorShape({n}));
     78   for (int i = 0; i < n; i++) {
     79     // Alternate back and forth between small-and-growing (.25) and
     80     // large-and-shrinking (26.67) alpha.
     81     alphas.vec<double>()(i) =
     82         0.25 + MathUtil::IPow(1.1, i % 2 == 0 ? i : n - i);
     83   }
     84   return alphas;
     85 }
     86 
     87 void BM_cpu_RandomGamma(int iters, int nsamp, int nalpha) {
     88   testing::ItemsProcessed(static_cast<int64>(iters) * nsamp * nalpha);
     89   Graph* g = new Graph(OpRegistry::Global());
     90   test::graph::RandomGamma(g, test::graph::Constant(g, VecShape(nsamp)),
     91                            test::graph::Constant(g, VecAlphas(nalpha)));
     92   test::Benchmark("cpu", g).Run(iters);
     93 }
     94 BENCHMARK(BM_cpu_RandomGamma)->RangePair(1 << 14, 4 << 15, 2, 50);
     95 
     96 void BM_PhiloxRandom(int iters) {
     97   // Fill 2M random numbers
     98   int count = 2 << 20;
     99 
    100   testing::ItemsProcessed(static_cast<int64>(iters) * count);
    101 
    102   random::PhiloxRandom gen(0x12345);
    103 
    104   int val = 1;
    105   for (int i = 0; i < iters; ++i) {
    106     for (int j = 0; j < count; j += 4) {
    107       /// each invocation of gen() returns 128-bit samples
    108       auto samples = gen();
    109 
    110       // use the result trivially so the compiler does not optimize it away
    111       val ^= samples[0] ^ samples[1] ^ samples[2] ^ samples[3];
    112     }
    113   }
    114 
    115   // A anchor point to make sure the compiler does not cut corners
    116   CHECK(val) << val;
    117 }
    118 BENCHMARK(BM_PhiloxRandom);
    119 
    120 void BM_StdMTRandom(int iters) {
    121   // Fill 2M random numbers
    122   int count = 2 << 20;
    123 
    124   testing::ItemsProcessed(static_cast<int64>(iters) * count);
    125 
    126   std::mt19937 gen(0x12345);
    127 
    128   uint_fast32_t val = 1;
    129   for (int i = 0; i < iters; ++i) {
    130     for (int j = 0; j < count; ++j) {
    131       /// each invocation of gen() returns 32-bit sample
    132       uint_fast32_t sample = gen();
    133 
    134       // use the result trivially so the compiler does not optimize it away
    135       val ^= sample;
    136     }
    137   }
    138 
    139   // A anchor point to make sure the compiler does not cut corners
    140   CHECK(val) << val;
    141 }
    142 BENCHMARK(BM_StdMTRandom);
    143 
    144 }  // namespace
    145 }  // namespace tensorflow
    146