1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <random> 17 18 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 19 #include "tensorflow/core/framework/tensor.h" 20 #include "tensorflow/core/lib/math/math_util.h" 21 #include "tensorflow/core/lib/random/philox_random.h" 22 #include "tensorflow/core/platform/test.h" 23 #include "tensorflow/core/platform/test_benchmark.h" 24 25 namespace tensorflow { 26 namespace { 27 28 Tensor VecShape(int64 v) { 29 if (v >= std::numeric_limits<int32>::max()) { 30 Tensor shape(DT_INT64, TensorShape({1})); 31 shape.vec<int64>()(0) = v; 32 return shape; 33 } else { 34 Tensor shape(DT_INT32, TensorShape({1})); 35 shape.vec<int32>()(0) = v; 36 return shape; 37 } 38 } 39 40 Graph* RandomUniform(int64 n) { 41 Graph* g = new Graph(OpRegistry::Global()); 42 test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)), 43 DT_FLOAT); 44 return g; 45 } 46 47 Graph* RandomNormal(int64 n) { 48 Graph* g = new Graph(OpRegistry::Global()); 49 test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)), 50 DT_FLOAT); 51 return g; 52 } 53 54 Graph* TruncatedNormal(int64 n) { 55 Graph* g = new Graph(OpRegistry::Global()); 56 test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)), 57 DT_FLOAT); 58 return g; 59 } 60 61 #define BM_RNG(DEVICE, RNG) \ 62 void BM_##DEVICE##_##RNG(int iters, int arg) { \ 63 testing::ItemsProcessed(static_cast<int64>(iters) * arg); \ 64 test::Benchmark(#DEVICE, RNG(arg)).Run(iters); \ 65 } \ 66 BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20); 67 68 BM_RNG(cpu, RandomUniform); 69 BM_RNG(cpu, RandomNormal); 70 BM_RNG(cpu, TruncatedNormal); 71 72 BM_RNG(gpu, RandomUniform); 73 BM_RNG(gpu, RandomNormal); 74 BM_RNG(gpu, TruncatedNormal); 75 76 Tensor VecAlphas(int64 n) { 77 Tensor alphas(DT_DOUBLE, TensorShape({n})); 78 for (int i = 0; i < n; i++) { 79 // Alternate back and forth between small-and-growing (.25) and 80 // large-and-shrinking (26.67) alpha. 81 alphas.vec<double>()(i) = 82 0.25 + MathUtil::IPow(1.1, i % 2 == 0 ? i : n - i); 83 } 84 return alphas; 85 } 86 87 void BM_cpu_RandomGamma(int iters, int nsamp, int nalpha) { 88 testing::ItemsProcessed(static_cast<int64>(iters) * nsamp * nalpha); 89 Graph* g = new Graph(OpRegistry::Global()); 90 test::graph::RandomGamma(g, test::graph::Constant(g, VecShape(nsamp)), 91 test::graph::Constant(g, VecAlphas(nalpha))); 92 test::Benchmark("cpu", g).Run(iters); 93 } 94 BENCHMARK(BM_cpu_RandomGamma)->RangePair(1 << 14, 4 << 15, 2, 50); 95 96 void BM_PhiloxRandom(int iters) { 97 // Fill 2M random numbers 98 int count = 2 << 20; 99 100 testing::ItemsProcessed(static_cast<int64>(iters) * count); 101 102 random::PhiloxRandom gen(0x12345); 103 104 int val = 1; 105 for (int i = 0; i < iters; ++i) { 106 for (int j = 0; j < count; j += 4) { 107 /// each invocation of gen() returns 128-bit samples 108 auto samples = gen(); 109 110 // use the result trivially so the compiler does not optimize it away 111 val ^= samples[0] ^ samples[1] ^ samples[2] ^ samples[3]; 112 } 113 } 114 115 // A anchor point to make sure the compiler does not cut corners 116 CHECK(val) << val; 117 } 118 BENCHMARK(BM_PhiloxRandom); 119 120 void BM_StdMTRandom(int iters) { 121 // Fill 2M random numbers 122 int count = 2 << 20; 123 124 testing::ItemsProcessed(static_cast<int64>(iters) * count); 125 126 std::mt19937 gen(0x12345); 127 128 uint_fast32_t val = 1; 129 for (int i = 0; i < iters; ++i) { 130 for (int j = 0; j < count; ++j) { 131 /// each invocation of gen() returns 32-bit sample 132 uint_fast32_t sample = gen(); 133 134 // use the result trivially so the compiler does not optimize it away 135 val ^= sample; 136 } 137 } 138 139 // A anchor point to make sure the compiler does not cut corners 140 CHECK(val) << val; 141 } 142 BENCHMARK(BM_StdMTRandom); 143 144 } // namespace 145 } // namespace tensorflow 146