1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 17 #include "tensorflow/core/framework/tensor.h" 18 #include "tensorflow/core/kernels/ops_util.h" 19 #include "tensorflow/core/platform/test.h" 20 #include "tensorflow/core/platform/test_benchmark.h" 21 #include "tensorflow/core/public/session_options.h" 22 23 namespace tensorflow { 24 25 // We focus on the single thread performance of training ops. 26 static SessionOptions InitSingleThreadedOptions() { 27 SessionOptions opts; 28 opts.config.set_intra_op_parallelism_threads(1); 29 opts.config.set_inter_op_parallelism_threads(1); 30 return opts; 31 } 32 33 static SessionOptions* GetOptions() { 34 static SessionOptions opts = InitSingleThreadedOptions(); 35 return &opts; 36 } 37 38 static Node* Var(Graph* g, int n) { 39 return test::graph::Var(g, DT_FLOAT, TensorShape({n})); 40 } 41 42 static Node* Zeros(Graph* g, int n) { 43 Tensor data(DT_FLOAT, TensorShape({n})); 44 data.flat<float>().setZero(); 45 return test::graph::Constant(g, data); 46 } 47 48 static Node* Random(Graph* g, int n) { 49 Tensor data(DT_FLOAT, TensorShape({n})); 50 data.flat<float>().setRandom(); 51 return test::graph::Constant(g, data); 52 } 53 54 static Node* Scalar(Graph* g, float val) { 55 Tensor data(DT_FLOAT, TensorShape({})); 56 data.flat<float>()(0) = val; 57 return test::graph::Constant(g, data); 58 } 59 60 static void SGD(int32 n, Graph** init_g, Graph** train_g) { 61 { 62 Graph* g = new Graph(OpRegistry::Global()); 63 auto var = Var(g, n); 64 test::graph::Assign(g, var, Zeros(g, n)); 65 *init_g = g; 66 } 67 { 68 Graph* g = new Graph(OpRegistry::Global()); 69 auto var = Var(g, n); 70 auto lr = Scalar(g, 0.01); 71 auto grad = Random(g, n); 72 test::graph::Multi(g, "ApplyGradientDescent", {var, lr, grad}); 73 *train_g = g; 74 } 75 } 76 77 static void BM_SGD(int iters, int params) { 78 const int64 tot = static_cast<int64>(iters) * params; 79 testing::ItemsProcessed(tot); 80 testing::BytesProcessed(tot * sizeof(float)); 81 Graph* init; 82 Graph* train; 83 SGD(params, &init, &train); 84 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 85 } 86 BENCHMARK(BM_SGD)->Arg(128 << 10)->Arg(256 << 10); 87 88 static void Adagrad(int32 n, Graph** init_g, Graph** train_g) { 89 { 90 Graph* g = new Graph(OpRegistry::Global()); 91 auto var = Var(g, n); 92 auto accum = Var(g, n); 93 auto zero = Zeros(g, n); 94 test::graph::Assign(g, var, zero); 95 test::graph::Assign(g, accum, zero); 96 *init_g = g; 97 } 98 { 99 Graph* g = new Graph(OpRegistry::Global()); 100 auto var = Var(g, n); 101 auto accum = Var(g, n); 102 auto lr = Scalar(g, 0.01); 103 auto grad = Random(g, n); 104 test::graph::Multi(g, "ApplyAdagrad", {var, accum, lr, grad}); 105 *train_g = g; 106 } 107 } 108 109 static void BM_Adagrad(int iters, int params) { 110 const int64 tot = static_cast<int64>(iters) * params; 111 testing::ItemsProcessed(tot); 112 testing::BytesProcessed(tot * sizeof(float)); 113 Graph* init; 114 Graph* train; 115 Adagrad(params, &init, &train); 116 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 117 } 118 BENCHMARK(BM_Adagrad)->Arg(128 << 10)->Arg(256 << 10); 119 120 static void Momentum(int32 n, Graph** init_g, Graph** train_g) { 121 TensorShape shape({n}); 122 { 123 Graph* g = new Graph(OpRegistry::Global()); 124 auto var = Var(g, n); 125 auto accum = Var(g, n); 126 auto zero = Zeros(g, n); 127 test::graph::Assign(g, var, zero); 128 test::graph::Assign(g, accum, zero); 129 *init_g = g; 130 } 131 { 132 Graph* g = new Graph(OpRegistry::Global()); 133 auto var = Var(g, n); 134 auto accum = Var(g, n); 135 auto lr = Scalar(g, 0.01); 136 auto grad = Random(g, n); 137 auto mom = Scalar(g, 0.01); 138 test::graph::Multi(g, "ApplyMomentum", {var, accum, lr, grad, mom}); 139 *train_g = g; 140 } 141 } 142 143 static void BM_Momentum(int iters, int params) { 144 const int64 tot = static_cast<int64>(iters) * params; 145 testing::ItemsProcessed(tot); 146 testing::BytesProcessed(tot * sizeof(float)); 147 Graph* init; 148 Graph* train; 149 Momentum(params, &init, &train); 150 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 151 } 152 BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10); 153 154 static void Adam(int32 n, Graph** init_g, Graph** train_g) { 155 TensorShape shape({n}); 156 { 157 Graph* g = new Graph(OpRegistry::Global()); 158 auto var = Var(g, n); 159 auto m = Var(g, n); 160 auto v = Var(g, n); 161 auto zero = Zeros(g, n); 162 test::graph::Assign(g, var, zero); 163 test::graph::Assign(g, m, zero); 164 test::graph::Assign(g, v, zero); 165 *init_g = g; 166 } 167 { 168 Graph* g = new Graph(OpRegistry::Global()); 169 auto var = Var(g, n); 170 auto m = Var(g, n); 171 auto v = Var(g, n); 172 auto beta1_power = Scalar(g, 0.9); 173 auto beta2_power = Scalar(g, 0.99); 174 auto lr = Scalar(g, 0.01); 175 auto beta1 = Scalar(g, 0.9); 176 auto beta2 = Scalar(g, 0.99); 177 auto epsilon = Scalar(g, 1e-8); 178 auto grad = Random(g, n); 179 test::graph::Multi( 180 g, "ApplyAdam", 181 {var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad}); 182 *train_g = g; 183 } 184 } 185 186 static void BM_Adam(int iters, int params) { 187 const int64 tot = static_cast<int64>(iters) * params; 188 testing::ItemsProcessed(tot); 189 testing::BytesProcessed(tot * sizeof(float)); 190 Graph* init; 191 Graph* train; 192 Adam(params, &init, &train); 193 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 194 } 195 BENCHMARK(BM_Adam)->Arg(128 << 10)->Arg(256 << 10); 196 197 static void RMSProp(int32 n, Graph** init_g, Graph** train_g) { 198 TensorShape shape({n}); 199 { 200 Graph* g = new Graph(OpRegistry::Global()); 201 auto var = Var(g, n); 202 auto ms = Var(g, n); 203 auto mom = Var(g, n); 204 auto zero = Zeros(g, n); 205 test::graph::Assign(g, var, zero); 206 test::graph::Assign(g, ms, zero); 207 test::graph::Assign(g, mom, zero); 208 *init_g = g; 209 } 210 { 211 Graph* g = new Graph(OpRegistry::Global()); 212 auto var = Var(g, n); 213 auto ms = Var(g, n); 214 auto mom = Var(g, n); 215 auto lr = Scalar(g, 0.01); 216 auto rho = Scalar(g, 0.9); 217 auto momentum = Scalar(g, 0.9); 218 auto epsilon = Scalar(g, 1e-8); 219 auto grad = Random(g, n); 220 test::graph::Multi(g, "ApplyRMSProp", 221 {var, ms, mom, lr, rho, momentum, epsilon, grad}); 222 *train_g = g; 223 } 224 } 225 226 static void BM_RMSProp(int iters, int params) { 227 const int64 tot = static_cast<int64>(iters) * params; 228 testing::ItemsProcessed(tot); 229 testing::BytesProcessed(tot * sizeof(float)); 230 Graph* init; 231 Graph* train; 232 RMSProp(params, &init, &train); 233 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 234 } 235 BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10); 236 237 static void AddSign(int32 n, Graph** init_g, Graph** train_g) { 238 TensorShape shape({n}); 239 { 240 Graph* g = new Graph(OpRegistry::Global()); 241 auto var = Var(g, n); 242 auto m = Var(g, n); 243 auto zero = Zeros(g, n); 244 test::graph::Assign(g, var, zero); 245 test::graph::Assign(g, m, zero); 246 *init_g = g; 247 } 248 { 249 Graph* g = new Graph(OpRegistry::Global()); 250 auto var = Var(g, n); 251 auto m = Var(g, n); 252 auto lr = Scalar(g, 0.01); 253 auto alpha = Scalar(g, 0.1); 254 auto sign_decay = Scalar(g, 0.9); 255 auto beta = Scalar(g, 0.8); 256 auto grad = Random(g, n); 257 test::graph::Multi(g, "ApplyAddSign", 258 {var, m, lr, alpha, sign_decay, beta, grad}); 259 *train_g = g; 260 } 261 } 262 263 static void BM_AddSign(int iters, int params) { 264 const int64 tot = static_cast<int64>(iters) * params; 265 testing::ItemsProcessed(tot); 266 testing::BytesProcessed(tot * sizeof(float)); 267 Graph* init; 268 Graph* train; 269 AddSign(params, &init, &train); 270 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 271 } 272 BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10); 273 274 static void PowerSign(int32 n, Graph** init_g, Graph** train_g) { 275 TensorShape shape({n}); 276 { 277 Graph* g = new Graph(OpRegistry::Global()); 278 auto var = Var(g, n); 279 auto m = Var(g, n); 280 auto zero = Zeros(g, n); 281 test::graph::Assign(g, var, zero); 282 test::graph::Assign(g, m, zero); 283 *init_g = g; 284 } 285 { 286 Graph* g = new Graph(OpRegistry::Global()); 287 auto var = Var(g, n); 288 auto m = Var(g, n); 289 auto lr = Scalar(g, 0.01); 290 auto logbase = Scalar(g, 2); 291 auto sign_decay = Scalar(g, 0.9); 292 auto beta = Scalar(g, 0.8); 293 auto grad = Random(g, n); 294 test::graph::Multi(g, "ApplyPowerSign", 295 {var, m, lr, logbase, sign_decay, beta, grad}); 296 *train_g = g; 297 } 298 } 299 300 static void BM_PowerSign(int iters, int params) { 301 const int64 tot = static_cast<int64>(iters) * params; 302 testing::ItemsProcessed(tot); 303 testing::BytesProcessed(tot * sizeof(float)); 304 Graph* init; 305 Graph* train; 306 PowerSign(params, &init, &train); 307 test::Benchmark("cpu", train, GetOptions(), init).Run(iters); 308 } 309 BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10); 310 311 } // end namespace tensorflow 312