Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     17 #include "tensorflow/core/framework/tensor.h"
     18 #include "tensorflow/core/kernels/ops_util.h"
     19 #include "tensorflow/core/platform/test.h"
     20 #include "tensorflow/core/platform/test_benchmark.h"
     21 #include "tensorflow/core/public/session_options.h"
     22 
     23 namespace tensorflow {
     24 
     25 // We focus on the single thread performance of training ops.
     26 static SessionOptions InitSingleThreadedOptions() {
     27   SessionOptions opts;
     28   opts.config.set_intra_op_parallelism_threads(1);
     29   opts.config.set_inter_op_parallelism_threads(1);
     30   return opts;
     31 }
     32 
     33 static SessionOptions* GetOptions() {
     34   static SessionOptions opts = InitSingleThreadedOptions();
     35   return &opts;
     36 }
     37 
     38 static Node* Var(Graph* g, int n) {
     39   return test::graph::Var(g, DT_FLOAT, TensorShape({n}));
     40 }
     41 
     42 static Node* Zeros(Graph* g, int n) {
     43   Tensor data(DT_FLOAT, TensorShape({n}));
     44   data.flat<float>().setZero();
     45   return test::graph::Constant(g, data);
     46 }
     47 
     48 static Node* Random(Graph* g, int n) {
     49   Tensor data(DT_FLOAT, TensorShape({n}));
     50   data.flat<float>().setRandom();
     51   return test::graph::Constant(g, data);
     52 }
     53 
     54 static Node* Scalar(Graph* g, float val) {
     55   Tensor data(DT_FLOAT, TensorShape({}));
     56   data.flat<float>()(0) = val;
     57   return test::graph::Constant(g, data);
     58 }
     59 
     60 static void SGD(int32 n, Graph** init_g, Graph** train_g) {
     61   {
     62     Graph* g = new Graph(OpRegistry::Global());
     63     auto var = Var(g, n);
     64     test::graph::Assign(g, var, Zeros(g, n));
     65     *init_g = g;
     66   }
     67   {
     68     Graph* g = new Graph(OpRegistry::Global());
     69     auto var = Var(g, n);
     70     auto lr = Scalar(g, 0.01);
     71     auto grad = Random(g, n);
     72     test::graph::Multi(g, "ApplyGradientDescent", {var, lr, grad});
     73     *train_g = g;
     74   }
     75 }
     76 
     77 static void BM_SGD(int iters, int params) {
     78   const int64 tot = static_cast<int64>(iters) * params;
     79   testing::ItemsProcessed(tot);
     80   testing::BytesProcessed(tot * sizeof(float));
     81   Graph* init;
     82   Graph* train;
     83   SGD(params, &init, &train);
     84   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
     85 }
     86 BENCHMARK(BM_SGD)->Arg(128 << 10)->Arg(256 << 10);
     87 
     88 static void Adagrad(int32 n, Graph** init_g, Graph** train_g) {
     89   {
     90     Graph* g = new Graph(OpRegistry::Global());
     91     auto var = Var(g, n);
     92     auto accum = Var(g, n);
     93     auto zero = Zeros(g, n);
     94     test::graph::Assign(g, var, zero);
     95     test::graph::Assign(g, accum, zero);
     96     *init_g = g;
     97   }
     98   {
     99     Graph* g = new Graph(OpRegistry::Global());
    100     auto var = Var(g, n);
    101     auto accum = Var(g, n);
    102     auto lr = Scalar(g, 0.01);
    103     auto grad = Random(g, n);
    104     test::graph::Multi(g, "ApplyAdagrad", {var, accum, lr, grad});
    105     *train_g = g;
    106   }
    107 }
    108 
    109 static void BM_Adagrad(int iters, int params) {
    110   const int64 tot = static_cast<int64>(iters) * params;
    111   testing::ItemsProcessed(tot);
    112   testing::BytesProcessed(tot * sizeof(float));
    113   Graph* init;
    114   Graph* train;
    115   Adagrad(params, &init, &train);
    116   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    117 }
    118 BENCHMARK(BM_Adagrad)->Arg(128 << 10)->Arg(256 << 10);
    119 
    120 static void Momentum(int32 n, Graph** init_g, Graph** train_g) {
    121   TensorShape shape({n});
    122   {
    123     Graph* g = new Graph(OpRegistry::Global());
    124     auto var = Var(g, n);
    125     auto accum = Var(g, n);
    126     auto zero = Zeros(g, n);
    127     test::graph::Assign(g, var, zero);
    128     test::graph::Assign(g, accum, zero);
    129     *init_g = g;
    130   }
    131   {
    132     Graph* g = new Graph(OpRegistry::Global());
    133     auto var = Var(g, n);
    134     auto accum = Var(g, n);
    135     auto lr = Scalar(g, 0.01);
    136     auto grad = Random(g, n);
    137     auto mom = Scalar(g, 0.01);
    138     test::graph::Multi(g, "ApplyMomentum", {var, accum, lr, grad, mom});
    139     *train_g = g;
    140   }
    141 }
    142 
    143 static void BM_Momentum(int iters, int params) {
    144   const int64 tot = static_cast<int64>(iters) * params;
    145   testing::ItemsProcessed(tot);
    146   testing::BytesProcessed(tot * sizeof(float));
    147   Graph* init;
    148   Graph* train;
    149   Momentum(params, &init, &train);
    150   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    151 }
    152 BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10);
    153 
    154 static void Adam(int32 n, Graph** init_g, Graph** train_g) {
    155   TensorShape shape({n});
    156   {
    157     Graph* g = new Graph(OpRegistry::Global());
    158     auto var = Var(g, n);
    159     auto m = Var(g, n);
    160     auto v = Var(g, n);
    161     auto zero = Zeros(g, n);
    162     test::graph::Assign(g, var, zero);
    163     test::graph::Assign(g, m, zero);
    164     test::graph::Assign(g, v, zero);
    165     *init_g = g;
    166   }
    167   {
    168     Graph* g = new Graph(OpRegistry::Global());
    169     auto var = Var(g, n);
    170     auto m = Var(g, n);
    171     auto v = Var(g, n);
    172     auto beta1_power = Scalar(g, 0.9);
    173     auto beta2_power = Scalar(g, 0.99);
    174     auto lr = Scalar(g, 0.01);
    175     auto beta1 = Scalar(g, 0.9);
    176     auto beta2 = Scalar(g, 0.99);
    177     auto epsilon = Scalar(g, 1e-8);
    178     auto grad = Random(g, n);
    179     test::graph::Multi(
    180         g, "ApplyAdam",
    181         {var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad});
    182     *train_g = g;
    183   }
    184 }
    185 
    186 static void BM_Adam(int iters, int params) {
    187   const int64 tot = static_cast<int64>(iters) * params;
    188   testing::ItemsProcessed(tot);
    189   testing::BytesProcessed(tot * sizeof(float));
    190   Graph* init;
    191   Graph* train;
    192   Adam(params, &init, &train);
    193   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    194 }
    195 BENCHMARK(BM_Adam)->Arg(128 << 10)->Arg(256 << 10);
    196 
    197 static void RMSProp(int32 n, Graph** init_g, Graph** train_g) {
    198   TensorShape shape({n});
    199   {
    200     Graph* g = new Graph(OpRegistry::Global());
    201     auto var = Var(g, n);
    202     auto ms = Var(g, n);
    203     auto mom = Var(g, n);
    204     auto zero = Zeros(g, n);
    205     test::graph::Assign(g, var, zero);
    206     test::graph::Assign(g, ms, zero);
    207     test::graph::Assign(g, mom, zero);
    208     *init_g = g;
    209   }
    210   {
    211     Graph* g = new Graph(OpRegistry::Global());
    212     auto var = Var(g, n);
    213     auto ms = Var(g, n);
    214     auto mom = Var(g, n);
    215     auto lr = Scalar(g, 0.01);
    216     auto rho = Scalar(g, 0.9);
    217     auto momentum = Scalar(g, 0.9);
    218     auto epsilon = Scalar(g, 1e-8);
    219     auto grad = Random(g, n);
    220     test::graph::Multi(g, "ApplyRMSProp",
    221                        {var, ms, mom, lr, rho, momentum, epsilon, grad});
    222     *train_g = g;
    223   }
    224 }
    225 
    226 static void BM_RMSProp(int iters, int params) {
    227   const int64 tot = static_cast<int64>(iters) * params;
    228   testing::ItemsProcessed(tot);
    229   testing::BytesProcessed(tot * sizeof(float));
    230   Graph* init;
    231   Graph* train;
    232   RMSProp(params, &init, &train);
    233   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    234 }
    235 BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10);
    236 
    237 static void AddSign(int32 n, Graph** init_g, Graph** train_g) {
    238   TensorShape shape({n});
    239   {
    240     Graph* g = new Graph(OpRegistry::Global());
    241     auto var = Var(g, n);
    242     auto m = Var(g, n);
    243     auto zero = Zeros(g, n);
    244     test::graph::Assign(g, var, zero);
    245     test::graph::Assign(g, m, zero);
    246     *init_g = g;
    247   }
    248   {
    249     Graph* g = new Graph(OpRegistry::Global());
    250     auto var = Var(g, n);
    251     auto m = Var(g, n);
    252     auto lr = Scalar(g, 0.01);
    253     auto alpha = Scalar(g, 0.1);
    254     auto sign_decay = Scalar(g, 0.9);
    255     auto beta = Scalar(g, 0.8);
    256     auto grad = Random(g, n);
    257     test::graph::Multi(g, "ApplyAddSign",
    258                        {var, m, lr, alpha, sign_decay, beta, grad});
    259     *train_g = g;
    260   }
    261 }
    262 
    263 static void BM_AddSign(int iters, int params) {
    264   const int64 tot = static_cast<int64>(iters) * params;
    265   testing::ItemsProcessed(tot);
    266   testing::BytesProcessed(tot * sizeof(float));
    267   Graph* init;
    268   Graph* train;
    269   AddSign(params, &init, &train);
    270   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    271 }
    272 BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10);
    273 
    274 static void PowerSign(int32 n, Graph** init_g, Graph** train_g) {
    275   TensorShape shape({n});
    276   {
    277     Graph* g = new Graph(OpRegistry::Global());
    278     auto var = Var(g, n);
    279     auto m = Var(g, n);
    280     auto zero = Zeros(g, n);
    281     test::graph::Assign(g, var, zero);
    282     test::graph::Assign(g, m, zero);
    283     *init_g = g;
    284   }
    285   {
    286     Graph* g = new Graph(OpRegistry::Global());
    287     auto var = Var(g, n);
    288     auto m = Var(g, n);
    289     auto lr = Scalar(g, 0.01);
    290     auto logbase = Scalar(g, 2);
    291     auto sign_decay = Scalar(g, 0.9);
    292     auto beta = Scalar(g, 0.8);
    293     auto grad = Random(g, n);
    294     test::graph::Multi(g, "ApplyPowerSign",
    295                        {var, m, lr, logbase, sign_decay, beta, grad});
    296     *train_g = g;
    297   }
    298 }
    299 
    300 static void BM_PowerSign(int iters, int params) {
    301   const int64 tot = static_cast<int64>(iters) * params;
    302   testing::ItemsProcessed(tot);
    303   testing::BytesProcessed(tot * sizeof(float));
    304   Graph* init;
    305   Graph* train;
    306   PowerSign(params, &init, &train);
    307   test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
    308 }
    309 BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10);
    310 
    311 }  // end namespace tensorflow
    312