Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     17 #include "tensorflow/core/framework/fake_input.h"
     18 #include "tensorflow/core/framework/node_def_builder.h"
     19 #include "tensorflow/core/framework/tensor.h"
     20 #include "tensorflow/core/framework/types.pb.h"
     21 #include "tensorflow/core/kernels/ops_testutil.h"
     22 #include "tensorflow/core/kernels/ops_util.h"
     23 #include "tensorflow/core/platform/test.h"
     24 #include "tensorflow/core/platform/test_benchmark.h"
     25 
     26 using Eigen::half;
     27 
     28 namespace tensorflow {
     29 
     30 template <typename Src, typename Dst>
     31 static Graph* Cast(int num) {
     32   Graph* g = new Graph(OpRegistry::Global());
     33   Tensor data(DataTypeToEnum<Src>::value,
     34               TensorShape({64, 64, num / (64 * 64)}));
     35   data.flat<Src>().setRandom();
     36   test::graph::Cast(g, test::graph::Constant(g, data),
     37                     DataTypeToEnum<Dst>::value);
     38   return g;
     39 }
     40 
     41 class CastOpTest : public OpsTestBase {
     42  protected:
     43   void MakeOp(DataType src, DataType dst) {
     44     TF_EXPECT_OK(NodeDefBuilder("cast_op", "Cast")
     45                      .Input(FakeInput(src))
     46                      .Attr("SrcT", src)
     47                      .Attr("DstT", dst)
     48                      .Finalize(node_def()));
     49     TF_EXPECT_OK(InitOp());
     50   }
     51 
     52   template <typename INPUT, typename OUTPUT>
     53   void CheckCast() {
     54     DataType in_type = DataTypeToEnum<INPUT>::v();
     55     DataType out_type = DataTypeToEnum<OUTPUT>::v();
     56     MakeOp(in_type, out_type);
     57     AddInputFromArray<INPUT>(TensorShape({1, 2, 2, 1}),
     58                              {INPUT(1), INPUT(2), INPUT(3), INPUT(4)});
     59     TF_ASSERT_OK(RunOpKernel());
     60     Tensor expected(allocator(), out_type, TensorShape({1, 2, 2, 1}));
     61     test::FillValues<OUTPUT>(&expected,
     62                              {OUTPUT(1), OUTPUT(2), OUTPUT(3), OUTPUT(4)});
     63     test::ExpectTensorEqual<OUTPUT>(expected, *GetOutput(0));
     64   }
     65 };
     66 
     67 #define TEST_CAST(in, out) \
     68   TEST_F(CastOpTest, TestCast##_##in##_##out) { CheckCast<in, out>(); }
     69 
     70 #define TEST_ALL_CASTS_FROM(in) \
     71   TEST_CAST(in, uint8);         \
     72   TEST_CAST(in, uint16);        \
     73   TEST_CAST(in, int16);         \
     74   TEST_CAST(in, int32);         \
     75   TEST_CAST(in, int64);         \
     76   TEST_CAST(in, half);          \
     77   TEST_CAST(in, float);         \
     78   TEST_CAST(in, double)
     79 
     80 TEST_ALL_CASTS_FROM(uint8)
     81 TEST_ALL_CASTS_FROM(uint16)
     82 TEST_ALL_CASTS_FROM(int16)
     83 TEST_ALL_CASTS_FROM(int32)
     84 TEST_ALL_CASTS_FROM(int64)
     85 TEST_ALL_CASTS_FROM(half)
     86 TEST_ALL_CASTS_FROM(float)
     87 TEST_ALL_CASTS_FROM(double)
     88 
     89 #undef TEST_ALL_CASTS_FROM
     90 #undef TEST_CAST
     91 
     92 // TODO(wicke): check conversions from/to bool, and bfloat16
     93 
     94 static void BM_cpu_float_int64(int iters, int num) {
     95   testing::ItemsProcessed(static_cast<int64>(iters) * num);
     96   testing::BytesProcessed(static_cast<int64>(iters) * num *
     97                           (sizeof(float) + sizeof(int64)));
     98   testing::UseRealTime();
     99   test::Benchmark("cpu", Cast<float, int64>(num)).Run(iters);
    100 }
    101 BENCHMARK(BM_cpu_float_int64)->Arg(64 << 10)->Arg(32 << 20);
    102 
    103 static void BM_gpu_float_int64(int iters, int num) {
    104   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    105   testing::BytesProcessed(static_cast<int64>(iters) * num *
    106                           (sizeof(float) + sizeof(int64)));
    107   testing::UseRealTime();
    108 #if GOOGLE_CUDA
    109   test::Benchmark("gpu", Cast<float, int64>(num)).Run(iters);
    110 #endif  // GOOGLE_CUDA
    111 #ifdef TENSORFLOW_USE_SYCL
    112   test::Benchmark("sycl", Cast<float, int64>(num)).Run(iters);
    113 #endif  // TENSORFLOW_USE_SYCL
    114 }
    115 BENCHMARK(BM_gpu_float_int64)->Arg(64 << 10)->Arg(32 << 20);
    116 
    117 static void BM_cpu_bool_float(int iters, int num) {
    118   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    119   testing::BytesProcessed(static_cast<int64>(iters) * num *
    120                           (sizeof(bool) + sizeof(float)));
    121   testing::UseRealTime();
    122   test::Benchmark("cpu", Cast<bool, float>(num)).Run(iters);
    123 }
    124 BENCHMARK(BM_cpu_bool_float)->Arg(64 << 10)->Arg(32 << 20);
    125 
    126 static void BM_gpu_bool_float(int iters, int num) {
    127   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    128   testing::BytesProcessed(static_cast<int64>(iters) * num *
    129                           (sizeof(bool) + sizeof(float)));
    130   testing::UseRealTime();
    131 #if GOOGLE_CUDA
    132   test::Benchmark("gpu", Cast<bool, float>(num)).Run(iters);
    133 #endif  // GOOGLE_CUDA
    134 #ifdef TENSORFLOW_USE_SYCL
    135   test::Benchmark("sycl", Cast<bool, float>(num)).Run(iters);
    136 #endif  // TENSORFLOW_USE_SYCL
    137 }
    138 BENCHMARK(BM_gpu_bool_float)->Arg(64 << 10)->Arg(32 << 20);
    139 
    140 static void BM_cpu_float_bfloat16(int iters, int num) {
    141   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    142   testing::BytesProcessed(static_cast<int64>(iters) * num *
    143                           (sizeof(float) + sizeof(bfloat16)));
    144   testing::UseRealTime();
    145   test::Benchmark("cpu", Cast<float, bfloat16>(num)).Run(iters);
    146 }
    147 BENCHMARK(BM_cpu_float_bfloat16)->Arg(64 << 10)->Arg(32 << 20);
    148 
    149 static void BM_cpu_bfloat16_float(int iters, int num) {
    150   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    151   testing::BytesProcessed(static_cast<int64>(iters) * num *
    152                           (sizeof(float) + sizeof(bfloat16)));
    153   testing::UseRealTime();
    154   test::Benchmark("cpu", Cast<bfloat16, float>(num)).Run(iters);
    155 }
    156 BENCHMARK(BM_cpu_bfloat16_float)->Arg(64 << 10)->Arg(32 << 20);
    157 
    158 static void BM_cpu_float_half(int iters, int num) {
    159   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    160   testing::BytesProcessed(static_cast<int64>(iters) * num *
    161                           (sizeof(float) + sizeof(Eigen::half)));
    162   testing::UseRealTime();
    163   test::Benchmark("cpu", Cast<float, Eigen::half>(num)).Run(iters);
    164 }
    165 BENCHMARK(BM_cpu_float_half)->Arg(64 << 10)->Arg(32 << 20);
    166 
    167 static void BM_cpu_half_float(int iters, int num) {
    168   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    169   testing::BytesProcessed(static_cast<int64>(iters) * num *
    170                           (sizeof(float) + sizeof(Eigen::half)));
    171   testing::UseRealTime();
    172   test::Benchmark("cpu", Cast<Eigen::half, float>(num)).Run(iters);
    173 }
    174 BENCHMARK(BM_cpu_half_float)->Arg(64 << 10)->Arg(32 << 20);
    175 
    176 static void BM_gpu_float_half(int iters, int num) {
    177   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    178   testing::BytesProcessed(static_cast<int64>(iters) * num *
    179                           (sizeof(float) + sizeof(Eigen::half)));
    180   testing::UseRealTime();
    181 #if GOOGLE_CUDA
    182   test::Benchmark("gpu", Cast<float, Eigen::half>(num)).Run(iters);
    183 #endif  // GOOGLE_CUDA
    184 }
    185 BENCHMARK(BM_gpu_float_half)->Arg(64 << 10)->Arg(32 << 20);
    186 
    187 static void BM_gpu_half_float(int iters, int num) {
    188   testing::ItemsProcessed(static_cast<int64>(iters) * num);
    189   testing::BytesProcessed(static_cast<int64>(iters) * num *
    190                           (sizeof(float) + sizeof(Eigen::half)));
    191   testing::UseRealTime();
    192 #if GOOGLE_CUDA
    193   test::Benchmark("gpu", Cast<Eigen::half, float>(num)).Run(iters);
    194 #endif  // GOOGLE_CUDA
    195 }
    196 BENCHMARK(BM_gpu_half_float)->Arg(64 << 10)->Arg(32 << 20);
    197 
    198 }  // end namespace tensorflow
    199