1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/platform/test.h" 17 18 #include "tensorflow/cc/ops/standard_ops.h" 19 #include "tensorflow/cc/profiler/profiler.h" 20 #include "tensorflow/core/framework/graph.pb.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/graph/default_device.h" 23 #include "tensorflow/core/lib/io/path.h" 24 #include "tensorflow/core/platform/env.h" 25 #include "tensorflow/core/public/session.h" 26 27 namespace tensorflow { 28 namespace tfprof { 29 30 class ProfilerTest : public ::testing::Test { 31 protected: 32 ProfilerTest() {} 33 }; 34 35 GraphDef CreateGraphDef() { 36 Scope root = Scope::NewRootScope(); 37 38 auto a = ops::Const<float>(root, {{3, 2}, {-1, 0}}); 39 40 auto x = ops::Const(root.WithOpName("x"), {{1.f}, {1.f}}); 41 42 auto y = ops::MatMul(root.WithOpName("y"), a, x); 43 44 auto y2 = ops::Square(root, y); 45 46 auto y2_sum = ops::Sum(root, y2, 0); 47 48 auto y_norm = ops::Sqrt(root, y2_sum); 49 50 auto y_div = ops::Div(root.WithOpName("y_normalized"), y, y_norm); 51 52 GraphDef def; 53 TF_CHECK_OK(root.ToGraphDef(&def)); 54 55 return def; 56 } 57 58 Options Default() { 59 Options opts(1000, /* max_depth */ 60 0, /* min_bytes */ 61 0, /* min_peak_bytes */ 62 0, /* min_residual_bytes */ 63 0, /* min_output_bytes */ 64 0, /* min_micros */ 65 0, /* min_accelerator_micros */ 66 0, /* min_cpu_micros */ 67 0, /* min_params */ 68 0, /* min_float_ops */ 69 0, /* min_occurrence */ 70 0, /* step */ 71 "name", /* order_by */ 72 {".*"}, /* account_type_regexes */ 73 {".*"}, /* start_name_regexes */ 74 {}, /* trim_name_regexes */ 75 {".*"}, {}, /* hide_name_regexes */ 76 false, /* account_displayed_op_only */ 77 {"micros"}, /* select */ 78 {"none"}, /* output_type */ 79 {}); 80 return opts; 81 } 82 83 template <typename T> 84 const T* ExtractNode(const T& pb, const string& name) { 85 if (pb.name() == name) { 86 return &pb; 87 } 88 for (const T& c : pb.children()) { 89 const T* ret = ExtractNode(c, name); 90 if (ret) return ret; 91 } 92 return nullptr; 93 } 94 95 TEST_F(ProfilerTest, Basics) { 96 SessionOptions options; 97 options.config.set_allow_soft_placement(true); 98 std::unique_ptr<Session> session(NewSession(options)); 99 GraphDef def = CreateGraphDef(); 100 if (options.target.empty()) { 101 graph::SetDefaultDevice("/gpu:0", &def); 102 } 103 104 TF_CHECK_OK(session->Create(def)); 105 106 Tensor x(DT_FLOAT, TensorShape({2, 1})); 107 auto x_flat = x.flat<float>(); 108 x_flat.setRandom(); 109 Eigen::Tensor<float, 0, Eigen::RowMajor> inv_norm = 110 x_flat.square().sum().sqrt().inverse(); 111 x_flat = x_flat * inv_norm(); 112 113 std::vector<Tensor> outputs; 114 RunOptions run_options; 115 run_options.set_trace_level(RunOptions::FULL_TRACE); 116 RunMetadata run_metadata; 117 outputs.clear(); 118 119 Profiler profiler(def); 120 for (int i = 0; i < 2; ++i) { 121 TF_CHECK_OK(session->Run(run_options, {{"x", x}}, {"y:0", "y_normalized:0"}, 122 {}, &outputs, &run_metadata)); 123 profiler.AddStep(i, run_metadata); 124 CHECK_EQ(size_t{2}, outputs.size()); 125 } 126 127 std::vector<DeviceAttributes> resp; 128 TF_CHECK_OK(session->ListDevices(&resp)); 129 bool has_gpu = false; 130 for (const auto& dev : resp) { 131 if (dev.device_type() == "GPU") { 132 has_gpu = true; 133 } 134 } 135 136 GraphNodeProto ret = profiler.ProfileNameScope(Default()); 137 const GraphNodeProto* matmul = ExtractNode(ret, "y"); 138 EXPECT_TRUE(matmul); 139 EXPECT_GT(matmul->exec_micros(), 0); 140 if (has_gpu) { 141 EXPECT_GT(matmul->accelerator_exec_micros(), 0); 142 } else { 143 EXPECT_EQ(matmul->accelerator_exec_micros(), 0); 144 } 145 const GraphNodeProto* square = ExtractNode(ret, "Square"); 146 EXPECT_TRUE(square); 147 EXPECT_GT(square->exec_micros(), 0); 148 if (has_gpu) { 149 EXPECT_GT(square->accelerator_exec_micros(), 0); 150 } else { 151 EXPECT_EQ(square->accelerator_exec_micros(), 0); 152 } 153 154 Options opts2 = Default(); 155 opts2.output_type = "timeline"; 156 string timeline_file = io::JoinPath(testing::TmpDir(), "timeline"); 157 opts2.output_options["outfile"] = timeline_file; 158 GraphNodeProto ret2 = profiler.ProfileGraph(opts2); 159 string s; 160 TF_CHECK_OK(ReadFileToString(Env::Default(), timeline_file + "_0", &s)); 161 EXPECT_TRUE(s.find("Square") != s.npos); 162 163 MultiGraphNodeProto ret3 = profiler.ProfileOperations(Default()); 164 const MultiGraphNodeProto* matmul2 = ExtractNode(ret3, "MatMul"); 165 EXPECT_TRUE(matmul2); 166 EXPECT_GT(matmul2->exec_micros(), 0); 167 if (has_gpu) { 168 EXPECT_GT(matmul2->accelerator_exec_micros(), 0); 169 } else { 170 EXPECT_EQ(matmul2->accelerator_exec_micros(), 0); 171 } 172 173 TF_CHECK_OK(session->Close()); 174 } 175 176 } // namespace tfprof 177 } // namespace tensorflow 178