1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/platform/device_tracer.h" 17 18 #include <map> 19 #include <memory> 20 #include <string> 21 #include <unordered_map> 22 #include <vector> 23 24 #include "tensorflow/core/common_runtime/direct_session.h" 25 #include "tensorflow/core/common_runtime/step_stats_collector.h" 26 #include "tensorflow/core/framework/allocator.h" 27 #include "tensorflow/core/framework/graph.pb.h" 28 #include "tensorflow/core/framework/tensor.h" 29 #include "tensorflow/core/framework/tensor_testutil.h" 30 #include "tensorflow/core/framework/types.pb.h" 31 #include "tensorflow/core/graph/graph.h" 32 #include "tensorflow/core/graph/testlib.h" 33 #include "tensorflow/core/kernels/ops_util.h" 34 #include "tensorflow/core/lib/core/errors.h" 35 #include "tensorflow/core/lib/core/status.h" 36 #include "tensorflow/core/lib/core/status_test_util.h" 37 #include "tensorflow/core/lib/core/threadpool.h" 38 #include "tensorflow/core/platform/test.h" 39 #include "tensorflow/core/public/session_options.h" 40 #include "tensorflow/core/util/device_name_utils.h" 41 42 namespace tensorflow { 43 namespace { 44 45 std::unique_ptr<Session> CreateSession() { 46 SessionOptions options; 47 (*options.config.mutable_device_count())["CPU"] = 1; 48 (*options.config.mutable_device_count())["GPU"] = 1; 49 options.config.set_allow_soft_placement(true); 50 return std::unique_ptr<Session>(NewSession(options)); 51 } 52 53 class DeviceTracerTest : public ::testing::Test { 54 public: 55 void Initialize(std::initializer_list<float> a_values) { 56 Graph graph(OpRegistry::Global()); 57 58 Tensor a_tensor(DT_FLOAT, TensorShape({2, 2})); 59 test::FillValues<float>(&a_tensor, a_values); 60 Node* a = test::graph::Constant(&graph, a_tensor); 61 a->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0"); 62 63 Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); 64 test::FillValues<float>(&x_tensor, {1, 1}); 65 Node* x = test::graph::Constant(&graph, x_tensor); 66 x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); 67 x_ = x->name(); 68 69 // y = A * x 70 Node* y = test::graph::Matmul(&graph, a, x, false, false); 71 y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); 72 y_ = y->name(); 73 74 // Use an Identity op to force a memcpy to CPU and back to GPU. 75 Node* i = test::graph::Identity(&graph, y); 76 i->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0"); 77 78 Node* y_neg = test::graph::Unary(&graph, "Neg", i); 79 y_neg_ = y_neg->name(); 80 y_neg->set_assigned_device_name( 81 "/job:localhost/replica:0/task:0/device:GPU:0"); 82 83 test::graph::ToGraphDef(&graph, &def_); 84 } 85 86 protected: 87 void ExpectFailure(const Status& status, error::Code code) { 88 EXPECT_FALSE(status.ok()) << status.ToString(); 89 if (!status.ok()) { 90 LOG(INFO) << "Status message: " << status.error_message(); 91 EXPECT_EQ(code, status.code()) << status.ToString(); 92 } 93 } 94 95 string x_; 96 string y_; 97 string y_neg_; 98 GraphDef def_; 99 }; 100 101 TEST_F(DeviceTracerTest, StartStop) { 102 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 103 if (!tracer) return; 104 TF_EXPECT_OK(tracer->Start()); 105 TF_EXPECT_OK(tracer->Stop()); 106 } 107 108 TEST_F(DeviceTracerTest, StopBeforeStart) { 109 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 110 if (!tracer) return; 111 TF_EXPECT_OK(tracer->Stop()); 112 TF_EXPECT_OK(tracer->Stop()); 113 } 114 115 TEST_F(DeviceTracerTest, CollectBeforeStart) { 116 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 117 if (!tracer) return; 118 StepStats stats; 119 StepStatsCollector collector(&stats); 120 TF_EXPECT_OK(tracer->Collect(&collector)); 121 EXPECT_EQ(stats.dev_stats_size(), 0); 122 } 123 124 TEST_F(DeviceTracerTest, CollectBeforeStop) { 125 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 126 if (!tracer) return; 127 TF_EXPECT_OK(tracer->Start()); 128 StepStats stats; 129 StepStatsCollector collector(&stats); 130 Status status = tracer->Collect(&collector); 131 ExpectFailure(status, tensorflow::error::FAILED_PRECONDITION); 132 TF_EXPECT_OK(tracer->Stop()); 133 } 134 135 TEST_F(DeviceTracerTest, StartTwoTracers) { 136 std::unique_ptr<DeviceTracer> tracer1(CreateDeviceTracer()); 137 std::unique_ptr<DeviceTracer> tracer2(CreateDeviceTracer()); 138 if (!tracer1 || !tracer2) return; 139 140 TF_EXPECT_OK(tracer1->Start()); 141 Status status = tracer2->Start(); 142 ExpectFailure(status, tensorflow::error::UNAVAILABLE); 143 TF_EXPECT_OK(tracer1->Stop()); 144 TF_EXPECT_OK(tracer2->Start()); 145 TF_EXPECT_OK(tracer2->Stop()); 146 } 147 148 TEST_F(DeviceTracerTest, RunWithTracer) { 149 // On non-GPU platforms, we may not support DeviceTracer. 150 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 151 if (!tracer) return; 152 153 Initialize({3, 2, -1, 0}); 154 auto session = CreateSession(); 155 ASSERT_TRUE(session != nullptr); 156 TF_ASSERT_OK(session->Create(def_)); 157 std::vector<std::pair<string, Tensor>> inputs; 158 159 // Request two targets: one fetch output and one non-fetched output. 160 std::vector<string> output_names = {y_ + ":0"}; 161 std::vector<string> target_nodes = {y_neg_}; 162 std::vector<Tensor> outputs; 163 164 TF_ASSERT_OK(tracer->Start()); 165 Status s = session->Run(inputs, output_names, target_nodes, &outputs); 166 TF_ASSERT_OK(s); 167 TF_ASSERT_OK(tracer->Stop()); 168 ASSERT_EQ(1, outputs.size()); 169 // The first output should be initialized and have the correct 170 // output. 171 auto mat = outputs[0].matrix<float>(); 172 ASSERT_TRUE(outputs[0].IsInitialized()); 173 EXPECT_FLOAT_EQ(5.0, mat(0, 0)); 174 } 175 176 TEST_F(DeviceTracerTest, TraceToStepStatsCollector) { 177 std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); 178 if (!tracer) return; 179 180 Initialize({3, 2, -1, 0}); 181 auto session = CreateSession(); 182 ASSERT_TRUE(session != nullptr); 183 TF_ASSERT_OK(session->Create(def_)); 184 std::vector<std::pair<string, Tensor>> inputs; 185 186 // Request two targets: one fetch output and one non-fetched output. 187 std::vector<string> output_names = {y_ + ":0"}; 188 std::vector<string> target_nodes = {y_neg_}; 189 std::vector<Tensor> outputs; 190 191 TF_ASSERT_OK(tracer->Start()); 192 Status s = session->Run(inputs, output_names, target_nodes, &outputs); 193 TF_ASSERT_OK(s); 194 195 TF_ASSERT_OK(tracer->Stop()); 196 StepStats stats; 197 StepStatsCollector collector(&stats); 198 TF_ASSERT_OK(tracer->Collect(&collector)); 199 collector.Finalize(); 200 // Depending on whether this runs on CPU or GPU, we will have a 201 // different number of devices. 202 EXPECT_GE(stats.dev_stats_size(), 1) << "Saw stats: " << stats.DebugString(); 203 } 204 205 TEST_F(DeviceTracerTest, RunWithTraceOption) { 206 Initialize({3, 2, -1, 0}); 207 auto session = CreateSession(); 208 ASSERT_TRUE(session != nullptr); 209 TF_ASSERT_OK(session->Create(def_)); 210 std::vector<std::pair<string, Tensor>> inputs; 211 212 // Request two targets: one fetch output and one non-fetched output. 213 std::vector<string> output_names = {y_ + ":0"}; 214 std::vector<string> target_nodes = {y_neg_}; 215 std::vector<Tensor> outputs; 216 217 // Prepares RunOptions and RunOutputs 218 RunOptions run_options; 219 run_options.set_trace_level(RunOptions::FULL_TRACE); 220 RunMetadata run_metadata; 221 Status s = session->Run(run_options, inputs, output_names, target_nodes, 222 &outputs, &run_metadata); 223 TF_ASSERT_OK(s); 224 ASSERT_TRUE(run_metadata.has_step_stats()); 225 // Depending on whether this runs on CPU or GPU, we will have a 226 // different number of devices. 227 EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1); 228 } 229 230 } // namespace 231 } // namespace tensorflow 232