Home | History | Annotate | Download | only in platform
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/platform/device_tracer.h"
     17 
     18 #include <map>
     19 #include <memory>
     20 #include <string>
     21 #include <unordered_map>
     22 #include <vector>
     23 
     24 #include "tensorflow/core/common_runtime/direct_session.h"
     25 #include "tensorflow/core/common_runtime/step_stats_collector.h"
     26 #include "tensorflow/core/framework/allocator.h"
     27 #include "tensorflow/core/framework/graph.pb.h"
     28 #include "tensorflow/core/framework/tensor.h"
     29 #include "tensorflow/core/framework/tensor_testutil.h"
     30 #include "tensorflow/core/framework/types.pb.h"
     31 #include "tensorflow/core/graph/graph.h"
     32 #include "tensorflow/core/graph/testlib.h"
     33 #include "tensorflow/core/kernels/ops_util.h"
     34 #include "tensorflow/core/lib/core/errors.h"
     35 #include "tensorflow/core/lib/core/status.h"
     36 #include "tensorflow/core/lib/core/status_test_util.h"
     37 #include "tensorflow/core/lib/core/threadpool.h"
     38 #include "tensorflow/core/platform/test.h"
     39 #include "tensorflow/core/public/session_options.h"
     40 #include "tensorflow/core/util/device_name_utils.h"
     41 
     42 namespace tensorflow {
     43 namespace {
     44 
     45 std::unique_ptr<Session> CreateSession() {
     46   SessionOptions options;
     47   (*options.config.mutable_device_count())["CPU"] = 1;
     48   (*options.config.mutable_device_count())["GPU"] = 1;
     49   options.config.set_allow_soft_placement(true);
     50   return std::unique_ptr<Session>(NewSession(options));
     51 }
     52 
     53 class DeviceTracerTest : public ::testing::Test {
     54  public:
     55   void Initialize(std::initializer_list<float> a_values) {
     56     Graph graph(OpRegistry::Global());
     57 
     58     Tensor a_tensor(DT_FLOAT, TensorShape({2, 2}));
     59     test::FillValues<float>(&a_tensor, a_values);
     60     Node* a = test::graph::Constant(&graph, a_tensor);
     61     a->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
     62 
     63     Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
     64     test::FillValues<float>(&x_tensor, {1, 1});
     65     Node* x = test::graph::Constant(&graph, x_tensor);
     66     x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
     67     x_ = x->name();
     68 
     69     // y = A * x
     70     Node* y = test::graph::Matmul(&graph, a, x, false, false);
     71     y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
     72     y_ = y->name();
     73 
     74     // Use an Identity op to force a memcpy to CPU and back to GPU.
     75     Node* i = test::graph::Identity(&graph, y);
     76     i->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
     77 
     78     Node* y_neg = test::graph::Unary(&graph, "Neg", i);
     79     y_neg_ = y_neg->name();
     80     y_neg->set_assigned_device_name(
     81         "/job:localhost/replica:0/task:0/device:GPU:0");
     82 
     83     test::graph::ToGraphDef(&graph, &def_);
     84   }
     85 
     86  protected:
     87   void ExpectFailure(const Status& status, error::Code code) {
     88     EXPECT_FALSE(status.ok()) << status.ToString();
     89     if (!status.ok()) {
     90       LOG(INFO) << "Status message: " << status.error_message();
     91       EXPECT_EQ(code, status.code()) << status.ToString();
     92     }
     93   }
     94 
     95   string x_;
     96   string y_;
     97   string y_neg_;
     98   GraphDef def_;
     99 };
    100 
    101 TEST_F(DeviceTracerTest, StartStop) {
    102   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    103   if (!tracer) return;
    104   TF_EXPECT_OK(tracer->Start());
    105   TF_EXPECT_OK(tracer->Stop());
    106 }
    107 
    108 TEST_F(DeviceTracerTest, StopBeforeStart) {
    109   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    110   if (!tracer) return;
    111   TF_EXPECT_OK(tracer->Stop());
    112   TF_EXPECT_OK(tracer->Stop());
    113 }
    114 
    115 TEST_F(DeviceTracerTest, CollectBeforeStart) {
    116   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    117   if (!tracer) return;
    118   StepStats stats;
    119   StepStatsCollector collector(&stats);
    120   TF_EXPECT_OK(tracer->Collect(&collector));
    121   EXPECT_EQ(stats.dev_stats_size(), 0);
    122 }
    123 
    124 TEST_F(DeviceTracerTest, CollectBeforeStop) {
    125   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    126   if (!tracer) return;
    127   TF_EXPECT_OK(tracer->Start());
    128   StepStats stats;
    129   StepStatsCollector collector(&stats);
    130   Status status = tracer->Collect(&collector);
    131   ExpectFailure(status, tensorflow::error::FAILED_PRECONDITION);
    132   TF_EXPECT_OK(tracer->Stop());
    133 }
    134 
    135 TEST_F(DeviceTracerTest, StartTwoTracers) {
    136   std::unique_ptr<DeviceTracer> tracer1(CreateDeviceTracer());
    137   std::unique_ptr<DeviceTracer> tracer2(CreateDeviceTracer());
    138   if (!tracer1 || !tracer2) return;
    139 
    140   TF_EXPECT_OK(tracer1->Start());
    141   Status status = tracer2->Start();
    142   ExpectFailure(status, tensorflow::error::UNAVAILABLE);
    143   TF_EXPECT_OK(tracer1->Stop());
    144   TF_EXPECT_OK(tracer2->Start());
    145   TF_EXPECT_OK(tracer2->Stop());
    146 }
    147 
    148 TEST_F(DeviceTracerTest, RunWithTracer) {
    149   // On non-GPU platforms, we may not support DeviceTracer.
    150   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    151   if (!tracer) return;
    152 
    153   Initialize({3, 2, -1, 0});
    154   auto session = CreateSession();
    155   ASSERT_TRUE(session != nullptr);
    156   TF_ASSERT_OK(session->Create(def_));
    157   std::vector<std::pair<string, Tensor>> inputs;
    158 
    159   // Request two targets: one fetch output and one non-fetched output.
    160   std::vector<string> output_names = {y_ + ":0"};
    161   std::vector<string> target_nodes = {y_neg_};
    162   std::vector<Tensor> outputs;
    163 
    164   TF_ASSERT_OK(tracer->Start());
    165   Status s = session->Run(inputs, output_names, target_nodes, &outputs);
    166   TF_ASSERT_OK(s);
    167   TF_ASSERT_OK(tracer->Stop());
    168   ASSERT_EQ(1, outputs.size());
    169   // The first output should be initialized and have the correct
    170   // output.
    171   auto mat = outputs[0].matrix<float>();
    172   ASSERT_TRUE(outputs[0].IsInitialized());
    173   EXPECT_FLOAT_EQ(5.0, mat(0, 0));
    174 }
    175 
    176 TEST_F(DeviceTracerTest, TraceToStepStatsCollector) {
    177   std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer());
    178   if (!tracer) return;
    179 
    180   Initialize({3, 2, -1, 0});
    181   auto session = CreateSession();
    182   ASSERT_TRUE(session != nullptr);
    183   TF_ASSERT_OK(session->Create(def_));
    184   std::vector<std::pair<string, Tensor>> inputs;
    185 
    186   // Request two targets: one fetch output and one non-fetched output.
    187   std::vector<string> output_names = {y_ + ":0"};
    188   std::vector<string> target_nodes = {y_neg_};
    189   std::vector<Tensor> outputs;
    190 
    191   TF_ASSERT_OK(tracer->Start());
    192   Status s = session->Run(inputs, output_names, target_nodes, &outputs);
    193   TF_ASSERT_OK(s);
    194 
    195   TF_ASSERT_OK(tracer->Stop());
    196   StepStats stats;
    197   StepStatsCollector collector(&stats);
    198   TF_ASSERT_OK(tracer->Collect(&collector));
    199   collector.Finalize();
    200   // Depending on whether this runs on CPU or GPU, we will have a
    201   // different number of devices.
    202   EXPECT_GE(stats.dev_stats_size(), 1) << "Saw stats: " << stats.DebugString();
    203 }
    204 
    205 TEST_F(DeviceTracerTest, RunWithTraceOption) {
    206   Initialize({3, 2, -1, 0});
    207   auto session = CreateSession();
    208   ASSERT_TRUE(session != nullptr);
    209   TF_ASSERT_OK(session->Create(def_));
    210   std::vector<std::pair<string, Tensor>> inputs;
    211 
    212   // Request two targets: one fetch output and one non-fetched output.
    213   std::vector<string> output_names = {y_ + ":0"};
    214   std::vector<string> target_nodes = {y_neg_};
    215   std::vector<Tensor> outputs;
    216 
    217   // Prepares RunOptions and RunOutputs
    218   RunOptions run_options;
    219   run_options.set_trace_level(RunOptions::FULL_TRACE);
    220   RunMetadata run_metadata;
    221   Status s = session->Run(run_options, inputs, output_names, target_nodes,
    222                           &outputs, &run_metadata);
    223   TF_ASSERT_OK(s);
    224   ASSERT_TRUE(run_metadata.has_step_stats());
    225   // Depending on whether this runs on CPU or GPU, we will have a
    226   // different number of devices.
    227   EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1);
    228 }
    229 
    230 }  // namespace
    231 }  // namespace tensorflow
    232