Home | History | Annotate | Download | only in profiler
      1 /* Copyright 2017 The TensorFlow Authors All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h"
     17 
     18 #include <cstdio>
     19 #include <ctime>
     20 #include <vector>
     21 
     22 #include "tensorflow/contrib/tpu/profiler/op_profile.pb.h"
     23 #include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
     24 #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h"
     25 #include "tensorflow/core/framework/graph.pb.h"
     26 #include "tensorflow/core/lib/core/errors.h"
     27 #include "tensorflow/core/lib/io/compression.h"
     28 #include "tensorflow/core/lib/io/path.h"
     29 #include "tensorflow/core/lib/strings/str_util.h"
     30 #include "tensorflow/core/lib/strings/strcat.h"
     31 #include "tensorflow/core/platform/env.h"
     32 #include "tensorflow/core/platform/protobuf.h"
     33 #include "tensorflow/core/protobuf/config.pb.h"
     34 #include "tensorflow/core/util/event.pb.h"
     35 #include "tensorflow/core/util/events_writer.h"
     36 
     37 namespace tensorflow {
     38 namespace tpu {
     39 namespace {
     40 
     41 using ::tensorflow::io::JoinPath;
     42 using ::tensorflow::protobuf::util::JsonOptions;
     43 using ::tensorflow::protobuf::util::MessageToJsonString;
     44 
     45 constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph.";
     46 constexpr char kJsonOpProfileFileName[] = "op_profile.json";
     47 constexpr char kJsonTraceFileName[] = "trace.json.gz";
     48 constexpr char kProfilePluginDirectory[] = "plugins/profile/";
     49 constexpr char kProtoTraceFileName[] = "trace";
     50 
     51 Status WriteGzippedDataToFile(const string& filename, const string& data) {
     52   std::unique_ptr<WritableFile> file;
     53   TF_RETURN_IF_ERROR(Env::Default()->NewWritableFile(filename, &file));
     54   io::ZlibCompressionOptions options = io::ZlibCompressionOptions::GZIP();
     55   io::ZlibOutputBuffer buffer(file.get(), options.input_buffer_size,
     56                               options.output_buffer_size, options);
     57   TF_RETURN_IF_ERROR(buffer.Init());
     58   TF_RETURN_IF_ERROR(buffer.Append(data));
     59   TF_RETURN_IF_ERROR(buffer.Close());
     60   TF_RETURN_IF_ERROR(file->Close());
     61   return Status::OK();
     62 }
     63 
     64 Status DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace,
     65                                std::ostream* os) {
     66   string proto_path = JoinPath(run_dir, kProtoTraceFileName);
     67   TF_RETURN_IF_ERROR(
     68       WriteStringToFile(Env::Default(), proto_path, encoded_trace));
     69   LOG(INFO) << "Dumped raw-proto trace data to " << proto_path;
     70 
     71   string json_path = JoinPath(run_dir, kJsonTraceFileName);
     72   Trace trace;
     73   trace.ParseFromString(encoded_trace);
     74   *os << "Trace contains " << trace.trace_events_size() << " events."
     75       << std::endl;
     76   TF_RETURN_IF_ERROR(
     77       WriteGzippedDataToFile(json_path, TraceEventsToJson(trace)));
     78   *os << "Dumped JSON trace data to " << json_path << std::endl;
     79   return Status::OK();
     80 }
     81 
     82 Status DumpOpProfileToLogDirectory(StringPiece run_dir,
     83                                    const tpu::op_profile::Profile& profile,
     84                                    std::ostream* os) {
     85   string path = JoinPath(run_dir, kJsonOpProfileFileName);
     86   string json;
     87   JsonOptions options;
     88   options.always_print_primitive_fields = true;
     89   auto status = MessageToJsonString(profile, &json, options);
     90   if (!status.ok()) {
     91     return errors::Internal(
     92         "Failed to convert op profile to json. Skipping... ",
     93         string(status.error_message()));
     94   }
     95   TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json));
     96   *os << "Dumped json op profile data to " << path << std::endl;
     97   return Status::OK();
     98 }
     99 
    100 Status DumpToolDataToLogDirectory(StringPiece run_dir,
    101                                   const tensorflow::ProfileToolData& tool,
    102                                   std::ostream* os) {
    103   string path = JoinPath(run_dir, tool.name());
    104   TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, tool.data()));
    105   *os << "Dumped tool data for " << tool.name() << " to " << path << std::endl;
    106   return Status::OK();
    107 }
    108 
    109 Status DumpGraphEvents(const string& logdir, const string& run,
    110                        const ProfileResponse& response, std::ostream* os) {
    111   int num_graphs = response.computation_graph_size();
    112   if (response.computation_graph_size() == 0) return Status::OK();
    113   // The server might generates multiple graphs for one program; we simply
    114   // pick the first one.
    115   if (num_graphs > 1) {
    116     *os << num_graphs
    117         << " TPU program variants observed over the profiling period. "
    118         << "One computation graph will be chosen arbitrarily." << std::endl;
    119   }
    120   // The graph plugin expects the graph in <logdir>/<run>/<event.file>.
    121   string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run));
    122   TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(run_dir));
    123   EventsWriter event_writer(JoinPath(run_dir, "events"));
    124   Event event;
    125   // Add the computation graph.
    126   event.set_graph_def(response.computation_graph(0).SerializeAsString());
    127   event_writer.WriteEvent(event);
    128   *os << "Wrote a HLO graph to " << event_writer.FileName() << std::endl;
    129 
    130   if (response.has_hlo_metadata()) {
    131     tensorflow::TaggedRunMetadata tagged_run_metadata;
    132     tagged_run_metadata.set_tag(run);
    133     tagged_run_metadata.set_run_metadata(
    134         response.hlo_metadata().SerializeAsString());
    135     tensorflow::Event meta_event;
    136     *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata;
    137     event_writer.WriteEvent(meta_event);
    138     *os << "Wrote HLO ops run metadata to " << event_writer.FileName()
    139         << std::endl;
    140   }
    141   return Status::OK();
    142 }
    143 
    144 }  // namespace
    145 
    146 Status WriteTensorboardTPUProfile(const string& logdir, const string& run,
    147                                   const ProfileResponse& response,
    148                                   std::ostream* os) {
    149   // Dumps profile data to <logdir>/plugins/profile/<run>/.
    150   string profile_run_dir = JoinPath(logdir, kProfilePluginDirectory, run);
    151   TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(profile_run_dir));
    152 
    153   // Ignore computation_graph for now.
    154   if (!response.encoded_trace().empty()) {
    155     LOG(INFO) << "Converting trace events to TraceViewer JSON.";
    156     TF_RETURN_IF_ERROR(
    157         DumpTraceToLogDirectory(profile_run_dir, response.encoded_trace(), os));
    158   }
    159   if (response.has_op_profile() &&
    160       (response.op_profile().has_by_program_structure() ||
    161        response.op_profile().has_by_category())) {
    162     TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir,
    163                                                    response.op_profile(), os));
    164   }
    165   for (const auto& tool_data : response.tool_data()) {
    166     TF_RETURN_IF_ERROR(
    167         DumpToolDataToLogDirectory(profile_run_dir, tool_data, os));
    168   }
    169 
    170   return Status::OK();
    171 }
    172 
    173 }  // namespace tpu
    174 }  // namespace tensorflow
    175