1 /* Copyright 2017 The TensorFlow Authors All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h" 17 18 #include <cstdio> 19 #include <ctime> 20 #include <vector> 21 22 #include "tensorflow/contrib/tpu/profiler/op_profile.pb.h" 23 #include "tensorflow/contrib/tpu/profiler/trace_events.pb.h" 24 #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h" 25 #include "tensorflow/core/framework/graph.pb.h" 26 #include "tensorflow/core/lib/core/errors.h" 27 #include "tensorflow/core/lib/io/compression.h" 28 #include "tensorflow/core/lib/io/path.h" 29 #include "tensorflow/core/lib/strings/str_util.h" 30 #include "tensorflow/core/lib/strings/strcat.h" 31 #include "tensorflow/core/platform/env.h" 32 #include "tensorflow/core/platform/protobuf.h" 33 #include "tensorflow/core/protobuf/config.pb.h" 34 #include "tensorflow/core/util/event.pb.h" 35 #include "tensorflow/core/util/events_writer.h" 36 37 namespace tensorflow { 38 namespace tpu { 39 namespace { 40 41 using ::tensorflow::io::JoinPath; 42 using ::tensorflow::protobuf::util::JsonOptions; 43 using ::tensorflow::protobuf::util::MessageToJsonString; 44 45 constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; 46 constexpr char kJsonOpProfileFileName[] = "op_profile.json"; 47 constexpr char kJsonTraceFileName[] = "trace.json.gz"; 48 constexpr char kProfilePluginDirectory[] = "plugins/profile/"; 49 constexpr char kProtoTraceFileName[] = "trace"; 50 51 Status WriteGzippedDataToFile(const string& filename, const string& data) { 52 std::unique_ptr<WritableFile> file; 53 TF_RETURN_IF_ERROR(Env::Default()->NewWritableFile(filename, &file)); 54 io::ZlibCompressionOptions options = io::ZlibCompressionOptions::GZIP(); 55 io::ZlibOutputBuffer buffer(file.get(), options.input_buffer_size, 56 options.output_buffer_size, options); 57 TF_RETURN_IF_ERROR(buffer.Init()); 58 TF_RETURN_IF_ERROR(buffer.Append(data)); 59 TF_RETURN_IF_ERROR(buffer.Close()); 60 TF_RETURN_IF_ERROR(file->Close()); 61 return Status::OK(); 62 } 63 64 Status DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace, 65 std::ostream* os) { 66 string proto_path = JoinPath(run_dir, kProtoTraceFileName); 67 TF_RETURN_IF_ERROR( 68 WriteStringToFile(Env::Default(), proto_path, encoded_trace)); 69 LOG(INFO) << "Dumped raw-proto trace data to " << proto_path; 70 71 string json_path = JoinPath(run_dir, kJsonTraceFileName); 72 Trace trace; 73 trace.ParseFromString(encoded_trace); 74 *os << "Trace contains " << trace.trace_events_size() << " events." 75 << std::endl; 76 TF_RETURN_IF_ERROR( 77 WriteGzippedDataToFile(json_path, TraceEventsToJson(trace))); 78 *os << "Dumped JSON trace data to " << json_path << std::endl; 79 return Status::OK(); 80 } 81 82 Status DumpOpProfileToLogDirectory(StringPiece run_dir, 83 const tpu::op_profile::Profile& profile, 84 std::ostream* os) { 85 string path = JoinPath(run_dir, kJsonOpProfileFileName); 86 string json; 87 JsonOptions options; 88 options.always_print_primitive_fields = true; 89 auto status = MessageToJsonString(profile, &json, options); 90 if (!status.ok()) { 91 return errors::Internal( 92 "Failed to convert op profile to json. Skipping... ", 93 string(status.error_message())); 94 } 95 TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json)); 96 *os << "Dumped json op profile data to " << path << std::endl; 97 return Status::OK(); 98 } 99 100 Status DumpToolDataToLogDirectory(StringPiece run_dir, 101 const tensorflow::ProfileToolData& tool, 102 std::ostream* os) { 103 string path = JoinPath(run_dir, tool.name()); 104 TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, tool.data())); 105 *os << "Dumped tool data for " << tool.name() << " to " << path << std::endl; 106 return Status::OK(); 107 } 108 109 Status DumpGraphEvents(const string& logdir, const string& run, 110 const ProfileResponse& response, std::ostream* os) { 111 int num_graphs = response.computation_graph_size(); 112 if (response.computation_graph_size() == 0) return Status::OK(); 113 // The server might generates multiple graphs for one program; we simply 114 // pick the first one. 115 if (num_graphs > 1) { 116 *os << num_graphs 117 << " TPU program variants observed over the profiling period. " 118 << "One computation graph will be chosen arbitrarily." << std::endl; 119 } 120 // The graph plugin expects the graph in <logdir>/<run>/<event.file>. 121 string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run)); 122 TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(run_dir)); 123 EventsWriter event_writer(JoinPath(run_dir, "events")); 124 Event event; 125 // Add the computation graph. 126 event.set_graph_def(response.computation_graph(0).SerializeAsString()); 127 event_writer.WriteEvent(event); 128 *os << "Wrote a HLO graph to " << event_writer.FileName() << std::endl; 129 130 if (response.has_hlo_metadata()) { 131 tensorflow::TaggedRunMetadata tagged_run_metadata; 132 tagged_run_metadata.set_tag(run); 133 tagged_run_metadata.set_run_metadata( 134 response.hlo_metadata().SerializeAsString()); 135 tensorflow::Event meta_event; 136 *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata; 137 event_writer.WriteEvent(meta_event); 138 *os << "Wrote HLO ops run metadata to " << event_writer.FileName() 139 << std::endl; 140 } 141 return Status::OK(); 142 } 143 144 } // namespace 145 146 Status WriteTensorboardTPUProfile(const string& logdir, const string& run, 147 const ProfileResponse& response, 148 std::ostream* os) { 149 // Dumps profile data to <logdir>/plugins/profile/<run>/. 150 string profile_run_dir = JoinPath(logdir, kProfilePluginDirectory, run); 151 TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(profile_run_dir)); 152 153 // Ignore computation_graph for now. 154 if (!response.encoded_trace().empty()) { 155 LOG(INFO) << "Converting trace events to TraceViewer JSON."; 156 TF_RETURN_IF_ERROR( 157 DumpTraceToLogDirectory(profile_run_dir, response.encoded_trace(), os)); 158 } 159 if (response.has_op_profile() && 160 (response.op_profile().has_by_program_structure() || 161 response.op_profile().has_by_category())) { 162 TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, 163 response.op_profile(), os)); 164 } 165 for (const auto& tool_data : response.tool_data()) { 166 TF_RETURN_IF_ERROR( 167 DumpToolDataToLogDirectory(profile_run_dir, tool_data, os)); 168 } 169 170 return Status::OK(); 171 } 172 173 } // namespace tpu 174 } // namespace tensorflow 175