1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/compiler/xla/service/executable.h" 17 18 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" 19 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" 20 #include "tensorflow/compiler/xla/status.h" 21 #include "tensorflow/compiler/xla/status_macros.h" 22 #include "tensorflow/core/lib/hash/hash.h" 23 #include "tensorflow/core/lib/io/path.h" 24 #include "tensorflow/core/lib/strings/stringprintf.h" 25 #include "tensorflow/core/platform/env.h" 26 27 using tensorflow::gtl::ArraySlice; 28 29 namespace xla { 30 31 StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>> 32 Executable::ExecuteOnStreams( 33 ArraySlice<const ServiceExecutableRunOptions> run_options, 34 ArraySlice<ArraySlice<const ShapedBuffer*>> arguments) { 35 TF_RET_CHECK(run_options.size() == arguments.size()); 36 37 std::vector<std::unique_ptr<ShapedBuffer>> return_values(run_options.size()); 38 39 if (run_options.size() == 1) { 40 TF_ASSIGN_OR_RETURN(return_values[0], 41 ExecuteOnStream(&run_options[0], arguments[0], 42 /*hlo_execution_profile=*/nullptr)); 43 return std::move(return_values); 44 } 45 46 for (size_t i = 0; i < run_options.size(); ++i) { 47 // We cannot BlockHostUntilDone() on the already-launched executions in case 48 // of error, since if the executions communicate, the initially launched 49 // executions may never complete if not all executions are running. 50 TF_ASSIGN_OR_RETURN(return_values[i], 51 ExecuteAsyncOnStream(&run_options[i], arguments[i])); 52 } 53 for (const auto& options : run_options) { 54 TF_RET_CHECK(options.stream() != nullptr); 55 TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone()); 56 } 57 return std::move(return_values); 58 } 59 60 StatusOr<std::unique_ptr<ShapedBuffer>> Executable::ExecuteOnStreamWrapper( 61 const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, 62 ArraySlice<const ShapedBuffer*> arguments) { 63 perftools::gputools::Stream* stream = run_options->stream(); 64 std::unique_ptr<perftools::gputools::Timer> timer; 65 if (profile != nullptr) { 66 timer.reset(new perftools::gputools::Timer(stream->parent())); 67 stream->InitTimer(timer.get()).ThenStartTimer(timer.get()); 68 } 69 70 VLOG(1) << "enqueueing executable on stream..."; 71 // If the profiling flag isn't enabled, we pass nullptr as the profile to 72 // indicate profiling is not requested. 73 std::unique_ptr<HloExecutionProfile> profile_ptr = 74 module_config().debug_options().xla_hlo_profile() && 75 hlo_profiling_enabled() 76 ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer_data(), 77 &hlo_profile_index_map()) 78 : nullptr; 79 80 StatusOr<std::unique_ptr<ShapedBuffer>> return_value = 81 ExecuteOnStream(run_options, arguments, profile_ptr.get()); 82 83 if (profile != nullptr) { 84 VLOG(1) << "enqueueing 'stop timer' and blocking host until done..."; 85 stream->ThenStopTimer(timer.get()); 86 TF_RETURN_IF_ERROR(stream->BlockHostUntilDone()); 87 VLOG(1) << "done with block-host-until-done"; 88 89 // Merge in run-time profile information from execution_profile. 90 // 91 // TODO(b/71713097): This is buggy -- even though the mutex takes care of 92 // C++ level races, some other concurrent ExecuteOnStreamWrapper call could 93 // have rewritten the execution_profile before we get to it. 94 profile->MergeFrom(execution_profile()); 95 96 // Overall execution time (in nanoseconds) from the executor timer. 97 if (stream->ok()) { 98 // Don't read timer->Nanoseconds() if the stream isn't OK -- that's 99 // illegal. 100 profile->set_compute_and_transfer_time_ns(timer->Nanoseconds()); 101 } 102 103 // TODO(b/28123297): On GPU we end up including transfer time in 104 // the compute time this way. Instead, we should get the correct 105 // value by measuring it. Setting the field here at least lets 106 // benchmarks provide *some* value for GPU computations. 107 // 108 // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually 109 // the compute time without the transfer time, so this way we get the 110 // correct compute time. We should instead have the correct value for 111 // compute_and_transfer_time and set compute_time to the compute time. 112 if (profile->compute_time_ns() == 0) { 113 profile->set_compute_time_ns(profile->compute_and_transfer_time_ns()); 114 } 115 } 116 117 if (profile_ptr != nullptr) { 118 XLA_LOG_LINES( 119 tensorflow::INFO, 120 profile_ptr->ToString(stream->parent()->GetDeviceDescription())); 121 hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute", 122 profile_ptr.get()); 123 } 124 125 return return_value; 126 } 127 128 Status Executable::DumpSessionModule() { 129 TF_RET_CHECK(dumping()); 130 const string& directory_path = 131 module_config().debug_options().xla_dump_executions_to(); 132 VersionedComputationHandle versioned_handle = entry_computation_handle(); 133 // This filename does not include the version number because the computation 134 // is only ever executed at one version. 135 string filename = tensorflow::strings::Printf( 136 "computation_%lld__%s__execution_%lld", versioned_handle.handle.handle(), 137 session_module_->entry().name().c_str(), ++execution_count_); 138 return Executable::DumpToDirectory(directory_path, filename, 139 *session_module_); 140 } 141 142 /* static */ Status Executable::DumpToDirectory( 143 const string& directory_path, string filename, 144 const SessionModule& session_module) { 145 tensorflow::Env* env = tensorflow::Env::Default(); 146 if (!env->IsDirectory(directory_path).ok()) { 147 // NB! CreateDir does not work reliably with multiple XLA threads -- two 148 // threads can race to observe the absence of the dump directory and 149 // simultaneously try to create it, causing the "losing" thread to get a 150 // "directory already exists" error. 151 TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory_path)); 152 } 153 filename = SanitizeFileName(std::move(filename)); 154 string file_path = tensorflow::io::JoinPath(directory_path, filename); 155 string result; 156 TF_RET_CHECK( 157 tensorflow::SerializeToStringDeterministic(session_module, &result)); 158 return tensorflow::WriteStringToFile(tensorflow::Env::Default(), file_path, 159 result); 160 } 161 162 } // namespace xla 163