1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_ 18 19 #include <memory> 20 #include <utility> 21 22 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" 23 #include "tensorflow/compiler/xla/service/computation_layout.h" 24 #include "tensorflow/compiler/xla/service/device_memory_allocator.h" 25 #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" 26 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" 27 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" 28 #include "tensorflow/compiler/xla/service/hlo_module.h" 29 #include "tensorflow/compiler/xla/service/service_executable_run_options.h" 30 #include "tensorflow/compiler/xla/service/session.pb.h" 31 #include "tensorflow/compiler/xla/service/shaped_buffer.h" 32 #include "tensorflow/compiler/xla/service/versioned_computation_handle.h" 33 #include "tensorflow/compiler/xla/statusor.h" 34 #include "tensorflow/compiler/xla/util.h" 35 #include "tensorflow/compiler/xla/xla_data.pb.h" 36 #include "tensorflow/core/lib/gtl/array_slice.h" 37 #include "tensorflow/core/platform/mutex.h" 38 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 39 #include "tensorflow/core/platform/thread_annotations.h" 40 41 namespace xla { 42 43 // A given platform's compiler will produce an Executable -- this is a uniform 44 // interface that is used for launching compiled programs across platforms. 45 class Executable { 46 public: 47 explicit Executable( 48 std::unique_ptr<const HloModule> hlo_module, 49 std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data, 50 std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map) 51 : hlo_module_(std::move(hlo_module)), 52 hlo_profile_printer_data_(std::move(hlo_profile_printer_data)), 53 hlo_profile_index_map_(std::move(hlo_profile_index_map)) { 54 CHECK_EQ(hlo_profile_printer_data_.get() == nullptr, 55 hlo_profile_index_map_.get() == nullptr); 56 } 57 virtual ~Executable() {} 58 59 // Enqueues the compilation result on the provided stream, passing the given 60 // arguments. This call is blocking and returns after the execution is done. 61 // 62 // If the hlo_execution_profile is provided as non-nullptr, profiling will be 63 // enabled. 64 // 65 // Returns a shaped buffer containing the result of the computation. 66 virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream( 67 const ServiceExecutableRunOptions* run_options, 68 tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, 69 HloExecutionProfile* hlo_execution_profile) = 0; 70 71 // Same as ExecuteOnStream(), but this call is non-blocking and returns as 72 // soon as all of the operations are enqueued for launch on the stream. 73 virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream( 74 const ServiceExecutableRunOptions* run_options, 75 tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) = 0; 76 77 // Same as ExecuteOnStream(), but runs this executable on multiple 78 // streams. arguments[i] contains the arguments to the execution on 79 // run_options[i]->stream() and the returned value is at index i of the 80 // returned vector. 81 virtual StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>> ExecuteOnStreams( 82 tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions> 83 run_options, 84 tensorflow::gtl::ArraySlice< 85 tensorflow::gtl::ArraySlice<const ShapedBuffer*>> 86 arguments); 87 88 // Populates `hlo_execution_profile` from `executor`. This is implicit in any 89 // Execute* API call that takes a hlo_execution_profile argument, but must be 90 // called explicitly for other (async, for example) variants after the stream 91 // has completed. 92 virtual Status PopulateExecutionProfile( 93 HloExecutionProfile* hlo_execution_profile, 94 perftools::gputools::StreamExecutor* executor) { 95 return Status::OK(); 96 } 97 98 // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a 99 // timer for the execution, sets up HLO profiling if enabled, and fills in the 100 // given ExecutionProfile if non-null. 101 StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper( 102 const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, 103 tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments); 104 105 // Returns the ExecutionProfile from executing on the device. This includes 106 // the number of cycles taken for the computation or the compilation time. 107 ExecutionProfile execution_profile() const { 108 tensorflow::mutex_lock lock(mutex_); 109 return execution_profile_; 110 } 111 112 // Returns Status::ok() if the two executables are equal to each other. 113 // 114 // An error status is returned otherwise. 115 virtual const Status EqualOrFail(const Executable& executable) { 116 return Unimplemented( 117 "Equality test on this executable is not implemented."); 118 } 119 120 const HloProfilePrinterData& hlo_profile_printer_data() const { 121 CHECK(hlo_profiling_enabled()); 122 return *hlo_profile_printer_data_; 123 } 124 125 const HloProfileIndexMap& hlo_profile_index_map() const { 126 CHECK(hlo_profiling_enabled()); 127 return *hlo_profile_index_map_; 128 } 129 130 // Returns whether this executable was compiled with HLO profilings support 131 // enabled. If not, the caller should not expect an hlo_execution_profile 132 // passed to ExecuteOnStream above to be populated during execution. 133 bool hlo_profiling_enabled() const { 134 return hlo_profile_printer_data_ != nullptr; 135 } 136 137 const HloModule& module() const { return *hlo_module_; } 138 139 const bool has_module() const { return hlo_module_ != nullptr; } 140 141 const HloModuleConfig& module_config() const { return hlo_module_->config(); } 142 143 // Returns the versioned computation handle of the computation computed by 144 // this executable. 145 const VersionedComputationHandle& entry_computation_handle() const { 146 return hlo_module_->entry_computation_handle(); 147 } 148 149 // The shape (including layout) that results from this execution. This is the 150 // shape of the DeviceMemoryBase result value in ExecuteOnStream above. 151 const Shape& result_shape() const { 152 return hlo_module_->config().entry_computation_layout().result_shape(); 153 } 154 155 // Dumping helpers. 156 void set_session_module(std::unique_ptr<xla::SessionModule> session_module) { 157 session_module_ = std::move(session_module); 158 } 159 bool dumping() const { return session_module_ != nullptr; } 160 SessionModule* session_module() const { return session_module_.get(); } 161 Status DumpSessionModule(); 162 163 // Dump session_module to directory_path/filename. 164 static Status DumpToDirectory(const string& directory_path, string filename, 165 const SessionModule& session_module); 166 167 protected: 168 mutable tensorflow::mutex mutex_; 169 170 // Execution profile data on the device. 171 ExecutionProfile execution_profile_ GUARDED_BY(mutex_); 172 173 // HloModule this was compiled from. BufferAssignment keeps pointers to 174 // HloInstructions owned by the HloModule so we need to keep the HloModule 175 // around. 176 const std::unique_ptr<const HloModule> hlo_module_; 177 178 // SessionModule this was compiled from. Null if not dumping executions. 179 std::unique_ptr<SessionModule> session_module_; 180 181 // Execution count, used to generate a unique filename for each dumped 182 // execution. 183 int64 execution_count_ = 0; 184 185 std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data_; 186 std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_; 187 }; 188 189 } // namespace xla 190 191 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_ 192