Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
     18 
     19 #include <memory>
     20 #include <utility>
     21 
     22 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
     23 #include "tensorflow/compiler/xla/service/computation_layout.h"
     24 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
     25 #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
     26 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
     27 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
     28 #include "tensorflow/compiler/xla/service/hlo_module.h"
     29 #include "tensorflow/compiler/xla/service/service_executable_run_options.h"
     30 #include "tensorflow/compiler/xla/service/session.pb.h"
     31 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
     32 #include "tensorflow/compiler/xla/service/versioned_computation_handle.h"
     33 #include "tensorflow/compiler/xla/statusor.h"
     34 #include "tensorflow/compiler/xla/util.h"
     35 #include "tensorflow/compiler/xla/xla_data.pb.h"
     36 #include "tensorflow/core/lib/gtl/array_slice.h"
     37 #include "tensorflow/core/platform/mutex.h"
     38 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
     39 #include "tensorflow/core/platform/thread_annotations.h"
     40 
     41 namespace xla {
     42 
     43 // A given platform's compiler will produce an Executable -- this is a uniform
     44 // interface that is used for launching compiled programs across platforms.
     45 class Executable {
     46  public:
     47   explicit Executable(
     48       std::unique_ptr<const HloModule> hlo_module,
     49       std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
     50       std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
     51       : hlo_module_(std::move(hlo_module)),
     52         hlo_profile_printer_data_(std::move(hlo_profile_printer_data)),
     53         hlo_profile_index_map_(std::move(hlo_profile_index_map)) {
     54     CHECK_EQ(hlo_profile_printer_data_.get() == nullptr,
     55              hlo_profile_index_map_.get() == nullptr);
     56   }
     57   virtual ~Executable() {}
     58 
     59   // Enqueues the compilation result on the provided stream, passing the given
     60   // arguments. This call is blocking and returns after the execution is done.
     61   //
     62   // If the hlo_execution_profile is provided as non-nullptr, profiling will be
     63   // enabled.
     64   //
     65   // Returns a shaped buffer containing the result of the computation.
     66   virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
     67       const ServiceExecutableRunOptions* run_options,
     68       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     69       HloExecutionProfile* hlo_execution_profile) = 0;
     70 
     71   // Same as ExecuteOnStream(), but this call is non-blocking and returns as
     72   // soon as all of the operations are enqueued for launch on the stream.
     73   virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
     74       const ServiceExecutableRunOptions* run_options,
     75       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) = 0;
     76 
     77   // Same as ExecuteOnStream(), but runs this executable on multiple
     78   // streams. arguments[i] contains the arguments to the execution on
     79   // run_options[i]->stream() and the returned value is at index i of the
     80   // returned vector.
     81   virtual StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>> ExecuteOnStreams(
     82       tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions>
     83           run_options,
     84       tensorflow::gtl::ArraySlice<
     85           tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
     86           arguments);
     87 
     88   // Populates `hlo_execution_profile` from `executor`. This is implicit in any
     89   // Execute* API call that takes a hlo_execution_profile argument, but must be
     90   // called explicitly for other (async, for example) variants after the stream
     91   // has completed.
     92   virtual Status PopulateExecutionProfile(
     93       HloExecutionProfile* hlo_execution_profile,
     94       perftools::gputools::StreamExecutor* executor) {
     95     return Status::OK();
     96   }
     97 
     98   // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a
     99   // timer for the execution, sets up HLO profiling if enabled, and fills in the
    100   // given ExecutionProfile if non-null.
    101   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper(
    102       const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
    103       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
    104 
    105   // Returns the ExecutionProfile from executing on the device. This includes
    106   // the number of cycles taken for the computation or the compilation time.
    107   ExecutionProfile execution_profile() const {
    108     tensorflow::mutex_lock lock(mutex_);
    109     return execution_profile_;
    110   }
    111 
    112   // Returns Status::ok() if the two executables are equal to each other.
    113   //
    114   // An error status is returned otherwise.
    115   virtual const Status EqualOrFail(const Executable& executable) {
    116     return Unimplemented(
    117         "Equality test on this executable is not implemented.");
    118   }
    119 
    120   const HloProfilePrinterData& hlo_profile_printer_data() const {
    121     CHECK(hlo_profiling_enabled());
    122     return *hlo_profile_printer_data_;
    123   }
    124 
    125   const HloProfileIndexMap& hlo_profile_index_map() const {
    126     CHECK(hlo_profiling_enabled());
    127     return *hlo_profile_index_map_;
    128   }
    129 
    130   // Returns whether this executable was compiled with HLO profilings support
    131   // enabled. If not, the caller should not expect an hlo_execution_profile
    132   // passed to ExecuteOnStream above to be populated during execution.
    133   bool hlo_profiling_enabled() const {
    134     return hlo_profile_printer_data_ != nullptr;
    135   }
    136 
    137   const HloModule& module() const { return *hlo_module_; }
    138 
    139   const bool has_module() const { return hlo_module_ != nullptr; }
    140 
    141   const HloModuleConfig& module_config() const { return hlo_module_->config(); }
    142 
    143   // Returns the versioned computation handle of the computation computed by
    144   // this executable.
    145   const VersionedComputationHandle& entry_computation_handle() const {
    146     return hlo_module_->entry_computation_handle();
    147   }
    148 
    149   // The shape (including layout) that results from this execution. This is the
    150   // shape of the DeviceMemoryBase result value in ExecuteOnStream above.
    151   const Shape& result_shape() const {
    152     return hlo_module_->config().entry_computation_layout().result_shape();
    153   }
    154 
    155   // Dumping helpers.
    156   void set_session_module(std::unique_ptr<xla::SessionModule> session_module) {
    157     session_module_ = std::move(session_module);
    158   }
    159   bool dumping() const { return session_module_ != nullptr; }
    160   SessionModule* session_module() const { return session_module_.get(); }
    161   Status DumpSessionModule();
    162 
    163   // Dump session_module to directory_path/filename.
    164   static Status DumpToDirectory(const string& directory_path, string filename,
    165                                 const SessionModule& session_module);
    166 
    167  protected:
    168   mutable tensorflow::mutex mutex_;
    169 
    170   // Execution profile data on the device.
    171   ExecutionProfile execution_profile_ GUARDED_BY(mutex_);
    172 
    173   // HloModule this was compiled from. BufferAssignment keeps pointers to
    174   // HloInstructions owned by the HloModule so we need to keep the HloModule
    175   // around.
    176   const std::unique_ptr<const HloModule> hlo_module_;
    177 
    178   // SessionModule this was compiled from. Null if not dumping executions.
    179   std::unique_ptr<SessionModule> session_module_;
    180 
    181   // Execution count, used to generate a unique filename for each dumped
    182   // execution.
    183   int64 execution_count_ = 0;
    184 
    185   std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data_;
    186   std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
    187 };
    188 
    189 }  // namespace xla
    190 
    191 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
    192