Home | History | Annotate | Download | only in cpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_EXECUTABLE_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_EXECUTABLE_H_
     18 
     19 #include <cstddef>
     20 #include <memory>
     21 #include <string>
     22 #include <unordered_map>
     23 #include <vector>
     24 
     25 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
     26 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
     27 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
     28 #include "tensorflow/compiler/xla/service/executable.h"
     29 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
     30 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
     31 #include "tensorflow/compiler/xla/service/hlo_module.h"
     32 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
     33 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
     34 #include "tensorflow/compiler/xla/statusor.h"
     35 #include "tensorflow/compiler/xla/types.h"
     36 #include "tensorflow/core/lib/gtl/array_slice.h"
     37 #include "tensorflow/core/platform/macros.h"
     38 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
     39 #include "tensorflow/core/platform/types.h"
     40 
     41 namespace xla {
     42 namespace cpu {
     43 
     44 // CPU-targeting implementation of the XLA Executable interface.
     45 //
     46 // Wraps a JIT-ed object that can be executed "on device". We JIT for the host
     47 // architecture, so JIT-ed code and host code share the same ABI.
     48 class CpuExecutable : public Executable {
     49  public:
     50   CpuExecutable(std::unique_ptr<SimpleOrcJIT> jit,
     51                 std::unique_ptr<const BufferAssignment> assignment,
     52                 std::unique_ptr<const HloModule> hlo_module,
     53                 const string& entry_function_name,
     54                 std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
     55                 std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
     56   ~CpuExecutable() override {}
     57 
     58   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
     59       const ServiceExecutableRunOptions* run_options,
     60       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     61       HloExecutionProfile* hlo_execution_profile) override;
     62 
     63   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
     64       const ServiceExecutableRunOptions* run_options,
     65       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
     66 
     67   // This should be called after set_ir_module_string.
     68   const string& ir_module_string() const { return ir_module_string_; }
     69 
     70   void set_ir_module_string(const string& ir_module_string) {
     71     ir_module_string_ = ir_module_string;
     72   }
     73 
     74   const Status EqualOrFail(const Executable& executable) {
     75     // TODO(b/62952745) Implement equality test on CPU executable.
     76     return Unimplemented("Equality test on CPU executable is not implemented.");
     77   }
     78 
     79   static int64 ShapeSizeBytes(const Shape& shape);
     80 
     81   // Type of the computation function we expect in the JIT.
     82   using ComputeFunctionType = void (*)(
     83       void* /*result*/, const ExecutableRunOptions* /*run_options*/,
     84       const void** /*args*/, void** /*temps*/, int64* /*profile_counters*/);
     85 
     86   const ComputeFunctionType& compute_function() const {
     87     return compute_function_;
     88   }
     89 
     90   const BufferAssignment& buffer_assignment() const { return *assignment_; }
     91 
     92  private:
     93   // Allocate buffers required for execution and assign them to the elements of
     94   // "buffers". "buffers" should be sized to the number of buffers in buffer
     95   // assignment. Each vector element corresponds to a particular Index. If
     96   // a vector element already contains a non-null DeviceMemoryBase, then no
     97   // buffer is assigned for this element.
     98   Status AllocateBuffers(
     99       DeviceMemoryAllocator* memory_allocator, int device_ordinal,
    100       std::vector<perftools::gputools::DeviceMemoryBase>* buffers);
    101 
    102   // Calls the generated function performing the computation with the given
    103   // arguments using the supplied buffers.
    104   Status ExecuteComputeFunction(
    105       const ExecutableRunOptions* run_options,
    106       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
    107       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
    108           buffers,
    109       HloExecutionProfile* hlo_execution_profile);
    110 
    111   // Create a ShapedBuffer for holding the result of the computation. The
    112   // addresses (DeviceMemoryBases) are set according to buffer assignment.
    113   // 'buffers_in_result' should point to a vector of the same size as
    114   // 'allocated_buffers'. An element in buffers_in_result is set to true if the
    115   // corresponding buffer is live out of the computation (and thus contained in
    116   // the returned ShapedBuffer).
    117   StatusOr<std::unique_ptr<ShapedBuffer>> CreateResultShapedBuffer(
    118       const ServiceExecutableRunOptions* run_options,
    119       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
    120           allocated_buffers,
    121       std::vector<bool>* buffers_in_result);
    122 
    123   // Returns the points-to set of the root instruction of the entry
    124   // computation. Uses points-to analysis from buffer assignment.
    125   const PointsToSet& GetRootPointsToSet() const;
    126 
    127   // The JIT containing compiled modules.
    128   const std::unique_ptr<SimpleOrcJIT> jit_;
    129 
    130   // Buffer assignment for the buffers we need to allocate.
    131   const std::unique_ptr<const BufferAssignment> assignment_;
    132 
    133   // The LLVM IR, in string format, of the unoptimized module generated for this
    134   // CpuExecutable. We save a string instead of an llvm::Module* because leaving
    135   // llvm::Module* in a singleton can cause the heap checker to emit false
    136   // positives.
    137   string ir_module_string_;
    138 
    139   ComputeFunctionType compute_function_;
    140 
    141   // Entry function name for the computation.
    142   const string entry_function_name_;
    143 
    144   TF_DISALLOW_COPY_AND_ASSIGN(CpuExecutable);
    145 };
    146 
    147 }  // namespace cpu
    148 }  // namespace xla
    149 
    150 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_EXECUTABLE_H_
    151