Home | History | Annotate | Download | only in host
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Declares the HostExecutor class, which is a CPU-only implementation of
     17 // the StreamExecutor interface. For now, this is used for testing and to
     18 // examine the performance of host-based StreamExecutor code.
     19 #ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
     20 #define TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
     21 
     22 #include "tensorflow/stream_executor/blas.h"
     23 #include "tensorflow/stream_executor/host/host_stream.h"
     24 #include "tensorflow/stream_executor/host/host_timer.h"
     25 #include "tensorflow/stream_executor/lib/error.h"
     26 #include "tensorflow/stream_executor/lib/status.h"
     27 #include "tensorflow/stream_executor/rng.h"
     28 #include "tensorflow/stream_executor/stream_executor.h"
     29 #include "tensorflow/stream_executor/stream_executor_internal.h"
     30 
     31 namespace perftools {
     32 namespace gputools {
     33 namespace host {
     34 
     35 // An implementation of StreamExecutor that does no communication or interaction
     36 // with a device, but DOES perform memory operations backed by the host.
     37 // Plugin routines (RNG, BLAS) are also supported and functional.
     38 // Kernel invocations will fail, but host callbacks may be enqueued on this
     39 // executor and its associated stream, and should follow standard ordering
     40 // semantics.
     41 //
     42 // This is useful for evaluating the performance of host-based or fallback
     43 // routines executed under the context of a GPU executor.
     44 // See stream_executor.h for description of the below operations.
     45 class HostExecutor : public internal::StreamExecutorInterface {
     46  public:
     47   explicit HostExecutor(const PluginConfig &plugin_config);
     48   ~HostExecutor() override;
     49 
     50   port::Status Init(int device_ordinal, DeviceOptions device_options) override {
     51     return port::Status::OK();
     52   }
     53 
     54   bool GetKernel(const MultiKernelLoaderSpec &spec,
     55                  KernelBase *kernel) override {
     56     return false;
     57   }
     58   bool Launch(Stream *stream, const ThreadDim &thread_dims,
     59               const BlockDim &block_dims, const KernelBase &kernel,
     60               const KernelArgsArrayBase &args) override {
     61     return false;
     62   }
     63 
     64   void *Allocate(uint64 size) override;
     65   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
     66                           uint64 size_bytes) override;
     67   void Deallocate(DeviceMemoryBase *mem) override;
     68 
     69   void *HostMemoryAllocate(uint64 size) override { return new char[size]; }
     70   void HostMemoryDeallocate(void *mem) override {
     71     delete[] static_cast<char *>(mem);
     72   }
     73   bool HostMemoryRegister(void *mem, uint64 size) override { return true; }
     74   bool HostMemoryUnregister(void *mem) override { return true; }
     75 
     76   bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src,
     77               uint64 size) override;
     78   bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src,
     79               uint64 size) override;
     80   bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst,
     81                             const DeviceMemoryBase &host_src,
     82                             uint64 size) override;
     83 
     84   bool MemZero(Stream *stream, DeviceMemoryBase *location,
     85                uint64 size) override;
     86   bool Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern,
     87               uint64 size) override;
     88   bool Memset32(Stream *stream, DeviceMemoryBase *location, uint32 pattern,
     89                 uint64 size) override;
     90 
     91   // No "synchronize all activity" implemented for this platform at the moment.
     92   bool SynchronizeAllActivity() override { return false; }
     93   bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) override;
     94 
     95   bool SynchronousMemSet(DeviceMemoryBase *location, int value,
     96                          uint64 size) override;
     97 
     98   port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
     99                                  const void *host_src, uint64 size) override;
    100   port::Status SynchronousMemcpy(void *host_dst,
    101                                  const DeviceMemoryBase &gpu_src,
    102                                  uint64 size) override;
    103   port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
    104                                                const DeviceMemoryBase &gpu_src,
    105                                                uint64 size) override;
    106 
    107   bool HostCallback(Stream *stream, std::function<void()> callback) override;
    108 
    109   port::Status AllocateEvent(Event *event) override {
    110     return port::Status{port::error::UNIMPLEMENTED, ""};
    111   }
    112 
    113   port::Status DeallocateEvent(Event *event) override {
    114     return port::Status{port::error::UNIMPLEMENTED, ""};
    115   }
    116 
    117   port::Status RecordEvent(Stream *stream, Event *event) override {
    118     return port::Status{port::error::UNIMPLEMENTED, ""};
    119   }
    120 
    121   port::Status WaitForEvent(Stream *stream, Event *event) override {
    122     return port::Status{port::error::UNIMPLEMENTED, ""};
    123   }
    124 
    125   Event::Status PollForEventStatus(Event *event) override {
    126     return Event::Status::kError;
    127   }
    128 
    129   bool AllocateStream(Stream *stream) override;
    130   void DeallocateStream(Stream *stream) override;
    131   bool CreateStreamDependency(Stream *dependent, Stream *other) override;
    132 
    133   // No special initialization is necessary for host timers.
    134   bool AllocateTimer(Timer *timer) override { return true; }
    135 
    136   void DeallocateTimer(Timer *timer) override {}
    137 
    138   bool StartTimer(Stream *stream, Timer *timer) override;
    139 
    140   bool StopTimer(Stream *stream, Timer *timer) override;
    141 
    142   port::Status BlockHostUntilDone(Stream *stream) override;
    143 
    144   int PlatformDeviceCount() override { return 1; }
    145 
    146   bool DeviceMemoryUsage(int64 *free, int64 *total) const override {
    147     return false;
    148   }
    149 
    150   DeviceDescription *PopulateDeviceDescription() const override;
    151 
    152   port::Status EnablePeerAccessTo(StreamExecutorInterface *other) override {
    153     return port::Status::OK();
    154   }
    155 
    156   bool CanEnablePeerAccessTo(StreamExecutorInterface *other) override {
    157     return true;
    158   }
    159 
    160   SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
    161     LOG(INFO) << "Shared memory configuration is unsupported for host "
    162               << "executors.";
    163     return SharedMemoryConfig::kDefault;
    164   }
    165 
    166   port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override {
    167     string error_msg{
    168         "Shared memory configuration is unsupported for host "
    169         "executors."};
    170     LOG(INFO) << error_msg;
    171     return port::Status{port::error::UNIMPLEMENTED, error_msg};
    172   }
    173 
    174   bool SupportsBlas() const override;
    175   blas::BlasSupport *CreateBlas() override;
    176 
    177   bool SupportsDnn() const override { return false; }
    178   dnn::DnnSupport *CreateDnn() override { return nullptr; }
    179 
    180   bool SupportsFft() const override;
    181   fft::FftSupport *CreateFft() override;
    182 
    183   bool SupportsRng() const override;
    184   rng::RngSupport *CreateRng() override;
    185 
    186   std::unique_ptr<internal::EventInterface> CreateEventImplementation()
    187       override {
    188     LOG(WARNING) << "Events not currently supported by HostExecutor.";
    189     return nullptr;
    190   }
    191 
    192   std::unique_ptr<internal::KernelInterface> CreateKernelImplementation()
    193       override {
    194     return nullptr;
    195   }
    196 
    197   std::unique_ptr<internal::StreamInterface> GetStreamImplementation()
    198       override {
    199     return std::unique_ptr<internal::StreamInterface>(new HostStream());
    200   }
    201 
    202   std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override {
    203     return std::unique_ptr<internal::TimerInterface>(new HostTimer());
    204   }
    205 
    206   void *CudaContextHack() override { return nullptr; }
    207 
    208  private:
    209   const PluginConfig plugin_config_;
    210 };
    211 
    212 }  // namespace host
    213 }  // namespace gputools
    214 }  // namespace perftools
    215 
    216 #endif  // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_
    217