1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Declares the HostExecutor class, which is a CPU-only implementation of 17 // the StreamExecutor interface. For now, this is used for testing and to 18 // examine the performance of host-based StreamExecutor code. 19 #ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 20 #define TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 21 22 #include "tensorflow/stream_executor/blas.h" 23 #include "tensorflow/stream_executor/host/host_stream.h" 24 #include "tensorflow/stream_executor/host/host_timer.h" 25 #include "tensorflow/stream_executor/lib/error.h" 26 #include "tensorflow/stream_executor/lib/status.h" 27 #include "tensorflow/stream_executor/rng.h" 28 #include "tensorflow/stream_executor/stream_executor.h" 29 #include "tensorflow/stream_executor/stream_executor_internal.h" 30 31 namespace perftools { 32 namespace gputools { 33 namespace host { 34 35 // An implementation of StreamExecutor that does no communication or interaction 36 // with a device, but DOES perform memory operations backed by the host. 37 // Plugin routines (RNG, BLAS) are also supported and functional. 38 // Kernel invocations will fail, but host callbacks may be enqueued on this 39 // executor and its associated stream, and should follow standard ordering 40 // semantics. 41 // 42 // This is useful for evaluating the performance of host-based or fallback 43 // routines executed under the context of a GPU executor. 44 // See stream_executor.h for description of the below operations. 45 class HostExecutor : public internal::StreamExecutorInterface { 46 public: 47 explicit HostExecutor(const PluginConfig &plugin_config); 48 ~HostExecutor() override; 49 50 port::Status Init(int device_ordinal, DeviceOptions device_options) override { 51 return port::Status::OK(); 52 } 53 54 bool GetKernel(const MultiKernelLoaderSpec &spec, 55 KernelBase *kernel) override { 56 return false; 57 } 58 bool Launch(Stream *stream, const ThreadDim &thread_dims, 59 const BlockDim &block_dims, const KernelBase &kernel, 60 const KernelArgsArrayBase &args) override { 61 return false; 62 } 63 64 void *Allocate(uint64 size) override; 65 void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, 66 uint64 size_bytes) override; 67 void Deallocate(DeviceMemoryBase *mem) override; 68 69 void *HostMemoryAllocate(uint64 size) override { return new char[size]; } 70 void HostMemoryDeallocate(void *mem) override { 71 delete[] static_cast<char *>(mem); 72 } 73 bool HostMemoryRegister(void *mem, uint64 size) override { return true; } 74 bool HostMemoryUnregister(void *mem) override { return true; } 75 76 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, 77 uint64 size) override; 78 bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src, 79 uint64 size) override; 80 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst, 81 const DeviceMemoryBase &host_src, 82 uint64 size) override; 83 84 bool MemZero(Stream *stream, DeviceMemoryBase *location, 85 uint64 size) override; 86 bool Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, 87 uint64 size) override; 88 bool Memset32(Stream *stream, DeviceMemoryBase *location, uint32 pattern, 89 uint64 size) override; 90 91 // No "synchronize all activity" implemented for this platform at the moment. 92 bool SynchronizeAllActivity() override { return false; } 93 bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) override; 94 95 bool SynchronousMemSet(DeviceMemoryBase *location, int value, 96 uint64 size) override; 97 98 port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, 99 const void *host_src, uint64 size) override; 100 port::Status SynchronousMemcpy(void *host_dst, 101 const DeviceMemoryBase &gpu_src, 102 uint64 size) override; 103 port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, 104 const DeviceMemoryBase &gpu_src, 105 uint64 size) override; 106 107 bool HostCallback(Stream *stream, std::function<void()> callback) override; 108 109 port::Status AllocateEvent(Event *event) override { 110 return port::Status{port::error::UNIMPLEMENTED, ""}; 111 } 112 113 port::Status DeallocateEvent(Event *event) override { 114 return port::Status{port::error::UNIMPLEMENTED, ""}; 115 } 116 117 port::Status RecordEvent(Stream *stream, Event *event) override { 118 return port::Status{port::error::UNIMPLEMENTED, ""}; 119 } 120 121 port::Status WaitForEvent(Stream *stream, Event *event) override { 122 return port::Status{port::error::UNIMPLEMENTED, ""}; 123 } 124 125 Event::Status PollForEventStatus(Event *event) override { 126 return Event::Status::kError; 127 } 128 129 bool AllocateStream(Stream *stream) override; 130 void DeallocateStream(Stream *stream) override; 131 bool CreateStreamDependency(Stream *dependent, Stream *other) override; 132 133 // No special initialization is necessary for host timers. 134 bool AllocateTimer(Timer *timer) override { return true; } 135 136 void DeallocateTimer(Timer *timer) override {} 137 138 bool StartTimer(Stream *stream, Timer *timer) override; 139 140 bool StopTimer(Stream *stream, Timer *timer) override; 141 142 port::Status BlockHostUntilDone(Stream *stream) override; 143 144 int PlatformDeviceCount() override { return 1; } 145 146 bool DeviceMemoryUsage(int64 *free, int64 *total) const override { 147 return false; 148 } 149 150 DeviceDescription *PopulateDeviceDescription() const override; 151 152 port::Status EnablePeerAccessTo(StreamExecutorInterface *other) override { 153 return port::Status::OK(); 154 } 155 156 bool CanEnablePeerAccessTo(StreamExecutorInterface *other) override { 157 return true; 158 } 159 160 SharedMemoryConfig GetDeviceSharedMemoryConfig() override { 161 LOG(INFO) << "Shared memory configuration is unsupported for host " 162 << "executors."; 163 return SharedMemoryConfig::kDefault; 164 } 165 166 port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override { 167 string error_msg{ 168 "Shared memory configuration is unsupported for host " 169 "executors."}; 170 LOG(INFO) << error_msg; 171 return port::Status{port::error::UNIMPLEMENTED, error_msg}; 172 } 173 174 bool SupportsBlas() const override; 175 blas::BlasSupport *CreateBlas() override; 176 177 bool SupportsDnn() const override { return false; } 178 dnn::DnnSupport *CreateDnn() override { return nullptr; } 179 180 bool SupportsFft() const override; 181 fft::FftSupport *CreateFft() override; 182 183 bool SupportsRng() const override; 184 rng::RngSupport *CreateRng() override; 185 186 std::unique_ptr<internal::EventInterface> CreateEventImplementation() 187 override { 188 LOG(WARNING) << "Events not currently supported by HostExecutor."; 189 return nullptr; 190 } 191 192 std::unique_ptr<internal::KernelInterface> CreateKernelImplementation() 193 override { 194 return nullptr; 195 } 196 197 std::unique_ptr<internal::StreamInterface> GetStreamImplementation() 198 override { 199 return std::unique_ptr<internal::StreamInterface>(new HostStream()); 200 } 201 202 std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override { 203 return std::unique_ptr<internal::TimerInterface>(new HostTimer()); 204 } 205 206 void *CudaContextHack() override { return nullptr; } 207 208 private: 209 const PluginConfig plugin_config_; 210 }; 211 212 } // namespace host 213 } // namespace gputools 214 } // namespace perftools 215 216 #endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ 217