Home | History | Annotate | Download | only in framework
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
     17 #define TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
     18 
     19 #include <memory>
     20 #include <string>
     21 #include <vector>
     22 
     23 #include "absl/base/macros.h"
     24 #include "tensorflow/core/framework/tensor.h"
     25 #include "tensorflow/core/lib/core/errors.h"
     26 #include "tensorflow/core/lib/core/refcount.h"
     27 #include "tensorflow/core/lib/core/status.h"
     28 #include "tensorflow/core/lib/core/stringpiece.h"
     29 #include "tensorflow/core/platform/logging.h"
     30 
     31 namespace Eigen {
     32 struct ThreadPoolDevice;
     33 #ifdef TENSORFLOW_USE_SYCL
     34 struct SyclDevice;
     35 #endif
     36 }  // end namespace Eigen
     37 
     38 namespace stream_executor {
     39 class Stream;
     40 }  // namespace stream_executor
     41 
     42 namespace tensorflow {
     43 
     44 class Device;
     45 class DeviceAttributes;
     46 class Env;
     47 class EventMgr;
     48 class OpKernelContext;
     49 class ResourceMgr;
     50 class ScopedAllocatorMgr;
     51 class TensorProto;
     52 
     53 namespace thread {
     54 class ThreadPool;
     55 }
     56 
     57 // A wrapper for an Eigen Gpu Device that includes per-op state. The
     58 // class is defined even for non-GPU devices since the
     59 // OpKernelContext::Params structure wants to fill it in.
     60 class PerOpGpuDevice {
     61  public:
     62   virtual ~PerOpGpuDevice() {}
     63   virtual const Eigen::GpuDevice& device() const = 0;
     64 };
     65 
     66 // A class that devices can subclass to pass around
     67 // Device-specific context to OpKernels.
     68 class DeviceContext : public core::RefCounted {
     69  public:
     70   ~DeviceContext() override {}
     71   virtual stream_executor::Stream* stream() const { return nullptr; }
     72   virtual void MaintainLifetimeOnStream(const Tensor* t,
     73                                         stream_executor::Stream* stream) const {
     74   }
     75 
     76   // "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into
     77   // "device_tensor" which is on a GPU device "device". "device_tensor"
     78   // must be allocated to be of the same size as "cpu_tensor".
     79   virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
     80                                      Tensor* device_tensor,
     81                                      StatusCallback done) const {
     82     done(errors::Internal("Unrecognized device type in CPU-to-device Copy"));
     83   }
     84 
     85   // Copies a tensor in this device.
     86   virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
     87                                       Device* device, Tensor* output_tensor,
     88                                       StatusCallback done) const {
     89     done(errors::Unimplemented("Copy in same device not implemented."));
     90   }
     91 
     92   // "device_tensor" is a tensor on a non-CPU device.  Copies
     93   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
     94   // to be of the same size as "device_tensor".
     95   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
     96                                      StringPiece tensor_name, Device* device,
     97                                      Tensor* cpu_tensor, StatusCallback done) {
     98     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
     99   }
    100 
    101   // If possible, wait for all events on *stream to complete then execute func.
    102   // A non-OK Status is returned otherwise.  The stream argument should be the
    103   // one provided by GpuDeviceInfo.  This function is not applicable to devices
    104   // that don't provide such a value.
    105   virtual Status ThenExecute(Device* device, stream_executor::Stream* stream,
    106                              std::function<void()> func) {
    107     return errors::Internal("ThenExecute not supported by device");
    108   }
    109 };
    110 
    111 // map[i] is the DeviceContext* for the node with id i, if i < map.size().
    112 typedef std::vector<DeviceContext*> DeviceContextMap;
    113 
    114 class DeviceBase {
    115  public:
    116   explicit DeviceBase(Env* env) : env_(env) {}
    117   virtual ~DeviceBase();
    118 
    119   Env* env() const { return env_; }
    120 
    121   // Override this to return true for devices that require an Op's
    122   // compute method to save references to the temporary tensors it
    123   // allocates until the Op execution completes
    124   virtual bool RequiresRecordingAccessedTensors() const { return false; }
    125 
    126   struct CpuWorkerThreads {
    127     int num_threads = 0;
    128     thread::ThreadPool* workers = nullptr;
    129   };
    130 
    131   // Does not take ownership.
    132   void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) {
    133     cpu_worker_threads_ = t;
    134   }
    135 
    136   virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const {
    137     CHECK(cpu_worker_threads_ != nullptr);
    138     return cpu_worker_threads_;
    139   }
    140 
    141   // "stream" is used in special circumstances (such as the
    142   // constructors of Ops) where there is no available OpKernelContext.
    143   // "default_context" is used by OpKernelContext whenever a device does not
    144   // supply a DeviceContext for an op in FillContextMap (e.g. when only
    145   // using a single stream.)
    146   // "event_mgr" is used to delay deallocation of temporary GPU buffers.
    147   // TODO(pbar) Work out how to move this out of DeviceBase.
    148   // GpuDeviceInfo name is an unfortunate legacy, it is used not only by GPUs
    149   // but also by TPU devices (to provide default device context).
    150   struct GpuDeviceInfo {
    151     // Make sure all the defaults are NULL, so we can spot missing assignments.
    152     stream_executor::Stream* stream = nullptr;
    153     DeviceContext* default_context = nullptr;
    154     EventMgr* event_mgr = nullptr;
    155     int gpu_id = -1;
    156   };
    157 
    158   // Does not take ownership.
    159   void set_tensorflow_gpu_device_info(GpuDeviceInfo* g) {
    160     gpu_device_info_ = g;
    161   }
    162 
    163   virtual const GpuDeviceInfo* tensorflow_gpu_device_info() const {
    164     return gpu_device_info_;
    165   }
    166 
    167   // The preferred thread pool for this device. If it is nullptr, the system
    168   // automatically assigns a thread pool for execution.
    169   virtual thread::ThreadPool* tensorflow_device_thread_pool() {
    170     return device_thread_pool_;
    171   }
    172 
    173   // Does not take ownership.
    174   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d);
    175 
    176 #ifdef TENSORFLOW_USE_SYCL
    177   void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; }
    178 #endif
    179 
    180   // Return the Allocator implementation to use based on the allocator
    181   // attributes requested.  See allocator.h for more details.
    182   virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
    183     LOG(FATAL) << "GetAllocator() is not implemented.";
    184     return nullptr;
    185   }
    186 
    187   // This method is provided for backwards compatibility, and will be removed
    188   // in a future release.
    189   ABSL_DEPRECATED("Use `this->GetAllocator()` or `this->GetScopedAllocator()`.")
    190   Allocator* GetStepAllocator(AllocatorAttributes attr, ResourceMgr*) {
    191     return GetAllocator(attr);
    192   }
    193 
    194   // Return an Allocator prepared for use in particular places by graph
    195   // optimization
    196   virtual Allocator* GetScopedAllocator(AllocatorAttributes attr,
    197                                         int64 step_id) {
    198     LOG(FATAL) << "Device does not implement GetScopedAllocator()";
    199     return nullptr;
    200   }
    201 
    202   virtual ScopedAllocatorMgr* GetScopedAllocatorMgr() const { return nullptr; }
    203 
    204   bool has_eigen_cpu_device() const { return !eigen_cpu_devices_.empty(); }
    205 
    206   virtual const Eigen::ThreadPoolDevice* eigen_cpu_device();
    207 
    208 #ifdef TENSORFLOW_USE_SYCL
    209   virtual const Eigen::SyclDevice* eigen_sycl_device() const {
    210     CHECK(eigen_sycl_device_ != nullptr);
    211     return eigen_sycl_device_;
    212   }
    213 #endif
    214 
    215   // Caller owns the return value. The OpKernelContext calls this even
    216   // for devices that do not implement an eigen_gpu_device. Overridden
    217   // by GPU devices to return a derived type.
    218   virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; }
    219 
    220   virtual DeviceBase* UnderlyingDevice() { return this; }
    221   virtual const DeviceBase* UnderlyingDevice() const { return this; }
    222 
    223   // This is overridden by GPU devices to reinitialize the derived
    224   // type returned by MakeGpuDevice.
    225   virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/,
    226                                        PerOpGpuDevice* /*device*/,
    227                                        DeviceContext* /*dc*/,
    228                                        Allocator* /*allocator*/) {
    229     return Status::OK();
    230   }
    231 
    232   // Unimplemented by default
    233   virtual const DeviceAttributes& attributes() const;
    234   virtual const string& name() const;
    235 
    236   // Materializes the given TensorProto into 'tensor' stored in Device
    237   // memory.  Most devices will want to override this.
    238   //
    239   // TODO(vrv): We should be able to put this function into
    240   // OpKernelContext and handle the copies from device memory via send
    241   // and receive nodes, instead of requiring that each device handle
    242   // the copies here as well as in copy ops.
    243   virtual Status MakeTensorFromProto(const TensorProto& tensor_proto,
    244                                      const AllocatorAttributes alloc_attrs,
    245                                      Tensor* tensor) {
    246     return errors::Internal("Device does not implement MakeTensorFromProto()");
    247   }
    248 
    249   // Some devices (i.e. GPUs) may free device memory prior to its actual use
    250   // being completed on the assumption that subsequent allocations can only be
    251   // used serially with respect to pending uses.  If this function returns a
    252   // non-zero value it is the value of a device-specific counter such that any
    253   // device memory tagged with an earlier freed-at count is really unencumbered
    254   // by pending uses.  For this to be useful the device memory allocator must
    255   // be tagging deallocated memory chunks using the same counter.
    256   virtual uint64 SafeAllocFrontier() { return 0; }
    257 
    258  protected:
    259   // Does not take ownership.
    260   void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
    261     device_thread_pool_ = thread_pool;
    262   }
    263 
    264  private:
    265   Env* const env_;
    266   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
    267   // Set by GPUs as well as by TPU devices.
    268   GpuDeviceInfo* gpu_device_info_ = nullptr;
    269   thread::ThreadPool* device_thread_pool_ = nullptr;
    270   std::vector<Eigen::ThreadPoolDevice*> eigen_cpu_devices_;
    271 #ifdef TENSORFLOW_USE_SYCL
    272   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
    273 #endif
    274 };
    275 
    276 }  // namespace tensorflow
    277 
    278 #endif  // TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
    279