Home | History | Annotate | Download | only in framework
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
     17 #define TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
     18 
     19 #include <memory>
     20 #include <string>
     21 #include <unordered_map>
     22 
     23 #include "tensorflow/core/framework/tensor.h"
     24 #include "tensorflow/core/lib/core/errors.h"
     25 #include "tensorflow/core/lib/core/refcount.h"
     26 #include "tensorflow/core/lib/core/status.h"
     27 #include "tensorflow/core/lib/core/stringpiece.h"
     28 #include "tensorflow/core/platform/logging.h"
     29 
     30 namespace Eigen {
     31 struct ThreadPoolDevice;
     32 #ifdef TENSORFLOW_USE_SYCL
     33 struct SyclDevice;
     34 #endif
     35 }  // end namespace Eigen
     36 
     37 namespace perftools {
     38 namespace gputools {
     39 class Stream;
     40 }  // namespace gputools
     41 }  // namespace perftools
     42 
     43 namespace tensorflow {
     44 
     45 class Device;
     46 class DeviceAttributes;
     47 class Env;
     48 class EventMgr;
     49 class OpKernelContext;
     50 class ResourceMgr;
     51 class TensorProto;
     52 
     53 namespace thread {
     54 class ThreadPool;
     55 }
     56 
     57 // A wrapper for an Eigen Gpu Device that includes per-op state. The
     58 // class is defined even for non-GPU devices since the
     59 // OpKernelContext::Params structure wants to fill it in.
     60 class PerOpGpuDevice {
     61  public:
     62   virtual ~PerOpGpuDevice() {}
     63   virtual const Eigen::GpuDevice& device() const = 0;
     64 };
     65 
     66 // A class that devices can subclass to pass around
     67 // Device-specific context to OpKernels.
     68 class DeviceContext : public core::RefCounted {
     69  public:
     70   ~DeviceContext() override {}
     71   virtual perftools::gputools::Stream* stream() const { return nullptr; }
     72   virtual void MaintainLifetimeOnStream(
     73       const Tensor* t, perftools::gputools::Stream* stream) const {}
     74 
     75   // "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into
     76   // "device_tensor" which is on a GPU device "device". "device_tensor"
     77   // must be allocated to be of the same size as "cpu_tensor".
     78   virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
     79                                      Tensor* device_tensor,
     80                                      StatusCallback done) const {
     81     done(errors::Internal("Unrecognized device type in CPU-to-device Copy"));
     82   }
     83 
     84   // "device_tensor" is a tensor on a non-CPU device.  Copies
     85   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
     86   // to be of the same size as "device_tensor".
     87   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
     88                                      StringPiece tensor_name, Device* device,
     89                                      Tensor* cpu_tensor, StatusCallback done) {
     90     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
     91   }
     92 };
     93 
     94 // map[i] is the DeviceContext* for the node with id i, if i < map.size().
     95 typedef std::vector<DeviceContext*> DeviceContextMap;
     96 
     97 class DeviceBase {
     98  public:
     99   explicit DeviceBase(Env* env) : env_(env) {}
    100   virtual ~DeviceBase();
    101 
    102   Env* env() const { return env_; }
    103 
    104   // Override this to return true for devices that require an Op's
    105   // compute method to save references to the temporary tensors it
    106   // allocates until the Op execution completes
    107   virtual bool RequiresRecordingAccessedTensors() const { return false; }
    108 
    109   struct CpuWorkerThreads {
    110     int num_threads = 0;
    111     thread::ThreadPool* workers = nullptr;
    112   };
    113 
    114   // Does not take ownership.
    115   void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) {
    116     cpu_worker_threads_ = t;
    117   }
    118 
    119   virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const {
    120     CHECK(cpu_worker_threads_ != nullptr);
    121     return cpu_worker_threads_;
    122   }
    123 
    124   // "stream" is used in special circumstances (such as the
    125   // constructors of Ops) where there is no available OpKernelContext.
    126   // "default_context" is used by OpKernelContext whenever a device does not
    127   // supply a DeviceContext for an op in FillContextMap (e.g. when only
    128   // using a single stream.)
    129   // "event_mgr" is used to delay deallocation of temporary GPU buffers.
    130   // TODO(pbar) Work out how to move this out of DeviceBase.
    131   struct GpuDeviceInfo {
    132     // Make sure all the defaults are NULL, so we can spot missing assignments.
    133     perftools::gputools::Stream* stream = nullptr;
    134     DeviceContext* default_context = nullptr;
    135     EventMgr* event_mgr = nullptr;
    136     int gpu_id = -1;
    137   };
    138 
    139   // Does not take ownership.
    140   void set_tensorflow_gpu_device_info(GpuDeviceInfo* g) {
    141     gpu_device_info_ = g;
    142   }
    143 
    144   virtual const GpuDeviceInfo* tensorflow_gpu_device_info() const {
    145     return gpu_device_info_;
    146   }
    147 
    148   // The preferred thread pool for this device. If it is nullptr, the system
    149   // automatically assigns a thread pool for execution.
    150   virtual thread::ThreadPool* tensorflow_device_thread_pool() {
    151     return device_thread_pool_;
    152   }
    153 
    154   // Does not take ownership.
    155   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) {
    156     eigen_cpu_device_ = d;
    157   }
    158 
    159 #ifdef TENSORFLOW_USE_SYCL
    160   void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; }
    161 #endif
    162 
    163   // Return the Allocator implementation to use based on the allocator
    164   // attributes requested.  See allocator.h for more details.
    165   virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
    166     LOG(FATAL) << "GetAllocator() is not implemented.";
    167     return nullptr;
    168   }
    169 
    170   // Return the Allocator implementation to use based on the allocator
    171   // attributes requested and the supplied resource manager. By
    172   // default this ignores the resource manager and calls the base
    173   // implementation but devices can override if they want to consult
    174   // the resource manager when choosing the allocator.
    175   virtual Allocator* GetStepAllocator(AllocatorAttributes attr,
    176                                       ResourceMgr* /*step_resource_manager*/) {
    177     return GetAllocator(attr);
    178   }
    179 
    180   virtual const Eigen::ThreadPoolDevice* eigen_cpu_device() {
    181     CHECK(eigen_cpu_device_ != nullptr);
    182     return eigen_cpu_device_;
    183   }
    184 
    185 #ifdef TENSORFLOW_USE_SYCL
    186   virtual const Eigen::SyclDevice* eigen_sycl_device() const {
    187     CHECK(eigen_sycl_device_ != nullptr);
    188     return eigen_sycl_device_;
    189   }
    190 #endif
    191 
    192   // Caller owns the return value. The OpKernelContext calls this even
    193   // for devices that do not implement an eigen_gpu_device. Overridden
    194   // by GPU devices to return a derived type.
    195   virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; }
    196 
    197   virtual DeviceBase* UnderlyingDevice() { return this; }
    198   virtual const DeviceBase* UnderlyingDevice() const { return this; }
    199 
    200   // This is overridden by GPU devices to reinitialize the derived
    201   // type returned by MakeGpuDevice.
    202   virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/,
    203                                      PerOpGpuDevice* /*device*/,
    204                                      DeviceContext* /*dc*/,
    205                                      Allocator* /*allocator*/) {}
    206 
    207   // Unimplemented by default
    208   virtual const DeviceAttributes& attributes() const;
    209   virtual const string& name() const;
    210 
    211   // Materializes the given TensorProto into 'tensor' stored in Device
    212   // memory.  Most devices will want to override this.
    213   //
    214   // TODO(vrv): We should be able to put this function into
    215   // OpKernelContext and handle the copies from device memory via send
    216   // and receive nodes, instead of requiring that each device handle
    217   // the copies here as well as in copy ops.
    218   virtual Status MakeTensorFromProto(const TensorProto& tensor_proto,
    219                                      const AllocatorAttributes alloc_attrs,
    220                                      Tensor* tensor) {
    221     return errors::Internal("Device does not implement MakeTensorFromProto()");
    222   }
    223 
    224  protected:
    225   // Does not take ownership.
    226   void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
    227     device_thread_pool_ = thread_pool;
    228   }
    229 
    230  private:
    231   Env* const env_;
    232   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
    233   GpuDeviceInfo* gpu_device_info_ = nullptr;
    234   thread::ThreadPool* device_thread_pool_ = nullptr;
    235   Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr;
    236 #ifdef TENSORFLOW_USE_SYCL
    237   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
    238 #endif
    239 };
    240 
    241 }  // namespace tensorflow
    242 
    243 #endif  // TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_
    244