Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_UTIL_H_
     17 #define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_UTIL_H_
     18 
     19 #include "tensorflow/core/common_runtime/device.h"
     20 #include "tensorflow/core/common_runtime/dma_helper.h"
     21 #include "tensorflow/core/framework/tensor.h"
     22 #include "tensorflow/core/lib/core/status.h"
     23 #include "tensorflow/core/platform/stream_executor.h"
     24 
     25 namespace tensorflow {
     26 
     27 class RecvTensorResponse;
     28 class TensorProto;
     29 
     30 namespace gpu = ::perftools::gputools;
     31 
     32 class GPUUtil {
     33  public:
     34   // "tensor" is GPU-local.  "dev" is the hosting GPU.
     35   // "device_context" should be the context of the GPU "_Send" op
     36   // which provides the Tensor.
     37   // Sets all necessary fields of "proto" by transferring value
     38   // bytes from GPU to CPU RAM. "is_dead" indicates that the
     39   // tensor is dead with an uninit value.
     40   static void SetProtoFromGPU(const Tensor& tensor, Device* dev,
     41                               const DeviceContext* device_context,
     42                               TensorProto* proto, bool is_dead,
     43                               StatusCallback done);
     44 
     45   // Copies the data in 'gpu_tensor' into 'cpu_tensor'.
     46   // 'gpu_tensor''s backing memory must be on 'gpu_device' and
     47   // 'cpu_tensor' must be allocated to be of the same size as
     48   // 'gpu_tensor'. Synchronous: may block.
     49   static void CopyGPUTensorToCPU(Device* gpu_device,
     50                                  const DeviceContext* device_context,
     51                                  const Tensor* gpu_tensor, Tensor* cpu_tensor,
     52                                  StatusCallback done);
     53 
     54   // Blocks until all operations queued on the stream associated with
     55   // "gpu_device" at the time of the call have completed.  Returns any
     56   // error pending on the stream at completion.
     57   static Status Sync(Device* gpu_device);
     58 
     59   // Blocks until all operations queued on all streams associated with the
     60   // corresponding GPU device at the time of call have completed.
     61   // Returns any error pending on the stream at completion.
     62   static Status SyncAll(Device* gpu_device);
     63 
     64   // For debugging purpose, given a "device" and a "tensor" allocated
     65   // on the device, return a string printing each byte in the tensor
     66   // (up to a limit).  "device" can be either a CPU or a GPU device.
     67   static string MemoryDebugString(const Device* device, Tensor* tensor);
     68 
     69   // Map a Tensor as a DeviceMemory object wrapping the given typed
     70   // buffer.
     71   //
     72   // NOTE: will be removed soon, see StreamExecutorUtil::AsDeviceMemory
     73   // instead.
     74   template <typename T>
     75   static perftools::gputools::DeviceMemory<T> AsDeviceMemory(const Tensor& t) {
     76     T* ptr = reinterpret_cast<T*>(const_cast<void*>(DMAHelper::base(&t)));
     77     return perftools::gputools::DeviceMemory<T>(
     78         perftools::gputools::DeviceMemoryBase(ptr, t.TotalBytes()));
     79   }
     80 
     81   // Computes a checksum over the contents of "tensor", which is allocated
     82   // on "gpu_device".
     83   static uint64 Checksum(Device* gpu_device,
     84                          const DeviceContext* device_context,
     85                          const Tensor& tensor);
     86 
     87   // Computes a checksum over the contents of "tensor", which is allocated
     88   // in local CPU RAM.
     89   static uint64 Checksum(const Tensor& tensor);
     90 
     91   static void CopyCPUTensorToGPU(const Tensor* cpu_tensor,
     92                                  const DeviceContext* device_context,
     93                                  Device* gpu_device, Tensor* gpu_tensor,
     94                                  StatusCallback done);
     95 
     96   static void DeviceToDeviceCopy(DeviceContext* send_dev_context,
     97                                  DeviceContext* recv_dev_context, Device* src,
     98                                  Device* dst,
     99                                  AllocatorAttributes src_alloc_attr,
    100                                  AllocatorAttributes dst_alloc_attr,
    101                                  const Tensor* input, Tensor* output,
    102                                  StatusCallback done);
    103 
    104   // Deep-copying of GPU tensor on the same device.
    105   // 'src_gpu_tensor''s and 'dst_gpu_tensor''s backing memory must be on
    106   // 'gpu_device' and 'dst_cpu_tensor' must be allocated to be of the same
    107   // size as 'src_gpu_tensor'.
    108   static void CopyGPUTensorToSameGPU(Device* gpu_device,
    109                                      const DeviceContext* device_context,
    110                                      const Tensor* src_gpu_tensor,
    111                                      Tensor* dst_gpu_tensor,
    112                                      StatusCallback done);
    113 };
    114 
    115 }  // namespace tensorflow
    116 #endif  // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_UTIL_H_
    117