Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/transfer_manager.h"
     17 
     18 #include <string>
     19 #include <utility>
     20 
     21 #include "tensorflow/compiler/xla/shape_util.h"
     22 #include "tensorflow/compiler/xla/status_macros.h"
     23 #include "tensorflow/compiler/xla/types.h"
     24 #include "tensorflow/compiler/xla/util.h"
     25 #include "tensorflow/core/platform/logging.h"
     26 #include "tensorflow/core/platform/macros.h"
     27 
     28 namespace se = ::perftools::gputools;
     29 
     30 namespace xla {
     31 /* static */ tensorflow::mutex
     32     TransferManager::platform_transfer_manager_mutex_(
     33         tensorflow::LINKER_INITIALIZED);
     34 
     35 /* static */ std::map<perftools::gputools::Platform::Id,
     36                       TransferManager::State>*
     37 TransferManager::GetPlatformTransferManagers() {
     38   static auto* r =
     39       new std::map<perftools::gputools::Platform::Id, TransferManager::State>;
     40   return r;
     41 }
     42 
     43 Status TransferManager::TransferArrayToDevice(
     44     perftools::gputools::StreamExecutor* executor, const Literal& literal,
     45     const perftools::gputools::DeviceMemoryBase& dest) {
     46   const Shape on_device_shape = HostShapeToDeviceShape(literal.shape());
     47   TF_RET_CHECK(ShapeUtil::IsArray(on_device_shape))
     48       << "On-device representation of "
     49       << ShapeUtil::HumanString(literal.shape())
     50       << " is not an array: " << ShapeUtil::HumanString(on_device_shape);
     51   if (dest.size() < GetByteSizeRequirement(on_device_shape)) {
     52     return FailedPrecondition(
     53         "Allocation on device not large enough for array: "
     54         "%lld < %lld",
     55         dest.size(), GetByteSizeRequirement(on_device_shape));
     56   }
     57   ShapedBuffer shaped_buffer(/*on_host_shape=*/literal.shape(), on_device_shape,
     58                              executor->platform(), executor->device_ordinal());
     59   shaped_buffer.set_buffer(dest, /*index=*/{});
     60   return TransferLiteralToDevice(executor, literal, shaped_buffer);
     61 }
     62 
     63 StatusOr<std::unique_ptr<Literal>> TransferManager::TransferArrayFromDevice(
     64     perftools::gputools::StreamExecutor* executor, const Shape& shape,
     65     const perftools::gputools::DeviceMemoryBase& source) {
     66   TF_RET_CHECK(ShapeUtil::Equal(HostShapeToDeviceShape(shape), shape))
     67       << "Shape " << ShapeUtil::HumanString(shape)
     68       << " has a differently shaped representation on-device: "
     69       << ShapeUtil::HumanString(HostShapeToDeviceShape(shape));
     70   if (source.size() < GetByteSizeRequirement(shape)) {
     71     return FailedPrecondition(
     72         "Allocation on device not large enough for array: "
     73         "%lld < %lld",
     74         source.size(), GetByteSizeRequirement(shape));
     75   }
     76   ShapedBuffer shaped_buffer(/*on_host_shape=*/shape, shape,
     77                              executor->platform(), executor->device_ordinal());
     78   shaped_buffer.set_buffer(source, /*index=*/{});
     79   return TransferLiteralFromDevice(executor, shaped_buffer);
     80 }
     81 
     82 /* static */ void TransferManager::RegisterTransferManager(
     83     se::Platform::Id platform_id,
     84     TransferManagerCreationFunction creation_function) {
     85   tensorflow::mutex_lock lock(
     86       TransferManager::platform_transfer_manager_mutex_);
     87   auto* managers = GetPlatformTransferManagers();
     88   CHECK(managers->find(platform_id) == managers->end());
     89   (*managers)[platform_id].creation_function = creation_function;
     90 }
     91 
     92 /* static */ StatusOr<TransferManager*> TransferManager::GetForPlatform(
     93     const se::Platform* platform) {
     94   tensorflow::mutex_lock lock(
     95       TransferManager::platform_transfer_manager_mutex_);
     96   auto* managers = GetPlatformTransferManagers();
     97 
     98   auto it = managers->find(platform->id());
     99   if (it == managers->end()) {
    100     return NotFound(
    101         "could not find registered transfer manager for platform %s -- check "
    102         "target linkage",
    103         platform->Name().c_str());
    104   }
    105 
    106   if (it->second.manager == nullptr) {
    107     // Lazily create the transfer manager the first time it is needed
    108     it->second.manager = (*it->second.creation_function)();
    109   }
    110 
    111   return it->second.manager.get();
    112 }
    113 
    114 Status TransferManager::WriteTupleIndexTables(
    115     perftools::gputools::StreamExecutor* executor,
    116     const ShapedBuffer& device_buffer) {
    117   VLOG(2) << "Writing tuple index tables for " << device_buffer;
    118 
    119   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
    120 
    121   return ShapeUtil::ForEachSubshapeWithStatus(
    122       device_buffer.on_device_shape(),
    123       [&](const Shape& device_subshape, const ShapeIndex& index) -> Status {
    124         if (ShapeUtil::IsTuple(device_subshape)) {
    125           se::DeviceMemoryBase device_memory = device_buffer.buffer(index);
    126           TF_RET_CHECK(GetByteSizeRequirement(device_subshape) ==
    127                        device_memory.size());
    128 
    129           std::vector<se::DeviceMemoryBase> elements;
    130           ShapeIndex element_index = index;
    131           for (int64 i = 0; i < ShapeUtil::TupleElementCount(device_subshape);
    132                ++i) {
    133             element_index.push_back(i);
    134             elements.push_back(device_buffer.buffer(element_index));
    135             element_index.pop_back();
    136           }
    137           return WriteSingleTupleIndexTable(executor, elements, device_subshape,
    138                                             &device_memory);
    139         }
    140 
    141         return Status::OK();
    142       });
    143 }
    144 
    145 Status TransferManager::TransferBufferFromDevice(
    146     se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
    147     int64 size, void* destination) {
    148   if (source.size() < size) {
    149     return FailedPrecondition(
    150         "Source allocation on device not large enough for data tranfer: "
    151         "%lld < %lld",
    152         source.size(), size);
    153   }
    154   auto copy_status = executor->SynchronousMemcpyD2H(source, size, destination);
    155   if (!copy_status.ok()) {
    156     return AddStatus(
    157         Status(static_cast<tensorflow::error::Code>(copy_status.code()),
    158                copy_status.error_message()),
    159         "failed transfer from device to buffer");
    160   }
    161   return Status::OK();
    162 }
    163 
    164 Status TransferManager::TransferBufferToDevice(
    165     se::StreamExecutor* executor, int64 size, const void* source,
    166     se::DeviceMemoryBase* destination) {
    167   if (destination->size() < size) {
    168     return FailedPrecondition(
    169         "Destination allocation on device not large enough for data tranfer: "
    170         "%lld < %lld",
    171         destination->size(), size);
    172   }
    173   auto copy_status = executor->SynchronousMemcpyH2D(source, size, destination);
    174   if (!copy_status.ok()) {
    175     return AddStatus(
    176         Status(static_cast<tensorflow::error::Code>(copy_status.code()),
    177                copy_status.error_message()),
    178         "failed transfer of buffer to device");
    179   }
    180   return Status::OK();
    181 }
    182 
    183 StatusOr<std::unique_ptr<ShapedBuffer>> TransferManager::AllocateShapedBuffer(
    184     const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
    185     int device_ordinal) {
    186   if (!LayoutUtil::HasLayout(on_host_shape)) {
    187     return InvalidArgument(
    188         "Shape must have a layout: %s",
    189         ShapeUtil::HumanStringWithLayout(on_host_shape).c_str());
    190   }
    191   TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(on_host_shape));
    192   const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape);
    193   TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape));
    194 
    195   auto shaped_buffer = WrapUnique(new ShapedBuffer(
    196       on_host_shape, on_device_shape, allocator->platform(), device_ordinal));
    197 
    198   // Allocate an appropriate sized buffer for each element in the shape
    199   // including the tuple pointer arrays.
    200   for (auto& pair : shaped_buffer->buffers()) {
    201     const ShapeIndex& index = pair.first;
    202     se::DeviceMemoryBase& memory_base = pair.second;
    203     const Shape& subshape = ShapeUtil::GetSubshape(on_device_shape, index);
    204     TF_ASSIGN_OR_RETURN(memory_base,
    205                         allocator->Allocate(shaped_buffer->device_ordinal(),
    206                                             GetByteSizeRequirement(subshape)));
    207   }
    208 
    209   return std::move(shaped_buffer);
    210 }
    211 
    212 StatusOr<std::unique_ptr<ScopedShapedBuffer>>
    213 TransferManager::AllocateScopedShapedBuffer(const Shape& on_host_shape,
    214                                             DeviceMemoryAllocator* allocator,
    215                                             int device_ordinal) {
    216   TF_ASSIGN_OR_RETURN(
    217       std::unique_ptr<ShapedBuffer> unscoped_buffer,
    218       AllocateShapedBuffer(on_host_shape, allocator, device_ordinal));
    219   return ScopedShapedBuffer::MakeScoped(unscoped_buffer.get(), allocator);
    220 }
    221 
    222 }  // namespace xla
    223