1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/compiler/xla/service/transfer_manager.h" 17 18 #include <string> 19 #include <utility> 20 21 #include "tensorflow/compiler/xla/shape_util.h" 22 #include "tensorflow/compiler/xla/status_macros.h" 23 #include "tensorflow/compiler/xla/types.h" 24 #include "tensorflow/compiler/xla/util.h" 25 #include "tensorflow/core/platform/logging.h" 26 #include "tensorflow/core/platform/macros.h" 27 28 namespace se = ::perftools::gputools; 29 30 namespace xla { 31 /* static */ tensorflow::mutex 32 TransferManager::platform_transfer_manager_mutex_( 33 tensorflow::LINKER_INITIALIZED); 34 35 /* static */ std::map<perftools::gputools::Platform::Id, 36 TransferManager::State>* 37 TransferManager::GetPlatformTransferManagers() { 38 static auto* r = 39 new std::map<perftools::gputools::Platform::Id, TransferManager::State>; 40 return r; 41 } 42 43 Status TransferManager::TransferArrayToDevice( 44 perftools::gputools::StreamExecutor* executor, const Literal& literal, 45 const perftools::gputools::DeviceMemoryBase& dest) { 46 const Shape on_device_shape = HostShapeToDeviceShape(literal.shape()); 47 TF_RET_CHECK(ShapeUtil::IsArray(on_device_shape)) 48 << "On-device representation of " 49 << ShapeUtil::HumanString(literal.shape()) 50 << " is not an array: " << ShapeUtil::HumanString(on_device_shape); 51 if (dest.size() < GetByteSizeRequirement(on_device_shape)) { 52 return FailedPrecondition( 53 "Allocation on device not large enough for array: " 54 "%lld < %lld", 55 dest.size(), GetByteSizeRequirement(on_device_shape)); 56 } 57 ShapedBuffer shaped_buffer(/*on_host_shape=*/literal.shape(), on_device_shape, 58 executor->platform(), executor->device_ordinal()); 59 shaped_buffer.set_buffer(dest, /*index=*/{}); 60 return TransferLiteralToDevice(executor, literal, shaped_buffer); 61 } 62 63 StatusOr<std::unique_ptr<Literal>> TransferManager::TransferArrayFromDevice( 64 perftools::gputools::StreamExecutor* executor, const Shape& shape, 65 const perftools::gputools::DeviceMemoryBase& source) { 66 TF_RET_CHECK(ShapeUtil::Equal(HostShapeToDeviceShape(shape), shape)) 67 << "Shape " << ShapeUtil::HumanString(shape) 68 << " has a differently shaped representation on-device: " 69 << ShapeUtil::HumanString(HostShapeToDeviceShape(shape)); 70 if (source.size() < GetByteSizeRequirement(shape)) { 71 return FailedPrecondition( 72 "Allocation on device not large enough for array: " 73 "%lld < %lld", 74 source.size(), GetByteSizeRequirement(shape)); 75 } 76 ShapedBuffer shaped_buffer(/*on_host_shape=*/shape, shape, 77 executor->platform(), executor->device_ordinal()); 78 shaped_buffer.set_buffer(source, /*index=*/{}); 79 return TransferLiteralFromDevice(executor, shaped_buffer); 80 } 81 82 /* static */ void TransferManager::RegisterTransferManager( 83 se::Platform::Id platform_id, 84 TransferManagerCreationFunction creation_function) { 85 tensorflow::mutex_lock lock( 86 TransferManager::platform_transfer_manager_mutex_); 87 auto* managers = GetPlatformTransferManagers(); 88 CHECK(managers->find(platform_id) == managers->end()); 89 (*managers)[platform_id].creation_function = creation_function; 90 } 91 92 /* static */ StatusOr<TransferManager*> TransferManager::GetForPlatform( 93 const se::Platform* platform) { 94 tensorflow::mutex_lock lock( 95 TransferManager::platform_transfer_manager_mutex_); 96 auto* managers = GetPlatformTransferManagers(); 97 98 auto it = managers->find(platform->id()); 99 if (it == managers->end()) { 100 return NotFound( 101 "could not find registered transfer manager for platform %s -- check " 102 "target linkage", 103 platform->Name().c_str()); 104 } 105 106 if (it->second.manager == nullptr) { 107 // Lazily create the transfer manager the first time it is needed 108 it->second.manager = (*it->second.creation_function)(); 109 } 110 111 return it->second.manager.get(); 112 } 113 114 Status TransferManager::WriteTupleIndexTables( 115 perftools::gputools::StreamExecutor* executor, 116 const ShapedBuffer& device_buffer) { 117 VLOG(2) << "Writing tuple index tables for " << device_buffer; 118 119 TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal()); 120 121 return ShapeUtil::ForEachSubshapeWithStatus( 122 device_buffer.on_device_shape(), 123 [&](const Shape& device_subshape, const ShapeIndex& index) -> Status { 124 if (ShapeUtil::IsTuple(device_subshape)) { 125 se::DeviceMemoryBase device_memory = device_buffer.buffer(index); 126 TF_RET_CHECK(GetByteSizeRequirement(device_subshape) == 127 device_memory.size()); 128 129 std::vector<se::DeviceMemoryBase> elements; 130 ShapeIndex element_index = index; 131 for (int64 i = 0; i < ShapeUtil::TupleElementCount(device_subshape); 132 ++i) { 133 element_index.push_back(i); 134 elements.push_back(device_buffer.buffer(element_index)); 135 element_index.pop_back(); 136 } 137 return WriteSingleTupleIndexTable(executor, elements, device_subshape, 138 &device_memory); 139 } 140 141 return Status::OK(); 142 }); 143 } 144 145 Status TransferManager::TransferBufferFromDevice( 146 se::StreamExecutor* executor, const se::DeviceMemoryBase& source, 147 int64 size, void* destination) { 148 if (source.size() < size) { 149 return FailedPrecondition( 150 "Source allocation on device not large enough for data tranfer: " 151 "%lld < %lld", 152 source.size(), size); 153 } 154 auto copy_status = executor->SynchronousMemcpyD2H(source, size, destination); 155 if (!copy_status.ok()) { 156 return AddStatus( 157 Status(static_cast<tensorflow::error::Code>(copy_status.code()), 158 copy_status.error_message()), 159 "failed transfer from device to buffer"); 160 } 161 return Status::OK(); 162 } 163 164 Status TransferManager::TransferBufferToDevice( 165 se::StreamExecutor* executor, int64 size, const void* source, 166 se::DeviceMemoryBase* destination) { 167 if (destination->size() < size) { 168 return FailedPrecondition( 169 "Destination allocation on device not large enough for data tranfer: " 170 "%lld < %lld", 171 destination->size(), size); 172 } 173 auto copy_status = executor->SynchronousMemcpyH2D(source, size, destination); 174 if (!copy_status.ok()) { 175 return AddStatus( 176 Status(static_cast<tensorflow::error::Code>(copy_status.code()), 177 copy_status.error_message()), 178 "failed transfer of buffer to device"); 179 } 180 return Status::OK(); 181 } 182 183 StatusOr<std::unique_ptr<ShapedBuffer>> TransferManager::AllocateShapedBuffer( 184 const Shape& on_host_shape, DeviceMemoryAllocator* allocator, 185 int device_ordinal) { 186 if (!LayoutUtil::HasLayout(on_host_shape)) { 187 return InvalidArgument( 188 "Shape must have a layout: %s", 189 ShapeUtil::HumanStringWithLayout(on_host_shape).c_str()); 190 } 191 TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(on_host_shape)); 192 const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape); 193 TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape)); 194 195 auto shaped_buffer = WrapUnique(new ShapedBuffer( 196 on_host_shape, on_device_shape, allocator->platform(), device_ordinal)); 197 198 // Allocate an appropriate sized buffer for each element in the shape 199 // including the tuple pointer arrays. 200 for (auto& pair : shaped_buffer->buffers()) { 201 const ShapeIndex& index = pair.first; 202 se::DeviceMemoryBase& memory_base = pair.second; 203 const Shape& subshape = ShapeUtil::GetSubshape(on_device_shape, index); 204 TF_ASSIGN_OR_RETURN(memory_base, 205 allocator->Allocate(shaped_buffer->device_ordinal(), 206 GetByteSizeRequirement(subshape))); 207 } 208 209 return std::move(shaped_buffer); 210 } 211 212 StatusOr<std::unique_ptr<ScopedShapedBuffer>> 213 TransferManager::AllocateScopedShapedBuffer(const Shape& on_host_shape, 214 DeviceMemoryAllocator* allocator, 215 int device_ordinal) { 216 TF_ASSIGN_OR_RETURN( 217 std::unique_ptr<ShapedBuffer> unscoped_buffer, 218 AllocateShapedBuffer(on_host_shape, allocator, device_ordinal)); 219 return ScopedShapedBuffer::MakeScoped(unscoped_buffer.get(), allocator); 220 } 221 222 } // namespace xla 223