Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
     17 
     18 #include <utility>
     19 
     20 #include "tensorflow/compiler/xla/map_util.h"
     21 #include "tensorflow/compiler/xla/ptr_util.h"
     22 #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
     23 #include "tensorflow/compiler/xla/status_macros.h"
     24 #include "tensorflow/compiler/xla/types.h"
     25 #include "tensorflow/compiler/xla/util.h"
     26 #include "tensorflow/core/lib/core/errors.h"
     27 #include "tensorflow/core/lib/strings/numbers.h"
     28 #include "tensorflow/core/platform/logging.h"
     29 #include "tensorflow/core/platform/types.h"
     30 
     31 namespace se = ::perftools::gputools;
     32 
     33 namespace xla {
     34 namespace gpu {
     35 
     36 void BufferAllocations::Builder::RegisterBuffer(BufferAllocation::Index index,
     37                                                 se::DeviceMemoryBase address) {
     38   InsertOrDie(&registered_buffers_, index, address);
     39 }
     40 
     41 StatusOr<std::unique_ptr<BufferAllocations>> BufferAllocations::Builder::Build(
     42     const BufferAssignment& buffer_assignment, int device_ordinal,
     43     DeviceMemoryAllocator* memory_allocator) {
     44   const int64 num_buffers = buffer_assignment.Allocations().size();
     45   auto buffer_allocations = WrapUnique(
     46       new BufferAllocations(num_buffers, device_ordinal, memory_allocator));
     47 
     48   for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
     49     // If buffer #i's address is already registered (e.g. external arguments or
     50     // result buffers), use that registered buffer.
     51     if (registered_buffers_.count(i)) {
     52       se::DeviceMemoryBase address = FindOrDie(registered_buffers_, i);
     53       if (reinterpret_cast<uintptr_t>(address.opaque()) %
     54               kCudaMallocAlignBytes !=
     55           0) {
     56         return InternalError(
     57             "Address of registered buffer %lld must be a multiple of %llx, but "
     58             "was %p",
     59             i, kCudaMallocAlignBytes, address.opaque());
     60       }
     61       buffer_allocations->SetBuffer(i, FindOrDie(registered_buffers_, i));
     62       continue;
     63     }
     64 
     65     // Allocate each allocation that might escape, or is the temp buffer.
     66     bool seen_temp_buffer = false;
     67     const BufferAllocation& allocation = buffer_assignment.GetAllocation(i);
     68     if (allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer()) {
     69       const int64 buffer_size = allocation.size();
     70       se::DeviceMemoryBase buffer_address;
     71       if (buffer_size > 0) {
     72         TF_ASSIGN_OR_RETURN(buffer_address, memory_allocator->Allocate(
     73                                                 device_ordinal, buffer_size));
     74         if (buffer_address == nullptr) {
     75           return ResourceExhausted(
     76               "Out of memory when allocating %s for buffer %lld.",
     77               tensorflow::strings::HumanReadableNumBytes(buffer_size).c_str(),
     78               i);
     79         }
     80         if (reinterpret_cast<uintptr_t>(buffer_address.opaque()) %
     81                 kCudaMallocAlignBytes !=
     82             0) {
     83           return InternalError(
     84               "Address returned by memory_allocator->Allocate must be a "
     85               "multiple of %llx, but was %p",
     86               kCudaMallocAlignBytes, buffer_address.opaque());
     87         }
     88       }
     89       buffer_allocations->SetBuffer(i, buffer_address);
     90       if (allocation.IsPreallocatedTempBuffer()) {
     91         if (seen_temp_buffer) {
     92           LOG(FATAL) << "Multiple temporary buffers detected.  BufferAssigner "
     93                      << "must guarantee at most one temporary buffer.";
     94         }
     95         seen_temp_buffer = true;
     96         buffer_allocations->temp_buffer_base_ = buffer_address;
     97       }
     98     }
     99   }
    100 
    101   if (VLOG_IS_ON(2)) {
    102     for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
    103       const auto& buf = buffer_allocations->buffers_[i];
    104       VLOG(2) << "Buffer " << i << " -> " << buf.opaque() << " (" << buf.size()
    105               << "B)";
    106     }
    107   }
    108 
    109   return std::move(buffer_allocations);
    110 }
    111 
    112 tensorflow::Status BufferAllocations::TearDown(
    113     const std::set<se::DeviceMemoryBase>& live_addresses,
    114     const BufferAssignment& buffer_assignment) {
    115   // Deallocate temporary buffers.
    116   const int64 num_buffers = buffer_assignment.Allocations().size();
    117   for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
    118     const BufferAllocation& allocation = buffer_assignment.GetAllocation(i);
    119     se::DeviceMemoryBase buffer_address = GetDeviceAddress(allocation.index());
    120     // Deallocate buffers marked "maybe_live_out" but aren't actually live out,
    121     // and temp buffers.
    122     if ((allocation.maybe_live_out() &&
    123          !live_addresses.count(buffer_address)) ||
    124         allocation.IsPreallocatedTempBuffer()) {
    125       TF_RETURN_IF_ERROR(
    126           memory_allocator_->Deallocate(device_ordinal_, &buffer_address));
    127     }
    128   }
    129   return tensorflow::Status::OK();
    130 }
    131 
    132 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress(
    133     BufferAllocation::Index buffer_index) const {
    134   CHECK_GE(buffer_index, 0);
    135   CHECK_LT(buffer_index, buffers_.size());
    136   return buffers_[buffer_index];
    137 }
    138 
    139 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress(
    140     const BufferAllocation::Slice& buffer_slice) const {
    141   se::DeviceMemoryBase base = GetDeviceAddress(buffer_slice.index());
    142   CHECK_LE(buffer_slice.offset(), base.size());
    143   CHECK_LE(buffer_slice.offset() + buffer_slice.size(), base.size());
    144   return se::DeviceMemoryBase(
    145       static_cast<char*>(base.opaque()) + buffer_slice.offset(),
    146       buffer_slice.size(), /*is_sub_buffer=*/true);
    147 }
    148 
    149 void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index,
    150                                   se::DeviceMemoryBase buffer) {
    151   CHECK_GE(buffer_index, 0);
    152   CHECK_LT(buffer_index, buffers_.size());
    153   buffers_[buffer_index] = buffer;
    154 }
    155 
    156 }  // namespace gpu
    157 }  // namespace xla
    158