Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
     17 
     18 #include <errno.h>
     19 #ifndef _MSC_VER
     20 #include <strings.h>
     21 #include <sys/mman.h>  // for munmap
     22 #endif
     23 
     24 #include <map>
     25 #include <utility>
     26 
     27 #include "tensorflow/core/lib/strings/numbers.h"
     28 #include "tensorflow/core/platform/logging.h"
     29 #include "tensorflow/core/platform/mutex.h"
     30 #include "tensorflow/core/platform/types.h"
     31 
     32 namespace tensorflow {
     33 
     34 PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
     35                              SubAllocator* allocator,
     36                              RoundUpInterface* size_rounder, string name)
     37     : name_(std::move(name)),
     38       has_size_limit_(pool_size_limit > 0),
     39       auto_resize_(auto_resize),
     40       pool_size_limit_(pool_size_limit),
     41       allocator_(allocator),
     42       size_rounder_(size_rounder),
     43       allocation_begun_(false) {
     44   if (auto_resize) {
     45     CHECK_LT(size_t{0}, pool_size_limit)
     46         << "size limit must be > 0 if auto_resize is true.";
     47   }
     48 }
     49 
     50 PoolAllocator::~PoolAllocator() { Clear(); }
     51 
     52 namespace {
     53 // Pools contain Chunks allocated from the underlying Allocator.
     54 // Chunk alignment is always on kPoolAlignment boundaries.  Each Chunk
     55 // begins with a descriptor (ChunkPrefix) that gives its size and a
     56 // pointer to itself.  The pointer returned to the user is just past
     57 // the ChunkPrefix.  If the user asks for a larger alignment, we will
     58 // increase the size of the chunk, then adjust the returned user
     59 // pointer and also re-write the ChunkPrefix.chunk_ptr value
     60 // immediately before it.  This way the Chunk address and size can be
     61 // recovered from the returned user pointer, regardless of alignment.
     62 // Note that this dereferencing of the pointers means that we cannot
     63 // handle GPU memory, only CPU memory.
     64 struct ChunkPrefix {
     65   size_t num_bytes;
     66   void* chunk_ptr;
     67 };
     68 // kPoolAlignment cannot be less than the size of ChunkPrefix.
     69 static const int kPoolAlignment = sizeof(ChunkPrefix);
     70 
     71 void* PrepareChunk(void* chunk, size_t alignment, size_t num_bytes) {
     72   ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(chunk);
     73   cp->num_bytes = num_bytes;
     74   cp->chunk_ptr = chunk;
     75   void* user_ptr = reinterpret_cast<void*>(cp + 1);
     76   if (alignment > kPoolAlignment) {
     77     // Move user_ptr forward to the first satisfying offset, and write
     78     // chunk_ptr just before it.
     79     size_t aligned_ptr = reinterpret_cast<size_t>(user_ptr) + alignment;
     80     user_ptr = reinterpret_cast<void*>(aligned_ptr & ~(alignment - 1));
     81     (reinterpret_cast<ChunkPrefix*>(user_ptr) - 1)->chunk_ptr = chunk;
     82   }
     83   // Safety check that user_ptr is always past the ChunkPrefix.
     84   CHECK_GE(user_ptr, reinterpret_cast<ChunkPrefix*>(chunk) + 1);
     85   return user_ptr;
     86 }
     87 
     88 ChunkPrefix* FindPrefix(void* user_ptr) {
     89   ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(user_ptr) - 1;
     90   return reinterpret_cast<ChunkPrefix*>(cp->chunk_ptr);
     91 }
     92 }  // namespace
     93 
     94 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
     95   if (!allocation_begun_) allocation_begun_ = true;
     96   if (num_bytes == 0) return nullptr;
     97 
     98   // If alignment is larger than kPoolAlignment, increase num_bytes so that we
     99   // are guaranteed to be able to return an aligned ptr by advancing user_ptr
    100   // without overrunning the end of the chunk.
    101   if (alignment > kPoolAlignment) {
    102     num_bytes += alignment;
    103   }
    104   num_bytes += sizeof(ChunkPrefix);
    105   num_bytes = size_rounder_->RoundUp(num_bytes);
    106   PtrRecord* pr = nullptr;
    107   if (has_size_limit_) {
    108     {
    109       mutex_lock lock(mutex_);
    110       auto iter = pool_.find(num_bytes);
    111       if (iter == pool_.end()) {
    112         allocated_count_++;
    113         // Deliberately fall out of lock scope before
    114         // calling the allocator.  No further modification
    115         // to the pool will be performed.
    116       } else {
    117         get_from_pool_count_++;
    118         pr = iter->second;
    119         RemoveFromList(pr);
    120         pool_.erase(iter);
    121         // Fall out of lock scope and do the result without the lock held.
    122       }
    123     }
    124   }
    125   if (pr != nullptr) {
    126     void* r = pr->ptr;
    127     delete pr;
    128     return PrepareChunk(r, alignment, num_bytes);
    129   } else {
    130     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
    131     for (const auto& v : alloc_visitors_) {
    132       v(ptr, num_bytes);
    133     }
    134     return PrepareChunk(ptr, alignment, num_bytes);
    135   }
    136 }
    137 
    138 void PoolAllocator::DeallocateRaw(void* ptr) {
    139   if (ptr == nullptr) return;
    140   ChunkPrefix* cp = FindPrefix(ptr);
    141   CHECK_LE((void*)cp, (void*)ptr);
    142   if (!has_size_limit_ && !auto_resize_) {
    143     for (const auto& v : free_visitors_) {
    144       v(cp, cp->num_bytes);
    145     }
    146     allocator_->Free(cp, cp->num_bytes);
    147   } else {
    148     mutex_lock lock(mutex_);
    149     ++put_count_;
    150     while (pool_.size() >= pool_size_limit_) {
    151       EvictOne();
    152     }
    153     PtrRecord* pr = new PtrRecord;
    154     pr->num_bytes = cp->num_bytes;
    155     pr->ptr = cp;
    156     AddToList(pr);
    157     pool_.insert(std::make_pair(cp->num_bytes, pr));
    158   }
    159 }
    160 
    161 void PoolAllocator::Clear() {
    162   if (has_size_limit_) {
    163     mutex_lock lock(mutex_);
    164     for (auto iter : pool_) {
    165       PtrRecord* pr = iter.second;
    166       for (const auto& v : free_visitors_) {
    167         v(pr->ptr, pr->num_bytes);
    168       }
    169       allocator_->Free(pr->ptr, pr->num_bytes);
    170       delete pr;
    171     }
    172     pool_.clear();
    173     get_from_pool_count_ = 0;
    174     put_count_ = 0;
    175     allocated_count_ = 0;
    176     evicted_count_ = 0;
    177     lru_head_ = nullptr;
    178     lru_tail_ = nullptr;
    179   }
    180 }
    181 
    182 void PoolAllocator::RemoveFromList(PtrRecord* pr) {
    183   if (pr->prev == nullptr) {
    184     DCHECK_EQ(lru_head_, pr);
    185     lru_head_ = nullptr;
    186   } else {
    187     pr->prev->next = pr->next;
    188   }
    189   if (pr->next == nullptr) {
    190     DCHECK_EQ(lru_tail_, pr);
    191     lru_tail_ = pr->prev;
    192   } else {
    193     pr->next->prev = pr->prev;
    194     if (lru_head_ == nullptr) {
    195       lru_head_ = pr->next;
    196     }
    197   }
    198 }
    199 
    200 void PoolAllocator::AddToList(PtrRecord* pr) {
    201   pr->prev = nullptr;
    202   if (lru_head_ == nullptr) {
    203     CHECK(lru_tail_ == nullptr);
    204     lru_tail_ = pr;
    205     pr->next = nullptr;
    206   } else {
    207     pr->next = lru_head_;
    208     pr->next->prev = pr;
    209   }
    210   lru_head_ = pr;
    211 }
    212 
    213 void PoolAllocator::EvictOne() {
    214   DCHECK(lru_tail_ != nullptr);
    215   PtrRecord* prec = lru_tail_;
    216   RemoveFromList(prec);
    217   auto iter = pool_.find(prec->num_bytes);
    218   while (iter->second != prec) {
    219     ++iter;
    220     DCHECK(iter != pool_.end());
    221   }
    222   pool_.erase(iter);
    223   for (const auto& v : free_visitors_) {
    224     v(prec->ptr, prec->num_bytes);
    225   }
    226   allocator_->Free(prec->ptr, prec->num_bytes);
    227   delete prec;
    228   ++evicted_count_;
    229   // Auto-resizing, and warning messages.
    230   static const double kTolerable = 2e-3;
    231   static const int kCheckInterval = 1000;
    232   static const double kIncreaseFactor = 1.1;
    233   static const int kMinPoolSize = 100;
    234   if (0 == evicted_count_ % kCheckInterval) {
    235     const double eviction_rate =
    236         evicted_count_ / static_cast<double>(put_count_);
    237     const int64 alloc_request_count = allocated_count_ + get_from_pool_count_;
    238     const double alloc_rate =
    239         (alloc_request_count == 0)
    240             ? 0.0
    241             : allocated_count_ / static_cast<double>(alloc_request_count);
    242     // Can turn on for debugging purposes.
    243     const bool kShouldLog = false;
    244     if (kShouldLog) {
    245       LOG(INFO) << "PoolAllocator: After " << alloc_request_count
    246                 << " get requests, put_count=" << put_count_
    247                 << " evicted_count=" << evicted_count_
    248                 << " eviction_rate=" << eviction_rate
    249                 << " and unsatisfied allocation rate=" << alloc_rate;
    250     }
    251     if (auto_resize_ && (eviction_rate > kTolerable) &&
    252         (alloc_rate > kTolerable)) {
    253       size_t new_size_limit = (pool_size_limit_ < kMinPoolSize)
    254                                   ? kMinPoolSize
    255                                   : (kIncreaseFactor * pool_size_limit_);
    256       if (kShouldLog) {
    257         LOG(INFO) << "Raising pool_size_limit_ from " << pool_size_limit_
    258                   << " to " << new_size_limit;
    259       }
    260       pool_size_limit_ = new_size_limit;
    261       // Reset all the counters so that ratios are relative to new sizes
    262       // at next test interval.
    263       put_count_ = 0;
    264       allocated_count_ = 0;
    265       evicted_count_ = 0;
    266       get_from_pool_count_ = 0;
    267     }
    268   }
    269 }
    270 
    271 void PoolAllocator::AddAllocVisitor(Visitor visitor) {
    272   mutex_lock lock(mutex_);
    273   CHECK(!allocation_begun_)
    274       << "AddAllocVisitor may not be called after pool allocation "
    275       << "has begun.";
    276   alloc_visitors_.push_back(visitor);
    277 }
    278 
    279 void PoolAllocator::AddFreeVisitor(Visitor visitor) {
    280   mutex_lock lock(mutex_);
    281   CHECK(!allocation_begun_)
    282       << "AddFreeVisitor may not be called after pool allocation "
    283       << "has begun.";
    284   free_visitors_.push_back(visitor);
    285 }
    286 
    287 }  // namespace tensorflow
    288