1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/gpu/pool_allocator.h" 17 18 #include <errno.h> 19 #ifndef _MSC_VER 20 #include <strings.h> 21 #include <sys/mman.h> // for munmap 22 #endif 23 24 #include <map> 25 #include <utility> 26 27 #include "tensorflow/core/lib/strings/numbers.h" 28 #include "tensorflow/core/platform/logging.h" 29 #include "tensorflow/core/platform/mutex.h" 30 #include "tensorflow/core/platform/types.h" 31 32 namespace tensorflow { 33 34 PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize, 35 SubAllocator* allocator, 36 RoundUpInterface* size_rounder, string name) 37 : name_(std::move(name)), 38 has_size_limit_(pool_size_limit > 0), 39 auto_resize_(auto_resize), 40 pool_size_limit_(pool_size_limit), 41 allocator_(allocator), 42 size_rounder_(size_rounder), 43 allocation_begun_(false) { 44 if (auto_resize) { 45 CHECK_LT(size_t{0}, pool_size_limit) 46 << "size limit must be > 0 if auto_resize is true."; 47 } 48 } 49 50 PoolAllocator::~PoolAllocator() { Clear(); } 51 52 namespace { 53 // Pools contain Chunks allocated from the underlying Allocator. 54 // Chunk alignment is always on kPoolAlignment boundaries. Each Chunk 55 // begins with a descriptor (ChunkPrefix) that gives its size and a 56 // pointer to itself. The pointer returned to the user is just past 57 // the ChunkPrefix. If the user asks for a larger alignment, we will 58 // increase the size of the chunk, then adjust the returned user 59 // pointer and also re-write the ChunkPrefix.chunk_ptr value 60 // immediately before it. This way the Chunk address and size can be 61 // recovered from the returned user pointer, regardless of alignment. 62 // Note that this dereferencing of the pointers means that we cannot 63 // handle GPU memory, only CPU memory. 64 struct ChunkPrefix { 65 size_t num_bytes; 66 void* chunk_ptr; 67 }; 68 // kPoolAlignment cannot be less than the size of ChunkPrefix. 69 static const int kPoolAlignment = sizeof(ChunkPrefix); 70 71 void* PrepareChunk(void* chunk, size_t alignment, size_t num_bytes) { 72 ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(chunk); 73 cp->num_bytes = num_bytes; 74 cp->chunk_ptr = chunk; 75 void* user_ptr = reinterpret_cast<void*>(cp + 1); 76 if (alignment > kPoolAlignment) { 77 // Move user_ptr forward to the first satisfying offset, and write 78 // chunk_ptr just before it. 79 size_t aligned_ptr = reinterpret_cast<size_t>(user_ptr) + alignment; 80 user_ptr = reinterpret_cast<void*>(aligned_ptr & ~(alignment - 1)); 81 (reinterpret_cast<ChunkPrefix*>(user_ptr) - 1)->chunk_ptr = chunk; 82 } 83 // Safety check that user_ptr is always past the ChunkPrefix. 84 CHECK_GE(user_ptr, reinterpret_cast<ChunkPrefix*>(chunk) + 1); 85 return user_ptr; 86 } 87 88 ChunkPrefix* FindPrefix(void* user_ptr) { 89 ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(user_ptr) - 1; 90 return reinterpret_cast<ChunkPrefix*>(cp->chunk_ptr); 91 } 92 } // namespace 93 94 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { 95 if (!allocation_begun_) allocation_begun_ = true; 96 if (num_bytes == 0) return nullptr; 97 98 // If alignment is larger than kPoolAlignment, increase num_bytes so that we 99 // are guaranteed to be able to return an aligned ptr by advancing user_ptr 100 // without overrunning the end of the chunk. 101 if (alignment > kPoolAlignment) { 102 num_bytes += alignment; 103 } 104 num_bytes += sizeof(ChunkPrefix); 105 num_bytes = size_rounder_->RoundUp(num_bytes); 106 PtrRecord* pr = nullptr; 107 if (has_size_limit_) { 108 { 109 mutex_lock lock(mutex_); 110 auto iter = pool_.find(num_bytes); 111 if (iter == pool_.end()) { 112 allocated_count_++; 113 // Deliberately fall out of lock scope before 114 // calling the allocator. No further modification 115 // to the pool will be performed. 116 } else { 117 get_from_pool_count_++; 118 pr = iter->second; 119 RemoveFromList(pr); 120 pool_.erase(iter); 121 // Fall out of lock scope and do the result without the lock held. 122 } 123 } 124 } 125 if (pr != nullptr) { 126 void* r = pr->ptr; 127 delete pr; 128 return PrepareChunk(r, alignment, num_bytes); 129 } else { 130 void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes); 131 for (const auto& v : alloc_visitors_) { 132 v(ptr, num_bytes); 133 } 134 return PrepareChunk(ptr, alignment, num_bytes); 135 } 136 } 137 138 void PoolAllocator::DeallocateRaw(void* ptr) { 139 if (ptr == nullptr) return; 140 ChunkPrefix* cp = FindPrefix(ptr); 141 CHECK_LE((void*)cp, (void*)ptr); 142 if (!has_size_limit_ && !auto_resize_) { 143 for (const auto& v : free_visitors_) { 144 v(cp, cp->num_bytes); 145 } 146 allocator_->Free(cp, cp->num_bytes); 147 } else { 148 mutex_lock lock(mutex_); 149 ++put_count_; 150 while (pool_.size() >= pool_size_limit_) { 151 EvictOne(); 152 } 153 PtrRecord* pr = new PtrRecord; 154 pr->num_bytes = cp->num_bytes; 155 pr->ptr = cp; 156 AddToList(pr); 157 pool_.insert(std::make_pair(cp->num_bytes, pr)); 158 } 159 } 160 161 void PoolAllocator::Clear() { 162 if (has_size_limit_) { 163 mutex_lock lock(mutex_); 164 for (auto iter : pool_) { 165 PtrRecord* pr = iter.second; 166 for (const auto& v : free_visitors_) { 167 v(pr->ptr, pr->num_bytes); 168 } 169 allocator_->Free(pr->ptr, pr->num_bytes); 170 delete pr; 171 } 172 pool_.clear(); 173 get_from_pool_count_ = 0; 174 put_count_ = 0; 175 allocated_count_ = 0; 176 evicted_count_ = 0; 177 lru_head_ = nullptr; 178 lru_tail_ = nullptr; 179 } 180 } 181 182 void PoolAllocator::RemoveFromList(PtrRecord* pr) { 183 if (pr->prev == nullptr) { 184 DCHECK_EQ(lru_head_, pr); 185 lru_head_ = nullptr; 186 } else { 187 pr->prev->next = pr->next; 188 } 189 if (pr->next == nullptr) { 190 DCHECK_EQ(lru_tail_, pr); 191 lru_tail_ = pr->prev; 192 } else { 193 pr->next->prev = pr->prev; 194 if (lru_head_ == nullptr) { 195 lru_head_ = pr->next; 196 } 197 } 198 } 199 200 void PoolAllocator::AddToList(PtrRecord* pr) { 201 pr->prev = nullptr; 202 if (lru_head_ == nullptr) { 203 CHECK(lru_tail_ == nullptr); 204 lru_tail_ = pr; 205 pr->next = nullptr; 206 } else { 207 pr->next = lru_head_; 208 pr->next->prev = pr; 209 } 210 lru_head_ = pr; 211 } 212 213 void PoolAllocator::EvictOne() { 214 DCHECK(lru_tail_ != nullptr); 215 PtrRecord* prec = lru_tail_; 216 RemoveFromList(prec); 217 auto iter = pool_.find(prec->num_bytes); 218 while (iter->second != prec) { 219 ++iter; 220 DCHECK(iter != pool_.end()); 221 } 222 pool_.erase(iter); 223 for (const auto& v : free_visitors_) { 224 v(prec->ptr, prec->num_bytes); 225 } 226 allocator_->Free(prec->ptr, prec->num_bytes); 227 delete prec; 228 ++evicted_count_; 229 // Auto-resizing, and warning messages. 230 static const double kTolerable = 2e-3; 231 static const int kCheckInterval = 1000; 232 static const double kIncreaseFactor = 1.1; 233 static const int kMinPoolSize = 100; 234 if (0 == evicted_count_ % kCheckInterval) { 235 const double eviction_rate = 236 evicted_count_ / static_cast<double>(put_count_); 237 const int64 alloc_request_count = allocated_count_ + get_from_pool_count_; 238 const double alloc_rate = 239 (alloc_request_count == 0) 240 ? 0.0 241 : allocated_count_ / static_cast<double>(alloc_request_count); 242 // Can turn on for debugging purposes. 243 const bool kShouldLog = false; 244 if (kShouldLog) { 245 LOG(INFO) << "PoolAllocator: After " << alloc_request_count 246 << " get requests, put_count=" << put_count_ 247 << " evicted_count=" << evicted_count_ 248 << " eviction_rate=" << eviction_rate 249 << " and unsatisfied allocation rate=" << alloc_rate; 250 } 251 if (auto_resize_ && (eviction_rate > kTolerable) && 252 (alloc_rate > kTolerable)) { 253 size_t new_size_limit = (pool_size_limit_ < kMinPoolSize) 254 ? kMinPoolSize 255 : (kIncreaseFactor * pool_size_limit_); 256 if (kShouldLog) { 257 LOG(INFO) << "Raising pool_size_limit_ from " << pool_size_limit_ 258 << " to " << new_size_limit; 259 } 260 pool_size_limit_ = new_size_limit; 261 // Reset all the counters so that ratios are relative to new sizes 262 // at next test interval. 263 put_count_ = 0; 264 allocated_count_ = 0; 265 evicted_count_ = 0; 266 get_from_pool_count_ = 0; 267 } 268 } 269 } 270 271 void PoolAllocator::AddAllocVisitor(Visitor visitor) { 272 mutex_lock lock(mutex_); 273 CHECK(!allocation_begun_) 274 << "AddAllocVisitor may not be called after pool allocation " 275 << "has begun."; 276 alloc_visitors_.push_back(visitor); 277 } 278 279 void PoolAllocator::AddFreeVisitor(Visitor visitor) { 280 mutex_lock lock(mutex_); 281 CHECK(!allocation_begun_) 282 << "AddFreeVisitor may not be called after pool allocation " 283 << "has begun."; 284 free_visitors_.push_back(visitor); 285 } 286 287 } // namespace tensorflow 288