Home | History | Annotate | Download | only in common_runtime
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <atomic>
     17 
     18 #include "tensorflow/core/common_runtime/bfc_allocator.h"
     19 
     20 #include "tensorflow/core/common_runtime/allocator_retry.h"
     21 #include "tensorflow/core/lib/core/bits.h"
     22 #include "tensorflow/core/lib/gtl/stl_util.h"
     23 #include "tensorflow/core/lib/strings/numbers.h"
     24 #include "tensorflow/core/lib/strings/str_util.h"
     25 #include "tensorflow/core/lib/strings/strcat.h"
     26 #include "tensorflow/core/platform/logging.h"
     27 #include "tensorflow/core/platform/mutex.h"
     28 #include "tensorflow/core/platform/types.h"
     29 
     30 namespace tensorflow {
     31 
     32 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
     33                            bool allow_growth, const string& name)
     34     : suballocator_(sub_allocator),
     35       name_(name),
     36       free_chunks_list_(kInvalidChunkHandle),
     37       next_allocation_id_(1) {
     38   if (allow_growth) {
     39     // 1MiB smallest initial allocation, unless total memory available
     40     // is less.
     41     curr_region_allocation_bytes_ =
     42         RoundedBytes(std::min(total_memory, size_t{1048576}));
     43   } else {
     44     curr_region_allocation_bytes_ = RoundedBytes(total_memory);
     45   }
     46 
     47   // Allocate the requested amount of memory.
     48   memory_limit_ = total_memory;
     49   stats_.bytes_limit = static_cast<int64>(total_memory);
     50 
     51   // Create a bunch of bins of various good sizes.
     52 
     53   // We create bins to fit all possible ranges that cover the
     54   // memory_limit_ starting from allocations up to 256 bytes to
     55   // allocations up to (and including) the memory limit.
     56   for (BinNum b = 0; b < kNumBins; b++) {
     57     size_t bin_size = BinNumToSize(b);
     58     VLOG(1) << "Creating bin of max chunk size "
     59             << strings::HumanReadableNumBytes(bin_size);
     60     new (BinFromIndex(b)) Bin(this, bin_size);
     61     CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
     62     CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
     63     CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
     64     if (b + 1 < kNumBins) {
     65       CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
     66     }
     67   }
     68 }
     69 
     70 BFCAllocator::~BFCAllocator() {
     71   // Return memory back.
     72   VLOG(2) << "Number of regions allocated: "
     73           << region_manager_.regions().size();
     74   for (const auto& region : region_manager_.regions()) {
     75     suballocator_->Free(region.ptr(), region.memory_size());
     76   }
     77 
     78   for (BinNum b = 0; b < kNumBins; b++) {
     79     BinFromIndex(b)->~Bin();
     80   }
     81 }
     82 
     83 BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
     84   DCHECK_GE(h, 0);
     85   DCHECK_LT(h, static_cast<int>(chunks_.size()));
     86   return &(chunks_[h]);
     87 }
     88 
     89 bool BFCAllocator::Extend(size_t rounded_bytes) {
     90   size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
     91   // Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
     92   available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
     93 
     94   // Do we have enough space to handle the client's request?
     95   // If not, fail immediately.
     96   if (rounded_bytes > available_bytes) {
     97     return false;
     98   }
     99 
    100   // If curr_region_allocation_bytes_ is not enough to satisfy the
    101   // allocation, keep multiplying by a power of two until that is
    102   // sufficient.
    103   bool increased_allocation = false;
    104   while (rounded_bytes > curr_region_allocation_bytes_) {
    105     curr_region_allocation_bytes_ *= 2;
    106     increased_allocation = true;
    107   }
    108 
    109   // Try allocating.
    110   size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
    111   void* mem_addr = suballocator_->Alloc(32, bytes);
    112   if (mem_addr == nullptr && !started_backpedal_) {
    113     // Only backpedal once.
    114     started_backpedal_ = true;
    115 
    116     static constexpr float kBackpedalFactor = 0.9;
    117 
    118     // Try allocating less memory.
    119     while (mem_addr == nullptr) {
    120       bytes = RoundedBytes(bytes * kBackpedalFactor);
    121       if (bytes < rounded_bytes) break;
    122       mem_addr = suballocator_->Alloc(32, bytes);
    123     }
    124   }
    125 
    126   if (mem_addr == nullptr) {
    127     return false;
    128   }
    129 
    130   if (!increased_allocation) {
    131     // Increase the region size of the next required allocation.
    132     curr_region_allocation_bytes_ *= 2;
    133   }
    134 
    135   VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes)
    136           << " bytes.";
    137 
    138   total_region_allocated_bytes_ += bytes;
    139   VLOG(1) << "Total allocated bytes: "
    140           << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
    141 
    142   VLOG(1) << "Allocated memory at " << mem_addr << " to "
    143           << static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
    144   region_manager_.AddAllocationRegion(mem_addr, bytes);
    145 
    146   // Create one large chunk for the whole memory space that will
    147   // be chunked later.
    148   ChunkHandle h = AllocateChunk();
    149   BFCAllocator::Chunk* c = ChunkFromHandle(h);
    150   c->ptr = mem_addr;
    151   c->size = bytes;
    152   c->allocation_id = -1;
    153   c->prev = kInvalidChunkHandle;
    154   c->next = kInvalidChunkHandle;
    155 
    156   region_manager_.set_handle(c->ptr, h);
    157 
    158   // TODO(vrv): Try to merge this new region with an existing region,
    159   // if the address space is contiguous, to avoid fragmentation
    160   // across regions.
    161 
    162   // Insert the chunk into the right bin.
    163   InsertFreeChunkIntoBin(h);
    164 
    165   // Invoke visitors on newly allocated region.
    166   for (const auto& visitor : region_visitors_) {
    167     visitor(mem_addr, bytes);
    168   }
    169   return true;
    170 }
    171 
    172 BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() {
    173   if (free_chunks_list_ != kInvalidChunkHandle) {
    174     ChunkHandle h = free_chunks_list_;
    175     Chunk* c = ChunkFromHandle(h);
    176     free_chunks_list_ = c->next;
    177     return h;
    178   } else {
    179     ChunkHandle h = chunks_.size();
    180     chunks_.resize(h + 1);
    181     return h;
    182   }
    183 }
    184 
    185 void BFCAllocator::DeallocateChunk(ChunkHandle h) {
    186   Chunk* c = ChunkFromHandle(h);
    187   c->next = free_chunks_list_;
    188   free_chunks_list_ = h;
    189 }
    190 
    191 void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) {
    192   // Fast path: Try once to allocate without getting the retry_helper_ involved
    193   void* r = AllocateRawInternal(unused_alignment, num_bytes, false);
    194   if (r != nullptr) {
    195     return r;
    196   } else {
    197     static const int64 kMaxMillisToWait = 10000;  // 10 seconds
    198     return retry_helper_.AllocateRaw(
    199         [this](size_t a, size_t nb, bool v) {
    200           return AllocateRawInternal(a, nb, v);
    201         },
    202         kMaxMillisToWait, unused_alignment, num_bytes);
    203   }
    204 }
    205 
    206 void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
    207                                 const AllocationAttributes& allocation_attr) {
    208   if (allocation_attr.no_retry_on_failure) {
    209     // Return immediately upon the first failure if this is for allocating an
    210     // optional scratch space.
    211     bool dump_log_on_failure = VLOG_IS_ON(2);
    212     void* result =
    213         AllocateRawInternal(unused_alignment, num_bytes, dump_log_on_failure);
    214     if (result == nullptr) {
    215       static std::atomic<int32> log_counter{0};
    216       int32 counter_value = log_counter.load(std::memory_order_relaxed);
    217       if (counter_value < 10) {
    218         log_counter.store(counter_value + 1, std::memory_order_relaxed);
    219         LOG(WARNING)
    220             << "Allocator (" << Name() << ") ran out of memory trying "
    221             << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
    222             << ". The caller indicates that this is not a failure, but"
    223             << " may mean that there could be performance gains if more"
    224             << " memory were available.";
    225       }
    226     }
    227     return result;
    228   } else {
    229     return AllocateRaw(unused_alignment, num_bytes);
    230   }
    231 }
    232 
    233 // static
    234 size_t BFCAllocator::RoundedBytes(size_t bytes) {
    235   size_t rounded_bytes =
    236       (kMinAllocationSize *
    237        ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
    238   DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
    239   return rounded_bytes;
    240 }
    241 
    242 void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
    243                                         size_t num_bytes,
    244                                         bool dump_log_on_failure) {
    245   if (num_bytes == 0) {
    246     LOG(ERROR) << "tried to allocate 0 bytes";
    247     return nullptr;
    248   }
    249   // First, always allocate memory of at least kMinAllocationSize
    250   // bytes, and always allocate multiples of kMinAllocationSize bytes
    251   // so all memory addresses are nicely byte aligned.
    252   size_t rounded_bytes = RoundedBytes(num_bytes);
    253 
    254   // The BFC allocator tries to find the best fit first.
    255   BinNum bin_num = BinNumForSize(rounded_bytes);
    256 
    257   mutex_lock l(lock_);
    258   void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
    259   if (ptr != nullptr) {
    260     return ptr;
    261   }
    262 
    263   // Try to extend
    264   if (Extend(rounded_bytes)) {
    265     ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
    266     if (ptr != nullptr) {
    267       return ptr;
    268     }
    269   }
    270 
    271   // We searched all bins for an existing free chunk to use and
    272   // couldn't find one.  This means we must have run out of memory,
    273   // Dump the memory log for analysis.
    274   if (dump_log_on_failure) {
    275     LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying "
    276                  << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
    277                  << ".  Current allocation summary follows.";
    278     DumpMemoryLog(rounded_bytes);
    279     LOG(WARNING) << RenderOccupancy();
    280   }
    281   return nullptr;
    282 }
    283 
    284 void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
    285                                  size_t num_bytes) {
    286   // First identify the first bin that could satisfy rounded_bytes.
    287   for (; bin_num < kNumBins; bin_num++) {
    288     // Start searching from the first bin for the smallest chunk that fits
    289     // rounded_bytes.
    290     Bin* b = BinFromIndex(bin_num);
    291     for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
    292          ++citer) {
    293       const BFCAllocator::ChunkHandle h = (*citer);
    294       BFCAllocator::Chunk* chunk = ChunkFromHandle(h);
    295       DCHECK(!chunk->in_use());
    296       if (chunk->size >= rounded_bytes) {
    297         // We found an existing chunk that fits us that wasn't in use, so remove
    298         // it from the free bin structure prior to using.
    299         RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
    300 
    301         // If we can break the size of the chunk into two reasonably large
    302         // pieces, do so.  In any case don't waste more than
    303         // kMaxInternalFragmentation bytes on padding this alloc.
    304         const int64 kMaxInternalFragmentation = 128 << 20;  // 128mb
    305         if (chunk->size >= rounded_bytes * 2 ||
    306             static_cast<int64>(chunk->size) - rounded_bytes >=
    307                 kMaxInternalFragmentation) {
    308           SplitChunk(h, rounded_bytes);
    309           chunk = ChunkFromHandle(h);  // Update chunk pointer in case it moved
    310         }
    311 
    312         // The requested size of the returned chunk is what the user
    313         // has allocated.
    314         chunk->requested_size = num_bytes;
    315         // Assign a unique id and increment the id counter, marking the
    316         // chunk as being in use.
    317         chunk->allocation_id = next_allocation_id_++;
    318 
    319         // Update stats.
    320         ++stats_.num_allocs;
    321         stats_.bytes_in_use += chunk->size;
    322         stats_.max_bytes_in_use =
    323             std::max(stats_.max_bytes_in_use, stats_.bytes_in_use);
    324         stats_.max_alloc_size =
    325             std::max<std::size_t>(stats_.max_alloc_size, chunk->size);
    326 
    327         VLOG(4) << "Returning: " << chunk->ptr;
    328         if (VLOG_IS_ON(4)) {
    329           LOG(INFO) << "A: " << RenderOccupancy();
    330         }
    331         return chunk->ptr;
    332       }
    333     }
    334   }
    335 
    336   return nullptr;
    337 }
    338 
    339 void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) {
    340   // Allocate the new chunk before we do any ChunkFromHandle
    341   ChunkHandle h_new_chunk = AllocateChunk();
    342 
    343   Chunk* c = ChunkFromHandle(h);
    344   CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
    345 
    346   // Create a new chunk starting num_bytes after c
    347   BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
    348   new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
    349   region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
    350 
    351   // Set the new sizes of the chunks.
    352   new_chunk->size = c->size - num_bytes;
    353   c->size = num_bytes;
    354 
    355   // The new chunk is not in use.
    356   new_chunk->allocation_id = -1;
    357 
    358   // Maintain the pointers.
    359   // c <-> c_neighbor becomes
    360   // c <-> new_chunk <-> c_neighbor
    361   BFCAllocator::ChunkHandle h_neighbor = c->next;
    362   new_chunk->prev = h;
    363   new_chunk->next = h_neighbor;
    364   c->next = h_new_chunk;
    365   if (h_neighbor != kInvalidChunkHandle) {
    366     Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
    367     c_neighbor->prev = h_new_chunk;
    368   }
    369 
    370   // Add the newly free chunk to the free bin.
    371   InsertFreeChunkIntoBin(h_new_chunk);
    372 }
    373 
    374 void BFCAllocator::DeallocateRaw(void* ptr) {
    375   DeallocateRawInternal(ptr);
    376   retry_helper_.NotifyDealloc();
    377 }
    378 
    379 void BFCAllocator::DeallocateRawInternal(void* ptr) {
    380   if (ptr == nullptr) {
    381     LOG(ERROR) << "tried to deallocate nullptr";
    382     return;
    383   }
    384   mutex_lock l(lock_);
    385 
    386   // Find the chunk from the ptr.
    387   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
    388   CHECK(h != kInvalidChunkHandle);
    389 
    390   // Consider coalescing it.
    391   FreeAndMaybeCoalesce(h);
    392 
    393   if (VLOG_IS_ON(4)) {
    394     LOG(INFO) << "F: " << RenderOccupancy();
    395   }
    396 }
    397 
    398 // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
    399 // We merge Chunk(h2) into Chunk(h1).
    400 void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1,
    401                          BFCAllocator::ChunkHandle h2) {
    402   Chunk* c1 = ChunkFromHandle(h1);
    403   Chunk* c2 = ChunkFromHandle(h2);
    404   // We can only merge chunks that are not in use.
    405   CHECK(!c1->in_use() && !c2->in_use());
    406 
    407   // c1's prev doesn't change, still points to the same ptr, and is
    408   // still not in use.
    409 
    410   // Fix up neighbor pointers
    411   //
    412   // c1 <-> c2 <-> c3 should become
    413   // c1 <-> c3
    414 
    415   BFCAllocator::ChunkHandle h3 = c2->next;
    416   c1->next = h3;
    417   CHECK(c2->prev == h1);
    418   if (h3 != kInvalidChunkHandle) {
    419     BFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
    420     c3->prev = h1;
    421   }
    422 
    423   // Set the new size
    424   c1->size += c2->size;
    425 
    426   DeleteChunk(h2);
    427 }
    428 
    429 void BFCAllocator::DeleteChunk(ChunkHandle h) {
    430   // Delete h and cleanup all state
    431   Chunk* c = ChunkFromHandle(h);
    432   //  VLOG(4) << "Removing: " << c->ptr;
    433   region_manager_.erase(c->ptr);
    434   DeallocateChunk(h);
    435 }
    436 
    437 void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) {
    438   Chunk* c = ChunkFromHandle(h);
    439   CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
    440   BinNum bin_num = BinNumForSize(c->size);
    441   Bin* new_bin = BinFromIndex(bin_num);
    442   c->bin_num = bin_num;
    443   new_bin->free_chunks.insert(h);
    444 }
    445 
    446 void BFCAllocator::RemoveFreeChunkIterFromBin(
    447     BFCAllocator::Bin::FreeChunkSet* free_chunks,
    448     const BFCAllocator::Bin::FreeChunkSet::iterator& citer) {
    449   ChunkHandle h = *citer;
    450   Chunk* c = ChunkFromHandle(h);
    451   CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
    452   free_chunks->erase(citer);
    453   c->bin_num = kInvalidBinNum;
    454 }
    455 
    456 void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) {
    457   Chunk* c = ChunkFromHandle(h);
    458   CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
    459   CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0)
    460       << "Could not find chunk in bin";
    461   c->bin_num = kInvalidBinNum;
    462 }
    463 
    464 void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
    465   Chunk* c = ChunkFromHandle(h);
    466   CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
    467 
    468   // Mark the chunk as no longer in use
    469   c->allocation_id = -1;
    470 
    471   // Updates the stats.
    472   stats_.bytes_in_use -= c->size;
    473 
    474   // This chunk is no longer in-use, consider coalescing the chunk
    475   // with adjacent chunks.
    476   ChunkHandle chunk_to_reassign = h;
    477 
    478   // If the next chunk is free, coalesce the two
    479   if (c->next != kInvalidChunkHandle) {
    480     Chunk* cnext = ChunkFromHandle(c->next);
    481     if (!cnext->in_use()) {
    482       //      VLOG(8) << "Chunk at " << cnext->ptr << " merging with c " <<
    483       //      c->ptr;
    484 
    485       chunk_to_reassign = h;
    486 
    487       // Deletes c->next
    488       RemoveFreeChunkFromBin(c->next);
    489       Merge(h, ChunkFromHandle(h)->next);
    490     }
    491   }
    492 
    493   // If the previous chunk is free, coalesce the two
    494   c = ChunkFromHandle(h);
    495   if (c->prev != kInvalidChunkHandle) {
    496     Chunk* cprev = ChunkFromHandle(c->prev);
    497     if (!cprev->in_use()) {
    498       //      VLOG(8) << "Chunk at " << c->ptr << " merging into c->prev "
    499       //       << cprev->ptr;
    500 
    501       chunk_to_reassign = c->prev;
    502 
    503       // Deletes c
    504       RemoveFreeChunkFromBin(c->prev);
    505       Merge(ChunkFromHandle(h)->prev, h);
    506       c = ChunkFromHandle(h);
    507     }
    508   }
    509 
    510   InsertFreeChunkIntoBin(chunk_to_reassign);
    511 }
    512 
    513 void BFCAllocator::AddAllocVisitor(Visitor visitor) {
    514   VLOG(1) << "AddVisitor";
    515   mutex_lock l(lock_);
    516   region_visitors_.push_back(visitor);
    517   for (const auto& region : region_manager_.regions()) {
    518     visitor(region.ptr(), region.memory_size());
    519   }
    520 }
    521 
    522 bool BFCAllocator::TracksAllocationSizes() { return true; }
    523 
    524 size_t BFCAllocator::RequestedSize(const void* ptr) {
    525   mutex_lock l(lock_);
    526   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
    527   CHECK(h != kInvalidChunkHandle)
    528       << "Asked for requested size of pointer we never allocated: " << ptr;
    529   BFCAllocator::Chunk* c = ChunkFromHandle(h);
    530   return c->requested_size;
    531 }
    532 
    533 size_t BFCAllocator::AllocatedSize(const void* ptr) {
    534   mutex_lock l(lock_);
    535   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
    536   CHECK(h != kInvalidChunkHandle)
    537       << "Asked for allocated size of pointer we never allocated: " << ptr;
    538   BFCAllocator::Chunk* c = ChunkFromHandle(h);
    539   return c->size;
    540 }
    541 
    542 int64 BFCAllocator::AllocationId(const void* ptr) {
    543   mutex_lock l(lock_);
    544   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
    545   CHECK(h != kInvalidChunkHandle)
    546       << "Asked for allocation id of pointer we never allocated: " << ptr;
    547   BFCAllocator::Chunk* c = ChunkFromHandle(h);
    548   return c->allocation_id;
    549 }
    550 
    551 namespace {
    552 
    553 void RenderRegion(char* rendered, const size_t resolution,
    554                   const size_t total_render_size, const size_t offset,
    555                   const void* base_ptr, const void* ptr, const size_t size,
    556                   const char c) {
    557   const char* base_ptr_c = static_cast<const char*>(base_ptr);
    558   const char* ptr_c = static_cast<const char*>(ptr);
    559 
    560   size_t start_location =
    561       ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
    562   CHECK_GE(start_location, 0);
    563   CHECK_LT(start_location, resolution);
    564   size_t end_location =
    565       ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
    566       total_render_size;
    567   CHECK_GE(end_location, 0);
    568   CHECK_LT(end_location, resolution);
    569 
    570   for (size_t i = start_location; i <= end_location; ++i) {
    571     rendered[i] = c;
    572   }
    573 }
    574 
    575 }  // namespace
    576 
    577 string BFCAllocator::RenderOccupancy() {
    578   // Make a buffer for the ASCII-art representation.
    579   const size_t resolution = 100;
    580   char rendered[resolution];
    581 
    582   // Compute the total region size to render over
    583   size_t total_region_size = 0;
    584   for (const auto& region : region_manager_.regions()) {
    585     total_region_size += region.memory_size();
    586   }
    587 
    588   if (total_region_size == 0) {
    589     return "<allocator contains no memory>";
    590   }
    591 
    592   // Start out with everything empty
    593   RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
    594                total_region_size, '_');
    595 
    596   size_t region_offset = 0;
    597   for (const auto& region : region_manager_.regions()) {
    598     ChunkHandle h = region_manager_.get_handle(region.ptr());
    599     // Then render each chunk left to right.
    600     while (h != kInvalidChunkHandle) {
    601       Chunk* c = ChunkFromHandle(h);
    602       if (c->in_use()) {
    603         // Render the wasted space
    604         size_t wasted = c->size - c->requested_size;
    605         if (wasted > 0) {
    606           RenderRegion(rendered, resolution, total_region_size,
    607                        region_offset + c->requested_size, region.ptr(), c->ptr,
    608                        wasted, 'x');
    609         }
    610         // Then the occupied space
    611         RenderRegion(rendered, resolution, total_region_size, region_offset,
    612                      region.ptr(), c->ptr, c->requested_size, '*');
    613       }
    614       h = c->next;
    615     }
    616     region_offset += region.memory_size();
    617   }
    618 
    619   return StringPiece(rendered, resolution).ToString();
    620 }
    621 
    622 void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
    623   const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
    624   for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
    625     Bin* b = BinFromIndex(bin_num);
    626     const BinDebugInfo& bin_info = bin_infos[bin_num];
    627     CHECK_EQ(b->free_chunks.size(),
    628              bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
    629 
    630     LOG(INFO) << "Bin (" << b->bin_size
    631               << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin
    632               << ", Chunks in use: " << bin_info.total_chunks_in_use << ". "
    633               << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin)
    634               << " allocated for chunks. "
    635               << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use)
    636               << " in use in bin. "
    637               << strings::HumanReadableNumBytes(
    638                      bin_info.total_requested_bytes_in_use)
    639               << " client-requested in use in bin.";
    640   }
    641 
    642   // Find the bin that we would have liked to allocate in, so we
    643   // can get some further analysis about fragmentation.
    644   Bin* b = BinForSize(num_bytes);
    645 
    646   LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
    647             << " was " << strings::HumanReadableNumBytes(b->bin_size)
    648             << ", Chunk State: ";
    649 
    650   for (ChunkHandle h : b->free_chunks) {
    651     Chunk* c = ChunkFromHandle(h);
    652     LOG(INFO) << c->DebugString(this, true);
    653   }
    654 
    655   // Next show the chunks that are in use, and also summarize their
    656   // number by size.
    657   std::map<size_t, int> in_use_by_size;
    658   for (const auto& region : region_manager_.regions()) {
    659     ChunkHandle h = region_manager_.get_handle(region.ptr());
    660     while (h != kInvalidChunkHandle) {
    661       const Chunk* c = ChunkFromHandle(h);
    662       if (c->in_use()) {
    663         in_use_by_size[c->size]++;
    664       }
    665       LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr
    666                 << " of size " << c->size;
    667       h = c->next;
    668     }
    669   }
    670 
    671   LOG(INFO) << "     Summary of in-use Chunks by size: ";
    672   size_t total_bytes = 0;
    673   for (auto& it : in_use_by_size) {
    674     LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
    675               << strings::HumanReadableNumBytes(it.first * it.second);
    676     total_bytes += (it.first * it.second);
    677   }
    678   LOG(INFO) << "Sum Total of in-use chunks: "
    679             << strings::HumanReadableNumBytes(total_bytes);
    680   LOG(INFO) << "Stats: \n" << stats_.DebugString();
    681 }
    682 
    683 void BFCAllocator::GetStats(AllocatorStats* stats) {
    684   mutex_lock l(lock_);
    685   *stats = stats_;
    686 }
    687 
    688 void BFCAllocator::ClearStats() {
    689   mutex_lock l(lock_);
    690   stats_.num_allocs = 0;
    691   stats_.max_bytes_in_use = stats_.bytes_in_use;
    692   stats_.max_alloc_size = 0;
    693 }
    694 
    695 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
    696 BFCAllocator::get_bin_debug_info() {
    697   std::array<BinDebugInfo, kNumBins> bin_infos;
    698   for (const auto& region : region_manager_.regions()) {
    699     ChunkHandle h = region_manager_.get_handle(region.ptr());
    700     while (h != kInvalidChunkHandle) {
    701       const Chunk* c = ChunkFromHandle(h);
    702       BinNum bin_num = BinNumForSize(c->size);
    703       BinDebugInfo& bin_info = bin_infos[bin_num];
    704       bin_info.total_bytes_in_bin += c->size;
    705       bin_info.total_chunks_in_bin++;
    706       if (c->in_use()) {
    707         bin_info.total_bytes_in_use += c->size;
    708         bin_info.total_requested_bytes_in_use += c->requested_size;
    709         bin_info.total_chunks_in_use++;
    710       } else {
    711         Bin* bin = BinFromIndex(bin_num);
    712         CHECK_EQ(bin->free_chunks.count(h), 1);
    713         CHECK_EQ(c->bin_num, bin_num);
    714       }
    715       h = c->next;
    716     }
    717   }
    718   return bin_infos;
    719 }
    720 
    721 }  // namespace tensorflow
    722