Home | History | Annotate | Download | only in allocator
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "base/mutex-inl.h"
     18 #include "mirror/class-inl.h"
     19 #include "mirror/object.h"
     20 #include "mirror/object-inl.h"
     21 #include "thread-inl.h"
     22 #include "thread_list.h"
     23 #include "rosalloc.h"
     24 
     25 #include <map>
     26 #include <list>
     27 #include <vector>
     28 
     29 namespace art {
     30 namespace gc {
     31 namespace allocator {
     32 
     33 extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
     34 
     35 static constexpr bool kUsePrefetchDuringAllocRun = true;
     36 static constexpr bool kPrefetchNewRunDataByZeroing = false;
     37 static constexpr size_t kPrefetchStride = 64;
     38 
     39 size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
     40 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
     41 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
     42 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
     43 size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
     44 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
     45 bool RosAlloc::initialized_ = false;
     46 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
     47 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
     48     reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
     49 
     50 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
     51                    PageReleaseMode page_release_mode, size_t page_release_size_threshold)
     52     : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
     53       capacity_(capacity), max_capacity_(max_capacity),
     54       lock_("rosalloc global lock", kRosAllocGlobalLock),
     55       bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
     56       page_release_mode_(page_release_mode),
     57       page_release_size_threshold_(page_release_size_threshold) {
     58   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
     59   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
     60   CHECK_LE(capacity, max_capacity);
     61   CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
     62   if (!initialized_) {
     63     Initialize();
     64   }
     65   VLOG(heap) << "RosAlloc base="
     66              << std::hex << (intptr_t)base_ << ", end="
     67              << std::hex << (intptr_t)(base_ + capacity_)
     68              << ", capacity=" << std::dec << capacity_
     69              << ", max_capacity=" << std::dec << max_capacity_;
     70   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
     71     size_bracket_lock_names_[i] =
     72         StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
     73     size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
     74     current_runs_[i] = dedicated_full_run_;
     75   }
     76   DCHECK_EQ(footprint_, capacity_);
     77   size_t num_of_pages = footprint_ / kPageSize;
     78   size_t max_num_of_pages = max_capacity_ / kPageSize;
     79   std::string error_msg;
     80   page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize),
     81                                                PROT_READ | PROT_WRITE, false, &error_msg));
     82   CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
     83   page_map_ = page_map_mem_map_->Begin();
     84   page_map_size_ = num_of_pages;
     85   max_page_map_size_ = max_num_of_pages;
     86   free_page_run_size_map_.resize(num_of_pages);
     87   FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
     88   if (kIsDebugBuild) {
     89     free_pages->magic_num_ = kMagicNumFree;
     90   }
     91   free_pages->SetByteSize(this, capacity_);
     92   DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
     93   DCHECK(free_pages->IsFree());
     94   free_pages->ReleasePages(this);
     95   DCHECK(free_pages->IsFree());
     96   free_page_runs_.insert(free_pages);
     97   if (kTraceRosAlloc) {
     98     LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
     99               << reinterpret_cast<intptr_t>(free_pages)
    100               << " into free_page_runs_";
    101   }
    102 }
    103 
    104 RosAlloc::~RosAlloc() {
    105   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
    106     delete size_bracket_locks_[i];
    107   }
    108 }
    109 
    110 void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
    111   lock_.AssertHeld(self);
    112   DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
    113   FreePageRun* res = NULL;
    114   const size_t req_byte_size = num_pages * kPageSize;
    115   // Find the lowest address free page run that's large enough.
    116   for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
    117     FreePageRun* fpr = *it;
    118     DCHECK(fpr->IsFree());
    119     size_t fpr_byte_size = fpr->ByteSize(this);
    120     DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
    121     if (req_byte_size <= fpr_byte_size) {
    122       // Found one.
    123       free_page_runs_.erase(it++);
    124       if (kTraceRosAlloc) {
    125         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
    126                   << std::hex << reinterpret_cast<intptr_t>(fpr)
    127                   << " from free_page_runs_";
    128       }
    129       if (req_byte_size < fpr_byte_size) {
    130         // Split.
    131         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
    132         if (kIsDebugBuild) {
    133           remainder->magic_num_ = kMagicNumFree;
    134         }
    135         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
    136         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    137         // Don't need to call madvise on remainder here.
    138         free_page_runs_.insert(remainder);
    139         if (kTraceRosAlloc) {
    140           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
    141                     << reinterpret_cast<intptr_t>(remainder)
    142                     << " into free_page_runs_";
    143         }
    144         fpr->SetByteSize(this, req_byte_size);
    145         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    146       }
    147       res = fpr;
    148       break;
    149     } else {
    150       ++it;
    151     }
    152   }
    153 
    154   // Failed to allocate pages. Grow the footprint, if possible.
    155   if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
    156     FreePageRun* last_free_page_run = NULL;
    157     size_t last_free_page_run_size;
    158     auto it = free_page_runs_.rbegin();
    159     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
    160       // There is a free page run at the end.
    161       DCHECK(last_free_page_run->IsFree());
    162       DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
    163       last_free_page_run_size = last_free_page_run->ByteSize(this);
    164     } else {
    165       // There is no free page run at the end.
    166       last_free_page_run_size = 0;
    167     }
    168     DCHECK_LT(last_free_page_run_size, req_byte_size);
    169     if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
    170       // If we grow the heap, we can allocate it.
    171       size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
    172                                   capacity_ - footprint_);
    173       DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
    174       size_t new_footprint = footprint_ + increment;
    175       size_t new_num_of_pages = new_footprint / kPageSize;
    176       DCHECK_LT(page_map_size_, new_num_of_pages);
    177       DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
    178       page_map_size_ = new_num_of_pages;
    179       DCHECK_LE(page_map_size_, max_page_map_size_);
    180       free_page_run_size_map_.resize(new_num_of_pages);
    181       art_heap_rosalloc_morecore(this, increment);
    182       if (last_free_page_run_size > 0) {
    183         // There was a free page run at the end. Expand its size.
    184         DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
    185         last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
    186         DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    187         DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
    188       } else {
    189         // Otherwise, insert a new free page run at the end.
    190         FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
    191         if (kIsDebugBuild) {
    192           new_free_page_run->magic_num_ = kMagicNumFree;
    193         }
    194         new_free_page_run->SetByteSize(this, increment);
    195         DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    196         free_page_runs_.insert(new_free_page_run);
    197         DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
    198         if (kTraceRosAlloc) {
    199           LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
    200                     << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
    201                     << " into free_page_runs_";
    202         }
    203       }
    204       DCHECK_LE(footprint_ + increment, capacity_);
    205       if (kTraceRosAlloc) {
    206         LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
    207                   << footprint_ << " to " << new_footprint;
    208       }
    209       footprint_ = new_footprint;
    210 
    211       // And retry the last free page run.
    212       it = free_page_runs_.rbegin();
    213       DCHECK(it != free_page_runs_.rend());
    214       FreePageRun* fpr = *it;
    215       if (kIsDebugBuild && last_free_page_run_size > 0) {
    216         DCHECK(last_free_page_run != NULL);
    217         DCHECK_EQ(last_free_page_run, fpr);
    218       }
    219       size_t fpr_byte_size = fpr->ByteSize(this);
    220       DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
    221       DCHECK_LE(req_byte_size, fpr_byte_size);
    222       free_page_runs_.erase(fpr);
    223       if (kTraceRosAlloc) {
    224         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
    225                   << " from free_page_runs_";
    226       }
    227       if (req_byte_size < fpr_byte_size) {
    228         // Split if there's a remainder.
    229         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
    230         if (kIsDebugBuild) {
    231           remainder->magic_num_ = kMagicNumFree;
    232         }
    233         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
    234         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    235         free_page_runs_.insert(remainder);
    236         if (kTraceRosAlloc) {
    237           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
    238                     << reinterpret_cast<intptr_t>(remainder)
    239                     << " into free_page_runs_";
    240         }
    241         fpr->SetByteSize(this, req_byte_size);
    242         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    243       }
    244       res = fpr;
    245     }
    246   }
    247   if (LIKELY(res != NULL)) {
    248     // Update the page map.
    249     size_t page_map_idx = ToPageMapIndex(res);
    250     for (size_t i = 0; i < num_pages; i++) {
    251       DCHECK(IsFreePage(page_map_idx + i));
    252     }
    253     switch (page_map_type) {
    254     case kPageMapRun:
    255       page_map_[page_map_idx] = kPageMapRun;
    256       for (size_t i = 1; i < num_pages; i++) {
    257         page_map_[page_map_idx + i] = kPageMapRunPart;
    258       }
    259       break;
    260     case kPageMapLargeObject:
    261       page_map_[page_map_idx] = kPageMapLargeObject;
    262       for (size_t i = 1; i < num_pages; i++) {
    263         page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
    264       }
    265       break;
    266     default:
    267       LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
    268       break;
    269     }
    270     if (kIsDebugBuild) {
    271       // Clear the first page since it is not madvised due to the magic number.
    272       memset(res, 0, kPageSize);
    273     }
    274     if (kTraceRosAlloc) {
    275       LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
    276                 << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
    277                 << "(" << std::dec << (num_pages * kPageSize) << ")";
    278     }
    279     return res;
    280   }
    281 
    282   // Fail.
    283   if (kTraceRosAlloc) {
    284     LOG(INFO) << "RosAlloc::AllocPages() : NULL";
    285   }
    286   return nullptr;
    287 }
    288 
    289 size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
    290   lock_.AssertHeld(self);
    291   size_t pm_idx = ToPageMapIndex(ptr);
    292   DCHECK_LT(pm_idx, page_map_size_);
    293   byte pm_type = page_map_[pm_idx];
    294   DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
    295   byte pm_part_type;
    296   switch (pm_type) {
    297   case kPageMapRun:
    298     pm_part_type = kPageMapRunPart;
    299     break;
    300   case kPageMapLargeObject:
    301     pm_part_type = kPageMapLargeObjectPart;
    302     break;
    303   default:
    304     LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
    305                << static_cast<int>(pm_type) << ", ptr=" << std::hex
    306                << reinterpret_cast<intptr_t>(ptr);
    307     return 0;
    308   }
    309   // Update the page map and count the number of pages.
    310   size_t num_pages = 1;
    311   page_map_[pm_idx] = kPageMapEmpty;
    312   size_t idx = pm_idx + 1;
    313   size_t end = page_map_size_;
    314   while (idx < end && page_map_[idx] == pm_part_type) {
    315     page_map_[idx] = kPageMapEmpty;
    316     num_pages++;
    317     idx++;
    318   }
    319   const size_t byte_size = num_pages * kPageSize;
    320   if (already_zero) {
    321     if (kCheckZeroMemory) {
    322       const uword* word_ptr = reinterpret_cast<uword*>(ptr);
    323       for (size_t i = 0; i < byte_size / sizeof(uword); ++i) {
    324         CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
    325       }
    326     }
    327   } else if (!DoesReleaseAllPages()) {
    328     memset(ptr, 0, byte_size);
    329   }
    330 
    331   if (kTraceRosAlloc) {
    332     LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
    333               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
    334               << "(" << std::dec << (num_pages * kPageSize) << ")";
    335   }
    336 
    337   // Turn it into a free run.
    338   FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
    339   if (kIsDebugBuild) {
    340     fpr->magic_num_ = kMagicNumFree;
    341   }
    342   fpr->SetByteSize(this, byte_size);
    343   DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
    344 
    345   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
    346   if (!free_page_runs_.empty()) {
    347     // Try to coalesce in the higher address direction.
    348     if (kTraceRosAlloc) {
    349       LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
    350                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
    351                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
    352                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
    353     }
    354     auto higher_it = free_page_runs_.upper_bound(fpr);
    355     if (higher_it != free_page_runs_.end()) {
    356       for (auto it = higher_it; it != free_page_runs_.end(); ) {
    357         FreePageRun* h = *it;
    358         DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    359         if (kTraceRosAlloc) {
    360           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
    361                     << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
    362                     << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
    363                     << (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
    364         }
    365         if (fpr->End(this) == h->Begin()) {
    366           if (kTraceRosAlloc) {
    367             LOG(INFO) << "Success";
    368           }
    369           // Clear magic num since this is no longer the start of a free page run.
    370           if (kIsDebugBuild) {
    371             h->magic_num_ = 0;
    372           }
    373           free_page_runs_.erase(it++);
    374           if (kTraceRosAlloc) {
    375             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
    376                       << reinterpret_cast<intptr_t>(h)
    377                       << " from free_page_runs_";
    378           }
    379           fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
    380           DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    381         } else {
    382           // Not adjacent. Stop.
    383           if (kTraceRosAlloc) {
    384             LOG(INFO) << "Fail";
    385           }
    386           break;
    387         }
    388       }
    389     }
    390     // Try to coalesce in the lower address direction.
    391     auto lower_it = free_page_runs_.upper_bound(fpr);
    392     if (lower_it != free_page_runs_.begin()) {
    393       --lower_it;
    394       for (auto it = lower_it; ; ) {
    395         // We want to try to coalesce with the first element but
    396         // there's no "<=" operator for the iterator.
    397         bool to_exit_loop = it == free_page_runs_.begin();
    398 
    399         FreePageRun* l = *it;
    400         DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    401         if (kTraceRosAlloc) {
    402           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
    403                     << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
    404                     << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
    405                     << (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
    406         }
    407         if (l->End(this) == fpr->Begin()) {
    408           if (kTraceRosAlloc) {
    409             LOG(INFO) << "Success";
    410           }
    411           free_page_runs_.erase(it--);
    412           if (kTraceRosAlloc) {
    413             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
    414                       << reinterpret_cast<intptr_t>(l)
    415                       << " from free_page_runs_";
    416           }
    417           l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
    418           DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    419           // Clear magic num since this is no longer the start of a free page run.
    420           if (kIsDebugBuild) {
    421             fpr->magic_num_ = 0;
    422           }
    423           fpr = l;
    424         } else {
    425           // Not adjacent. Stop.
    426           if (kTraceRosAlloc) {
    427             LOG(INFO) << "Fail";
    428           }
    429           break;
    430         }
    431         if (to_exit_loop) {
    432           break;
    433         }
    434       }
    435     }
    436   }
    437 
    438   // Insert it.
    439   DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
    440   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
    441   DCHECK(fpr->IsFree());
    442   fpr->ReleasePages(this);
    443   DCHECK(fpr->IsFree());
    444   free_page_runs_.insert(fpr);
    445   DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
    446   if (kTraceRosAlloc) {
    447     LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
    448               << " into free_page_runs_";
    449   }
    450   return byte_size;
    451 }
    452 
    453 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
    454   DCHECK_GT(size, kLargeSizeThreshold);
    455   size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
    456   void* r;
    457   {
    458     MutexLock mu(self, lock_);
    459     r = AllocPages(self, num_pages, kPageMapLargeObject);
    460   }
    461   if (UNLIKELY(r == nullptr)) {
    462     if (kTraceRosAlloc) {
    463       LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
    464     }
    465     return nullptr;
    466   }
    467   const size_t total_bytes = num_pages * kPageSize;
    468   *bytes_allocated = total_bytes;
    469   if (kTraceRosAlloc) {
    470     LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
    471               << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
    472               << "(" << std::dec << (num_pages * kPageSize) << ")";
    473   }
    474   // Check if the returned memory is really all zero.
    475   if (kCheckZeroMemory) {
    476     CHECK_EQ(total_bytes % sizeof(uword), 0U);
    477     const uword* words = reinterpret_cast<uword*>(r);
    478     for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) {
    479       CHECK_EQ(words[i], 0U);
    480     }
    481   }
    482   return r;
    483 }
    484 
    485 size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
    486   DCHECK_LE(base_, ptr);
    487   DCHECK_LT(ptr, base_ + footprint_);
    488   size_t pm_idx = RoundDownToPageMapIndex(ptr);
    489   Run* run = nullptr;
    490   {
    491     MutexLock mu(self, lock_);
    492     DCHECK_LT(pm_idx, page_map_size_);
    493     byte page_map_entry = page_map_[pm_idx];
    494     if (kTraceRosAlloc) {
    495       LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
    496                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
    497     }
    498     switch (page_map_[pm_idx]) {
    499       case kPageMapLargeObject:
    500         return FreePages(self, ptr, false);
    501       case kPageMapLargeObjectPart:
    502         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
    503         return 0;
    504       case kPageMapRunPart: {
    505         // Find the beginning of the run.
    506         do {
    507           --pm_idx;
    508           DCHECK_LT(pm_idx, capacity_ / kPageSize);
    509         } while (page_map_[pm_idx] != kPageMapRun);
    510         // Fall-through.
    511       case kPageMapRun:
    512         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
    513         DCHECK_EQ(run->magic_num_, kMagicNum);
    514         break;
    515       case kPageMapReleased:
    516         // Fall-through.
    517       case kPageMapEmpty:
    518         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
    519         return 0;
    520       }
    521       default:
    522         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
    523         return 0;
    524     }
    525   }
    526   DCHECK(run != nullptr);
    527   return FreeFromRun(self, ptr, run);
    528 }
    529 
    530 size_t RosAlloc::Free(Thread* self, void* ptr) {
    531   ReaderMutexLock rmu(self, bulk_free_lock_);
    532   return FreeInternal(self, ptr);
    533 }
    534 
    535 RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
    536   RosAlloc::Run* new_run = nullptr;
    537   {
    538     MutexLock mu(self, lock_);
    539     new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
    540   }
    541   if (LIKELY(new_run != nullptr)) {
    542     if (kIsDebugBuild) {
    543       new_run->magic_num_ = kMagicNum;
    544     }
    545     new_run->size_bracket_idx_ = idx;
    546     new_run->SetAllocBitMapBitsForInvalidSlots();
    547     DCHECK(!new_run->IsThreadLocal());
    548     DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
    549     DCHECK(!new_run->to_be_bulk_freed_);
    550     if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
    551       // Take ownership of the cache lines if we are likely to be thread local run.
    552       if (kPrefetchNewRunDataByZeroing) {
    553         // Zeroing the data is sometimes faster than prefetching but it increases memory usage
    554         // since we end up dirtying zero pages which may have been madvised.
    555         new_run->ZeroData();
    556       } else {
    557         const size_t num_of_slots = numOfSlots[idx];
    558         const size_t bracket_size = bracketSizes[idx];
    559         const size_t num_of_bytes = num_of_slots * bracket_size;
    560         byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx];
    561         for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
    562           __builtin_prefetch(begin + i);
    563         }
    564       }
    565     }
    566   }
    567   return new_run;
    568 }
    569 
    570 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
    571   // Get the lowest address non-full run from the binary tree.
    572   std::set<Run*>* const bt = &non_full_runs_[idx];
    573   if (!bt->empty()) {
    574     // If there's one, use it as the current run.
    575     auto it = bt->begin();
    576     Run* non_full_run = *it;
    577     DCHECK(non_full_run != nullptr);
    578     DCHECK(!non_full_run->IsThreadLocal());
    579     bt->erase(it);
    580     return non_full_run;
    581   }
    582   // If there's none, allocate a new run and use it as the current run.
    583   return AllocRun(self, idx);
    584 }
    585 
    586 inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
    587   Run* current_run = current_runs_[idx];
    588   DCHECK(current_run != nullptr);
    589   void* slot_addr = current_run->AllocSlot();
    590   if (UNLIKELY(slot_addr == nullptr)) {
    591     // The current run got full. Try to refill it.
    592     DCHECK(current_run->IsFull());
    593     if (kIsDebugBuild && current_run != dedicated_full_run_) {
    594       full_runs_[idx].insert(current_run);
    595       if (kTraceRosAlloc) {
    596         LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
    597                   << reinterpret_cast<intptr_t>(current_run)
    598                   << " into full_runs_[" << std::dec << idx << "]";
    599       }
    600       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
    601       DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
    602     }
    603     current_run = RefillRun(self, idx);
    604     if (UNLIKELY(current_run == nullptr)) {
    605       // Failed to allocate a new run, make sure that it is the dedicated full run.
    606       current_runs_[idx] = dedicated_full_run_;
    607       return nullptr;
    608     }
    609     DCHECK(current_run != nullptr);
    610     DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
    611     DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
    612     current_run->SetIsThreadLocal(false);
    613     current_runs_[idx] = current_run;
    614     DCHECK(!current_run->IsFull());
    615     slot_addr = current_run->AllocSlot();
    616     // Must succeed now with a new run.
    617     DCHECK(slot_addr != nullptr);
    618   }
    619   return slot_addr;
    620 }
    621 
    622 void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
    623   DCHECK_LE(size, kLargeSizeThreshold);
    624   size_t bracket_size;
    625   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
    626   DCHECK_EQ(idx, SizeToIndex(size));
    627   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
    628   DCHECK_EQ(bracket_size, bracketSizes[idx]);
    629   DCHECK_LE(size, bracket_size);
    630   DCHECK(size > 512 || bracket_size - size < 16);
    631   Locks::mutator_lock_->AssertExclusiveHeld(self);
    632   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
    633   if (LIKELY(slot_addr != nullptr)) {
    634     DCHECK(bytes_allocated != nullptr);
    635     *bytes_allocated = bracket_size;
    636     // Caller verifies that it is all 0.
    637   }
    638   return slot_addr;
    639 }
    640 
    641 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
    642   DCHECK_LE(size, kLargeSizeThreshold);
    643   size_t bracket_size;
    644   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
    645   DCHECK_EQ(idx, SizeToIndex(size));
    646   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
    647   DCHECK_EQ(bracket_size, bracketSizes[idx]);
    648   DCHECK_LE(size, bracket_size);
    649   DCHECK(size > 512 || bracket_size - size < 16);
    650 
    651   void* slot_addr;
    652 
    653   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
    654     // Use a thread-local run.
    655     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
    656     // Allow invalid since this will always fail the allocation.
    657     if (kIsDebugBuild) {
    658       // Need the lock to prevent race conditions.
    659       MutexLock mu(self, *size_bracket_locks_[idx]);
    660       CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
    661       CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
    662     }
    663     DCHECK(thread_local_run != nullptr);
    664     DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
    665     slot_addr = thread_local_run->AllocSlot();
    666     // The allocation must fail if the run is invalid.
    667     DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
    668         << "allocated from an invalid run";
    669     if (UNLIKELY(slot_addr == nullptr)) {
    670       // The run got full. Try to free slots.
    671       DCHECK(thread_local_run->IsFull());
    672       MutexLock mu(self, *size_bracket_locks_[idx]);
    673       bool is_all_free_after_merge;
    674       // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
    675       if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
    676         DCHECK_NE(thread_local_run, dedicated_full_run_);
    677         // Some slot got freed. Keep it.
    678         DCHECK(!thread_local_run->IsFull());
    679         DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
    680         if (is_all_free_after_merge) {
    681           // Check that the bitmap idx is back at 0 if it's all free.
    682           DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
    683         }
    684       } else {
    685         // No slots got freed. Try to refill the thread-local run.
    686         DCHECK(thread_local_run->IsFull());
    687         if (thread_local_run != dedicated_full_run_) {
    688           thread_local_run->SetIsThreadLocal(false);
    689           if (kIsDebugBuild) {
    690             full_runs_[idx].insert(thread_local_run);
    691             if (kTraceRosAlloc) {
    692               LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
    693                         << reinterpret_cast<intptr_t>(thread_local_run)
    694                         << " into full_runs_[" << std::dec << idx << "]";
    695             }
    696           }
    697           DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
    698           DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
    699         }
    700 
    701         thread_local_run = RefillRun(self, idx);
    702         if (UNLIKELY(thread_local_run == nullptr)) {
    703           self->SetRosAllocRun(idx, dedicated_full_run_);
    704           return nullptr;
    705         }
    706         DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
    707         DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
    708         thread_local_run->SetIsThreadLocal(true);
    709         self->SetRosAllocRun(idx, thread_local_run);
    710         DCHECK(!thread_local_run->IsFull());
    711       }
    712 
    713       DCHECK(thread_local_run != nullptr);
    714       DCHECK(!thread_local_run->IsFull());
    715       DCHECK(thread_local_run->IsThreadLocal());
    716       slot_addr = thread_local_run->AllocSlot();
    717       // Must succeed now with a new run.
    718       DCHECK(slot_addr != nullptr);
    719     }
    720     if (kTraceRosAlloc) {
    721       LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
    722                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
    723                 << "(" << std::dec << (bracket_size) << ")";
    724     }
    725   } else {
    726     // Use the (shared) current run.
    727     MutexLock mu(self, *size_bracket_locks_[idx]);
    728     slot_addr = AllocFromCurrentRunUnlocked(self, idx);
    729     if (kTraceRosAlloc) {
    730       LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
    731                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
    732                 << "(" << std::dec << (bracket_size) << ")";
    733     }
    734   }
    735   DCHECK(bytes_allocated != nullptr);
    736   *bytes_allocated = bracket_size;
    737   // Caller verifies that it is all 0.
    738   return slot_addr;
    739 }
    740 
    741 size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
    742   DCHECK_EQ(run->magic_num_, kMagicNum);
    743   DCHECK_LT(run, ptr);
    744   DCHECK_LT(ptr, run->End());
    745   const size_t idx = run->size_bracket_idx_;
    746   const size_t bracket_size = bracketSizes[idx];
    747   bool run_was_full = false;
    748   MutexLock mu(self, *size_bracket_locks_[idx]);
    749   if (kIsDebugBuild) {
    750     run_was_full = run->IsFull();
    751   }
    752   if (kTraceRosAlloc) {
    753     LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
    754   }
    755   if (LIKELY(run->IsThreadLocal())) {
    756     // It's a thread-local run. Just mark the thread-local free bit map and return.
    757     DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
    758     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
    759     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
    760     run->MarkThreadLocalFreeBitMap(ptr);
    761     if (kTraceRosAlloc) {
    762       LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
    763                 << reinterpret_cast<intptr_t>(run);
    764     }
    765     // A thread local run will be kept as a thread local even if it's become all free.
    766     return bracket_size;
    767   }
    768   // Free the slot in the run.
    769   run->FreeSlot(ptr);
    770   std::set<Run*>* non_full_runs = &non_full_runs_[idx];
    771   if (run->IsAllFree()) {
    772     // It has just become completely free. Free the pages of this run.
    773     std::set<Run*>::iterator pos = non_full_runs->find(run);
    774     if (pos != non_full_runs->end()) {
    775       non_full_runs->erase(pos);
    776       if (kTraceRosAlloc) {
    777         LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
    778                   << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
    779       }
    780     }
    781     if (run == current_runs_[idx]) {
    782       current_runs_[idx] = dedicated_full_run_;
    783     }
    784     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
    785     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
    786     run->ZeroHeader();
    787     {
    788       MutexLock mu(self, lock_);
    789       FreePages(self, run, true);
    790     }
    791   } else {
    792     // It is not completely free. If it wasn't the current run or
    793     // already in the non-full run set (i.e., it was full) insert it
    794     // into the non-full run set.
    795     if (run != current_runs_[idx]) {
    796       std::unordered_set<Run*, hash_run, eq_run>* full_runs =
    797           kIsDebugBuild ? &full_runs_[idx] : NULL;
    798       std::set<Run*>::iterator pos = non_full_runs->find(run);
    799       if (pos == non_full_runs->end()) {
    800         DCHECK(run_was_full);
    801         DCHECK(full_runs->find(run) != full_runs->end());
    802         if (kIsDebugBuild) {
    803           full_runs->erase(run);
    804           if (kTraceRosAlloc) {
    805             LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
    806                       << reinterpret_cast<intptr_t>(run) << " from full_runs_";
    807           }
    808         }
    809         non_full_runs->insert(run);
    810         DCHECK(!run->IsFull());
    811         if (kTraceRosAlloc) {
    812           LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
    813                     << reinterpret_cast<intptr_t>(run)
    814                     << " into non_full_runs_[" << std::dec << idx << "]";
    815         }
    816       }
    817     }
    818   }
    819   return bracket_size;
    820 }
    821 
    822 std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
    823   std::string bit_map_str;
    824   for (size_t v = 0; v < num_vec; v++) {
    825     uint32_t vec = bit_map_base[v];
    826     if (v != num_vec - 1) {
    827       bit_map_str.append(StringPrintf("%x-", vec));
    828     } else {
    829       bit_map_str.append(StringPrintf("%x", vec));
    830     }
    831   }
    832   return bit_map_str.c_str();
    833 }
    834 
    835 std::string RosAlloc::Run::Dump() {
    836   size_t idx = size_bracket_idx_;
    837   size_t num_slots = numOfSlots[idx];
    838   size_t num_vec = RoundUp(num_slots, 32) / 32;
    839   std::ostringstream stream;
    840   stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
    841          << "{ magic_num=" << static_cast<int>(magic_num_)
    842          << " size_bracket_idx=" << idx
    843          << " is_thread_local=" << static_cast<int>(is_thread_local_)
    844          << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
    845          << " first_search_vec_idx=" << first_search_vec_idx_
    846          << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
    847          << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
    848          << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
    849          << " }" << std::endl;
    850   return stream.str();
    851 }
    852 
    853 inline void* RosAlloc::Run::AllocSlot() {
    854   const size_t idx = size_bracket_idx_;
    855   while (true) {
    856     if (kIsDebugBuild) {
    857       // Make sure that no slots leaked, the bitmap should be full for all previous vectors.
    858       for (size_t i = 0; i < first_search_vec_idx_; ++i) {
    859         CHECK_EQ(~alloc_bit_map_[i], 0U);
    860       }
    861     }
    862     uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
    863     uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
    864     if (LIKELY(ffz1 != 0)) {
    865       const uint32_t ffz = ffz1 - 1;
    866       const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
    867       const uint32_t mask = 1U << ffz;
    868       DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
    869       // Found an empty slot. Set the bit.
    870       DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
    871       *alloc_bitmap_ptr |= mask;
    872       DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
    873       byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
    874       if (kTraceRosAlloc) {
    875         LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
    876                   << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
    877       }
    878       return slot_addr;
    879     }
    880     const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
    881     if (first_search_vec_idx_ + 1 >= num_words) {
    882       DCHECK(IsFull());
    883       // Already at the last word, return null.
    884       return nullptr;
    885     }
    886     // Increase the index to the next word and try again.
    887     ++first_search_vec_idx_;
    888   }
    889 }
    890 
    891 void RosAlloc::Run::FreeSlot(void* ptr) {
    892   DCHECK(!IsThreadLocal());
    893   const byte idx = size_bracket_idx_;
    894   const size_t bracket_size = bracketSizes[idx];
    895   const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
    896       - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
    897   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
    898   size_t slot_idx = offset_from_slot_base / bracket_size;
    899   DCHECK_LT(slot_idx, numOfSlots[idx]);
    900   size_t vec_idx = slot_idx / 32;
    901   if (kIsDebugBuild) {
    902     size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
    903     DCHECK_LT(vec_idx, num_vec);
    904   }
    905   size_t vec_off = slot_idx % 32;
    906   uint32_t* vec = &alloc_bit_map_[vec_idx];
    907   first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
    908   const uint32_t mask = 1U << vec_off;
    909   DCHECK_NE(*vec & mask, 0U);
    910   *vec &= ~mask;
    911   DCHECK_EQ(*vec & mask, 0U);
    912   // Zero out the memory.
    913   // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
    914   memset(ptr, 0, bracket_size);
    915   if (kTraceRosAlloc) {
    916     LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
    917               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
    918   }
    919 }
    920 
    921 inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
    922   DCHECK(IsThreadLocal());
    923   // Free slots in the alloc bit map based on the thread local free bit map.
    924   const size_t idx = size_bracket_idx_;
    925   const size_t num_of_slots = numOfSlots[idx];
    926   const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
    927   bool changed = false;
    928   uint32_t* vecp = &alloc_bit_map_[0];
    929   uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
    930   bool is_all_free_after = true;
    931   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
    932     uint32_t tl_free_vec = *tl_free_vecp;
    933     uint32_t vec_before = *vecp;
    934     uint32_t vec_after;
    935     if (tl_free_vec != 0) {
    936       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
    937       vec_after = vec_before & ~tl_free_vec;
    938       *vecp = vec_after;
    939       changed = true;
    940       *tl_free_vecp = 0;  // clear the thread local free bit map.
    941     } else {
    942       vec_after = vec_before;
    943     }
    944     if (vec_after != 0) {
    945       if (v == num_vec - 1) {
    946         // Only not all free if a bit other than the mask bits are set.
    947         is_all_free_after =
    948             is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
    949       } else {
    950         is_all_free_after = false;
    951       }
    952     }
    953     DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
    954   }
    955   *is_all_free_after_out = is_all_free_after;
    956   // Return true if there was at least a bit set in the thread-local
    957   // free bit map and at least a bit in the alloc bit map changed.
    958   return changed;
    959 }
    960 
    961 inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
    962   DCHECK(!IsThreadLocal());
    963   // Free slots in the alloc bit map based on the bulk free bit map.
    964   const size_t num_vec = NumberOfBitmapVectors();
    965   uint32_t* vecp = &alloc_bit_map_[0];
    966   uint32_t* free_vecp = &BulkFreeBitMap()[0];
    967   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
    968     uint32_t free_vec = *free_vecp;
    969     if (free_vec != 0) {
    970       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
    971       *vecp &= ~free_vec;
    972       *free_vecp = 0;  // clear the bulk free bit map.
    973     }
    974     DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
    975   }
    976 }
    977 
    978 inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
    979   DCHECK(IsThreadLocal());
    980   // Union the thread local bit map with the bulk free bit map.
    981   size_t num_vec = NumberOfBitmapVectors();
    982   uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
    983   uint32_t* from_vecp = &BulkFreeBitMap()[0];
    984   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
    985     uint32_t from_vec = *from_vecp;
    986     if (from_vec != 0) {
    987       *to_vecp |= from_vec;
    988       *from_vecp = 0;  // clear the bulk free bit map.
    989     }
    990     DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
    991   }
    992 }
    993 
    994 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
    995   DCHECK(IsThreadLocal());
    996   MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
    997 }
    998 
    999 inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
   1000   return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
   1001 }
   1002 
   1003 inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
   1004                                                   const char* caller_name) {
   1005   const byte idx = size_bracket_idx_;
   1006   const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
   1007       - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
   1008   const size_t bracket_size = bracketSizes[idx];
   1009   memset(ptr, 0, bracket_size);
   1010   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
   1011   size_t slot_idx = offset_from_slot_base / bracket_size;
   1012   DCHECK_LT(slot_idx, numOfSlots[idx]);
   1013   size_t vec_idx = slot_idx / 32;
   1014   if (kIsDebugBuild) {
   1015     size_t num_vec = NumberOfBitmapVectors();
   1016     DCHECK_LT(vec_idx, num_vec);
   1017   }
   1018   size_t vec_off = slot_idx % 32;
   1019   uint32_t* vec = &free_bit_map_base[vec_idx];
   1020   const uint32_t mask = 1U << vec_off;
   1021   DCHECK_EQ(*vec & mask, 0U);
   1022   *vec |= mask;
   1023   DCHECK_NE(*vec & mask, 0U);
   1024   if (kTraceRosAlloc) {
   1025     LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
   1026               << reinterpret_cast<intptr_t>(ptr)
   1027               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
   1028   }
   1029   return bracket_size;
   1030 }
   1031 
   1032 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
   1033   const size_t kBitsPerVec = 32;
   1034   DCHECK_GE(num_slots * kBitsPerVec, num_vec);
   1035   size_t remain = num_vec * kBitsPerVec - num_slots;
   1036   DCHECK_NE(remain, kBitsPerVec);
   1037   return ((1U << remain) - 1) << (kBitsPerVec - remain);
   1038 }
   1039 
   1040 inline bool RosAlloc::Run::IsAllFree() {
   1041   const byte idx = size_bracket_idx_;
   1042   const size_t num_slots = numOfSlots[idx];
   1043   const size_t num_vec = NumberOfBitmapVectors();
   1044   DCHECK_NE(num_vec, 0U);
   1045   // Check the last vector after the loop since it uses a special case for the masked bits.
   1046   for (size_t v = 0; v < num_vec - 1; v++) {
   1047     uint32_t vec = alloc_bit_map_[v];
   1048     if (vec != 0) {
   1049       return false;
   1050     }
   1051   }
   1052   // Make sure the last word is equal to the mask, all other bits must be 0.
   1053   return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
   1054 }
   1055 
   1056 inline bool RosAlloc::Run::IsFull() {
   1057   const size_t num_vec = NumberOfBitmapVectors();
   1058   for (size_t v = 0; v < num_vec; ++v) {
   1059     if (~alloc_bit_map_[v] != 0) {
   1060       return false;
   1061     }
   1062   }
   1063   return true;
   1064 }
   1065 
   1066 inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
   1067   const size_t num_vec = NumberOfBitmapVectors();
   1068   for (size_t v = 0; v < num_vec; v++) {
   1069     uint32_t vec = BulkFreeBitMap()[v];
   1070     if (vec != 0) {
   1071       return false;
   1072     }
   1073   }
   1074   return true;
   1075 }
   1076 
   1077 inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
   1078   const size_t num_vec = NumberOfBitmapVectors();
   1079   for (size_t v = 0; v < num_vec; v++) {
   1080     uint32_t vec = ThreadLocalFreeBitMap()[v];
   1081     if (vec != 0) {
   1082       return false;
   1083     }
   1084   }
   1085   return true;
   1086 }
   1087 
   1088 inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
   1089   const size_t idx = size_bracket_idx_;
   1090   const size_t num_slots = numOfSlots[idx];
   1091   const size_t num_vec = RoundUp(num_slots, 32) / 32;
   1092   DCHECK_NE(num_vec, 0U);
   1093   // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
   1094   // don't represent valid slots.
   1095   alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
   1096 }
   1097 
   1098 inline void RosAlloc::Run::ZeroHeader() {
   1099   const byte idx = size_bracket_idx_;
   1100   memset(this, 0, headerSizes[idx]);
   1101 }
   1102 
   1103 inline void RosAlloc::Run::ZeroData() {
   1104   const byte idx = size_bracket_idx_;
   1105   byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx];
   1106   memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
   1107 }
   1108 
   1109 inline void RosAlloc::Run::FillAllocBitMap() {
   1110   size_t num_vec = NumberOfBitmapVectors();
   1111   memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
   1112   first_search_vec_idx_ = num_vec - 1;  // No free bits in any of the bitmap words.
   1113 }
   1114 
   1115 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
   1116                                     void* arg) {
   1117   size_t idx = size_bracket_idx_;
   1118   byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
   1119   size_t num_slots = numOfSlots[idx];
   1120   size_t bracket_size = IndexToBracketSize(idx);
   1121   DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
   1122   size_t num_vec = RoundUp(num_slots, 32) / 32;
   1123   size_t slots = 0;
   1124   for (size_t v = 0; v < num_vec; v++, slots += 32) {
   1125     DCHECK_GE(num_slots, slots);
   1126     uint32_t vec = alloc_bit_map_[v];
   1127     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
   1128     for (size_t i = 0; i < end; ++i) {
   1129       bool is_allocated = ((vec >> i) & 0x1) != 0;
   1130       byte* slot_addr = slot_base + (slots + i) * bracket_size;
   1131       if (is_allocated) {
   1132         handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
   1133       } else {
   1134         handler(slot_addr, slot_addr + bracket_size, 0, arg);
   1135       }
   1136     }
   1137   }
   1138 }
   1139 
   1140 // If true, read the page map entries in BulkFree() without using the
   1141 // lock for better performance, assuming that the existence of an
   1142 // allocated chunk/pointer being freed in BulkFree() guarantees that
   1143 // the page map entry won't change. Disabled for now.
   1144 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
   1145 
   1146 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
   1147   size_t freed_bytes = 0;
   1148   if (false) {
   1149     // Used only to test Free() as GC uses only BulkFree().
   1150     for (size_t i = 0; i < num_ptrs; ++i) {
   1151       freed_bytes += FreeInternal(self, ptrs[i]);
   1152     }
   1153     return freed_bytes;
   1154   }
   1155 
   1156   WriterMutexLock wmu(self, bulk_free_lock_);
   1157 
   1158   // First mark slots to free in the bulk free bit map without locking the
   1159   // size bracket locks. On host, unordered_set is faster than vector + flag.
   1160 #ifdef HAVE_ANDROID_OS
   1161   std::vector<Run*> runs;
   1162 #else
   1163   std::unordered_set<Run*, hash_run, eq_run> runs;
   1164 #endif
   1165   for (size_t i = 0; i < num_ptrs; i++) {
   1166     void* ptr = ptrs[i];
   1167     DCHECK_LE(base_, ptr);
   1168     DCHECK_LT(ptr, base_ + footprint_);
   1169     size_t pm_idx = RoundDownToPageMapIndex(ptr);
   1170     Run* run = nullptr;
   1171     if (kReadPageMapEntryWithoutLockInBulkFree) {
   1172       // Read the page map entries without locking the lock.
   1173       byte page_map_entry = page_map_[pm_idx];
   1174       if (kTraceRosAlloc) {
   1175         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
   1176                   << std::dec << pm_idx
   1177                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
   1178       }
   1179       if (LIKELY(page_map_entry == kPageMapRun)) {
   1180         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
   1181       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
   1182         size_t pi = pm_idx;
   1183         // Find the beginning of the run.
   1184         do {
   1185           --pi;
   1186           DCHECK_LT(pi, capacity_ / kPageSize);
   1187         } while (page_map_[pi] != kPageMapRun);
   1188         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
   1189       } else if (page_map_entry == kPageMapLargeObject) {
   1190         MutexLock mu(self, lock_);
   1191         freed_bytes += FreePages(self, ptr, false);
   1192         continue;
   1193       } else {
   1194         LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
   1195       }
   1196     } else {
   1197       // Read the page map entries with a lock.
   1198       MutexLock mu(self, lock_);
   1199       DCHECK_LT(pm_idx, page_map_size_);
   1200       byte page_map_entry = page_map_[pm_idx];
   1201       if (kTraceRosAlloc) {
   1202         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
   1203                   << std::dec << pm_idx
   1204                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
   1205       }
   1206       if (LIKELY(page_map_entry == kPageMapRun)) {
   1207         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
   1208       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
   1209         size_t pi = pm_idx;
   1210         // Find the beginning of the run.
   1211         do {
   1212           --pi;
   1213           DCHECK_LT(pi, capacity_ / kPageSize);
   1214         } while (page_map_[pi] != kPageMapRun);
   1215         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
   1216       } else if (page_map_entry == kPageMapLargeObject) {
   1217         freed_bytes += FreePages(self, ptr, false);
   1218         continue;
   1219       } else {
   1220         LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
   1221       }
   1222     }
   1223     DCHECK(run != nullptr);
   1224     DCHECK_EQ(run->magic_num_, kMagicNum);
   1225     // Set the bit in the bulk free bit map.
   1226     freed_bytes += run->MarkBulkFreeBitMap(ptr);
   1227 #ifdef HAVE_ANDROID_OS
   1228     if (!run->to_be_bulk_freed_) {
   1229       run->to_be_bulk_freed_ = true;
   1230       runs.push_back(run);
   1231     }
   1232 #else
   1233     runs.insert(run);
   1234 #endif
   1235   }
   1236 
   1237   // Now, iterate over the affected runs and update the alloc bit map
   1238   // based on the bulk free bit map (for non-thread-local runs) and
   1239   // union the bulk free bit map into the thread-local free bit map
   1240   // (for thread-local runs.)
   1241   for (Run* run : runs) {
   1242 #ifdef HAVE_ANDROID_OS
   1243     DCHECK(run->to_be_bulk_freed_);
   1244     run->to_be_bulk_freed_ = false;
   1245 #endif
   1246     size_t idx = run->size_bracket_idx_;
   1247     MutexLock mu(self, *size_bracket_locks_[idx]);
   1248     if (run->IsThreadLocal()) {
   1249       DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
   1250       DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
   1251       DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
   1252       run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
   1253       if (kTraceRosAlloc) {
   1254         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
   1255                   << std::hex << reinterpret_cast<intptr_t>(run);
   1256       }
   1257       DCHECK(run->IsThreadLocal());
   1258       // A thread local run will be kept as a thread local even if
   1259       // it's become all free.
   1260     } else {
   1261       bool run_was_full = run->IsFull();
   1262       run->MergeBulkFreeBitMapIntoAllocBitMap();
   1263       if (kTraceRosAlloc) {
   1264         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
   1265                   << reinterpret_cast<intptr_t>(run);
   1266       }
   1267       // Check if the run should be moved to non_full_runs_ or
   1268       // free_page_runs_.
   1269       std::set<Run*>* non_full_runs = &non_full_runs_[idx];
   1270       std::unordered_set<Run*, hash_run, eq_run>* full_runs =
   1271           kIsDebugBuild ? &full_runs_[idx] : NULL;
   1272       if (run->IsAllFree()) {
   1273         // It has just become completely free. Free the pages of the
   1274         // run.
   1275         bool run_was_current = run == current_runs_[idx];
   1276         if (run_was_current) {
   1277           DCHECK(full_runs->find(run) == full_runs->end());
   1278           DCHECK(non_full_runs->find(run) == non_full_runs->end());
   1279           // If it was a current run, reuse it.
   1280         } else if (run_was_full) {
   1281           // If it was full, remove it from the full run set (debug
   1282           // only.)
   1283           if (kIsDebugBuild) {
   1284             std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
   1285             DCHECK(pos != full_runs->end());
   1286             full_runs->erase(pos);
   1287             if (kTraceRosAlloc) {
   1288               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
   1289                         << reinterpret_cast<intptr_t>(run)
   1290                         << " from full_runs_";
   1291             }
   1292             DCHECK(full_runs->find(run) == full_runs->end());
   1293           }
   1294         } else {
   1295           // If it was in a non full run set, remove it from the set.
   1296           DCHECK(full_runs->find(run) == full_runs->end());
   1297           DCHECK(non_full_runs->find(run) != non_full_runs->end());
   1298           non_full_runs->erase(run);
   1299           if (kTraceRosAlloc) {
   1300             LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
   1301                       << reinterpret_cast<intptr_t>(run)
   1302                       << " from non_full_runs_";
   1303           }
   1304           DCHECK(non_full_runs->find(run) == non_full_runs->end());
   1305         }
   1306         if (!run_was_current) {
   1307           run->ZeroHeader();
   1308           MutexLock mu(self, lock_);
   1309           FreePages(self, run, true);
   1310         }
   1311       } else {
   1312         // It is not completely free. If it wasn't the current run or
   1313         // already in the non-full run set (i.e., it was full) insert
   1314         // it into the non-full run set.
   1315         if (run == current_runs_[idx]) {
   1316           DCHECK(non_full_runs->find(run) == non_full_runs->end());
   1317           DCHECK(full_runs->find(run) == full_runs->end());
   1318           // If it was a current run, keep it.
   1319         } else if (run_was_full) {
   1320           // If it was full, remove it from the full run set (debug
   1321           // only) and insert into the non-full run set.
   1322           DCHECK(full_runs->find(run) != full_runs->end());
   1323           DCHECK(non_full_runs->find(run) == non_full_runs->end());
   1324           if (kIsDebugBuild) {
   1325             full_runs->erase(run);
   1326             if (kTraceRosAlloc) {
   1327               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
   1328                         << reinterpret_cast<intptr_t>(run)
   1329                         << " from full_runs_";
   1330             }
   1331           }
   1332           non_full_runs->insert(run);
   1333           if (kTraceRosAlloc) {
   1334             LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
   1335                       << reinterpret_cast<intptr_t>(run)
   1336                       << " into non_full_runs_[" << std::dec << idx;
   1337           }
   1338         } else {
   1339           // If it was not full, so leave it in the non full run set.
   1340           DCHECK(full_runs->find(run) == full_runs->end());
   1341           DCHECK(non_full_runs->find(run) != non_full_runs->end());
   1342         }
   1343       }
   1344     }
   1345   }
   1346   return freed_bytes;
   1347 }
   1348 
   1349 std::string RosAlloc::DumpPageMap() {
   1350   std::ostringstream stream;
   1351   stream << "RosAlloc PageMap: " << std::endl;
   1352   lock_.AssertHeld(Thread::Current());
   1353   size_t end = page_map_size_;
   1354   FreePageRun* curr_fpr = NULL;
   1355   size_t curr_fpr_size = 0;
   1356   size_t remaining_curr_fpr_size = 0;
   1357   size_t num_running_empty_pages = 0;
   1358   for (size_t i = 0; i < end; ++i) {
   1359     byte pm = page_map_[i];
   1360     switch (pm) {
   1361       case kPageMapReleased:
   1362         // Fall-through.
   1363       case kPageMapEmpty: {
   1364         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
   1365         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
   1366           // Encountered a fresh free page run.
   1367           DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
   1368           DCHECK(fpr->IsFree());
   1369           DCHECK(curr_fpr == NULL);
   1370           DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
   1371           curr_fpr = fpr;
   1372           curr_fpr_size = fpr->ByteSize(this);
   1373           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
   1374           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
   1375           stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
   1376                  << " (FPR start) fpr_size=" << curr_fpr_size
   1377                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
   1378           if (remaining_curr_fpr_size == 0) {
   1379             // Reset at the end of the current free page run.
   1380             curr_fpr = NULL;
   1381             curr_fpr_size = 0;
   1382           }
   1383           stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
   1384           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
   1385         } else {
   1386           // Still part of the current free page run.
   1387           DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
   1388           DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
   1389           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
   1390           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
   1391           remaining_curr_fpr_size -= kPageSize;
   1392           stream << "[" << i << "]=Empty (FPR part)"
   1393                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
   1394           if (remaining_curr_fpr_size == 0) {
   1395             // Reset at the end of the current free page run.
   1396             curr_fpr = NULL;
   1397             curr_fpr_size = 0;
   1398           }
   1399         }
   1400         num_running_empty_pages++;
   1401         break;
   1402       }
   1403       case kPageMapLargeObject: {
   1404         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
   1405         num_running_empty_pages = 0;
   1406         stream << "[" << i << "]=Large (start)" << std::endl;
   1407         break;
   1408       }
   1409       case kPageMapLargeObjectPart:
   1410         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
   1411         num_running_empty_pages = 0;
   1412         stream << "[" << i << "]=Large (part)" << std::endl;
   1413         break;
   1414       case kPageMapRun: {
   1415         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
   1416         num_running_empty_pages = 0;
   1417         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
   1418         size_t idx = run->size_bracket_idx_;
   1419         stream << "[" << i << "]=Run (start)"
   1420                << " idx=" << idx
   1421                << " numOfPages=" << numOfPages[idx]
   1422                << " is_thread_local=" << run->is_thread_local_
   1423                << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
   1424                << std::endl;
   1425         break;
   1426       }
   1427       case kPageMapRunPart:
   1428         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
   1429         num_running_empty_pages = 0;
   1430         stream << "[" << i << "]=Run (part)" << std::endl;
   1431         break;
   1432       default:
   1433         stream << "[" << i << "]=Unrecognizable page map type: " << pm;
   1434         break;
   1435     }
   1436   }
   1437   return stream.str();
   1438 }
   1439 
   1440 size_t RosAlloc::UsableSize(void* ptr) {
   1441   DCHECK_LE(base_, ptr);
   1442   DCHECK_LT(ptr, base_ + footprint_);
   1443   size_t pm_idx = RoundDownToPageMapIndex(ptr);
   1444   MutexLock mu(Thread::Current(), lock_);
   1445   switch (page_map_[pm_idx]) {
   1446     case kPageMapReleased:
   1447       // Fall-through.
   1448     case kPageMapEmpty:
   1449       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
   1450                  << std::hex << reinterpret_cast<intptr_t>(ptr);
   1451       break;
   1452     case kPageMapLargeObject: {
   1453       size_t num_pages = 1;
   1454       size_t idx = pm_idx + 1;
   1455       size_t end = page_map_size_;
   1456       while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
   1457         num_pages++;
   1458         idx++;
   1459       }
   1460       return num_pages * kPageSize;
   1461     }
   1462     case kPageMapLargeObjectPart:
   1463       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
   1464                  << std::hex << reinterpret_cast<intptr_t>(ptr);
   1465       break;
   1466     case kPageMapRun:
   1467     case kPageMapRunPart: {
   1468       // Find the beginning of the run.
   1469       while (page_map_[pm_idx] != kPageMapRun) {
   1470         pm_idx--;
   1471         DCHECK_LT(pm_idx, capacity_ / kPageSize);
   1472       }
   1473       DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
   1474       Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
   1475       DCHECK_EQ(run->magic_num_, kMagicNum);
   1476       size_t idx = run->size_bracket_idx_;
   1477       size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
   1478           - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
   1479       DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
   1480       return IndexToBracketSize(idx);
   1481     }
   1482     default: {
   1483       LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
   1484       break;
   1485     }
   1486   }
   1487   return 0;
   1488 }
   1489 
   1490 bool RosAlloc::Trim() {
   1491   MutexLock mu(Thread::Current(), lock_);
   1492   FreePageRun* last_free_page_run;
   1493   DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
   1494   auto it = free_page_runs_.rbegin();
   1495   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
   1496     // Remove the last free page run, if any.
   1497     DCHECK(last_free_page_run->IsFree());
   1498     DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
   1499     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
   1500     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
   1501     free_page_runs_.erase(last_free_page_run);
   1502     size_t decrement = last_free_page_run->ByteSize(this);
   1503     size_t new_footprint = footprint_ - decrement;
   1504     DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
   1505     size_t new_num_of_pages = new_footprint / kPageSize;
   1506     DCHECK_GE(page_map_size_, new_num_of_pages);
   1507     // Zero out the tail of the page map.
   1508     byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
   1509     byte* madvise_begin = AlignUp(zero_begin, kPageSize);
   1510     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
   1511     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
   1512     if (madvise_size > 0) {
   1513       DCHECK_ALIGNED(madvise_begin, kPageSize);
   1514       DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
   1515       if (!kMadviseZeroes) {
   1516         memset(madvise_begin, 0, madvise_size);
   1517       }
   1518       CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
   1519     }
   1520     if (madvise_begin - zero_begin) {
   1521       memset(zero_begin, 0, madvise_begin - zero_begin);
   1522     }
   1523     page_map_size_ = new_num_of_pages;
   1524     free_page_run_size_map_.resize(new_num_of_pages);
   1525     DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
   1526     art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
   1527     if (kTraceRosAlloc) {
   1528       LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
   1529                 << footprint_ << " to " << new_footprint;
   1530     }
   1531     DCHECK_LT(new_footprint, footprint_);
   1532     DCHECK_LT(new_footprint, capacity_);
   1533     footprint_ = new_footprint;
   1534     return true;
   1535   }
   1536   return false;
   1537 }
   1538 
   1539 void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
   1540                           void* arg) {
   1541   // Note: no need to use this to release pages as we already do so in FreePages().
   1542   if (handler == NULL) {
   1543     return;
   1544   }
   1545   MutexLock mu(Thread::Current(), lock_);
   1546   size_t pm_end = page_map_size_;
   1547   size_t i = 0;
   1548   while (i < pm_end) {
   1549     byte pm = page_map_[i];
   1550     switch (pm) {
   1551       case kPageMapReleased:
   1552         // Fall-through.
   1553       case kPageMapEmpty: {
   1554         // The start of a free page run.
   1555         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
   1556         DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
   1557         size_t fpr_size = fpr->ByteSize(this);
   1558         DCHECK(IsAligned<kPageSize>(fpr_size));
   1559         void* start = fpr;
   1560         if (kIsDebugBuild) {
   1561           // In the debug build, the first page of a free page run
   1562           // contains a magic number for debugging. Exclude it.
   1563           start = reinterpret_cast<byte*>(fpr) + kPageSize;
   1564         }
   1565         void* end = reinterpret_cast<byte*>(fpr) + fpr_size;
   1566         handler(start, end, 0, arg);
   1567         size_t num_pages = fpr_size / kPageSize;
   1568         if (kIsDebugBuild) {
   1569           for (size_t j = i + 1; j < i + num_pages; ++j) {
   1570             DCHECK(IsFreePage(j));
   1571           }
   1572         }
   1573         i += fpr_size / kPageSize;
   1574         DCHECK_LE(i, pm_end);
   1575         break;
   1576       }
   1577       case kPageMapLargeObject: {
   1578         // The start of a large object.
   1579         size_t num_pages = 1;
   1580         size_t idx = i + 1;
   1581         while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
   1582           num_pages++;
   1583           idx++;
   1584         }
   1585         void* start = base_ + i * kPageSize;
   1586         void* end = base_ + (i + num_pages) * kPageSize;
   1587         size_t used_bytes = num_pages * kPageSize;
   1588         handler(start, end, used_bytes, arg);
   1589         if (kIsDebugBuild) {
   1590           for (size_t j = i + 1; j < i + num_pages; ++j) {
   1591             DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
   1592           }
   1593         }
   1594         i += num_pages;
   1595         DCHECK_LE(i, pm_end);
   1596         break;
   1597       }
   1598       case kPageMapLargeObjectPart:
   1599         LOG(FATAL) << "Unreachable - page map type: " << pm;
   1600         break;
   1601       case kPageMapRun: {
   1602         // The start of a run.
   1603         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
   1604         DCHECK_EQ(run->magic_num_, kMagicNum);
   1605         // The dedicated full run doesn't contain any real allocations, don't visit the slots in
   1606         // there.
   1607         run->InspectAllSlots(handler, arg);
   1608         size_t num_pages = numOfPages[run->size_bracket_idx_];
   1609         if (kIsDebugBuild) {
   1610           for (size_t j = i + 1; j < i + num_pages; ++j) {
   1611             DCHECK_EQ(page_map_[j], kPageMapRunPart);
   1612           }
   1613         }
   1614         i += num_pages;
   1615         DCHECK_LE(i, pm_end);
   1616         break;
   1617       }
   1618       case kPageMapRunPart:
   1619         LOG(FATAL) << "Unreachable - page map type: " << pm;
   1620         break;
   1621       default:
   1622         LOG(FATAL) << "Unreachable - page map type: " << pm;
   1623         break;
   1624     }
   1625   }
   1626 }
   1627 
   1628 size_t RosAlloc::Footprint() {
   1629   MutexLock mu(Thread::Current(), lock_);
   1630   return footprint_;
   1631 }
   1632 
   1633 size_t RosAlloc::FootprintLimit() {
   1634   MutexLock mu(Thread::Current(), lock_);
   1635   return capacity_;
   1636 }
   1637 
   1638 void RosAlloc::SetFootprintLimit(size_t new_capacity) {
   1639   MutexLock mu(Thread::Current(), lock_);
   1640   DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
   1641   // Only growing is supported here. But Trim() is supported.
   1642   if (capacity_ < new_capacity) {
   1643     CHECK_LE(new_capacity, max_capacity_);
   1644     capacity_ = new_capacity;
   1645     VLOG(heap) << "new capacity=" << capacity_;
   1646   }
   1647 }
   1648 
   1649 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   1650   Thread* self = Thread::Current();
   1651   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
   1652   ReaderMutexLock wmu(self, bulk_free_lock_);
   1653   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
   1654     MutexLock mu(self, *size_bracket_locks_[idx]);
   1655     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
   1656     CHECK(thread_local_run != nullptr);
   1657     // Invalid means already revoked.
   1658     DCHECK(thread_local_run->IsThreadLocal());
   1659     if (thread_local_run != dedicated_full_run_) {
   1660       thread->SetRosAllocRun(idx, dedicated_full_run_);
   1661       DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
   1662       // Note the thread local run may not be full here.
   1663       bool dont_care;
   1664       thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
   1665       thread_local_run->SetIsThreadLocal(false);
   1666       thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
   1667       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
   1668       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
   1669       RevokeRun(self, idx, thread_local_run);
   1670     }
   1671   }
   1672 }
   1673 
   1674 void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
   1675   size_bracket_locks_[idx]->AssertHeld(self);
   1676   DCHECK(run != dedicated_full_run_);
   1677   if (run->IsFull()) {
   1678     if (kIsDebugBuild) {
   1679       full_runs_[idx].insert(run);
   1680       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
   1681       if (kTraceRosAlloc) {
   1682         LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
   1683                   << reinterpret_cast<intptr_t>(run)
   1684                   << " into full_runs_[" << std::dec << idx << "]";
   1685       }
   1686     }
   1687   } else if (run->IsAllFree()) {
   1688     run->ZeroHeader();
   1689     MutexLock mu(self, lock_);
   1690     FreePages(self, run, true);
   1691   } else {
   1692     non_full_runs_[idx].insert(run);
   1693     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
   1694     if (kTraceRosAlloc) {
   1695       LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
   1696                 << reinterpret_cast<intptr_t>(run)
   1697                 << " into non_full_runs_[" << std::dec << idx << "]";
   1698     }
   1699   }
   1700 }
   1701 
   1702 void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
   1703   // Revoke the current runs which share the same idx as thread local runs.
   1704   Thread* self = Thread::Current();
   1705   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
   1706     MutexLock mu(self, *size_bracket_locks_[idx]);
   1707     if (current_runs_[idx] != dedicated_full_run_) {
   1708       RevokeRun(self, idx, current_runs_[idx]);
   1709       current_runs_[idx] = dedicated_full_run_;
   1710     }
   1711   }
   1712 }
   1713 
   1714 void RosAlloc::RevokeAllThreadLocalRuns() {
   1715   // This is called when a mutator thread won't allocate such as at
   1716   // the Zygote creation time or during the GC pause.
   1717   MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
   1718   MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
   1719   std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
   1720   for (Thread* thread : thread_list) {
   1721     RevokeThreadLocalRuns(thread);
   1722   }
   1723   RevokeThreadUnsafeCurrentRuns();
   1724 }
   1725 
   1726 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
   1727   if (kIsDebugBuild) {
   1728     Thread* self = Thread::Current();
   1729     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
   1730     ReaderMutexLock wmu(self, bulk_free_lock_);
   1731     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
   1732       MutexLock mu(self, *size_bracket_locks_[idx]);
   1733       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
   1734       DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
   1735     }
   1736   }
   1737 }
   1738 
   1739 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
   1740   if (kIsDebugBuild) {
   1741     Thread* self = Thread::Current();
   1742     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   1743     MutexLock mu2(self, *Locks::thread_list_lock_);
   1744     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
   1745     for (Thread* t : thread_list) {
   1746       AssertThreadLocalRunsAreRevoked(t);
   1747     }
   1748     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
   1749       MutexLock mu(self, *size_bracket_locks_[idx]);
   1750       CHECK_EQ(current_runs_[idx], dedicated_full_run_);
   1751     }
   1752   }
   1753 }
   1754 
   1755 void RosAlloc::Initialize() {
   1756   // bracketSizes.
   1757   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
   1758     if (i < kNumOfSizeBrackets - 2) {
   1759       bracketSizes[i] = 16 * (i + 1);
   1760     } else if (i == kNumOfSizeBrackets - 2) {
   1761       bracketSizes[i] = 1 * KB;
   1762     } else {
   1763       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
   1764       bracketSizes[i] = 2 * KB;
   1765     }
   1766     if (kTraceRosAlloc) {
   1767       LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
   1768     }
   1769   }
   1770   // numOfPages.
   1771   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
   1772     if (i < 4) {
   1773       numOfPages[i] = 1;
   1774     } else if (i < 8) {
   1775       numOfPages[i] = 1;
   1776     } else if (i < 16) {
   1777       numOfPages[i] = 4;
   1778     } else if (i < 32) {
   1779       numOfPages[i] = 8;
   1780     } else if (i == 32) {
   1781       DCHECK_EQ(i, kNumOfSizeBrackets - 2);
   1782       numOfPages[i] = 16;
   1783     } else {
   1784       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
   1785       numOfPages[i] = 32;
   1786     }
   1787     if (kTraceRosAlloc) {
   1788       LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
   1789     }
   1790   }
   1791   // Compute numOfSlots and slotOffsets.
   1792   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
   1793     size_t bracket_size = bracketSizes[i];
   1794     size_t run_size = kPageSize * numOfPages[i];
   1795     size_t max_num_of_slots = run_size / bracket_size;
   1796     // Compute the actual number of slots by taking the header and
   1797     // alignment into account.
   1798     size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
   1799     DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
   1800     size_t header_size = 0;
   1801     size_t bulk_free_bit_map_offset = 0;
   1802     size_t thread_local_free_bit_map_offset = 0;
   1803     size_t num_of_slots = 0;
   1804     // Search for the maximum number of slots that allows enough space
   1805     // for the header (including the bit maps.)
   1806     for (int s = max_num_of_slots; s >= 0; s--) {
   1807       size_t tmp_slots_size = bracket_size * s;
   1808       size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
   1809       size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
   1810       size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
   1811       size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
   1812       size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
   1813       size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
   1814       // Align up the unaligned header size. bracket_size may not be a power of two.
   1815       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
   1816           tmp_unaligned_header_size :
   1817           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
   1818       DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
   1819       DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
   1820       if (tmp_slots_size + tmp_header_size <= run_size) {
   1821         // Found the right number of slots, that is, there was enough
   1822         // space for the header (including the bit maps.)
   1823         num_of_slots = s;
   1824         header_size = tmp_header_size;
   1825         bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
   1826         thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
   1827         break;
   1828       }
   1829     }
   1830     DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
   1831     // Add the padding for the alignment remainder.
   1832     header_size += run_size % bracket_size;
   1833     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
   1834     numOfSlots[i] = num_of_slots;
   1835     headerSizes[i] = header_size;
   1836     bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
   1837     threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
   1838     if (kTraceRosAlloc) {
   1839       LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
   1840                 << ", headerSizes[" << i << "]=" << headerSizes[i]
   1841                 << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
   1842                 << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
   1843     }
   1844   }
   1845   // Fill the alloc bitmap so nobody can successfully allocate from it.
   1846   if (kIsDebugBuild) {
   1847     dedicated_full_run_->magic_num_ = kMagicNum;
   1848   }
   1849   // It doesn't matter which size bracket we use since the main goal is to have the allocation
   1850   // fail 100% of the time you attempt to allocate into the dedicated full run.
   1851   dedicated_full_run_->size_bracket_idx_ = 0;
   1852   dedicated_full_run_->FillAllocBitMap();
   1853   dedicated_full_run_->SetIsThreadLocal(true);
   1854 }
   1855 
   1856 void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
   1857   if (used_bytes == 0) {
   1858     return;
   1859   }
   1860   size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
   1861   *bytes_allocated += used_bytes;
   1862 }
   1863 
   1864 void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
   1865   if (used_bytes == 0) {
   1866     return;
   1867   }
   1868   size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
   1869   ++(*objects_allocated);
   1870 }
   1871 
   1872 void RosAlloc::Verify() {
   1873   Thread* self = Thread::Current();
   1874   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
   1875       << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
   1876   MutexLock mu(self, *Locks::thread_list_lock_);
   1877   ReaderMutexLock wmu(self, bulk_free_lock_);
   1878   std::vector<Run*> runs;
   1879   {
   1880     MutexLock mu(self, lock_);
   1881     size_t pm_end = page_map_size_;
   1882     size_t i = 0;
   1883     while (i < pm_end) {
   1884       byte pm = page_map_[i];
   1885       switch (pm) {
   1886         case kPageMapReleased:
   1887           // Fall-through.
   1888         case kPageMapEmpty: {
   1889           // The start of a free page run.
   1890           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
   1891           DCHECK_EQ(fpr->magic_num_, kMagicNumFree);
   1892           CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
   1893               << "An empty page must belong to the free page run set";
   1894           size_t fpr_size = fpr->ByteSize(this);
   1895           CHECK(IsAligned<kPageSize>(fpr_size))
   1896               << "A free page run size isn't page-aligned : " << fpr_size;
   1897           size_t num_pages = fpr_size / kPageSize;
   1898           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
   1899               << "A free page run size must be > 0 : " << fpr_size;
   1900           for (size_t j = i + 1; j < i + num_pages; ++j) {
   1901             CHECK(IsFreePage(j))
   1902                 << "A mismatch between the page map table for kPageMapEmpty "
   1903                 << " at page index " << j
   1904                 << " and the free page run size : page index range : "
   1905                 << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
   1906           }
   1907           i += num_pages;
   1908           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
   1909                               << std::endl << DumpPageMap();
   1910           break;
   1911         }
   1912         case kPageMapLargeObject: {
   1913           // The start of a large object.
   1914           size_t num_pages = 1;
   1915           size_t idx = i + 1;
   1916           while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
   1917             num_pages++;
   1918             idx++;
   1919           }
   1920           void* start = base_ + i * kPageSize;
   1921           mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
   1922           size_t obj_size = obj->SizeOf();
   1923           CHECK_GT(obj_size, kLargeSizeThreshold)
   1924               << "A rosalloc large object size must be > " << kLargeSizeThreshold;
   1925           CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
   1926               << "A rosalloc large object size " << obj_size
   1927               << " does not match the page map table " << (num_pages * kPageSize)
   1928               << std::endl << DumpPageMap();
   1929           i += num_pages;
   1930           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
   1931                               << std::endl << DumpPageMap();
   1932           break;
   1933         }
   1934         case kPageMapLargeObjectPart:
   1935           LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
   1936           break;
   1937         case kPageMapRun: {
   1938           // The start of a run.
   1939           Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
   1940           DCHECK_EQ(run->magic_num_, kMagicNum);
   1941           size_t idx = run->size_bracket_idx_;
   1942           CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
   1943           size_t num_pages = numOfPages[idx];
   1944           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
   1945               << "Run size must be > 0 : " << num_pages;
   1946           for (size_t j = i + 1; j < i + num_pages; ++j) {
   1947             CHECK_EQ(page_map_[j], kPageMapRunPart)
   1948                 << "A mismatch between the page map table for kPageMapRunPart "
   1949                 << " at page index " << j
   1950                 << " and the run size : page index range " << i << " to " << (i + num_pages)
   1951                 << std::endl << DumpPageMap();
   1952           }
   1953           // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
   1954           runs.push_back(run);
   1955           i += num_pages;
   1956           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
   1957                               << std::endl << DumpPageMap();
   1958           break;
   1959         }
   1960         case kPageMapRunPart:
   1961           // Fall-through.
   1962         default:
   1963           LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
   1964           break;
   1965       }
   1966     }
   1967   }
   1968   std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
   1969   for (Thread* thread : threads) {
   1970     for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
   1971       MutexLock mu(self, *size_bracket_locks_[i]);
   1972       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
   1973       CHECK(thread_local_run != nullptr);
   1974       CHECK(thread_local_run->IsThreadLocal());
   1975       CHECK(thread_local_run == dedicated_full_run_ ||
   1976             thread_local_run->size_bracket_idx_ == i);
   1977     }
   1978   }
   1979   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
   1980     MutexLock mu(self, *size_bracket_locks_[i]);
   1981     Run* current_run = current_runs_[i];
   1982     CHECK(current_run != nullptr);
   1983     if (current_run != dedicated_full_run_) {
   1984       // The dedicated full run is currently marked as thread local.
   1985       CHECK(!current_run->IsThreadLocal());
   1986       CHECK_EQ(current_run->size_bracket_idx_, i);
   1987     }
   1988   }
   1989   // Call Verify() here for the lock order.
   1990   for (auto& run : runs) {
   1991     run->Verify(self, this);
   1992   }
   1993 }
   1994 
   1995 void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
   1996   DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
   1997   const size_t idx = size_bracket_idx_;
   1998   CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
   1999   byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
   2000   const size_t num_slots = numOfSlots[idx];
   2001   const size_t num_vec = RoundUp(num_slots, 32) / 32;
   2002   CHECK_GT(num_vec, 0U);
   2003   size_t bracket_size = IndexToBracketSize(idx);
   2004   CHECK_EQ(slot_base + num_slots * bracket_size,
   2005            reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
   2006       << "Mismatch in the end address of the run " << Dump();
   2007   // Check that the bulk free bitmap is clean. It's only used during BulkFree().
   2008   CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
   2009   uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
   2010   // Make sure all the bits at the end of the run are set so that we don't allocate there.
   2011   CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
   2012   // Ensure that the first bitmap index is valid.
   2013   CHECK_LT(first_search_vec_idx_, num_vec);
   2014   // Check the thread local runs, the current runs, and the run sets.
   2015   if (IsThreadLocal()) {
   2016     // If it's a thread local run, then it must be pointed to by an owner thread.
   2017     bool owner_found = false;
   2018     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
   2019     for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
   2020       Thread* thread = *it;
   2021       for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
   2022         MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
   2023         Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
   2024         if (thread_local_run == this) {
   2025           CHECK(!owner_found)
   2026               << "A thread local run has more than one owner thread " << Dump();
   2027           CHECK_EQ(i, idx)
   2028               << "A mismatching size bracket index in a thread local run " << Dump();
   2029           owner_found = true;
   2030         }
   2031       }
   2032     }
   2033     CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
   2034   } else {
   2035     // If it's not thread local, check that the thread local free bitmap is clean.
   2036     CHECK(IsThreadLocalFreeBitmapClean())
   2037         << "A non-thread-local run's thread local free bitmap isn't clean "
   2038         << Dump();
   2039     // Check if it's a current run for the size bucket.
   2040     bool is_current_run = false;
   2041     for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
   2042       MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
   2043       Run* current_run = rosalloc->current_runs_[i];
   2044       if (idx == i) {
   2045         if (this == current_run) {
   2046           is_current_run = true;
   2047         }
   2048       } else {
   2049         // If the size bucket index does not match, then it must not
   2050         // be a current run.
   2051         CHECK_NE(this, current_run)
   2052             << "A current run points to a run with a wrong size bracket index " << Dump();
   2053       }
   2054     }
   2055     // If it's neither a thread local or current run, then it must be
   2056     // in a run set.
   2057     if (!is_current_run) {
   2058       MutexLock mu(self, rosalloc->lock_);
   2059       std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
   2060       // If it's all free, it must be a free page run rather than a run.
   2061       CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
   2062       if (!IsFull()) {
   2063         // If it's not full, it must in the non-full run set.
   2064         CHECK(non_full_runs.find(this) != non_full_runs.end())
   2065             << "A non-full run isn't in the non-full run set " << Dump();
   2066       } else {
   2067         // If it's full, it must in the full run set (debug build only.)
   2068         if (kIsDebugBuild) {
   2069           std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
   2070           CHECK(full_runs.find(this) != full_runs.end())
   2071               << " A full run isn't in the full run set " << Dump();
   2072         }
   2073       }
   2074     }
   2075   }
   2076   // Check each slot.
   2077   size_t slots = 0;
   2078   for (size_t v = 0; v < num_vec; v++, slots += 32) {
   2079     DCHECK_GE(num_slots, slots) << "Out of bounds";
   2080     uint32_t vec = alloc_bit_map_[v];
   2081     uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
   2082     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
   2083     for (size_t i = 0; i < end; ++i) {
   2084       bool is_allocated = ((vec >> i) & 0x1) != 0;
   2085       // If a thread local run, slots may be marked freed in the
   2086       // thread local free bitmap.
   2087       bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
   2088       if (is_allocated && !is_thread_local_freed) {
   2089         byte* slot_addr = slot_base + (slots + i) * bracket_size;
   2090         mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
   2091         size_t obj_size = obj->SizeOf();
   2092         CHECK_LE(obj_size, kLargeSizeThreshold)
   2093             << "A run slot contains a large object " << Dump();
   2094         CHECK_EQ(SizeToIndex(obj_size), idx)
   2095             << PrettyTypeOf(obj) << " "
   2096             << "obj_size=" << obj_size << ", idx=" << idx << " "
   2097             << "A run slot contains an object with wrong size " << Dump();
   2098       }
   2099     }
   2100   }
   2101 }
   2102 
   2103 size_t RosAlloc::ReleasePages() {
   2104   VLOG(heap) << "RosAlloc::ReleasePages()";
   2105   DCHECK(!DoesReleaseAllPages());
   2106   Thread* self = Thread::Current();
   2107   size_t reclaimed_bytes = 0;
   2108   size_t i = 0;
   2109   // Check the page map size which might have changed due to grow/shrink.
   2110   while (i < page_map_size_) {
   2111     // Reading the page map without a lock is racy but the race is benign since it should only
   2112     // result in occasionally not releasing pages which we could release.
   2113     byte pm = page_map_[i];
   2114     switch (pm) {
   2115       case kPageMapReleased:
   2116         // Fall through.
   2117       case kPageMapEmpty: {
   2118         // This is currently the start of a free page run.
   2119         // Acquire the lock to prevent other threads racing in and modifying the page map.
   2120         MutexLock mu(self, lock_);
   2121         // Check that it's still empty after we acquired the lock since another thread could have
   2122         // raced in and placed an allocation here.
   2123         if (IsFreePage(i)) {
   2124           // Free page runs can start with a released page if we coalesced a released page free
   2125           // page run with an empty page run.
   2126           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
   2127           // There is a race condition where FreePage can coalesce fpr with the previous
   2128           // free page run before we acquire lock_. In that case free_page_runs_.find will not find
   2129           // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
   2130           // to the next page.
   2131           if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
   2132             size_t fpr_size = fpr->ByteSize(this);
   2133             DCHECK(IsAligned<kPageSize>(fpr_size));
   2134             byte* start = reinterpret_cast<byte*>(fpr);
   2135             reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
   2136             size_t pages = fpr_size / kPageSize;
   2137             CHECK_GT(pages, 0U) << "Infinite loop probable";
   2138             i += pages;
   2139             DCHECK_LE(i, page_map_size_);
   2140             break;
   2141           }
   2142         }
   2143         // Fall through.
   2144       }
   2145       case kPageMapLargeObject:      // Fall through.
   2146       case kPageMapLargeObjectPart:  // Fall through.
   2147       case kPageMapRun:              // Fall through.
   2148       case kPageMapRunPart:          // Fall through.
   2149         ++i;
   2150         break;  // Skip.
   2151       default:
   2152         LOG(FATAL) << "Unreachable - page map type: " << pm;
   2153         break;
   2154     }
   2155   }
   2156   return reclaimed_bytes;
   2157 }
   2158 
   2159 size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
   2160   DCHECK_ALIGNED(start, kPageSize);
   2161   DCHECK_ALIGNED(end, kPageSize);
   2162   DCHECK_LT(start, end);
   2163   if (kIsDebugBuild) {
   2164     // In the debug build, the first page of a free page run
   2165     // contains a magic number for debugging. Exclude it.
   2166     start += kPageSize;
   2167   }
   2168   if (!kMadviseZeroes) {
   2169     // TODO: Do this when we resurrect the page instead.
   2170     memset(start, 0, end - start);
   2171   }
   2172   CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
   2173   size_t pm_idx = ToPageMapIndex(start);
   2174   size_t reclaimed_bytes = 0;
   2175   // Calculate reclaimed bytes and upate page map.
   2176   const size_t max_idx = pm_idx + (end - start) / kPageSize;
   2177   for (; pm_idx < max_idx; ++pm_idx) {
   2178     DCHECK(IsFreePage(pm_idx));
   2179     if (page_map_[pm_idx] == kPageMapEmpty) {
   2180       // Mark the page as released and update how many bytes we released.
   2181       reclaimed_bytes += kPageSize;
   2182       page_map_[pm_idx] = kPageMapReleased;
   2183     }
   2184   }
   2185   return reclaimed_bytes;
   2186 }
   2187 
   2188 void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
   2189   Thread* self = Thread::Current();
   2190   size_t largest_continuous_free_pages = 0;
   2191   WriterMutexLock wmu(self, bulk_free_lock_);
   2192   MutexLock mu(self, lock_);
   2193   for (FreePageRun* fpr : free_page_runs_) {
   2194     largest_continuous_free_pages = std::max(largest_continuous_free_pages,
   2195                                              fpr->ByteSize(this));
   2196   }
   2197   if (failed_alloc_bytes > kLargeSizeThreshold) {
   2198     // Large allocation.
   2199     size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
   2200     if (required_bytes > largest_continuous_free_pages) {
   2201       os << "; failed due to fragmentation (required continguous free "
   2202          << required_bytes << " bytes where largest contiguous free "
   2203          <<  largest_continuous_free_pages << " bytes)";
   2204     }
   2205   } else {
   2206     // Non-large allocation.
   2207     size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
   2208     if (required_bytes > largest_continuous_free_pages) {
   2209       os << "; failed due to fragmentation (required continguous free "
   2210          << required_bytes << " bytes for a new buffer where largest contiguous free "
   2211          <<  largest_continuous_free_pages << " bytes)";
   2212     }
   2213   }
   2214 }
   2215 
   2216 }  // namespace allocator
   2217 }  // namespace gc
   2218 }  // namespace art
   2219