Home | History | Annotate | Download | only in allocator
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
     18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
     19 
     20 #include <stdint.h>
     21 #include <stdlib.h>
     22 #include <sys/mman.h>
     23 #include <memory>
     24 #include <set>
     25 #include <string>
     26 #include <unordered_set>
     27 #include <vector>
     28 
     29 #include <android-base/logging.h>
     30 
     31 #include "base/allocator.h"
     32 #include "base/bit_utils.h"
     33 #include "base/mutex.h"
     34 #include "globals.h"
     35 #include "thread.h"
     36 
     37 namespace art {
     38 
     39 class MemMap;
     40 
     41 namespace gc {
     42 namespace allocator {
     43 
     44 // A runs-of-slots memory allocator.
     45 class RosAlloc {
     46  private:
     47   // Represents a run of free pages.
     48   class FreePageRun {
     49    public:
     50     uint8_t magic_num_;  // The magic number used for debugging only.
     51 
     52     bool IsFree() const {
     53       return !kIsDebugBuild || magic_num_ == kMagicNumFree;
     54     }
     55     size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
     56       const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
     57       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
     58       size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
     59       DCHECK_GE(byte_size, static_cast<size_t>(0));
     60       DCHECK_ALIGNED(byte_size, kPageSize);
     61       return byte_size;
     62     }
     63     void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
     64         REQUIRES(rosalloc->lock_) {
     65       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
     66       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
     67       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
     68       rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
     69     }
     70     void* Begin() {
     71       return reinterpret_cast<void*>(this);
     72     }
     73     void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
     74       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
     75       uint8_t* end = fpr_base + ByteSize(rosalloc);
     76       return end;
     77     }
     78     bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
     79         REQUIRES(rosalloc->lock_) {
     80       return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
     81     }
     82     bool IsAtEndOfSpace(RosAlloc* rosalloc)
     83         REQUIRES(rosalloc->lock_) {
     84       return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
     85     }
     86     bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
     87       switch (rosalloc->page_release_mode_) {
     88         case kPageReleaseModeNone:
     89           return false;
     90         case kPageReleaseModeEnd:
     91           return IsAtEndOfSpace(rosalloc);
     92         case kPageReleaseModeSize:
     93           return IsLargerThanPageReleaseThreshold(rosalloc);
     94         case kPageReleaseModeSizeAndEnd:
     95           return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
     96         case kPageReleaseModeAll:
     97           return true;
     98         default:
     99           LOG(FATAL) << "Unexpected page release mode ";
    100           return false;
    101       }
    102     }
    103     void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
    104       uint8_t* start = reinterpret_cast<uint8_t*>(this);
    105       size_t byte_size = ByteSize(rosalloc);
    106       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
    107       if (ShouldReleasePages(rosalloc)) {
    108         rosalloc->ReleasePageRange(start, start + byte_size);
    109       }
    110     }
    111 
    112    private:
    113     DISALLOW_COPY_AND_ASSIGN(FreePageRun);
    114   };
    115 
    116   // The slot header.
    117   class Slot {
    118    public:
    119     Slot* Next() const {
    120       return next_;
    121     }
    122     void SetNext(Slot* next) {
    123       next_ = next;
    124     }
    125     // The slot right before this slot in terms of the address.
    126     Slot* Left(size_t bracket_size) {
    127       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
    128     }
    129     void Clear() {
    130       next_ = nullptr;
    131     }
    132 
    133    private:
    134     Slot* next_;  // Next slot in the list.
    135     friend class RosAlloc;
    136   };
    137 
    138   // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
    139   // traverse the list from the head to the tail when merging free lists.
    140   // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
    141   // tail in the allocation fast path for a performance reason.
    142   template<bool kUseTail = true>
    143   class SlotFreeList {
    144    public:
    145     SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
    146     Slot* Head() const {
    147       return reinterpret_cast<Slot*>(head_);
    148     }
    149     Slot* Tail() const {
    150       CHECK(kUseTail);
    151       return reinterpret_cast<Slot*>(tail_);
    152     }
    153     size_t Size() const {
    154       return size_;
    155     }
    156     // Removes from the head of the free list.
    157     Slot* Remove() {
    158       Slot* slot;
    159       if (kIsDebugBuild) {
    160         Verify();
    161       }
    162       Slot** headp = reinterpret_cast<Slot**>(&head_);
    163       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
    164       Slot* old_head = *headp;
    165       if (old_head == nullptr) {
    166         // List was empty.
    167         if (kUseTail) {
    168           DCHECK(*tailp == nullptr);
    169         }
    170         return nullptr;
    171       } else {
    172         // List wasn't empty.
    173         if (kUseTail) {
    174           DCHECK(*tailp != nullptr);
    175         }
    176         Slot* old_head_next = old_head->Next();
    177         slot = old_head;
    178         *headp = old_head_next;
    179         if (kUseTail && old_head_next == nullptr) {
    180           // List becomes empty.
    181           *tailp = nullptr;
    182         }
    183       }
    184       slot->Clear();
    185       --size_;
    186       if (kIsDebugBuild) {
    187         Verify();
    188       }
    189       return slot;
    190     }
    191     void Add(Slot* slot) {
    192       if (kIsDebugBuild) {
    193         Verify();
    194       }
    195       DCHECK(slot != nullptr);
    196       DCHECK(slot->Next() == nullptr);
    197       Slot** headp = reinterpret_cast<Slot**>(&head_);
    198       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
    199       Slot* old_head = *headp;
    200       if (old_head == nullptr) {
    201         // List was empty.
    202         if (kUseTail) {
    203           DCHECK(*tailp == nullptr);
    204         }
    205         *headp = slot;
    206         if (kUseTail) {
    207           *tailp = slot;
    208         }
    209       } else {
    210         // List wasn't empty.
    211         if (kUseTail) {
    212           DCHECK(*tailp != nullptr);
    213         }
    214         *headp = slot;
    215         slot->SetNext(old_head);
    216       }
    217       ++size_;
    218       if (kIsDebugBuild) {
    219         Verify();
    220       }
    221     }
    222     // Merge the given list into this list. Empty the given list.
    223     // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
    224     // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
    225     // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
    226     // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
    227     void Merge(SlotFreeList<true>* list) {
    228       if (kIsDebugBuild) {
    229         Verify();
    230         CHECK(list != nullptr);
    231         list->Verify();
    232       }
    233       if (list->Size() == 0) {
    234         return;
    235       }
    236       Slot** headp = reinterpret_cast<Slot**>(&head_);
    237       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
    238       Slot* old_head = *headp;
    239       if (old_head == nullptr) {
    240         // List was empty.
    241         *headp = list->Head();
    242         if (kUseTail) {
    243           *tailp = list->Tail();
    244         }
    245         size_ = list->Size();
    246       } else {
    247         // List wasn't empty.
    248         DCHECK(list->Head() != nullptr);
    249         *headp = list->Head();
    250         DCHECK(list->Tail() != nullptr);
    251         list->Tail()->SetNext(old_head);
    252         // if kUseTail, no change to tailp.
    253         size_ += list->Size();
    254       }
    255       list->Reset();
    256       if (kIsDebugBuild) {
    257         Verify();
    258       }
    259     }
    260 
    261     void Reset() {
    262       head_ = 0;
    263       if (kUseTail) {
    264         tail_ = 0;
    265       }
    266       size_ = 0;
    267     }
    268 
    269     void Verify() {
    270       Slot* head = reinterpret_cast<Slot*>(head_);
    271       Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
    272       if (size_ == 0) {
    273         CHECK(head == nullptr);
    274         if (kUseTail) {
    275           CHECK(tail == nullptr);
    276         }
    277       } else {
    278         CHECK(head != nullptr);
    279         if (kUseTail) {
    280           CHECK(tail != nullptr);
    281         }
    282         size_t count = 0;
    283         for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
    284           ++count;
    285           if (kUseTail && slot->Next() == nullptr) {
    286             CHECK_EQ(slot, tail);
    287           }
    288         }
    289         CHECK_EQ(size_, count);
    290       }
    291     }
    292 
    293    private:
    294     // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
    295     // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
    296     // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
    297     // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
    298     // (won't open up enough space to cause an extra slot to be available).
    299     uint64_t head_;
    300     // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
    301     // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
    302     // Unused if kUseTail is false.
    303     uint64_t tail_;
    304     // The number of slots in the list. This is used to make it fast to check if a free list is all
    305     // free without traversing the whole free list.
    306     uint32_t size_;
    307     uint32_t padding_ ATTRIBUTE_UNUSED;
    308     friend class RosAlloc;
    309   };
    310 
    311   // Represents a run of memory slots of the same size.
    312   //
    313   // A run's memory layout:
    314   //
    315   // +-------------------+
    316   // | magic_num         |
    317   // +-------------------+
    318   // | size_bracket_idx  |
    319   // +-------------------+
    320   // | is_thread_local   |
    321   // +-------------------+
    322   // | to_be_bulk_freed  |
    323   // +-------------------+
    324   // |                   |
    325   // | free list         |
    326   // |                   |
    327   // +-------------------+
    328   // |                   |
    329   // | bulk free list    |
    330   // |                   |
    331   // +-------------------+
    332   // |                   |
    333   // | thread-local free |
    334   // | list              |
    335   // |                   |
    336   // +-------------------+
    337   // | padding due to    |
    338   // | alignment         |
    339   // +-------------------+
    340   // | slot 0            |
    341   // +-------------------+
    342   // | slot 1            |
    343   // +-------------------+
    344   // | slot 2            |
    345   // +-------------------+
    346   // ...
    347   // +-------------------+
    348   // | last slot         |
    349   // +-------------------+
    350   //
    351   class Run {
    352    public:
    353     uint8_t magic_num_;                 // The magic number used for debugging.
    354     uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
    355     uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
    356     uint8_t to_be_bulk_freed_;          // Used within BulkFree() to flag a run that's involved with a bulk free.
    357     uint32_t padding_ ATTRIBUTE_UNUSED;
    358     // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
    359     SlotFreeList<false> free_list_;
    360     SlotFreeList<true> bulk_free_list_;
    361     SlotFreeList<true> thread_local_free_list_;
    362     // Padding due to alignment
    363     // Slot 0
    364     // Slot 1
    365     // ...
    366 
    367     // Returns the byte size of the header.
    368     static size_t fixed_header_size() {
    369       return sizeof(Run);
    370     }
    371     Slot* FirstSlot() const {
    372       const uint8_t idx = size_bracket_idx_;
    373       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
    374     }
    375     Slot* LastSlot() {
    376       const uint8_t idx = size_bracket_idx_;
    377       const size_t bracket_size = bracketSizes[idx];
    378       uintptr_t end = reinterpret_cast<uintptr_t>(End());
    379       Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
    380       DCHECK_LE(FirstSlot(), last_slot);
    381       return last_slot;
    382     }
    383     SlotFreeList<false>* FreeList() {
    384       return &free_list_;
    385     }
    386     SlotFreeList<true>* BulkFreeList() {
    387       return &bulk_free_list_;
    388     }
    389     SlotFreeList<true>* ThreadLocalFreeList() {
    390       return &thread_local_free_list_;
    391     }
    392     void* End() {
    393       return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
    394     }
    395     void SetIsThreadLocal(bool is_thread_local) {
    396       is_thread_local_  = is_thread_local ? 1 : 0;
    397     }
    398     bool IsThreadLocal() const {
    399       return is_thread_local_ != 0;
    400     }
    401     // Set up the free list for a new/empty run.
    402     void InitFreeList() {
    403       const uint8_t idx = size_bracket_idx_;
    404       const size_t bracket_size = bracketSizes[idx];
    405       Slot* first_slot = FirstSlot();
    406       // Add backwards so the first slot is at the head of the list.
    407       for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
    408         free_list_.Add(slot);
    409       }
    410     }
    411     // Merge the thread local free list to the free list.  Used when a thread-local run becomes
    412     // full.
    413     bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
    414     // Merge the bulk free list to the free list. Used in a bulk free.
    415     void MergeBulkFreeListToFreeList();
    416     // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
    417     // process, GC will first record all the slots to free in a run in the bulk free list where it
    418     // can write without a lock, and later acquire a lock once per run to merge the bulk free list
    419     // to the thread-local free list.
    420     void MergeBulkFreeListToThreadLocalFreeList();
    421     // Allocates a slot in a run.
    422     ALWAYS_INLINE void* AllocSlot();
    423     // Frees a slot in a run. This is used in a non-bulk free.
    424     void FreeSlot(void* ptr);
    425     // Add the given slot to the bulk free list. Returns the bracket size.
    426     size_t AddToBulkFreeList(void* ptr);
    427     // Add the given slot to the thread-local free list.
    428     void AddToThreadLocalFreeList(void* ptr);
    429     // Returns true if all the slots in the run are not in use.
    430     bool IsAllFree() const {
    431       return free_list_.Size() == numOfSlots[size_bracket_idx_];
    432     }
    433     // Returns the number of free slots.
    434     size_t NumberOfFreeSlots() {
    435       return free_list_.Size();
    436     }
    437     // Returns true if all the slots in the run are in use.
    438     ALWAYS_INLINE bool IsFull();
    439     // Returns true if the bulk free list is empty.
    440     bool IsBulkFreeListEmpty() const {
    441       return bulk_free_list_.Size() == 0;
    442     }
    443     // Returns true if the thread local free list is empty.
    444     bool IsThreadLocalFreeListEmpty() const {
    445       return thread_local_free_list_.Size() == 0;
    446     }
    447     // Zero the run's data.
    448     void ZeroData();
    449     // Zero the run's header and the slot headers.
    450     void ZeroHeaderAndSlotHeaders();
    451     // Iterate over all the slots and apply the given function.
    452     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
    453     // Dump the run metadata for debugging.
    454     std::string Dump();
    455     // Verify for debugging.
    456     void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
    457         REQUIRES(Locks::mutator_lock_)
    458         REQUIRES(Locks::thread_list_lock_);
    459 
    460    private:
    461     // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
    462     // size.
    463     size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
    464     // Turns a FreeList into a string for debugging.
    465     template<bool kUseTail>
    466     std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
    467     // Check a given pointer is a valid slot address and return it as Slot*.
    468     Slot* ToSlot(void* ptr) {
    469       const uint8_t idx = size_bracket_idx_;
    470       const size_t bracket_size = bracketSizes[idx];
    471       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
    472           - reinterpret_cast<uint8_t*>(FirstSlot());
    473       DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
    474       size_t slot_idx = offset_from_slot_base / bracket_size;
    475       DCHECK_LT(slot_idx, numOfSlots[idx]);
    476       return reinterpret_cast<Slot*>(ptr);
    477     }
    478     size_t SlotIndex(Slot* slot) const {
    479       const uint8_t idx = size_bracket_idx_;
    480       const size_t bracket_size = bracketSizes[idx];
    481       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
    482           - reinterpret_cast<uint8_t*>(FirstSlot());
    483       DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
    484       size_t slot_idx = offset_from_slot_base / bracket_size;
    485       DCHECK_LT(slot_idx, numOfSlots[idx]);
    486       return slot_idx;
    487     }
    488 
    489     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
    490   };
    491 
    492   // The magic number for a run.
    493   static constexpr uint8_t kMagicNum = 42;
    494   // The magic number for free pages.
    495   static constexpr uint8_t kMagicNumFree = 43;
    496   // The number of size brackets.
    497   static constexpr size_t kNumOfSizeBrackets = 42;
    498   // The sizes (the slot sizes, in bytes) of the size brackets.
    499   static size_t bracketSizes[kNumOfSizeBrackets];
    500   // The numbers of pages that are used for runs for each size bracket.
    501   static size_t numOfPages[kNumOfSizeBrackets];
    502   // The numbers of slots of the runs for each size bracket.
    503   static size_t numOfSlots[kNumOfSizeBrackets];
    504   // The header sizes in bytes of the runs for each size bracket.
    505   static size_t headerSizes[kNumOfSizeBrackets];
    506 
    507   // Initialize the run specs (the above arrays).
    508   static void Initialize();
    509   static bool initialized_;
    510 
    511   // Returns the byte size of the bracket size from the index.
    512   static size_t IndexToBracketSize(size_t idx) {
    513     DCHECK_LT(idx, kNumOfSizeBrackets);
    514     return bracketSizes[idx];
    515   }
    516   // Returns the index of the size bracket from the bracket size.
    517   static size_t BracketSizeToIndex(size_t size) {
    518     DCHECK(8 <= size &&
    519            ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
    520             (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
    521             size == 1 * KB || size == 2 * KB));
    522     size_t idx;
    523     if (UNLIKELY(size == 1 * KB)) {
    524       idx = kNumOfSizeBrackets - 2;
    525     } else if (UNLIKELY(size == 2 * KB)) {
    526       idx = kNumOfSizeBrackets - 1;
    527     } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
    528       DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
    529       idx = size / kThreadLocalBracketQuantumSize - 1;
    530     } else {
    531       DCHECK(size <= kMaxRegularBracketSize);
    532       DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
    533       idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
    534           + kNumThreadLocalSizeBrackets;
    535     }
    536     DCHECK(bracketSizes[idx] == size);
    537     return idx;
    538   }
    539   // Returns true if the given allocation size is for a thread local allocation.
    540   static bool IsSizeForThreadLocal(size_t size) {
    541     bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
    542     DCHECK(size > kLargeSizeThreshold ||
    543            (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
    544     return is_size_for_thread_local;
    545   }
    546   // Rounds up the size up the nearest bracket size.
    547   static size_t RoundToBracketSize(size_t size) {
    548     DCHECK(size <= kLargeSizeThreshold);
    549     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
    550       return RoundUp(size, kThreadLocalBracketQuantumSize);
    551     } else if (size <= kMaxRegularBracketSize) {
    552       return RoundUp(size, kBracketQuantumSize);
    553     } else if (UNLIKELY(size <= 1 * KB)) {
    554       return 1 * KB;
    555     } else {
    556       DCHECK_LE(size, 2 * KB);
    557       return 2 * KB;
    558     }
    559   }
    560   // Returns the size bracket index from the byte size with rounding.
    561   static size_t SizeToIndex(size_t size) {
    562     DCHECK(size <= kLargeSizeThreshold);
    563     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
    564       return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
    565     } else if (size <= kMaxRegularBracketSize) {
    566       return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
    567           - 1 + kNumThreadLocalSizeBrackets;
    568     } else if (size <= 1 * KB) {
    569       return kNumOfSizeBrackets - 2;
    570     } else {
    571       DCHECK_LE(size, 2 * KB);
    572       return kNumOfSizeBrackets - 1;
    573     }
    574   }
    575   // A combination of SizeToIndex() and RoundToBracketSize().
    576   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
    577     DCHECK(size <= kLargeSizeThreshold);
    578     size_t idx;
    579     size_t bracket_size;
    580     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
    581       bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
    582       idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
    583     } else if (size <= kMaxRegularBracketSize) {
    584       bracket_size = RoundUp(size, kBracketQuantumSize);
    585       idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
    586           + kNumThreadLocalSizeBrackets;
    587     } else if (size <= 1 * KB) {
    588       bracket_size = 1 * KB;
    589       idx = kNumOfSizeBrackets - 2;
    590     } else {
    591       DCHECK(size <= 2 * KB);
    592       bracket_size = 2 * KB;
    593       idx = kNumOfSizeBrackets - 1;
    594     }
    595     DCHECK_EQ(idx, SizeToIndex(size)) << idx;
    596     DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
    597     DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
    598     DCHECK_LE(size, bracket_size) << idx;
    599     DCHECK(size > kMaxRegularBracketSize ||
    600            (size <= kMaxThreadLocalBracketSize &&
    601             bracket_size - size < kThreadLocalBracketQuantumSize) ||
    602            (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
    603     *bracket_size_out = bracket_size;
    604     return idx;
    605   }
    606 
    607   // Returns the page map index from an address. Requires that the
    608   // address is page size aligned.
    609   size_t ToPageMapIndex(const void* addr) const {
    610     DCHECK_LE(base_, addr);
    611     DCHECK_LT(addr, base_ + capacity_);
    612     size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
    613     DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
    614     return byte_offset / kPageSize;
    615   }
    616   // Returns the page map index from an address with rounding.
    617   size_t RoundDownToPageMapIndex(const void* addr) const {
    618     DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
    619     return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
    620   }
    621 
    622   // A memory allocation request larger than this size is treated as a large object and allocated
    623   // at a page-granularity.
    624   static const size_t kLargeSizeThreshold = 2048;
    625 
    626   // If true, check that the returned memory is actually zero.
    627   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
    628   // Valgrind protects memory, so do not check memory when running under valgrind. In a normal
    629   // build with kCheckZeroMemory the whole test should be optimized away.
    630   // TODO: Unprotect before checks.
    631   ALWAYS_INLINE bool ShouldCheckZeroMemory();
    632 
    633   // If true, log verbose details of operations.
    634   static constexpr bool kTraceRosAlloc = false;
    635 
    636   struct hash_run {
    637     size_t operator()(const RosAlloc::Run* r) const {
    638       return reinterpret_cast<size_t>(r);
    639     }
    640   };
    641 
    642   struct eq_run {
    643     bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
    644       return r1 == r2;
    645     }
    646   };
    647 
    648  public:
    649   // Different page release modes.
    650   enum PageReleaseMode {
    651     kPageReleaseModeNone,         // Release no empty pages.
    652     kPageReleaseModeEnd,          // Release empty pages at the end of the space.
    653     kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
    654     kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
    655                                   // at the end of the space.
    656     kPageReleaseModeAll,          // Release all empty pages.
    657   };
    658 
    659   // The default value for page_release_size_threshold_.
    660   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
    661 
    662   // We use thread-local runs for the size brackets whose indexes
    663   // are less than this index. We use shared (current) runs for the rest.
    664   // Sync this with the length of Thread::rosalloc_runs_.
    665   static const size_t kNumThreadLocalSizeBrackets = 16;
    666   static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
    667                 "Mismatch between kNumThreadLocalSizeBrackets and "
    668                 "kNumRosAllocThreadLocalSizeBracketsInThread");
    669 
    670   // The size of the largest bracket we use thread-local runs for.
    671   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
    672   static const size_t kMaxThreadLocalBracketSize = 128;
    673 
    674   // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
    675   // this index.
    676   static const size_t kNumRegularSizeBrackets = 40;
    677 
    678   // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
    679   // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
    680   static const size_t kMaxRegularBracketSize = 512;
    681 
    682   // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
    683   static constexpr size_t kThreadLocalBracketQuantumSize = 8;
    684 
    685   // Equal to Log2(kThreadLocalBracketQuantumSize).
    686   static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
    687 
    688   // The bracket size increment for the non-thread-local, regular brackets (of size <=
    689   // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
    690   static constexpr size_t kBracketQuantumSize = 16;
    691 
    692   // Equal to Log2(kBracketQuantumSize).
    693   static constexpr size_t kBracketQuantumSizeShift = 4;
    694 
    695  private:
    696   // The base address of the memory region that's managed by this allocator.
    697   uint8_t* base_;
    698 
    699   // The footprint in bytes of the currently allocated portion of the
    700   // memory region.
    701   size_t footprint_;
    702 
    703   // The maximum footprint. The address, base_ + capacity_, indicates
    704   // the end of the memory region that's currently managed by this allocator.
    705   size_t capacity_;
    706 
    707   // The maximum capacity. The address, base_ + max_capacity_, indicates
    708   // the end of the memory region that's ever managed by this allocator.
    709   size_t max_capacity_;
    710 
    711   template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
    712   using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
    713 
    714   // The run sets that hold the runs whose slots are not all
    715   // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
    716   AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
    717   // The run sets that hold the runs whose slots are all full. This is
    718   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
    719   std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
    720       full_runs_[kNumOfSizeBrackets];
    721   // The set of free pages.
    722   AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
    723   // The dedicated full run, it is always full and shared by all threads when revoking happens.
    724   // This is an optimization since enables us to avoid a null check for revoked runs.
    725   static Run* dedicated_full_run_;
    726   // Using size_t to ensure that it is at least word aligned.
    727   static size_t dedicated_full_run_storage_[];
    728   // The current runs where the allocations are first attempted for
    729   // the size brackes that do not use thread-local
    730   // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
    731   Run* current_runs_[kNumOfSizeBrackets];
    732   // The mutexes, one per size bracket.
    733   Mutex* size_bracket_locks_[kNumOfSizeBrackets];
    734   // Bracket lock names (since locks only have char* names).
    735   std::string size_bracket_lock_names_[kNumOfSizeBrackets];
    736   // The types of page map entries.
    737   enum PageMapKind {
    738     kPageMapReleased = 0,     // Zero and released back to the OS.
    739     kPageMapEmpty,            // Zero but probably dirty.
    740     kPageMapRun,              // The beginning of a run.
    741     kPageMapRunPart,          // The non-beginning part of a run.
    742     kPageMapLargeObject,      // The beginning of a large object.
    743     kPageMapLargeObjectPart,  // The non-beginning part of a large object.
    744   };
    745   // The table that indicates what pages are currently used for.
    746   volatile uint8_t* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
    747   size_t page_map_size_;
    748   size_t max_page_map_size_;
    749   std::unique_ptr<MemMap> page_map_mem_map_;
    750 
    751   // The table that indicates the size of free page runs. These sizes
    752   // are stored here to avoid storing in the free page header and
    753   // release backing pages.
    754   std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
    755       GUARDED_BY(lock_);
    756   // The global lock. Used to guard the page map, the free page set,
    757   // and the footprint.
    758   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
    759   // The reader-writer lock to allow one bulk free at a time while
    760   // allowing multiple individual frees at the same time. Also, this
    761   // is used to avoid race conditions between BulkFree() and
    762   // RevokeThreadLocalRuns() on the bulk free list.
    763   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
    764 
    765   // The page release mode.
    766   const PageReleaseMode page_release_mode_;
    767   // Under kPageReleaseModeSize(AndEnd), if the free page run size is
    768   // greater than or equal to this value, release pages.
    769   const size_t page_release_size_threshold_;
    770 
    771   // Whether this allocator is running under Valgrind.
    772   bool is_running_on_memory_tool_;
    773 
    774   // The base address of the memory region that's managed by this allocator.
    775   uint8_t* Begin() { return base_; }
    776   // The end address of the memory region that's managed by this allocator.
    777   uint8_t* End() { return base_ + capacity_; }
    778 
    779   // Page-granularity alloc/free
    780   void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
    781       REQUIRES(lock_);
    782   // Returns how many bytes were freed.
    783   size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
    784 
    785   // Allocate/free a run slot.
    786   void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
    787                      size_t* bytes_tl_bulk_allocated)
    788       REQUIRES(!lock_);
    789   // Allocate/free a run slot without acquiring locks.
    790   // TODO: REQUIRES(Locks::mutator_lock_)
    791   void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
    792                                  size_t* usable_size, size_t* bytes_tl_bulk_allocated)
    793       REQUIRES(!lock_);
    794   void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
    795 
    796   // Returns the bracket size.
    797   size_t FreeFromRun(Thread* self, void* ptr, Run* run)
    798       REQUIRES(!lock_);
    799 
    800   // Used to allocate a new thread local run for a size bracket.
    801   Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
    802 
    803   // Used to acquire a new/reused run for a size bracket. Used when a
    804   // thread-local or current run gets full.
    805   Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
    806 
    807   // The internal of non-bulk Free().
    808   size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
    809 
    810   // Allocates large objects.
    811   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
    812                          size_t* usable_size, size_t* bytes_tl_bulk_allocated)
    813       REQUIRES(!lock_);
    814 
    815   // Revoke a run by adding it to non_full_runs_ or freeing the pages.
    816   void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
    817 
    818   // Revoke the current runs which share an index with the thread local runs.
    819   void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
    820 
    821   // Release a range of pages.
    822   size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
    823 
    824   // Dumps the page map for debugging.
    825   std::string DumpPageMap() REQUIRES(lock_);
    826 
    827  public:
    828   RosAlloc(void* base, size_t capacity, size_t max_capacity,
    829            PageReleaseMode page_release_mode,
    830            bool running_on_memory_tool,
    831            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
    832   ~RosAlloc();
    833 
    834   static size_t RunFreeListOffset() {
    835     return OFFSETOF_MEMBER(Run, free_list_);
    836   }
    837   static size_t RunFreeListHeadOffset() {
    838     return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
    839   }
    840   static size_t RunFreeListSizeOffset() {
    841     return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
    842   }
    843   static size_t RunSlotNextOffset() {
    844     return OFFSETOF_MEMBER(Slot, next_);
    845   }
    846 
    847   // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
    848   // If used, this may cause race conditions if multiple threads are allocating at the same time.
    849   template<bool kThreadSafe = true>
    850   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
    851               size_t* bytes_tl_bulk_allocated)
    852       REQUIRES(!lock_);
    853   size_t Free(Thread* self, void* ptr)
    854       REQUIRES(!bulk_free_lock_, !lock_);
    855   size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
    856       REQUIRES(!bulk_free_lock_, !lock_);
    857 
    858   // Returns true if the given allocation request can be allocated in
    859   // an existing thread local run without allocating a new run.
    860   ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
    861   // Allocate the given allocation request in an existing thread local
    862   // run without allocating a new run.
    863   ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
    864 
    865   // Returns the maximum bytes that could be allocated for the given
    866   // size in bulk, that is the maximum value for the
    867   // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
    868   ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
    869 
    870   // Returns the size of the allocated slot for a given allocated memory chunk.
    871   size_t UsableSize(const void* ptr) REQUIRES(!lock_);
    872   // Returns the size of the allocated slot for a given size.
    873   size_t UsableSize(size_t bytes) {
    874     if (UNLIKELY(bytes > kLargeSizeThreshold)) {
    875       return RoundUp(bytes, kPageSize);
    876     } else {
    877       return RoundToBracketSize(bytes);
    878     }
    879   }
    880   // Try to reduce the current footprint by releasing the free page
    881   // run at the end of the memory region, if any.
    882   bool Trim() REQUIRES(!lock_);
    883   // Iterates over all the memory slots and apply the given function.
    884   void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
    885                   void* arg)
    886       REQUIRES(!lock_);
    887 
    888   // Release empty pages.
    889   size_t ReleasePages() REQUIRES(!lock_);
    890   // Returns the current footprint.
    891   size_t Footprint() REQUIRES(!lock_);
    892   // Returns the current capacity, maximum footprint.
    893   size_t FootprintLimit() REQUIRES(!lock_);
    894   // Update the current capacity.
    895   void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
    896 
    897   // Releases the thread-local runs assigned to the given thread back to the common set of runs.
    898   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
    899   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
    900   size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
    901   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
    902   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
    903   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
    904   size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
    905   // Assert the thread local runs of a thread are revoked.
    906   void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
    907   // Assert all the thread local runs are revoked.
    908   void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
    909 
    910   static Run* GetDedicatedFullRun() {
    911     return dedicated_full_run_;
    912   }
    913   bool IsFreePage(size_t idx) const {
    914     DCHECK_LT(idx, capacity_ / kPageSize);
    915     uint8_t pm_type = page_map_[idx];
    916     return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
    917   }
    918 
    919   // Callbacks for InspectAll that will count the number of bytes
    920   // allocated and objects allocated, respectively.
    921   static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
    922   static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
    923 
    924   bool DoesReleaseAllPages() const {
    925     return page_release_mode_ == kPageReleaseModeAll;
    926   }
    927 
    928   // Verify for debugging.
    929   void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
    930                          !lock_);
    931 
    932   void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
    933       REQUIRES(!bulk_free_lock_, !lock_);
    934 
    935   void DumpStats(std::ostream& os)
    936       REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
    937 
    938  private:
    939   friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
    940 
    941   DISALLOW_COPY_AND_ASSIGN(RosAlloc);
    942 };
    943 std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
    944 
    945 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
    946 // else (currently rosalloc_space.cc).
    947 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
    948 
    949 }  // namespace allocator
    950 }  // namespace gc
    951 }  // namespace art
    952 
    953 #endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
    954