Home | History | Annotate | Download | only in wtf
      1 /*
      2  * Copyright (C) 2013 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "wtf/PartitionAlloc.h"
     33 
     34 #include <string.h>
     35 
     36 #ifndef NDEBUG
     37 #include <stdio.h>
     38 #endif
     39 
     40 // Two partition pages are used as guard / metadata page so make sure the super
     41 // page size is bigger.
     42 COMPILE_ASSERT(WTF::kPartitionPageSize * 4 <= WTF::kSuperPageSize, ok_super_page_size);
     43 COMPILE_ASSERT(!(WTF::kSuperPageSize % WTF::kPartitionPageSize), ok_super_page_multiple);
     44 // Four system pages gives us room to hack out a still-guard-paged piece
     45 // of metadata in the middle of a guard partition page.
     46 COMPILE_ASSERT(WTF::kSystemPageSize * 4 <= WTF::kPartitionPageSize, ok_partition_page_size);
     47 COMPILE_ASSERT(!(WTF::kPartitionPageSize % WTF::kSystemPageSize), ok_partition_page_multiple);
     48 COMPILE_ASSERT(sizeof(WTF::PartitionPage) <= WTF::kPageMetadataSize, PartitionPage_not_too_big);
     49 COMPILE_ASSERT(sizeof(WTF::PartitionBucket) <= WTF::kPageMetadataSize, PartitionBucket_not_too_big);
     50 COMPILE_ASSERT(sizeof(WTF::PartitionSuperPageExtentEntry) <= WTF::kPageMetadataSize, PartitionSuperPageExtentEntry_not_too_big);
     51 COMPILE_ASSERT(WTF::kPageMetadataSize * WTF::kNumPartitionPagesPerSuperPage <= WTF::kSystemPageSize, page_metadata_fits_in_hole);
     52 // Check that some of our zanier calculations worked out as expected.
     53 COMPILE_ASSERT(WTF::kGenericSmallestBucket == 8, generic_smallest_bucket);
     54 COMPILE_ASSERT(WTF::kGenericMaxBucketed == 983040, generic_max_bucketed);
     55 
     56 namespace WTF {
     57 
     58 int PartitionRootBase::gInitializedLock = 0;
     59 bool PartitionRootBase::gInitialized = false;
     60 PartitionPage PartitionRootBase::gSeedPage;
     61 PartitionBucket PartitionRootBase::gPagedBucket;
     62 
     63 static size_t partitionBucketNumSystemPages(size_t size)
     64 {
     65     // This works out reasonably for the current bucket sizes of the generic
     66     // allocator, and the current values of partition page size and constants.
     67     // Specifically, we have enough room to always pack the slots perfectly into
     68     // some number of system pages. The only waste is the waste associated with
     69     // unfaulted pages (i.e. wasted address space).
     70     // TODO: we end up using a lot of system pages for very small sizes. For
     71     // example, we'll use 12 system pages for slot size 24. The slot size is
     72     // so small that the waste would be tiny with just 4, or 1, system pages.
     73     // Later, we can investigate whether there are anti-fragmentation benefits
     74     // to using fewer system pages.
     75     double bestWasteRatio = 1.0f;
     76     size_t bestPages = 0;
     77     if (size > kMaxSystemPagesPerSlotSpan * kSystemPageSize) {
     78         ASSERT(!(size % kSystemPageSize));
     79         return size / kSystemPageSize;
     80     }
     81     ASSERT(size <= kMaxSystemPagesPerSlotSpan * kSystemPageSize);
     82     for (size_t i = kNumSystemPagesPerPartitionPage - 1; i <= kMaxSystemPagesPerSlotSpan; ++i) {
     83         size_t pageSize = kSystemPageSize * i;
     84         size_t numSlots = pageSize / size;
     85         size_t waste = pageSize - (numSlots * size);
     86         // Leaving a page unfaulted is not free; the page will occupy an empty page table entry.
     87         // Make a simple attempt to account for that.
     88         size_t numRemainderPages = i & (kNumSystemPagesPerPartitionPage - 1);
     89         size_t numUnfaultedPages = numRemainderPages ? (kNumSystemPagesPerPartitionPage - numRemainderPages) : 0;
     90         waste += sizeof(void*) * numUnfaultedPages;
     91         double wasteRatio = (double) waste / (double) pageSize;
     92         if (wasteRatio < bestWasteRatio) {
     93             bestWasteRatio = wasteRatio;
     94             bestPages = i;
     95         }
     96     }
     97     ASSERT(bestPages > 0);
     98     return bestPages;
     99 }
    100 
    101 static void parititonAllocBaseInit(PartitionRootBase* root)
    102 {
    103     ASSERT(!root->initialized);
    104 
    105     spinLockLock(&PartitionRootBase::gInitializedLock);
    106     if (!PartitionRootBase::gInitialized) {
    107         PartitionRootBase::gInitialized = true;
    108         // We mark the seed page as free to make sure it is skipped by our
    109         // logic to find a new active page.
    110         PartitionRootBase::gPagedBucket.activePagesHead = &PartitionRootGeneric::gSeedPage;
    111     }
    112     spinLockUnlock(&PartitionRootBase::gInitializedLock);
    113 
    114     root->initialized = true;
    115     root->totalSizeOfCommittedPages = 0;
    116     root->totalSizeOfSuperPages = 0;
    117     root->nextSuperPage = 0;
    118     root->nextPartitionPage = 0;
    119     root->nextPartitionPageEnd = 0;
    120     root->firstExtent = 0;
    121     root->currentExtent = 0;
    122 
    123     memset(&root->globalEmptyPageRing, '\0', sizeof(root->globalEmptyPageRing));
    124     root->globalEmptyPageRingIndex = 0;
    125 
    126     // This is a "magic" value so we can test if a root pointer is valid.
    127     root->invertedSelf = ~reinterpret_cast<uintptr_t>(root);
    128 }
    129 
    130 static void partitionBucketInitBase(PartitionBucket* bucket, PartitionRootBase* root)
    131 {
    132     bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
    133     bucket->freePagesHead = 0;
    134     bucket->numFullPages = 0;
    135     bucket->numSystemPagesPerSlotSpan = partitionBucketNumSystemPages(bucket->slotSize);
    136 }
    137 
    138 void partitionAllocInit(PartitionRoot* root, size_t numBuckets, size_t maxAllocation)
    139 {
    140     parititonAllocBaseInit(root);
    141 
    142     root->numBuckets = numBuckets;
    143     root->maxAllocation = maxAllocation;
    144     size_t i;
    145     for (i = 0; i < root->numBuckets; ++i) {
    146         PartitionBucket* bucket = &root->buckets()[i];
    147         if (!i)
    148             bucket->slotSize = kAllocationGranularity;
    149         else
    150             bucket->slotSize = i << kBucketShift;
    151         partitionBucketInitBase(bucket, root);
    152     }
    153 }
    154 
    155 void partitionAllocGenericInit(PartitionRootGeneric* root)
    156 {
    157     parititonAllocBaseInit(root);
    158 
    159     root->lock = 0;
    160 
    161     // Precalculate some shift and mask constants used in the hot path.
    162     // Example: malloc(41) == 101001 binary.
    163     // Order is 6 (1 << 6-1)==32 is highest bit set.
    164     // orderIndex is the next three MSB == 010 == 2.
    165     // subOrderIndexMask is a mask for the remaining bits == 11 (masking to 01 for the subOrderIndex).
    166     size_t order;
    167     for (order = 0; order <= kBitsPerSizet; ++order) {
    168         size_t orderIndexShift;
    169         if (order < kGenericNumBucketsPerOrderBits + 1)
    170             orderIndexShift = 0;
    171         else
    172             orderIndexShift = order - (kGenericNumBucketsPerOrderBits + 1);
    173         root->orderIndexShifts[order] = orderIndexShift;
    174         size_t subOrderIndexMask;
    175         if (order == kBitsPerSizet) {
    176             // This avoids invoking undefined behavior for an excessive shift.
    177             subOrderIndexMask = static_cast<size_t>(-1) >> (kGenericNumBucketsPerOrderBits + 1);
    178         } else {
    179             subOrderIndexMask = ((1 << order) - 1) >> (kGenericNumBucketsPerOrderBits + 1);
    180         }
    181         root->orderSubIndexMasks[order] = subOrderIndexMask;
    182     }
    183 
    184     // Set up the actual usable buckets first.
    185     // Note that typical values (i.e. min allocation size of 8) will result in
    186     // invalid buckets (size==9 etc. or more generally, size is not a multiple
    187     // of the smallest allocation granularity).
    188     // We avoid them in the bucket lookup map, but we tolerate them to keep the
    189     // code simpler and the structures more generic.
    190     size_t i, j;
    191     size_t currentSize = kGenericSmallestBucket;
    192     size_t currentIncrement = kGenericSmallestBucket >> kGenericNumBucketsPerOrderBits;
    193     PartitionBucket* bucket = &root->buckets[0];
    194     for (i = 0; i < kGenericNumBucketedOrders; ++i) {
    195         for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
    196             bucket->slotSize = currentSize;
    197             partitionBucketInitBase(bucket, root);
    198             // Disable invalid buckets so that touching them faults.
    199             if (currentSize % kGenericSmallestBucket)
    200                 bucket->activePagesHead = 0;
    201             currentSize += currentIncrement;
    202             ++bucket;
    203         }
    204         currentIncrement <<= 1;
    205     }
    206     ASSERT(currentSize == 1 << kGenericMaxBucketedOrder);
    207     ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
    208 
    209     // Then set up the fast size -> bucket lookup table.
    210     bucket = &root->buckets[0];
    211     PartitionBucket** bucketPtr = &root->bucketLookups[0];
    212     for (order = 0; order <= kBitsPerSizet; ++order) {
    213         for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
    214             if (order < kGenericMinBucketedOrder) {
    215                 // Use the bucket of finest granularity for malloc(0) etc.
    216                 *bucketPtr++ = &root->buckets[0];
    217             } else if (order > kGenericMaxBucketedOrder) {
    218                 *bucketPtr++ = &PartitionRootGeneric::gPagedBucket;
    219             } else {
    220                 PartitionBucket* validBucket = bucket;
    221                 // Skip over invalid buckets.
    222                 while (validBucket->slotSize % kGenericSmallestBucket)
    223                     validBucket++;
    224                 *bucketPtr++ = validBucket;
    225                 bucket++;
    226             }
    227         }
    228     }
    229     ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
    230     ASSERT(bucketPtr == &root->bucketLookups[0] + ((kBitsPerSizet + 1) * kGenericNumBucketsPerOrder));
    231     // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
    232     // which tries to overflow to a non-existant order.
    233     *bucketPtr = &PartitionRootGeneric::gPagedBucket;
    234 }
    235 
    236 static bool partitionAllocShutdownBucket(PartitionBucket* bucket)
    237 {
    238     // Failure here indicates a memory leak.
    239     bool noLeaks = !bucket->numFullPages;
    240     PartitionPage* page = bucket->activePagesHead;
    241     while (page) {
    242         if (page->numAllocatedSlots)
    243             noLeaks = false;
    244         page = page->nextPage;
    245     }
    246 
    247     return noLeaks;
    248 }
    249 
    250 static void partitionAllocBaseShutdown(PartitionRootBase* root)
    251 {
    252     ASSERT(root->initialized);
    253     root->initialized = false;
    254 
    255     // Now that we've examined all partition pages in all buckets, it's safe
    256     // to free all our super pages. We first collect the super page pointers
    257     // on the stack because some of them are themselves store in super pages.
    258     char* superPages[kMaxPartitionSize / kSuperPageSize];
    259     size_t numSuperPages = 0;
    260     PartitionSuperPageExtentEntry* entry = root->firstExtent;
    261     while (entry) {
    262         char* superPage = entry->superPageBase;
    263         while (superPage != entry->superPagesEnd) {
    264             superPages[numSuperPages] = superPage;
    265             numSuperPages++;
    266             superPage += kSuperPageSize;
    267         }
    268         entry = entry->next;
    269     }
    270     ASSERT(numSuperPages == root->totalSizeOfSuperPages / kSuperPageSize);
    271     for (size_t i = 0; i < numSuperPages; ++i)
    272         freePages(superPages[i], kSuperPageSize);
    273 }
    274 
    275 bool partitionAllocShutdown(PartitionRoot* root)
    276 {
    277     bool noLeaks = true;
    278     size_t i;
    279     for (i = 0; i < root->numBuckets; ++i) {
    280         PartitionBucket* bucket = &root->buckets()[i];
    281         if (!partitionAllocShutdownBucket(bucket))
    282             noLeaks = false;
    283     }
    284 
    285     partitionAllocBaseShutdown(root);
    286     return noLeaks;
    287 }
    288 
    289 bool partitionAllocGenericShutdown(PartitionRootGeneric* root)
    290 {
    291     bool noLeaks = true;
    292     size_t i;
    293     for (i = 0; i < kGenericNumBucketedOrders * kGenericNumBucketsPerOrder; ++i) {
    294         PartitionBucket* bucket = &root->buckets[i];
    295         if (!partitionAllocShutdownBucket(bucket))
    296             noLeaks = false;
    297     }
    298     partitionAllocBaseShutdown(root);
    299     return noLeaks;
    300 }
    301 
    302 static NEVER_INLINE void partitionOutOfMemory()
    303 {
    304     IMMEDIATE_CRASH();
    305 }
    306 
    307 static NEVER_INLINE void partitionFull()
    308 {
    309     IMMEDIATE_CRASH();
    310 }
    311 
    312 static ALWAYS_INLINE void partitionDecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
    313 {
    314     decommitSystemPages(addr, len);
    315     ASSERT(root->totalSizeOfCommittedPages > len);
    316     root->totalSizeOfCommittedPages -= len;
    317 }
    318 
    319 static ALWAYS_INLINE void partitionRecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
    320 {
    321     recommitSystemPages(addr, len);
    322     root->totalSizeOfCommittedPages += len;
    323 }
    324 
    325 static ALWAYS_INLINE void* partitionAllocPartitionPages(PartitionRootBase* root, int flags, size_t numPartitionPages)
    326 {
    327     ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPage) % kPartitionPageSize));
    328     ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPageEnd) % kPartitionPageSize));
    329     RELEASE_ASSERT(numPartitionPages <= kNumPartitionPagesPerSuperPage);
    330     size_t totalSize = kPartitionPageSize * numPartitionPages;
    331     root->totalSizeOfCommittedPages += totalSize;
    332     size_t numPartitionPagesLeft = (root->nextPartitionPageEnd - root->nextPartitionPage) >> kPartitionPageShift;
    333     if (LIKELY(numPartitionPagesLeft >= numPartitionPages)) {
    334         // In this case, we can still hand out pages from the current super page
    335         // allocation.
    336         char* ret = root->nextPartitionPage;
    337         root->nextPartitionPage += totalSize;
    338         return ret;
    339     }
    340 
    341     // Need a new super page.
    342     root->totalSizeOfSuperPages += kSuperPageSize;
    343     if (root->totalSizeOfSuperPages > kMaxPartitionSize)
    344         partitionFull();
    345     char* requestedAddress = root->nextSuperPage;
    346     char* superPage = reinterpret_cast<char*>(allocPages(requestedAddress, kSuperPageSize, kSuperPageSize));
    347     if (UNLIKELY(!superPage)) {
    348         if (flags & PartitionAllocReturnNull)
    349             return 0;
    350         partitionOutOfMemory();
    351     }
    352     root->nextSuperPage = superPage + kSuperPageSize;
    353     char* ret = superPage + kPartitionPageSize;
    354     root->nextPartitionPage = ret + totalSize;
    355     root->nextPartitionPageEnd = root->nextSuperPage - kPartitionPageSize;
    356     // Make the first partition page in the super page a guard page, but leave a
    357     // hole in the middle.
    358     // This is where we put page metadata and also a tiny amount of extent
    359     // metadata.
    360     setSystemPagesInaccessible(superPage, kSystemPageSize);
    361     setSystemPagesInaccessible(superPage + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
    362     // Also make the last partition page a guard page.
    363     setSystemPagesInaccessible(superPage + (kSuperPageSize - kPartitionPageSize), kPartitionPageSize);
    364 
    365     // If we were after a specific address, but didn't get it, assume that
    366     // the system chose a lousy address and re-randomize the next
    367     // allocation.
    368     if (requestedAddress && requestedAddress != superPage)
    369         root->nextSuperPage = 0;
    370 
    371     // We allocated a new super page so update super page metadata.
    372     // First check if this is a new extent or not.
    373     PartitionSuperPageExtentEntry* latestExtent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(superPage));
    374     PartitionSuperPageExtentEntry* currentExtent = root->currentExtent;
    375     bool isNewExtent = (superPage != requestedAddress);
    376     if (UNLIKELY(isNewExtent)) {
    377         latestExtent->next = 0;
    378         if (UNLIKELY(!currentExtent)) {
    379             root->firstExtent = latestExtent;
    380         } else {
    381             ASSERT(currentExtent->superPageBase);
    382             currentExtent->next = latestExtent;
    383         }
    384         root->currentExtent = latestExtent;
    385         currentExtent = latestExtent;
    386         currentExtent->superPageBase = superPage;
    387         currentExtent->superPagesEnd = superPage + kSuperPageSize;
    388     } else {
    389         // We allocated next to an existing extent so just nudge the size up a little.
    390         currentExtent->superPagesEnd += kSuperPageSize;
    391         ASSERT(ret >= currentExtent->superPageBase && ret < currentExtent->superPagesEnd);
    392     }
    393     // By storing the root in every extent metadata object, we have a fast way
    394     // to go from a pointer within the partition to the root object.
    395     latestExtent->root = root;
    396 
    397     return ret;
    398 }
    399 
    400 static ALWAYS_INLINE void partitionUnusePage(PartitionRootBase* root, PartitionPage* page)
    401 {
    402     ASSERT(page->bucket->numSystemPagesPerSlotSpan);
    403     void* addr = partitionPageToPointer(page);
    404     partitionDecommitSystemPages(root, addr, page->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
    405 }
    406 
    407 static ALWAYS_INLINE size_t partitionBucketSlots(const PartitionBucket* bucket)
    408 {
    409     return (bucket->numSystemPagesPerSlotSpan * kSystemPageSize) / bucket->slotSize;
    410 }
    411 
    412 static ALWAYS_INLINE size_t partitionBucketPartitionPages(const PartitionBucket* bucket)
    413 {
    414     return (bucket->numSystemPagesPerSlotSpan + (kNumSystemPagesPerPartitionPage - 1)) / kNumSystemPagesPerPartitionPage;
    415 }
    416 
    417 static ALWAYS_INLINE void partitionPageReset(PartitionPage* page, PartitionBucket* bucket)
    418 {
    419     ASSERT(page != &PartitionRootGeneric::gSeedPage);
    420     page->numAllocatedSlots = 0;
    421     page->numUnprovisionedSlots = partitionBucketSlots(bucket);
    422     ASSERT(page->numUnprovisionedSlots);
    423     page->bucket = bucket;
    424     page->nextPage = 0;
    425     // NULLing the freelist is not strictly necessary but it makes an ASSERT in partitionPageFillFreelist simpler.
    426     page->freelistHead = 0;
    427     page->pageOffset = 0;
    428     page->freeCacheIndex = -1;
    429     size_t numPartitionPages = partitionBucketPartitionPages(bucket);
    430     size_t i;
    431     char* pageCharPtr = reinterpret_cast<char*>(page);
    432     for (i = 1; i < numPartitionPages; ++i) {
    433         pageCharPtr += kPageMetadataSize;
    434         PartitionPage* secondaryPage = reinterpret_cast<PartitionPage*>(pageCharPtr);
    435         secondaryPage->pageOffset = i;
    436     }
    437 }
    438 
    439 static ALWAYS_INLINE char* partitionPageAllocAndFillFreelist(PartitionPage* page)
    440 {
    441     ASSERT(page != &PartitionRootGeneric::gSeedPage);
    442     size_t numSlots = page->numUnprovisionedSlots;
    443     ASSERT(numSlots);
    444     PartitionBucket* bucket = page->bucket;
    445     // We should only get here when _every_ slot is either used or unprovisioned.
    446     // (The third state is "on the freelist". If we have a non-empty freelist, we should not get here.)
    447     ASSERT(numSlots + page->numAllocatedSlots == partitionBucketSlots(bucket));
    448     // Similarly, make explicitly sure that the freelist is empty.
    449     ASSERT(!page->freelistHead);
    450     ASSERT(page->numAllocatedSlots >= 0);
    451 
    452     size_t size = bucket->slotSize;
    453     char* base = reinterpret_cast<char*>(partitionPageToPointer(page));
    454     char* returnObject = base + (size * page->numAllocatedSlots);
    455     char* firstFreelistPointer = returnObject + size;
    456     char* firstFreelistPointerExtent = firstFreelistPointer + sizeof(PartitionFreelistEntry*);
    457     // Our goal is to fault as few system pages as possible. We calculate the
    458     // page containing the "end" of the returned slot, and then allow freelist
    459     // pointers to be written up to the end of that page.
    460     char* subPageLimit = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(firstFreelistPointer) + kSystemPageOffsetMask) & kSystemPageBaseMask);
    461     char* slotsLimit = returnObject + (size * page->numUnprovisionedSlots);
    462     char* freelistLimit = subPageLimit;
    463     if (UNLIKELY(slotsLimit < freelistLimit))
    464         freelistLimit = slotsLimit;
    465 
    466     size_t numNewFreelistEntries = 0;
    467     if (LIKELY(firstFreelistPointerExtent <= freelistLimit)) {
    468         // Only consider used space in the slot span. If we consider wasted
    469         // space, we may get an off-by-one when a freelist pointer fits in the
    470         // wasted space, but a slot does not.
    471         // We know we can fit at least one freelist pointer.
    472         numNewFreelistEntries = 1;
    473         // Any further entries require space for the whole slot span.
    474         numNewFreelistEntries += (freelistLimit - firstFreelistPointerExtent) / size;
    475     }
    476 
    477     // We always return an object slot -- that's the +1 below.
    478     // We do not neccessarily create any new freelist entries, because we cross sub page boundaries frequently for large bucket sizes.
    479     ASSERT(numNewFreelistEntries + 1 <= numSlots);
    480     numSlots -= (numNewFreelistEntries + 1);
    481     page->numUnprovisionedSlots = numSlots;
    482     page->numAllocatedSlots++;
    483 
    484     if (LIKELY(numNewFreelistEntries)) {
    485         char* freelistPointer = firstFreelistPointer;
    486         PartitionFreelistEntry* entry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
    487         page->freelistHead = entry;
    488         while (--numNewFreelistEntries) {
    489             freelistPointer += size;
    490             PartitionFreelistEntry* nextEntry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
    491             entry->next = partitionFreelistMask(nextEntry);
    492             entry = nextEntry;
    493         }
    494         entry->next = partitionFreelistMask(0);
    495     } else {
    496         page->freelistHead = 0;
    497     }
    498     return returnObject;
    499 }
    500 
    501 // This helper function scans the active page list for a suitable new active
    502 // page, starting at the passed in page.
    503 // When it finds a suitable new active page (one that has free slots), it is
    504 // set as the new active page and true is returned. If there is no suitable new
    505 // active page, false is returned and the current active page is set to null.
    506 // As potential pages are scanned, they are tidied up according to their state.
    507 // Freed pages are swept on to the free page list and full pages are unlinked
    508 // from any list.
    509 static ALWAYS_INLINE bool partitionSetNewActivePage(PartitionPage* page)
    510 {
    511     if (page == &PartitionRootBase::gSeedPage) {
    512         ASSERT(!page->nextPage);
    513         return false;
    514     }
    515 
    516     PartitionPage* nextPage = 0;
    517     PartitionBucket* bucket = page->bucket;
    518 
    519     for (; page; page = nextPage) {
    520         nextPage = page->nextPage;
    521         ASSERT(page->bucket == bucket);
    522         ASSERT(page != bucket->freePagesHead);
    523         ASSERT(!bucket->freePagesHead || page != bucket->freePagesHead->nextPage);
    524 
    525         // Page is usable if it has something on the freelist, or unprovisioned
    526         // slots that can be turned into a freelist.
    527         if (LIKELY(page->freelistHead != 0) || LIKELY(page->numUnprovisionedSlots)) {
    528             bucket->activePagesHead = page;
    529             return true;
    530         }
    531 
    532         ASSERT(page->numAllocatedSlots >= 0);
    533         if (LIKELY(page->numAllocatedSlots == 0)) {
    534             ASSERT(page->freeCacheIndex == -1);
    535             // We hit a free page, so shepherd it on to the free page list.
    536             page->nextPage = bucket->freePagesHead;
    537             bucket->freePagesHead = page;
    538         } else {
    539             // If we get here, we found a full page. Skip over it too, and also
    540             // tag it as full (via a negative value). We need it tagged so that
    541             // free'ing can tell, and move it back into the active page list.
    542             ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket)));
    543             page->numAllocatedSlots = -page->numAllocatedSlots;
    544             ++bucket->numFullPages;
    545             // numFullPages is a uint16_t for efficient packing so guard against
    546             // overflow to be safe.
    547             RELEASE_ASSERT(bucket->numFullPages);
    548             // Not necessary but might help stop accidents.
    549             page->nextPage = 0;
    550         }
    551     }
    552 
    553     bucket->activePagesHead = 0;
    554     return false;
    555 }
    556 
    557 struct PartitionDirectMapExtent {
    558     size_t mapSize; // Mapped size, not including guard pages and meta-data.
    559 };
    560 
    561 static ALWAYS_INLINE PartitionDirectMapExtent* partitionPageToDirectMapExtent(PartitionPage* page)
    562 {
    563     ASSERT(partitionBucketIsDirectMapped(page->bucket));
    564     return reinterpret_cast<PartitionDirectMapExtent*>(reinterpret_cast<char*>(page) + 2 * kPageMetadataSize);
    565 }
    566 
    567 static ALWAYS_INLINE void* partitionDirectMap(PartitionRootBase* root, int flags, size_t size)
    568 {
    569     size = partitionDirectMapSize(size);
    570 
    571     // Because we need to fake looking like a super page, We need to allocate
    572     // a bunch of system pages more than "size":
    573     // - The first few system pages are the partition page in which the super
    574     // page metadata is stored. We fault just one system page out of a partition
    575     // page sized clump.
    576     // - We add a trailing guard page.
    577     size_t mapSize = size + kPartitionPageSize + kSystemPageSize;
    578     // Round up to the allocation granularity.
    579     mapSize += kPageAllocationGranularityOffsetMask;
    580     mapSize &= kPageAllocationGranularityBaseMask;
    581 
    582     // TODO: we may want to let the operating system place these allocations
    583     // where it pleases. On 32-bit, this might limit address space
    584     // fragmentation and on 64-bit, this might have useful savings for TLB
    585     // and page table overhead.
    586     // TODO: if upsizing realloc()s are common on large sizes, we could
    587     // consider over-allocating address space on 64-bit, "just in case".
    588     // TODO: consider pre-populating page tables (e.g. MAP_POPULATE on Linux,
    589     // MADV_WILLNEED on POSIX).
    590     // TODO: these pages will be zero-filled. Consider internalizing an
    591     // allocZeroed() API so we can avoid a memset() entirely in this case.
    592     char* ptr = reinterpret_cast<char*>(allocPages(0, mapSize, kSuperPageSize));
    593     if (!ptr) {
    594         if (flags & PartitionAllocReturnNull)
    595             return 0;
    596         partitionOutOfMemory();
    597     }
    598     char* ret = ptr + kPartitionPageSize;
    599     // TODO: due to all the guard paging, this arrangement creates 4 mappings.
    600     // We could get it down to three by using read-only for the metadata page,
    601     // or perhaps two by leaving out the trailing guard page on 64-bit.
    602     setSystemPagesInaccessible(ptr, kSystemPageSize);
    603     setSystemPagesInaccessible(ptr + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
    604     setSystemPagesInaccessible(ret + size, kSystemPageSize);
    605 
    606     PartitionSuperPageExtentEntry* extent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(ptr));
    607     extent->root = root;
    608     PartitionPage* page = partitionPointerToPageNoAlignmentCheck(ret);
    609     PartitionBucket* bucket = reinterpret_cast<PartitionBucket*>(reinterpret_cast<char*>(page) + kPageMetadataSize);
    610     page->freelistHead = 0;
    611     page->nextPage = 0;
    612     page->bucket = bucket;
    613     page->numAllocatedSlots = 1;
    614     page->numUnprovisionedSlots = 0;
    615     page->pageOffset = 0;
    616     page->freeCacheIndex = 0;
    617 
    618     bucket->activePagesHead = 0;
    619     bucket->freePagesHead = 0;
    620     bucket->slotSize = size;
    621     bucket->numSystemPagesPerSlotSpan = 0;
    622     bucket->numFullPages = 0;
    623 
    624     PartitionDirectMapExtent* mapExtent = partitionPageToDirectMapExtent(page);
    625     mapExtent->mapSize = mapSize - kPartitionPageSize - kSystemPageSize;
    626 
    627     return ret;
    628 }
    629 
    630 static ALWAYS_INLINE void partitionDirectUnmap(PartitionPage* page)
    631 {
    632     size_t unmapSize = partitionPageToDirectMapExtent(page)->mapSize;
    633 
    634     // Add on the size of the trailing guard page and preceeding partition
    635     // page.
    636     unmapSize += kPartitionPageSize + kSystemPageSize;
    637 
    638     ASSERT(!(unmapSize & kPageAllocationGranularityOffsetMask));
    639 
    640     char* ptr = reinterpret_cast<char*>(partitionPageToPointer(page));
    641     // Account for the mapping starting a partition page before the actual
    642     // allocation address.
    643     ptr -= kPartitionPageSize;
    644 
    645     freePages(ptr, unmapSize);
    646 }
    647 
    648 void* partitionAllocSlowPath(PartitionRootBase* root, int flags, size_t size, PartitionBucket* bucket)
    649 {
    650     // The slow path is called when the freelist is empty.
    651     ASSERT(!bucket->activePagesHead->freelistHead);
    652 
    653     // For the partitionAllocGeneric API, we have a bunch of buckets marked
    654     // as special cases. We bounce them through to the slow path so that we
    655     // can still have a blazing fast hot path due to lack of corner-case
    656     // branches.
    657     bool returnNull = flags & PartitionAllocReturnNull;
    658     if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
    659         ASSERT(size > kGenericMaxBucketed);
    660         ASSERT(bucket == &PartitionRootBase::gPagedBucket);
    661         if (size > kGenericMaxDirectMapped) {
    662             if (returnNull)
    663                 return 0;
    664             RELEASE_ASSERT(false);
    665         }
    666         return partitionDirectMap(root, flags, size);
    667     }
    668 
    669     // First, look for a usable page in the existing active pages list.
    670     // Change active page, accepting the current page as a candidate.
    671     if (LIKELY(partitionSetNewActivePage(bucket->activePagesHead))) {
    672         PartitionPage* newPage = bucket->activePagesHead;
    673         if (LIKELY(newPage->freelistHead != 0)) {
    674             PartitionFreelistEntry* ret = newPage->freelistHead;
    675             newPage->freelistHead = partitionFreelistMask(ret->next);
    676             newPage->numAllocatedSlots++;
    677             return ret;
    678         }
    679         ASSERT(newPage->numUnprovisionedSlots);
    680         return partitionPageAllocAndFillFreelist(newPage);
    681     }
    682 
    683     // Second, look in our list of freed but reserved pages.
    684     PartitionPage* newPage = bucket->freePagesHead;
    685     if (LIKELY(newPage != 0)) {
    686         ASSERT(newPage != &PartitionRootGeneric::gSeedPage);
    687         ASSERT(!newPage->freelistHead);
    688         ASSERT(!newPage->numAllocatedSlots);
    689         ASSERT(!newPage->numUnprovisionedSlots);
    690         ASSERT(newPage->freeCacheIndex == -1);
    691         bucket->freePagesHead = newPage->nextPage;
    692         void* addr = partitionPageToPointer(newPage);
    693         partitionRecommitSystemPages(root, addr, newPage->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
    694     } else {
    695         // Third. If we get here, we need a brand new page.
    696         size_t numPartitionPages = partitionBucketPartitionPages(bucket);
    697         void* rawNewPage = partitionAllocPartitionPages(root, flags, numPartitionPages);
    698         if (UNLIKELY(!rawNewPage)) {
    699             ASSERT(returnNull);
    700             return 0;
    701         }
    702         // Skip the alignment check because it depends on page->bucket, which is not yet set.
    703         newPage = partitionPointerToPageNoAlignmentCheck(rawNewPage);
    704     }
    705 
    706     partitionPageReset(newPage, bucket);
    707     bucket->activePagesHead = newPage;
    708     return partitionPageAllocAndFillFreelist(newPage);
    709 }
    710 
    711 static ALWAYS_INLINE void partitionFreePage(PartitionRootBase* root, PartitionPage* page)
    712 {
    713     ASSERT(page->freelistHead);
    714     ASSERT(!page->numAllocatedSlots);
    715     partitionUnusePage(root, page);
    716     // We actually leave the freed page in the active list. We'll sweep it on
    717     // to the free page list when we next walk the active page list. Pulling
    718     // this trick enables us to use a singly-linked page list for all cases,
    719     // which is critical in keeping the page metadata structure down to 32
    720     // bytes in size.
    721     page->freelistHead = 0;
    722     page->numUnprovisionedSlots = 0;
    723 }
    724 
    725 static ALWAYS_INLINE void partitionRegisterEmptyPage(PartitionPage* page)
    726 {
    727     PartitionRootBase* root = partitionPageToRoot(page);
    728 
    729     // If the page is already registered as empty, give it another life.
    730     if (page->freeCacheIndex != -1) {
    731         ASSERT(page->freeCacheIndex >= 0);
    732         ASSERT(static_cast<unsigned>(page->freeCacheIndex) < kMaxFreeableSpans);
    733         ASSERT(root->globalEmptyPageRing[page->freeCacheIndex] == page);
    734         root->globalEmptyPageRing[page->freeCacheIndex] = 0;
    735     }
    736 
    737     size_t currentIndex = root->globalEmptyPageRingIndex;
    738     PartitionPage* pageToFree = root->globalEmptyPageRing[currentIndex];
    739     // The page might well have been re-activated, filled up, etc. before we get
    740     // around to looking at it here.
    741     if (pageToFree) {
    742         ASSERT(pageToFree != &PartitionRootBase::gSeedPage);
    743         ASSERT(pageToFree->freeCacheIndex >= 0);
    744         ASSERT(static_cast<unsigned>(pageToFree->freeCacheIndex) < kMaxFreeableSpans);
    745         ASSERT(pageToFree == root->globalEmptyPageRing[pageToFree->freeCacheIndex]);
    746         if (!pageToFree->numAllocatedSlots && pageToFree->freelistHead) {
    747             // The page is still empty, and not freed, so _really_ free it.
    748             partitionFreePage(root, pageToFree);
    749         }
    750         pageToFree->freeCacheIndex = -1;
    751     }
    752 
    753     // We put the empty slot span on our global list of "pages that were once
    754     // empty". thus providing it a bit of breathing room to get re-used before
    755     // we really free it. This improves performance, particularly on Mac OS X
    756     // which has subpar memory management performance.
    757     root->globalEmptyPageRing[currentIndex] = page;
    758     page->freeCacheIndex = currentIndex;
    759     ++currentIndex;
    760     if (currentIndex == kMaxFreeableSpans)
    761         currentIndex = 0;
    762     root->globalEmptyPageRingIndex = currentIndex;
    763 }
    764 
    765 void partitionFreeSlowPath(PartitionPage* page)
    766 {
    767     PartitionBucket* bucket = page->bucket;
    768     ASSERT(page != &PartitionRootGeneric::gSeedPage);
    769     ASSERT(bucket->activePagesHead != &PartitionRootGeneric::gSeedPage);
    770     if (LIKELY(page->numAllocatedSlots == 0)) {
    771         // Page became fully unused.
    772         if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
    773             partitionDirectUnmap(page);
    774             return;
    775         }
    776         // If it's the current active page, attempt to change it. We'd prefer to leave
    777         // the page empty as a gentle force towards defragmentation.
    778         if (LIKELY(page == bucket->activePagesHead) && page->nextPage) {
    779             if (partitionSetNewActivePage(page->nextPage)) {
    780                 ASSERT(bucket->activePagesHead != page);
    781                 // Link the empty page back in after the new current page, to
    782                 // avoid losing a reference to it.
    783                 // TODO: consider walking the list to link the empty page after
    784                 // all non-empty pages?
    785                 PartitionPage* currentPage = bucket->activePagesHead;
    786                 page->nextPage = currentPage->nextPage;
    787                 currentPage->nextPage = page;
    788             } else {
    789                 bucket->activePagesHead = page;
    790                 page->nextPage = 0;
    791             }
    792         }
    793         partitionRegisterEmptyPage(page);
    794     } else {
    795         // Ensure that the page is full. That's the only valid case if we
    796         // arrive here.
    797         ASSERT(page->numAllocatedSlots < 0);
    798         // A transition of numAllocatedSlots from 0 to -1 is not legal, and
    799         // likely indicates a double-free.
    800         RELEASE_ASSERT(page->numAllocatedSlots != -1);
    801         page->numAllocatedSlots = -page->numAllocatedSlots - 2;
    802         ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket) - 1));
    803         // Fully used page became partially used. It must be put back on the
    804         // non-full page list. Also make it the current page to increase the
    805         // chances of it being filled up again. The old current page will be
    806         // the next page.
    807         page->nextPage = bucket->activePagesHead;
    808         bucket->activePagesHead = page;
    809         --bucket->numFullPages;
    810         // Special case: for a partition page with just a single slot, it may
    811         // now be empty and we want to run it through the empty logic.
    812         if (UNLIKELY(page->numAllocatedSlots == 0))
    813             partitionFreeSlowPath(page);
    814     }
    815 }
    816 
    817 bool partitionReallocDirectMappedInPlace(PartitionRootGeneric* root, PartitionPage* page, size_t newSize)
    818 {
    819     ASSERT(partitionBucketIsDirectMapped(page->bucket));
    820 
    821     newSize = partitionCookieSizeAdjustAdd(newSize);
    822 
    823     // Note that the new size might be a bucketed size; this function is called
    824     // whenever we're reallocating a direct mapped allocation.
    825     newSize = partitionDirectMapSize(newSize);
    826     if (newSize < kGenericMinDirectMappedDownsize)
    827         return false;
    828 
    829     // bucket->slotSize is the current size of the allocation.
    830     size_t currentSize = page->bucket->slotSize;
    831     if (newSize == currentSize)
    832         return true;
    833 
    834     char* charPtr = static_cast<char*>(partitionPageToPointer(page));
    835 
    836     if (newSize < currentSize) {
    837         size_t mapSize = partitionPageToDirectMapExtent(page)->mapSize;
    838 
    839         // Don't reallocate in-place if new size is less than 80 % of the full
    840         // map size, to avoid holding on to too much unused address space.
    841         if ((newSize / kSystemPageSize) * 5 < (mapSize / kSystemPageSize) * 4)
    842             return false;
    843 
    844         // Shrink by decommitting unneeded pages and making them inaccessible.
    845         size_t decommitSize = currentSize - newSize;
    846         partitionDecommitSystemPages(root, charPtr + newSize, decommitSize);
    847         setSystemPagesInaccessible(charPtr + newSize, decommitSize);
    848     } else if (newSize <= partitionPageToDirectMapExtent(page)->mapSize) {
    849         // Grow within the actually allocated memory. Just need to make the
    850         // pages accessible again.
    851         size_t recommitSize = newSize - currentSize;
    852         setSystemPagesAccessible(charPtr + currentSize, recommitSize);
    853         partitionRecommitSystemPages(root, charPtr + currentSize, recommitSize);
    854 
    855 #ifndef NDEBUG
    856         memset(charPtr + currentSize, kUninitializedByte, recommitSize);
    857 #endif
    858     } else {
    859         // We can't perform the realloc in-place.
    860         // TODO: support this too when possible.
    861         return false;
    862     }
    863 
    864 #ifndef NDEBUG
    865     // Write a new trailing cookie.
    866     partitionCookieWriteValue(charPtr + newSize - kCookieSize);
    867 #endif
    868 
    869     page->bucket->slotSize = newSize;
    870     return true;
    871 }
    872 
    873 void* partitionReallocGeneric(PartitionRootGeneric* root, void* ptr, size_t newSize)
    874 {
    875 #if defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
    876     return realloc(ptr, newSize);
    877 #else
    878     if (UNLIKELY(!ptr))
    879         return partitionAllocGeneric(root, newSize);
    880     if (UNLIKELY(!newSize)) {
    881         partitionFreeGeneric(root, ptr);
    882         return 0;
    883     }
    884 
    885     RELEASE_ASSERT(newSize <= kGenericMaxDirectMapped);
    886 
    887     ASSERT(partitionPointerIsValid(partitionCookieFreePointerAdjust(ptr)));
    888 
    889     PartitionPage* page = partitionPointerToPage(partitionCookieFreePointerAdjust(ptr));
    890 
    891     if (UNLIKELY(partitionBucketIsDirectMapped(page->bucket))) {
    892         // We may be able to perform the realloc in place by changing the
    893         // accessibility of memory pages and, if reducing the size, decommitting
    894         // them.
    895         if (partitionReallocDirectMappedInPlace(root, page, newSize))
    896             return ptr;
    897     }
    898 
    899     size_t actualNewSize = partitionAllocActualSize(root, newSize);
    900     size_t actualOldSize = partitionAllocGetSize(ptr);
    901 
    902     // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
    903     // new size is a significant percentage smaller. We could do the same if we
    904     // determine it is a win.
    905     if (actualNewSize == actualOldSize) {
    906         // Trying to allocate a block of size newSize would give us a block of
    907         // the same size as the one we've already got, so no point in doing
    908         // anything here.
    909         return ptr;
    910     }
    911 
    912     // This realloc cannot be resized in-place. Sadness.
    913     void* ret = partitionAllocGeneric(root, newSize);
    914     size_t copySize = actualOldSize;
    915     if (newSize < copySize)
    916         copySize = newSize;
    917 
    918     memcpy(ret, ptr, copySize);
    919     partitionFreeGeneric(root, ptr);
    920     return ret;
    921 #endif
    922 }
    923 
    924 #ifndef NDEBUG
    925 
    926 void partitionDumpStats(const PartitionRoot& root)
    927 {
    928     size_t i;
    929     size_t totalLive = 0;
    930     size_t totalResident = 0;
    931     size_t totalFreeable = 0;
    932     for (i = 0; i < root.numBuckets; ++i) {
    933         const PartitionBucket& bucket = root.buckets()[i];
    934         if (bucket.activePagesHead == &PartitionRootGeneric::gSeedPage && !bucket.freePagesHead && !bucket.numFullPages) {
    935             // Empty bucket with no freelist or full pages. Skip reporting it.
    936             continue;
    937         }
    938         size_t numFreePages = 0;
    939         PartitionPage* freePages = bucket.freePagesHead;
    940         while (freePages) {
    941             ++numFreePages;
    942             freePages = freePages->nextPage;
    943         }
    944         size_t bucketSlotSize = bucket.slotSize;
    945         size_t bucketNumSlots = partitionBucketSlots(&bucket);
    946         size_t bucketUsefulStorage = bucketSlotSize * bucketNumSlots;
    947         size_t bucketPageSize = bucket.numSystemPagesPerSlotSpan * kSystemPageSize;
    948         size_t bucketWaste = bucketPageSize - bucketUsefulStorage;
    949         size_t numActiveBytes = bucket.numFullPages * bucketUsefulStorage;
    950         size_t numResidentBytes = bucket.numFullPages * bucketPageSize;
    951         size_t numFreeableBytes = 0;
    952         size_t numActivePages = 0;
    953         const PartitionPage* page = bucket.activePagesHead;
    954         while (page) {
    955             ASSERT(page != &PartitionRootGeneric::gSeedPage);
    956             // A page may be on the active list but freed and not yet swept.
    957             if (!page->freelistHead && !page->numUnprovisionedSlots && !page->numAllocatedSlots) {
    958                 ++numFreePages;
    959             } else {
    960                 ++numActivePages;
    961                 numActiveBytes += (page->numAllocatedSlots * bucketSlotSize);
    962                 size_t pageBytesResident = (bucketNumSlots - page->numUnprovisionedSlots) * bucketSlotSize;
    963                 // Round up to system page size.
    964                 pageBytesResident = (pageBytesResident + kSystemPageOffsetMask) & kSystemPageBaseMask;
    965                 numResidentBytes += pageBytesResident;
    966                 if (!page->numAllocatedSlots)
    967                     numFreeableBytes += pageBytesResident;
    968             }
    969             page = page->nextPage;
    970         }
    971         totalLive += numActiveBytes;
    972         totalResident += numResidentBytes;
    973         totalFreeable += numFreeableBytes;
    974         printf("bucket size %zu (pageSize %zu waste %zu): %zu alloc/%zu commit/%zu freeable bytes, %zu/%zu/%zu full/active/free pages\n", bucketSlotSize, bucketPageSize, bucketWaste, numActiveBytes, numResidentBytes, numFreeableBytes, static_cast<size_t>(bucket.numFullPages), numActivePages, numFreePages);
    975     }
    976     printf("total live: %zu bytes\n", totalLive);
    977     printf("total resident: %zu bytes\n", totalResident);
    978     printf("total freeable: %zu bytes\n", totalFreeable);
    979     fflush(stdout);
    980 }
    981 
    982 #endif // !NDEBUG
    983 
    984 } // namespace WTF
    985 
    986