Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file arena.h
     24 *
     25 * @brief Arena memory manager
     26 *        The arena is convenient and fast for managing allocations for any of
     27 *        our allocations that are associated with operations and can all be freed
     28 *        once when their operation has completed. Allocations are cheap since
     29 *        most of the time its simply an increment of an offset. Also, no need to
     30 *        free individual allocations. All of the arena memory can be freed at once.
     31 *
     32 ******************************************************************************/
     33 #pragma once
     34 
     35 #include <mutex>
     36 #include <algorithm>
     37 #include <atomic>
     38 #include "core/utils.h"
     39 
     40 static const size_t ARENA_BLOCK_ALIGN = 64;
     41 
     42 struct ArenaBlock
     43 {
     44     size_t      blockSize = 0;
     45     ArenaBlock* pNext = nullptr;
     46 };
     47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
     48     "Increase BLOCK_ALIGN size");
     49 
     50 class DefaultAllocator
     51 {
     52 public:
     53     ArenaBlock* AllocateAligned(size_t size, size_t align)
     54     {
     55         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
     56 
     57         ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
     58         p->blockSize = size;
     59         return p;
     60     }
     61 
     62     void Free(ArenaBlock* pMem)
     63     {
     64         if (pMem)
     65         {
     66             SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
     67             AlignedFree(pMem);
     68         }
     69     }
     70 };
     71 
     72 // Caching Allocator for Arena
     73 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
     74 struct CachingAllocatorT : DefaultAllocator
     75 {
     76     ArenaBlock* AllocateAligned(size_t size, size_t align)
     77     {
     78         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
     79         SWR_ASSUME_ASSERT(size <= uint32_t(-1));
     80 
     81         uint32_t bucket = GetBucketId(size);
     82 
     83         {
     84             // search cached blocks
     85             std::lock_guard<std::mutex> l(m_mutex);
     86             ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
     87             ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
     88 
     89             if (pBlock)
     90             {
     91                 m_cachedSize -= pBlock->blockSize;
     92                 if (pBlock == m_pLastCachedBlocks[bucket])
     93                 {
     94                     m_pLastCachedBlocks[bucket] = pPrevBlock;
     95                 }
     96             }
     97             else
     98             {
     99                 pPrevBlock = &m_oldCachedBlocks[bucket];
    100                 pBlock = SearchBlocks(pPrevBlock, size, align);
    101 
    102                 if (pBlock)
    103                 {
    104                     m_oldCachedSize -= pBlock->blockSize;
    105                     if (pBlock == m_pOldLastCachedBlocks[bucket])
    106                     {
    107                         m_pOldLastCachedBlocks[bucket] = pPrevBlock;
    108                     }
    109                 }
    110             }
    111 
    112             if (pBlock)
    113             {
    114                 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
    115                 pPrevBlock->pNext = pBlock->pNext;
    116                 pBlock->pNext = nullptr;
    117 
    118                 return pBlock;
    119             }
    120 
    121             m_totalAllocated += size;
    122 
    123 #if 0
    124             {
    125                 static uint32_t count = 0;
    126                 char buf[128];
    127                 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
    128                 OutputDebugStringA(buf);
    129             }
    130 #endif
    131         }
    132 
    133         if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
    134         {
    135             // Make all blocks in this bucket the same size
    136             size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
    137         }
    138 
    139         return this->DefaultAllocator::AllocateAligned(size, align);
    140     }
    141 
    142     void Free(ArenaBlock* pMem)
    143     {
    144         if (pMem)
    145         {
    146             std::unique_lock<std::mutex> l(m_mutex);
    147             InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
    148         }
    149     }
    150 
    151     void FreeOldBlocks()
    152     {
    153         if (!m_cachedSize) { return; }
    154         std::lock_guard<std::mutex> l(m_mutex);
    155 
    156         bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
    157 
    158         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
    159         {
    160             if (doFree)
    161             {
    162                 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
    163                 while (pBlock)
    164                 {
    165                     ArenaBlock* pNext = pBlock->pNext;
    166                     m_oldCachedSize -= pBlock->blockSize;
    167                     m_totalAllocated -= pBlock->blockSize;
    168                     this->DefaultAllocator::Free(pBlock);
    169                     pBlock = pNext;
    170                 }
    171                 m_oldCachedBlocks[i].pNext = nullptr;
    172                 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
    173             }
    174 
    175             if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
    176             {
    177                 if (i && i < (CACHE_NUM_BUCKETS - 1))
    178                 {
    179                     // We know that all blocks are the same size.
    180                     // Just move the list over.
    181                     m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
    182                     m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
    183                     m_cachedBlocks[i].pNext = nullptr;
    184                     if (m_pOldLastCachedBlocks[i]->pNext)
    185                     {
    186                         m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
    187                     }
    188                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
    189                 }
    190                 else
    191                 {
    192                     // The end buckets can have variable sized lists.
    193                     // Insert each block based on size
    194                     ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
    195                     while (pBlock)
    196                     {
    197                         ArenaBlock* pNext = pBlock->pNext;
    198                         pBlock->pNext = nullptr;
    199                         m_cachedSize -= pBlock->blockSize;
    200                         InsertCachedBlock<true>(i, pBlock);
    201                         pBlock = pNext;
    202                     }
    203 
    204                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
    205                     m_cachedBlocks[i].pNext = nullptr;
    206                 }
    207             }
    208         }
    209 
    210         m_oldCachedSize += m_cachedSize;
    211         m_cachedSize = 0;
    212     }
    213 
    214     CachingAllocatorT()
    215     {
    216         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
    217         {
    218             m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
    219             m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
    220         }
    221     }
    222 
    223     ~CachingAllocatorT()
    224     {
    225         // Free all cached blocks
    226         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
    227         {
    228             ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
    229             while (pBlock)
    230             {
    231                 ArenaBlock* pNext = pBlock->pNext;
    232                 this->DefaultAllocator::Free(pBlock);
    233                 pBlock = pNext;
    234             }
    235             pBlock = m_oldCachedBlocks[i].pNext;
    236             while (pBlock)
    237             {
    238                 ArenaBlock* pNext = pBlock->pNext;
    239                 this->DefaultAllocator::Free(pBlock);
    240                 pBlock = pNext;
    241             }
    242         }
    243     }
    244 
    245 private:
    246     static uint32_t GetBucketId(size_t blockSize)
    247     {
    248         uint32_t bucketId = 0;
    249 
    250 #if defined(BitScanReverseSizeT)
    251         BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
    252         bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
    253 #endif
    254 
    255         return bucketId;
    256     }
    257 
    258     template <bool OldBlockT = false>
    259     void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
    260     {
    261         SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
    262 
    263         ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
    264         ArenaBlock* pBlock = pPrevBlock->pNext;
    265 
    266         while (pBlock)
    267         {
    268             if (pNewBlock->blockSize >= pBlock->blockSize)
    269             {
    270                 // Insert here
    271                 break;
    272             }
    273             pPrevBlock = pBlock;
    274             pBlock = pBlock->pNext;
    275         }
    276 
    277         // Insert into list
    278         SWR_ASSUME_ASSERT(pPrevBlock);
    279         pPrevBlock->pNext = pNewBlock;
    280         pNewBlock->pNext = pBlock;
    281 
    282         if (OldBlockT)
    283         {
    284             if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
    285             {
    286                 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
    287             }
    288 
    289             m_oldCachedSize += pNewBlock->blockSize;
    290         }
    291         else
    292         {
    293             if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
    294             {
    295                 m_pLastCachedBlocks[bucketId] = pNewBlock;
    296             }
    297 
    298             m_cachedSize += pNewBlock->blockSize;
    299         }
    300     }
    301 
    302     static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
    303     {
    304         ArenaBlock* pBlock = pPrevBlock->pNext;
    305         ArenaBlock* pPotentialBlock = nullptr;
    306         ArenaBlock* pPotentialPrev = nullptr;
    307 
    308         while (pBlock)
    309         {
    310             if (pBlock->blockSize >= blockSize)
    311             {
    312                 if (pBlock == AlignUp(pBlock, align))
    313                 {
    314                     if (pBlock->blockSize == blockSize)
    315                     {
    316                         // Won't find a better match
    317                         break;
    318                     }
    319 
    320                     // We could use this as it is larger than we wanted, but
    321                     // continue to search for a better match
    322                     pPotentialBlock = pBlock;
    323                     pPotentialPrev = pPrevBlock;
    324                 }
    325             }
    326             else
    327             {
    328                 // Blocks are sorted by size (biggest first)
    329                 // So, if we get here, there are no blocks
    330                 // large enough, fall through to allocation.
    331                 pBlock = nullptr;
    332                 break;
    333             }
    334 
    335             pPrevBlock = pBlock;
    336             pBlock = pBlock->pNext;
    337         }
    338 
    339         if (!pBlock)
    340         {
    341             // Couldn't find an exact match, use next biggest size
    342             pBlock = pPotentialBlock;
    343             pPrevBlock = pPotentialPrev;
    344         }
    345 
    346         return pBlock;
    347     }
    348 
    349     // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
    350     static const uint32_t   CACHE_NUM_BUCKETS       = NumBucketsT;
    351     static const uint32_t   CACHE_START_BUCKET_BIT  = StartBucketBitT;
    352     static const size_t     MAX_UNUSED_SIZE         = sizeof(MEGABYTE);
    353 
    354     ArenaBlock              m_cachedBlocks[CACHE_NUM_BUCKETS];
    355     ArenaBlock*             m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
    356     ArenaBlock              m_oldCachedBlocks[CACHE_NUM_BUCKETS];
    357     ArenaBlock*             m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
    358     std::mutex              m_mutex;
    359 
    360     size_t                  m_totalAllocated = 0;
    361 
    362     size_t                  m_cachedSize = 0;
    363     size_t                  m_oldCachedSize = 0;
    364 };
    365 typedef CachingAllocatorT<> CachingAllocator;
    366 
    367 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
    368 class TArena
    369 {
    370 public:
    371     TArena(T& in_allocator)  : m_allocator(in_allocator) {}
    372     TArena()                 : m_allocator(m_defAllocator) {}
    373     ~TArena()
    374     {
    375         Reset(true);
    376     }
    377 
    378     void* AllocAligned(size_t size, size_t  align)
    379     {
    380         if (0 == size)
    381         {
    382             return nullptr;
    383         }
    384 
    385         SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
    386 
    387         if (m_pCurBlock)
    388         {
    389             ArenaBlock* pCurBlock = m_pCurBlock;
    390             size_t offset = AlignUp(m_offset, align);
    391 
    392             if ((offset + size) <= pCurBlock->blockSize)
    393             {
    394                 void* pMem = PtrAdd(pCurBlock, offset);
    395                 m_offset = offset + size;
    396                 return pMem;
    397             }
    398 
    399             // Not enough memory in this block, fall through to allocate
    400             // a new block
    401         }
    402 
    403         static const size_t ArenaBlockSize = BlockSizeT;
    404         size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
    405 
    406         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
    407         blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
    408 
    409         ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
    410         SWR_ASSERT(pNewBlock != nullptr);
    411 
    412         if (pNewBlock != nullptr)
    413         {
    414             m_offset = ARENA_BLOCK_ALIGN;
    415             pNewBlock->pNext = m_pCurBlock;
    416 
    417             m_pCurBlock = pNewBlock;
    418         }
    419 
    420         return AllocAligned(size, align);
    421     }
    422 
    423     void* Alloc(size_t  size)
    424     {
    425         return AllocAligned(size, 1);
    426     }
    427 
    428     void* AllocAlignedSync(size_t size, size_t align)
    429     {
    430         void* pAlloc = nullptr;
    431 
    432         m_mutex.lock();
    433         pAlloc = AllocAligned(size, align);
    434         m_mutex.unlock();
    435 
    436         return pAlloc;
    437     }
    438 
    439     void* AllocSync(size_t size)
    440     {
    441         void* pAlloc = nullptr;
    442 
    443         m_mutex.lock();
    444         pAlloc = Alloc(size);
    445         m_mutex.unlock();
    446 
    447         return pAlloc;
    448     }
    449 
    450     void Reset(bool removeAll = false)
    451     {
    452         m_offset = ARENA_BLOCK_ALIGN;
    453 
    454         if (m_pCurBlock)
    455         {
    456             ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
    457             m_pCurBlock->pNext = nullptr;
    458             while (pUsedBlocks)
    459             {
    460                 ArenaBlock* pBlock = pUsedBlocks;
    461                 pUsedBlocks = pBlock->pNext;
    462 
    463                 m_allocator.Free(pBlock);
    464             }
    465 
    466             if (removeAll)
    467             {
    468                 m_allocator.Free(m_pCurBlock);
    469                 m_pCurBlock = nullptr;
    470             }
    471         }
    472     }
    473 
    474     bool IsEmpty()
    475     {
    476         return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
    477     }
    478 
    479 private:
    480 
    481     ArenaBlock*         m_pCurBlock = nullptr;
    482     size_t              m_offset    = ARENA_BLOCK_ALIGN;
    483 
    484     /// @note Mutex is only used by sync allocation functions.
    485     std::mutex          m_mutex;
    486 
    487     DefaultAllocator    m_defAllocator;
    488     T&                  m_allocator;
    489 };
    490 
    491 using StdArena      = TArena<DefaultAllocator>;
    492 using CachingArena  = TArena<CachingAllocator>;
    493