Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2008, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // ---
     31 // Author: Sanjay Ghemawat <opensource (at) google.com>
     32 //
     33 // Common definitions for tcmalloc code.
     34 
     35 #ifndef TCMALLOC_COMMON_H_
     36 #define TCMALLOC_COMMON_H_
     37 
     38 #include "config.h"
     39 #include <stddef.h>                     // for size_t
     40 #ifdef HAVE_STDINT_H
     41 #include <stdint.h>                     // for uintptr_t, uint64_t
     42 #endif
     43 #include "free_list.h"  // for SIZE_CLASS macros
     44 #include "internal_logging.h"  // for ASSERT, etc
     45 
     46 // Type that can hold a page number
     47 typedef uintptr_t PageID;
     48 
     49 // Type that can hold the length of a run of pages
     50 typedef uintptr_t Length;
     51 
     52 //-------------------------------------------------------------------
     53 // Configuration
     54 //-------------------------------------------------------------------
     55 
     56 // Using large pages speeds up the execution at a cost of larger memory use.
     57 // Deallocation may speed up by a factor as the page map gets 8x smaller, so
     58 // lookups in the page map result in fewer L2 cache misses, which translates to
     59 // speedup for application/platform combinations with high L2 cache pressure.
     60 // As the number of size classes increases with large pages, we increase
     61 // the thread cache allowance to avoid passing more free ranges to and from
     62 // central lists.  Also, larger pages are less likely to get freed.
     63 // These two factors cause a bounded increase in memory use.
     64 
     65 static const size_t kAlignment  = 8;
     66 
     67 // Constants dependant on tcmalloc configuration and archetecture.  Chromium
     68 // tunes these constants.
     69 // We need to guarantee the smallest class size is big enough to hold the
     70 // pointers that form the free list.
     71 static const size_t kNumFreeListPointers =
     72   (tcmalloc::kSupportsDoublyLinkedList ? 2 : 1);
     73 static const size_t kLinkSize = kNumFreeListPointers * sizeof(void *);
     74 static const size_t kMinClassSize =
     75   (kLinkSize > kAlignment ? kLinkSize : kAlignment);
     76 static const size_t kSkippedClasses = (kAlignment < kMinClassSize ? 1 : 0);
     77 
     78 #if defined(TCMALLOC_LARGE_PAGES)
     79 static const size_t kPageShift  = 15;
     80 static const size_t kNumClasses = 78 - kSkippedClasses;
     81 #else
     82 // Original TCMalloc code used kPageShift == 13.  In Chromium, we changed
     83 // this to 12 (as was done in prior versions of TCMalloc).
     84 static const size_t kPageShift  = 12;
     85 static const size_t kNumClasses = 54 - kSkippedClasses;
     86 #endif
     87 static const size_t kMaxThreadCacheSize = 4 << 20;
     88 
     89 static const size_t kPageSize   = 1 << kPageShift;
     90 // Original TCMalloc code used kMaxSize == 256 * 1024.  In Chromium, we
     91 // changed this to 32K, and represent it in terms of page size (as was done
     92 // in prior versions of TCMalloc).
     93 static const size_t kMaxSize    = 8u * kPageSize;
     94 // For all span-lengths < kMaxPages we keep an exact-size list.
     95 static const size_t kMaxPages = 1 << (20 - kPageShift);
     96 
     97 // Default bound on the total amount of thread caches.
     98 #ifdef TCMALLOC_SMALL_BUT_SLOW
     99 // Make the overall thread cache no bigger than that of a single thread
    100 // for the small memory footprint case.
    101 static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
    102 #else
    103 static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
    104 #endif
    105 
    106 // Lower bound on the per-thread cache sizes
    107 static const size_t kMinThreadCacheSize = kMaxSize * 2;
    108 
    109 // The number of bytes one ThreadCache will steal from another when
    110 // the first ThreadCache is forced to Scavenge(), delaying the
    111 // next call to Scavenge for this thread.
    112 static const size_t kStealAmount = 1 << 16;
    113 
    114 // The number of times that a deallocation can cause a freelist to
    115 // go over its max_length() before shrinking max_length().
    116 static const int kMaxOverages = 3;
    117 
    118 // Maximum length we allow a per-thread free-list to have before we
    119 // move objects from it into the corresponding central free-list.  We
    120 // want this big to avoid locking the central free-list too often.  It
    121 // should not hurt to make this list somewhat big because the
    122 // scavenging code will shrink it down when its contents are not in use.
    123 static const int kMaxDynamicFreeListLength = 8192;
    124 
    125 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
    126 
    127 #if defined __x86_64__
    128 // All current and planned x86_64 processors only look at the lower 48 bits
    129 // in virtual to physical address translation.  The top 16 are thus unused.
    130 // TODO(rus): Under what operating systems can we increase it safely to 17?
    131 // This lets us use smaller page maps.  On first allocation, a 36-bit page map
    132 // uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
    133 static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
    134 #else
    135 static const int kAddressBits = 8 * sizeof(void*);
    136 #endif
    137 
    138 namespace tcmalloc {
    139 
    140 // Convert byte size into pages.  This won't overflow, but may return
    141 // an unreasonably large value if bytes is huge enough.
    142 inline Length pages(size_t bytes) {
    143   return (bytes >> kPageShift) +
    144       ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
    145 }
    146 
    147 // For larger allocation sizes, we use larger memory alignments to
    148 // reduce the number of size classes.
    149 int AlignmentForSize(size_t size);
    150 
    151 // Size-class information + mapping
    152 class SizeMap {
    153  private:
    154   // Number of objects to move between a per-thread list and a central
    155   // list in one shot.  We want this to be not too small so we can
    156   // amortize the lock overhead for accessing the central list.  Making
    157   // it too big may temporarily cause unnecessary memory wastage in the
    158   // per-thread free list until the scavenger cleans up the list.
    159   int num_objects_to_move_[kNumClasses];
    160 
    161   //-------------------------------------------------------------------
    162   // Mapping from size to size_class and vice versa
    163   //-------------------------------------------------------------------
    164 
    165   // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
    166   // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
    167   // So for these larger sizes we have an array indexed by ceil(size/128).
    168   //
    169   // We flatten both logical arrays into one physical array and use
    170   // arithmetic to compute an appropriate index.  The constants used by
    171   // ClassIndex() were selected to make the flattening work.
    172   //
    173   // Examples:
    174   //   Size       Expression                      Index
    175   //   -------------------------------------------------------
    176   //   0          (0 + 7) / 8                     0
    177   //   1          (1 + 7) / 8                     1
    178   //   ...
    179   //   1024       (1024 + 7) / 8                  128
    180   //   1025       (1025 + 127 + (120<<7)) / 128   129
    181   //   ...
    182   //   32768      (32768 + 127 + (120<<7)) / 128  376
    183   static const int kMaxSmallSize = 1024;
    184   static const size_t kClassArraySize =
    185       ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
    186   unsigned char class_array_[kClassArraySize];
    187 
    188   // Compute index of the class_array[] entry for a given size
    189   static inline int ClassIndex(int s) {
    190     ASSERT(0 <= s);
    191     ASSERT(s <= kMaxSize);
    192     const bool big = (s > kMaxSmallSize);
    193     const int add_amount = big ? (127 + (120<<7)) : 7;
    194     const int shift_amount = big ? 7 : 3;
    195     return (s + add_amount) >> shift_amount;
    196   }
    197 
    198   int NumMoveSize(size_t size);
    199 
    200   // Mapping from size class to max size storable in that class
    201   size_t class_to_size_[kNumClasses];
    202 
    203   // Mapping from size class to number of pages to allocate at a time
    204   size_t class_to_pages_[kNumClasses];
    205 
    206  public:
    207   // Constructor should do nothing since we rely on explicit Init()
    208   // call, which may or may not be called before the constructor runs.
    209   SizeMap() { }
    210 
    211   // Initialize the mapping arrays
    212   void Init();
    213 
    214   inline int SizeClass(int size) {
    215     return class_array_[ClassIndex(size)];
    216   }
    217 
    218   // Get the byte-size for a specified class
    219   inline size_t ByteSizeForClass(size_t cl) {
    220     return class_to_size_[cl];
    221   }
    222 
    223   // Mapping from size class to max size storable in that class
    224   inline size_t class_to_size(size_t cl) {
    225     return class_to_size_[cl];
    226   }
    227 
    228   // Mapping from size class to number of pages to allocate at a time
    229   inline size_t class_to_pages(size_t cl) {
    230     return class_to_pages_[cl];
    231   }
    232 
    233   // Number of objects to move between a per-thread list and a central
    234   // list in one shot.  We want this to be not too small so we can
    235   // amortize the lock overhead for accessing the central list.  Making
    236   // it too big may temporarily cause unnecessary memory wastage in the
    237   // per-thread free list until the scavenger cleans up the list.
    238   inline int num_objects_to_move(size_t cl) {
    239     return num_objects_to_move_[cl];
    240   }
    241 };
    242 
    243 // Allocates "bytes" worth of memory and returns it.  Increments
    244 // metadata_system_bytes appropriately.  May return NULL if allocation
    245 // fails.  Requires pageheap_lock is held.
    246 void* MetaDataAlloc(size_t bytes);
    247 
    248 // Returns the total number of bytes allocated from the system.
    249 // Requires pageheap_lock is held.
    250 uint64_t metadata_system_bytes();
    251 uint64_t metadata_unmapped_bytes();
    252 
    253 // Adjust metadata_system_bytes to indicate that bytes are actually committed.
    254 // Requires pageheap_lock is held.
    255 void update_metadata_system_bytes(int diff);
    256 void update_metadata_unmapped_bytes(int diff);
    257 
    258 // size/depth are made the same size as a pointer so that some generic
    259 // code below can conveniently cast them back and forth to void*.
    260 static const int kMaxStackDepth = 31;
    261 struct StackTrace {
    262   uintptr_t size;          // Size of object
    263   uintptr_t depth;         // Number of PC values stored in array below
    264   void*     stack[kMaxStackDepth];
    265 };
    266 
    267 }  // namespace tcmalloc
    268 
    269 #endif  // TCMALLOC_COMMON_H_
    270