1 // Copyright (c) 2008, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // Author: Sanjay Ghemawat <opensource (at) google.com> 32 // 33 // Common definitions for tcmalloc code. 34 35 #ifndef TCMALLOC_COMMON_H_ 36 #define TCMALLOC_COMMON_H_ 37 38 #include "config.h" 39 #include <stddef.h> // for size_t 40 #ifdef HAVE_STDINT_H 41 #include <stdint.h> // for uintptr_t, uint64_t 42 #endif 43 #include "free_list.h" // for SIZE_CLASS macros 44 #include "internal_logging.h" // for ASSERT, etc 45 46 // Type that can hold a page number 47 typedef uintptr_t PageID; 48 49 // Type that can hold the length of a run of pages 50 typedef uintptr_t Length; 51 52 //------------------------------------------------------------------- 53 // Configuration 54 //------------------------------------------------------------------- 55 56 // Using large pages speeds up the execution at a cost of larger memory use. 57 // Deallocation may speed up by a factor as the page map gets 8x smaller, so 58 // lookups in the page map result in fewer L2 cache misses, which translates to 59 // speedup for application/platform combinations with high L2 cache pressure. 60 // As the number of size classes increases with large pages, we increase 61 // the thread cache allowance to avoid passing more free ranges to and from 62 // central lists. Also, larger pages are less likely to get freed. 63 // These two factors cause a bounded increase in memory use. 64 65 static const size_t kAlignment = 8; 66 67 // Constants dependant on tcmalloc configuration and archetecture. Chromium 68 // tunes these constants. 69 // We need to guarantee the smallest class size is big enough to hold the 70 // pointers that form the free list. 71 static const size_t kNumFreeListPointers = 72 (tcmalloc::kSupportsDoublyLinkedList ? 2 : 1); 73 static const size_t kLinkSize = kNumFreeListPointers * sizeof(void *); 74 static const size_t kMinClassSize = 75 (kLinkSize > kAlignment ? kLinkSize : kAlignment); 76 static const size_t kSkippedClasses = (kAlignment < kMinClassSize ? 1 : 0); 77 78 #if defined(TCMALLOC_LARGE_PAGES) 79 static const size_t kPageShift = 15; 80 static const size_t kNumClasses = 78 - kSkippedClasses; 81 #else 82 // Original TCMalloc code used kPageShift == 13. In Chromium, we changed 83 // this to 12 (as was done in prior versions of TCMalloc). 84 static const size_t kPageShift = 12; 85 static const size_t kNumClasses = 54 - kSkippedClasses; 86 #endif 87 static const size_t kMaxThreadCacheSize = 4 << 20; 88 89 static const size_t kPageSize = 1 << kPageShift; 90 // Original TCMalloc code used kMaxSize == 256 * 1024. In Chromium, we 91 // changed this to 32K, and represent it in terms of page size (as was done 92 // in prior versions of TCMalloc). 93 static const size_t kMaxSize = 8u * kPageSize; 94 // For all span-lengths < kMaxPages we keep an exact-size list. 95 static const size_t kMaxPages = 1 << (20 - kPageShift); 96 97 // Default bound on the total amount of thread caches. 98 #ifdef TCMALLOC_SMALL_BUT_SLOW 99 // Make the overall thread cache no bigger than that of a single thread 100 // for the small memory footprint case. 101 static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; 102 #else 103 static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; 104 #endif 105 106 // Lower bound on the per-thread cache sizes 107 static const size_t kMinThreadCacheSize = kMaxSize * 2; 108 109 // The number of bytes one ThreadCache will steal from another when 110 // the first ThreadCache is forced to Scavenge(), delaying the 111 // next call to Scavenge for this thread. 112 static const size_t kStealAmount = 1 << 16; 113 114 // The number of times that a deallocation can cause a freelist to 115 // go over its max_length() before shrinking max_length(). 116 static const int kMaxOverages = 3; 117 118 // Maximum length we allow a per-thread free-list to have before we 119 // move objects from it into the corresponding central free-list. We 120 // want this big to avoid locking the central free-list too often. It 121 // should not hurt to make this list somewhat big because the 122 // scavenging code will shrink it down when its contents are not in use. 123 static const int kMaxDynamicFreeListLength = 8192; 124 125 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 126 127 #if defined __x86_64__ 128 // All current and planned x86_64 processors only look at the lower 48 bits 129 // in virtual to physical address translation. The top 16 are thus unused. 130 // TODO(rus): Under what operating systems can we increase it safely to 17? 131 // This lets us use smaller page maps. On first allocation, a 36-bit page map 132 // uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. 133 static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); 134 #else 135 static const int kAddressBits = 8 * sizeof(void*); 136 #endif 137 138 namespace tcmalloc { 139 140 // Convert byte size into pages. This won't overflow, but may return 141 // an unreasonably large value if bytes is huge enough. 142 inline Length pages(size_t bytes) { 143 return (bytes >> kPageShift) + 144 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 145 } 146 147 // For larger allocation sizes, we use larger memory alignments to 148 // reduce the number of size classes. 149 int AlignmentForSize(size_t size); 150 151 // Size-class information + mapping 152 class SizeMap { 153 private: 154 // Number of objects to move between a per-thread list and a central 155 // list in one shot. We want this to be not too small so we can 156 // amortize the lock overhead for accessing the central list. Making 157 // it too big may temporarily cause unnecessary memory wastage in the 158 // per-thread free list until the scavenger cleans up the list. 159 int num_objects_to_move_[kNumClasses]; 160 161 //------------------------------------------------------------------- 162 // Mapping from size to size_class and vice versa 163 //------------------------------------------------------------------- 164 165 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 166 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 167 // So for these larger sizes we have an array indexed by ceil(size/128). 168 // 169 // We flatten both logical arrays into one physical array and use 170 // arithmetic to compute an appropriate index. The constants used by 171 // ClassIndex() were selected to make the flattening work. 172 // 173 // Examples: 174 // Size Expression Index 175 // ------------------------------------------------------- 176 // 0 (0 + 7) / 8 0 177 // 1 (1 + 7) / 8 1 178 // ... 179 // 1024 (1024 + 7) / 8 128 180 // 1025 (1025 + 127 + (120<<7)) / 128 129 181 // ... 182 // 32768 (32768 + 127 + (120<<7)) / 128 376 183 static const int kMaxSmallSize = 1024; 184 static const size_t kClassArraySize = 185 ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; 186 unsigned char class_array_[kClassArraySize]; 187 188 // Compute index of the class_array[] entry for a given size 189 static inline int ClassIndex(int s) { 190 ASSERT(0 <= s); 191 ASSERT(s <= kMaxSize); 192 const bool big = (s > kMaxSmallSize); 193 const int add_amount = big ? (127 + (120<<7)) : 7; 194 const int shift_amount = big ? 7 : 3; 195 return (s + add_amount) >> shift_amount; 196 } 197 198 int NumMoveSize(size_t size); 199 200 // Mapping from size class to max size storable in that class 201 size_t class_to_size_[kNumClasses]; 202 203 // Mapping from size class to number of pages to allocate at a time 204 size_t class_to_pages_[kNumClasses]; 205 206 public: 207 // Constructor should do nothing since we rely on explicit Init() 208 // call, which may or may not be called before the constructor runs. 209 SizeMap() { } 210 211 // Initialize the mapping arrays 212 void Init(); 213 214 inline int SizeClass(int size) { 215 return class_array_[ClassIndex(size)]; 216 } 217 218 // Get the byte-size for a specified class 219 inline size_t ByteSizeForClass(size_t cl) { 220 return class_to_size_[cl]; 221 } 222 223 // Mapping from size class to max size storable in that class 224 inline size_t class_to_size(size_t cl) { 225 return class_to_size_[cl]; 226 } 227 228 // Mapping from size class to number of pages to allocate at a time 229 inline size_t class_to_pages(size_t cl) { 230 return class_to_pages_[cl]; 231 } 232 233 // Number of objects to move between a per-thread list and a central 234 // list in one shot. We want this to be not too small so we can 235 // amortize the lock overhead for accessing the central list. Making 236 // it too big may temporarily cause unnecessary memory wastage in the 237 // per-thread free list until the scavenger cleans up the list. 238 inline int num_objects_to_move(size_t cl) { 239 return num_objects_to_move_[cl]; 240 } 241 }; 242 243 // Allocates "bytes" worth of memory and returns it. Increments 244 // metadata_system_bytes appropriately. May return NULL if allocation 245 // fails. Requires pageheap_lock is held. 246 void* MetaDataAlloc(size_t bytes); 247 248 // Returns the total number of bytes allocated from the system. 249 // Requires pageheap_lock is held. 250 uint64_t metadata_system_bytes(); 251 uint64_t metadata_unmapped_bytes(); 252 253 // Adjust metadata_system_bytes to indicate that bytes are actually committed. 254 // Requires pageheap_lock is held. 255 void update_metadata_system_bytes(int diff); 256 void update_metadata_unmapped_bytes(int diff); 257 258 // size/depth are made the same size as a pointer so that some generic 259 // code below can conveniently cast them back and forth to void*. 260 static const int kMaxStackDepth = 31; 261 struct StackTrace { 262 uintptr_t size; // Size of object 263 uintptr_t depth; // Number of PC values stored in array below 264 void* stack[kMaxStackDepth]; 265 }; 266 267 } // namespace tcmalloc 268 269 #endif // TCMALLOC_COMMON_H_ 270