1 // Copyright (c) 2005, 2007, Google Inc. 2 // All rights reserved. 3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // --- 32 // Author: Sanjay Ghemawat <opensource (at) google.com> 33 // 34 // A malloc that uses a per-thread cache to satisfy small malloc requests. 35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 36 // 37 // See doc/tcmalloc.html for a high-level 38 // description of how this malloc works. 39 // 40 // SYNCHRONIZATION 41 // 1. The thread-specific lists are accessed without acquiring any locks. 42 // This is safe because each such list is only accessed by one thread. 43 // 2. We have a lock per central free-list, and hold it while manipulating 44 // the central free list for a particular size. 45 // 3. The central page allocator is protected by "pageheap_lock". 46 // 4. The pagemap (which maps from page-number to descriptor), 47 // can be read without holding any locks, and written while holding 48 // the "pageheap_lock". 49 // 5. To improve performance, a subset of the information one can get 50 // from the pagemap is cached in a data structure, pagemap_cache_, 51 // that atomically reads and writes its entries. This cache can be 52 // read and written without locking. 53 // 54 // This multi-threaded access to the pagemap is safe for fairly 55 // subtle reasons. We basically assume that when an object X is 56 // allocated by thread A and deallocated by thread B, there must 57 // have been appropriate synchronization in the handoff of object 58 // X from thread A to thread B. The same logic applies to pagemap_cache_. 59 // 60 // THE PAGEID-TO-SIZECLASS CACHE 61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 62 // returns 0 for a particular PageID then that means "no information," not that 63 // the sizeclass is 0. The cache may have stale information for pages that do 64 // not hold the beginning of any free()'able object. Staleness is eliminated 65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 66 // do_memalign() for all other relevant pages. 67 // 68 // TODO: Bias reclamation to larger addresses 69 // TODO: implement mallinfo/mallopt 70 // TODO: Better testing 71 // 72 // 9/28/2003 (new page-level allocator replaces ptmalloc2): 73 // * malloc/free of small objects goes from ~300 ns to ~50 ns. 74 // * allocation of a reasonably complicated struct 75 // goes from about 1100 ns to about 300 ns. 76 77 #include "config.h" 78 #include "wtf/FastMalloc.h" 79 80 #include "wtf/Assertions.h" 81 #include "wtf/CPU.h" 82 #include "wtf/StdLibExtras.h" 83 84 #if OS(MACOSX) 85 #include <AvailabilityMacros.h> 86 #endif 87 88 #include <limits> 89 #if OS(WIN) 90 #include <windows.h> 91 #else 92 #include <pthread.h> 93 #endif 94 #include <stdlib.h> 95 #include <string.h> 96 97 #ifndef NO_TCMALLOC_SAMPLES 98 #define NO_TCMALLOC_SAMPLES 99 #endif 100 101 #if !USE(SYSTEM_MALLOC) && defined(NDEBUG) 102 #define FORCE_SYSTEM_MALLOC 0 103 #else 104 #define FORCE_SYSTEM_MALLOC 1 105 #endif 106 107 // Harden the pointers stored in the TCMalloc linked lists 108 #if COMPILER(GCC) 109 #define ENABLE_TCMALLOC_HARDENING 1 110 #endif 111 112 // Use a background thread to periodically scavenge memory to release back to the system 113 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 114 115 #ifndef NDEBUG 116 namespace WTF { 117 118 #if OS(WIN) 119 120 // TLS_OUT_OF_INDEXES is not defined on WinCE. 121 #ifndef TLS_OUT_OF_INDEXES 122 #define TLS_OUT_OF_INDEXES 0xffffffff 123 #endif 124 125 static DWORD isForibiddenTlsIndex = TLS_OUT_OF_INDEXES; 126 static const LPVOID kTlsAllowValue = reinterpret_cast<LPVOID>(0); // Must be zero. 127 static const LPVOID kTlsForbiddenValue = reinterpret_cast<LPVOID>(1); 128 129 #if !ASSERT_DISABLED 130 static bool isForbidden() 131 { 132 // By default, fastMalloc is allowed so we don't allocate the 133 // tls index unless we're asked to make it forbidden. If TlsSetValue 134 // has not been called on a thread, the value returned by TlsGetValue is 0. 135 return (isForibiddenTlsIndex != TLS_OUT_OF_INDEXES) && (TlsGetValue(isForibiddenTlsIndex) == kTlsForbiddenValue); 136 } 137 #endif 138 139 void fastMallocForbid() 140 { 141 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 142 isForibiddenTlsIndex = TlsAlloc(); // a little racey, but close enough for debug only 143 TlsSetValue(isForibiddenTlsIndex, kTlsForbiddenValue); 144 } 145 146 void fastMallocAllow() 147 { 148 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 149 return; 150 TlsSetValue(isForibiddenTlsIndex, kTlsAllowValue); 151 } 152 153 #else // !OS(WIN) 154 155 static pthread_key_t isForbiddenKey; 156 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; 157 static void initializeIsForbiddenKey() 158 { 159 pthread_key_create(&isForbiddenKey, 0); 160 } 161 162 #if !ASSERT_DISABLED 163 static bool isForbidden() 164 { 165 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 166 return !!pthread_getspecific(isForbiddenKey); 167 } 168 #endif 169 170 void fastMallocForbid() 171 { 172 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 173 pthread_setspecific(isForbiddenKey, &isForbiddenKey); 174 } 175 176 void fastMallocAllow() 177 { 178 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 179 pthread_setspecific(isForbiddenKey, 0); 180 } 181 #endif // OS(WIN) 182 183 } // namespace WTF 184 #endif // NDEBUG 185 186 namespace WTF { 187 188 void* fastZeroedMalloc(size_t n) 189 { 190 void* result = fastMalloc(n); 191 memset(result, 0, n); 192 return result; 193 } 194 195 char* fastStrDup(const char* src) 196 { 197 size_t len = strlen(src) + 1; 198 char* dup = static_cast<char*>(fastMalloc(len)); 199 memcpy(dup, src, len); 200 return dup; 201 } 202 203 } // namespace WTF 204 205 #if FORCE_SYSTEM_MALLOC 206 207 #if OS(MACOSX) 208 #include <malloc/malloc.h> 209 #elif OS(WIN) 210 #include <malloc.h> 211 #endif 212 213 namespace WTF { 214 215 void* fastMalloc(size_t n) 216 { 217 ASSERT(!isForbidden()); 218 219 void* result = malloc(n); 220 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 221 222 return result; 223 } 224 225 void* fastCalloc(size_t n_elements, size_t element_size) 226 { 227 ASSERT(!isForbidden()); 228 229 void* result = calloc(n_elements, element_size); 230 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 231 232 return result; 233 } 234 235 void fastFree(void* p) 236 { 237 ASSERT(!isForbidden()); 238 239 free(p); 240 } 241 242 void* fastRealloc(void* p, size_t n) 243 { 244 ASSERT(!isForbidden()); 245 246 void* result = realloc(p, n); 247 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 248 249 return result; 250 } 251 252 void releaseFastMallocFreeMemory() { } 253 254 FastMallocStatistics fastMallocStatistics() 255 { 256 FastMallocStatistics statistics = { 0, 0, 0 }; 257 return statistics; 258 } 259 260 } // namespace WTF 261 262 #if OS(MACOSX) 263 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. 264 // It will never be used in this case, so it's type and value are less interesting than its presence. 265 extern "C" const int jscore_fastmalloc_introspection = 0; 266 #endif 267 268 #else // FORCE_SYSTEM_MALLOC 269 270 #include "Compiler.h" 271 #include "TCPackedCache.h" 272 #include "TCPageMap.h" 273 #include "TCSpinLock.h" 274 #include "TCSystemAlloc.h" 275 #include <algorithm> 276 #include <errno.h> 277 #include <pthread.h> 278 #include <stdarg.h> 279 #include <stddef.h> 280 #if OS(POSIX) 281 #include <unistd.h> 282 #endif 283 #if OS(WIN) 284 #ifndef WIN32_LEAN_AND_MEAN 285 #define WIN32_LEAN_AND_MEAN 286 #endif 287 #include <windows.h> 288 #endif 289 290 #if OS(MACOSX) 291 #include "MallocZoneSupport.h" 292 #include "wtf/HashSet.h" 293 #include "wtf/Vector.h" 294 #else 295 #include "wtf/CurrentTime.h" 296 #endif 297 298 #if HAVE(DISPATCH_H) 299 #include <dispatch/dispatch.h> 300 #endif 301 302 #ifndef PRIuS 303 #define PRIuS "zu" 304 #endif 305 306 // Calling pthread_getspecific through a global function pointer is faster than a normal 307 // call to the function on Mac OS X, and it's used in performance-critical code. So we 308 // use a function pointer. But that's not necessarily faster on other platforms, and we had 309 // problems with this technique on Windows, so we'll do this only on Mac OS X. 310 #if OS(MACOSX) 311 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; 312 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) 313 #endif 314 315 #define DEFINE_VARIABLE(type, name, value, meaning) \ 316 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ 317 type FLAGS_##name(value); \ 318 char FLAGS_no##name; \ 319 } \ 320 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name 321 322 #define DEFINE_int64(name, value, meaning) \ 323 DEFINE_VARIABLE(int64_t, name, value, meaning) 324 325 #define DEFINE_double(name, value, meaning) \ 326 DEFINE_VARIABLE(double, name, value, meaning) 327 328 namespace WTF { 329 330 #define malloc fastMalloc 331 #define calloc fastCalloc 332 #define free fastFree 333 #define realloc fastRealloc 334 335 #define MESSAGE WTF_LOG_ERROR 336 #define CHECK_CONDITION ASSERT 337 338 #if !OS(MACOSX) 339 static const char kLLHardeningMask = 0; 340 #endif 341 342 template <unsigned> struct EntropySource; 343 template <> struct EntropySource<4> { 344 static uint32_t value() 345 { 346 #if OS(MACOSX) 347 return arc4random(); 348 #else 349 return static_cast<uint32_t>(static_cast<uintptr_t>(currentTime() * 10000) ^ reinterpret_cast<uintptr_t>(&kLLHardeningMask)); 350 #endif 351 } 352 }; 353 354 template <> struct EntropySource<8> { 355 static uint64_t value() 356 { 357 return EntropySource<4>::value() | (static_cast<uint64_t>(EntropySource<4>::value()) << 32); 358 } 359 }; 360 361 #if ENABLE(TCMALLOC_HARDENING) 362 /* 363 * To make it harder to exploit use-after free style exploits 364 * we mask the addresses we put into our linked lists with the 365 * address of kLLHardeningMask. Due to ASLR the address of 366 * kLLHardeningMask should be sufficiently randomized to make direct 367 * freelist manipulation much more difficult. 368 */ 369 enum { 370 MaskKeyShift = 13 371 }; 372 373 static ALWAYS_INLINE uintptr_t internalEntropyValue() 374 { 375 static uintptr_t value = EntropySource<sizeof(uintptr_t)>::value() | 1; 376 ASSERT(value); 377 return value; 378 } 379 380 #define HARDENING_ENTROPY internalEntropyValue() 381 #define ROTATE_VALUE(value, amount) (((value) >> (amount)) | ((value) << (sizeof(value) * 8 - (amount)))) 382 #define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (reinterpret_cast<typeof(ptr)>(reinterpret_cast<uintptr_t>(ptr)^(ROTATE_VALUE(reinterpret_cast<uintptr_t>(key), MaskKeyShift)^entropy))) 383 384 385 static ALWAYS_INLINE uint32_t freedObjectStartPoison() 386 { 387 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 388 ASSERT(value); 389 return value; 390 } 391 392 static ALWAYS_INLINE uint32_t freedObjectEndPoison() 393 { 394 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 395 ASSERT(value); 396 return value; 397 } 398 399 #define PTR_TO_UINT32(ptr) static_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr)) 400 #define END_POISON_INDEX(allocationSize) (((allocationSize) - sizeof(uint32_t)) / sizeof(uint32_t)) 401 #define POISON_ALLOCATION(allocation, allocationSize) do { \ 402 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 403 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef1; \ 404 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeef3; \ 405 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 406 break; \ 407 reinterpret_cast<uint32_t*>(allocation)[2] = 0xbadbeef5; \ 408 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = 0xbadbeef7; \ 409 } while (false); 410 411 #define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) do { \ 412 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 413 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef9; \ 414 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeefb; \ 415 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 416 break; \ 417 reinterpret_cast<uint32_t*>(allocation)[2] = (startPoison) ^ PTR_TO_UINT32(allocation); \ 418 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = (endPoison) ^ PTR_TO_UINT32(allocation); \ 419 } while (false) 420 421 #define POISON_DEALLOCATION(allocation, allocationSize) \ 422 POISON_DEALLOCATION_EXPLICIT(allocation, (allocationSize), freedObjectStartPoison(), freedObjectEndPoison()) 423 424 #define MAY_BE_POISONED(allocation, allocationSize) (((allocationSize) >= 4 * sizeof(uint32_t)) && ( \ 425 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) || \ 426 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 427 )) 428 429 #define IS_DEFINITELY_POISONED(allocation, allocationSize) (((allocationSize) < 4 * sizeof(uint32_t)) || ( \ 430 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) && \ 431 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 432 )) 433 434 #else 435 436 #define POISON_ALLOCATION(allocation, allocationSize) 437 #define POISON_DEALLOCATION(allocation, allocationSize) 438 #define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) 439 #define MAY_BE_POISONED(allocation, allocationSize) (false) 440 #define IS_DEFINITELY_POISONED(allocation, allocationSize) (true) 441 #define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (((void)entropy), ((void)key), ptr) 442 443 #define HARDENING_ENTROPY 0 444 445 #endif 446 447 //------------------------------------------------------------------- 448 // Configuration 449 //------------------------------------------------------------------- 450 451 // Not all possible combinations of the following parameters make 452 // sense. In particular, if kMaxSize increases, you may have to 453 // increase kNumClasses as well. 454 static const size_t kPageShift = 12; 455 static const size_t kPageSize = 1 << kPageShift; 456 static const size_t kMaxSize = 8u * kPageSize; 457 static const size_t kAlignShift = 3; 458 static const size_t kAlignment = 1 << kAlignShift; 459 static const size_t kNumClasses = 68; 460 461 // Allocates a big block of memory for the pagemap once we reach more than 462 // 128MB 463 static const size_t kPageMapBigAllocationThreshold = 128 << 20; 464 465 // Minimum number of pages to fetch from system at a time. Must be 466 // significantly bigger than kPageSize to amortize system-call 467 // overhead, and also to reduce external fragementation. Also, we 468 // should keep this value big because various incarnations of Linux 469 // have small limits on the number of mmap() regions per 470 // address-space. 471 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); 472 473 // Number of objects to move between a per-thread list and a central 474 // list in one shot. We want this to be not too small so we can 475 // amortize the lock overhead for accessing the central list. Making 476 // it too big may temporarily cause unnecessary memory wastage in the 477 // per-thread free list until the scavenger cleans up the list. 478 static int num_objects_to_move[kNumClasses]; 479 480 // Maximum length we allow a per-thread free-list to have before we 481 // move objects from it into the corresponding central free-list. We 482 // want this big to avoid locking the central free-list too often. It 483 // should not hurt to make this list somewhat big because the 484 // scavenging code will shrink it down when its contents are not in use. 485 static const int kMaxFreeListLength = 256; 486 487 // Lower and upper bounds on the per-thread cache sizes 488 static const size_t kMinThreadCacheSize = kMaxSize * 2; 489 static const size_t kMaxThreadCacheSize = 2 << 20; 490 491 // Default bound on the total amount of thread caches 492 static const size_t kDefaultOverallThreadCacheSize = 16 << 20; 493 494 // For all span-lengths < kMaxPages we keep an exact-size list. 495 // REQUIRED: kMaxPages >= kMinSystemAlloc; 496 static const size_t kMaxPages = kMinSystemAlloc; 497 498 /* The smallest prime > 2^n */ 499 static int primes_list[] = { 500 // Small values might cause high rates of sampling 501 // and hence commented out. 502 // 2, 5, 11, 17, 37, 67, 131, 257, 503 // 521, 1031, 2053, 4099, 8209, 16411, 504 32771, 65537, 131101, 262147, 524309, 1048583, 505 2097169, 4194319, 8388617, 16777259, 33554467 }; 506 507 // Twice the approximate gap between sampling actions. 508 // I.e., we take one sample approximately once every 509 // tcmalloc_sample_parameter/2 510 // bytes of allocation, i.e., ~ once every 128KB. 511 // Must be a prime number. 512 #ifdef NO_TCMALLOC_SAMPLES 513 DEFINE_int64(tcmalloc_sample_parameter, 0, 514 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); 515 static size_t sample_period = 0; 516 #else 517 DEFINE_int64(tcmalloc_sample_parameter, 262147, 518 "Twice the approximate gap between sampling actions." 519 " Must be a prime number. Otherwise will be rounded up to a " 520 " larger prime number"); 521 static size_t sample_period = 262147; 522 #endif 523 524 // Protects sample_period above 525 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; 526 527 // Parameters for controlling how fast memory is returned to the OS. 528 529 DEFINE_double(tcmalloc_release_rate, 1, 530 "Rate at which we release unused memory to the system. " 531 "Zero means we never release memory back to the system. " 532 "Increase this flag to return memory faster; decrease it " 533 "to return memory slower. Reasonable rates are in the " 534 "range [0,10]"); 535 536 //------------------------------------------------------------------- 537 // Mapping from size to size_class and vice versa 538 //------------------------------------------------------------------- 539 540 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 541 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 542 // So for these larger sizes we have an array indexed by ceil(size/128). 543 // 544 // We flatten both logical arrays into one physical array and use 545 // arithmetic to compute an appropriate index. The constants used by 546 // ClassIndex() were selected to make the flattening work. 547 // 548 // Examples: 549 // Size Expression Index 550 // ------------------------------------------------------- 551 // 0 (0 + 7) / 8 0 552 // 1 (1 + 7) / 8 1 553 // ... 554 // 1024 (1024 + 7) / 8 128 555 // 1025 (1025 + 127 + (120<<7)) / 128 129 556 // ... 557 // 32768 (32768 + 127 + (120<<7)) / 128 376 558 static const size_t kMaxSmallSize = 1024; 559 static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 560 static const int add_amount[2] = { 7, 127 + (120 << 7) }; 561 static unsigned char class_array[377]; 562 563 // Compute index of the class_array[] entry for a given size 564 static inline int ClassIndex(size_t s) { 565 const int i = (s > kMaxSmallSize); 566 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); 567 } 568 569 // Mapping from size class to max size storable in that class 570 static size_t class_to_size[kNumClasses]; 571 572 // Mapping from size class to number of pages to allocate at a time 573 static size_t class_to_pages[kNumClasses]; 574 575 // Hardened singly linked list. We make this a class to allow compiler to 576 // statically prevent mismatching hardened and non-hardened list 577 class HardenedSLL { 578 public: 579 static ALWAYS_INLINE HardenedSLL create(void* value) 580 { 581 HardenedSLL result; 582 result.m_value = value; 583 return result; 584 } 585 586 static ALWAYS_INLINE HardenedSLL null() 587 { 588 HardenedSLL result; 589 result.m_value = 0; 590 return result; 591 } 592 593 ALWAYS_INLINE void setValue(void* value) { m_value = value; } 594 ALWAYS_INLINE void* value() const { return m_value; } 595 ALWAYS_INLINE bool operator!() const { return !m_value; } 596 typedef void* (HardenedSLL::*UnspecifiedBoolType); 597 ALWAYS_INLINE operator UnspecifiedBoolType() const { return m_value ? &HardenedSLL::m_value : 0; } 598 599 bool operator!=(const HardenedSLL& other) const { return m_value != other.m_value; } 600 bool operator==(const HardenedSLL& other) const { return m_value == other.m_value; } 601 602 private: 603 void* m_value; 604 }; 605 606 // TransferCache is used to cache transfers of num_objects_to_move[size_class] 607 // back and forth between thread caches and the central cache for a given size 608 // class. 609 struct TCEntry { 610 HardenedSLL head; // Head of chain of objects. 611 HardenedSLL tail; // Tail of chain of objects. 612 }; 613 // A central cache freelist can have anywhere from 0 to kNumTransferEntries 614 // slots to put link list chains into. To keep memory usage bounded the total 615 // number of TCEntries across size classes is fixed. Currently each size 616 // class is initially given one TCEntry which also means that the maximum any 617 // one class can have is kNumClasses. 618 static const int kNumTransferEntries = kNumClasses; 619 620 // Note: the following only works for "n"s that fit in 32-bits, but 621 // that is fine since we only use it for small sizes. 622 static inline int LgFloor(size_t n) { 623 int log = 0; 624 for (int i = 4; i >= 0; --i) { 625 int shift = (1 << i); 626 size_t x = n >> shift; 627 if (x != 0) { 628 n = x; 629 log += shift; 630 } 631 } 632 ASSERT(n == 1); 633 return log; 634 } 635 636 // Functions for using our simple hardened singly linked list 637 static ALWAYS_INLINE HardenedSLL SLL_Next(HardenedSLL t, uintptr_t entropy) { 638 return HardenedSLL::create(XOR_MASK_PTR_WITH_KEY(*(reinterpret_cast<void**>(t.value())), t.value(), entropy)); 639 } 640 641 static ALWAYS_INLINE void SLL_SetNext(HardenedSLL t, HardenedSLL n, uintptr_t entropy) { 642 *(reinterpret_cast<void**>(t.value())) = XOR_MASK_PTR_WITH_KEY(n.value(), t.value(), entropy); 643 } 644 645 static ALWAYS_INLINE void SLL_Push(HardenedSLL* list, HardenedSLL element, uintptr_t entropy) { 646 SLL_SetNext(element, *list, entropy); 647 *list = element; 648 } 649 650 static ALWAYS_INLINE HardenedSLL SLL_Pop(HardenedSLL *list, uintptr_t entropy) { 651 HardenedSLL result = *list; 652 *list = SLL_Next(*list, entropy); 653 return result; 654 } 655 656 // Remove N elements from a linked list to which head points. head will be 657 // modified to point to the new head. start and end will point to the first 658 // and last nodes of the range. Note that end will point to NULL after this 659 // function is called. 660 661 static ALWAYS_INLINE void SLL_PopRange(HardenedSLL* head, int N, HardenedSLL *start, HardenedSLL *end, uintptr_t entropy) { 662 if (N == 0) { 663 *start = HardenedSLL::null(); 664 *end = HardenedSLL::null(); 665 return; 666 } 667 668 HardenedSLL tmp = *head; 669 for (int i = 1; i < N; ++i) { 670 tmp = SLL_Next(tmp, entropy); 671 } 672 673 *start = *head; 674 *end = tmp; 675 *head = SLL_Next(tmp, entropy); 676 // Unlink range from list. 677 SLL_SetNext(tmp, HardenedSLL::null(), entropy); 678 } 679 680 static ALWAYS_INLINE void SLL_PushRange(HardenedSLL *head, HardenedSLL start, HardenedSLL end, uintptr_t entropy) { 681 if (!start) return; 682 SLL_SetNext(end, *head, entropy); 683 *head = start; 684 } 685 686 // Setup helper functions. 687 688 static ALWAYS_INLINE size_t SizeClass(size_t size) { 689 return class_array[ClassIndex(size)]; 690 } 691 692 // Get the byte-size for a specified class 693 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { 694 return class_to_size[cl]; 695 } 696 static int NumMoveSize(size_t size) { 697 if (size == 0) return 0; 698 // Use approx 64k transfers between thread and central caches. 699 int num = static_cast<int>(64.0 * 1024.0 / size); 700 if (num < 2) num = 2; 701 // Clamp well below kMaxFreeListLength to avoid ping pong between central 702 // and thread caches. 703 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) 704 num = static_cast<int>(0.8 * kMaxFreeListLength); 705 706 // Also, avoid bringing in too many objects into small object free 707 // lists. There are lots of such lists, and if we allow each one to 708 // fetch too many at a time, we end up having to scavenge too often 709 // (especially when there are lots of threads and each thread gets a 710 // small allowance for its thread cache). 711 // 712 // TODO: Make thread cache free list sizes dynamic so that we do not 713 // have to equally divide a fixed resource amongst lots of threads. 714 if (num > 32) num = 32; 715 716 return num; 717 } 718 719 // Initialize the mapping arrays 720 static void InitSizeClasses() { 721 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] 722 if (ClassIndex(0) < 0) { 723 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); 724 CRASH(); 725 } 726 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { 727 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); 728 CRASH(); 729 } 730 731 // Compute the size classes we want to use 732 size_t sc = 1; // Next size class to assign 733 unsigned char alignshift = kAlignShift; 734 int last_lg = -1; 735 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { 736 int lg = LgFloor(size); 737 if (lg > last_lg) { 738 // Increase alignment every so often. 739 // 740 // Since we double the alignment every time size doubles and 741 // size >= 128, this means that space wasted due to alignment is 742 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 743 // bytes, so the space wasted as a percentage starts falling for 744 // sizes > 2K. 745 if ((lg >= 7) && (alignshift < 8)) { 746 alignshift++; 747 } 748 last_lg = lg; 749 } 750 751 // Allocate enough pages so leftover is less than 1/8 of total. 752 // This bounds wasted space to at most 12.5%. 753 size_t psize = kPageSize; 754 while ((psize % size) > (psize >> 3)) { 755 psize += kPageSize; 756 } 757 const size_t my_pages = psize >> kPageShift; 758 759 if (sc > 1 && my_pages == class_to_pages[sc-1]) { 760 // See if we can merge this into the previous class without 761 // increasing the fragmentation of the previous class. 762 const size_t my_objects = (my_pages << kPageShift) / size; 763 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) 764 / class_to_size[sc-1]; 765 if (my_objects == prev_objects) { 766 // Adjust last class to include this size 767 class_to_size[sc-1] = size; 768 continue; 769 } 770 } 771 772 // Add new class 773 class_to_pages[sc] = my_pages; 774 class_to_size[sc] = size; 775 sc++; 776 } 777 if (sc != kNumClasses) { 778 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", 779 sc, int(kNumClasses)); 780 CRASH(); 781 } 782 783 // Initialize the mapping arrays 784 int next_size = 0; 785 for (unsigned char c = 1; c < kNumClasses; c++) { 786 const size_t max_size_in_class = class_to_size[c]; 787 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { 788 class_array[ClassIndex(s)] = c; 789 } 790 next_size = static_cast<int>(max_size_in_class + kAlignment); 791 } 792 793 // Double-check sizes just to be safe 794 for (size_t size = 0; size <= kMaxSize; size++) { 795 const size_t sc = SizeClass(size); 796 if (sc == 0) { 797 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 798 CRASH(); 799 } 800 if (sc > 1 && size <= class_to_size[sc-1]) { 801 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS 802 "\n", sc, size); 803 CRASH(); 804 } 805 if (sc >= kNumClasses) { 806 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 807 CRASH(); 808 } 809 const size_t s = class_to_size[sc]; 810 if (size > s) { 811 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 812 CRASH(); 813 } 814 if (s == 0) { 815 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 816 CRASH(); 817 } 818 } 819 820 // Initialize the num_objects_to_move array. 821 for (size_t cl = 1; cl < kNumClasses; ++cl) { 822 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); 823 } 824 } 825 826 // ------------------------------------------------------------------------- 827 // Simple allocator for objects of a specified type. External locking 828 // is required before accessing one of these objects. 829 // ------------------------------------------------------------------------- 830 831 // Metadata allocator -- keeps stats about how many bytes allocated 832 static uint64_t metadata_system_bytes = 0; 833 static void* MetaDataAlloc(size_t bytes) { 834 void* result = TCMalloc_SystemAlloc(bytes, 0); 835 if (result != NULL) { 836 metadata_system_bytes += bytes; 837 } 838 return result; 839 } 840 841 template <class T> 842 class PageHeapAllocator { 843 private: 844 // How much to allocate from system at a time 845 static const size_t kAllocIncrement = 32 << 10; 846 847 // Aligned size of T 848 static const size_t kAlignedSize 849 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); 850 851 // Free area from which to carve new objects 852 char* free_area_; 853 size_t free_avail_; 854 855 // Linked list of all regions allocated by this allocator 856 HardenedSLL allocated_regions_; 857 858 // Free list of already carved objects 859 HardenedSLL free_list_; 860 861 // Number of allocated but unfreed objects 862 int inuse_; 863 uintptr_t entropy_; 864 865 public: 866 void Init(uintptr_t entropy) { 867 ASSERT(kAlignedSize <= kAllocIncrement); 868 inuse_ = 0; 869 allocated_regions_ = HardenedSLL::null(); 870 free_area_ = NULL; 871 free_avail_ = 0; 872 free_list_.setValue(NULL); 873 entropy_ = entropy; 874 } 875 876 T* New() { 877 // Consult free list 878 void* result; 879 if (free_list_) { 880 result = free_list_.value(); 881 free_list_ = SLL_Next(free_list_, entropy_); 882 } else { 883 if (free_avail_ < kAlignedSize) { 884 // Need more room 885 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); 886 if (!new_allocation) 887 CRASH(); 888 889 HardenedSLL new_head = HardenedSLL::create(new_allocation); 890 SLL_SetNext(new_head, allocated_regions_, entropy_); 891 allocated_regions_ = new_head; 892 free_area_ = new_allocation + kAlignedSize; 893 free_avail_ = kAllocIncrement - kAlignedSize; 894 } 895 result = free_area_; 896 free_area_ += kAlignedSize; 897 free_avail_ -= kAlignedSize; 898 } 899 inuse_++; 900 return reinterpret_cast<T*>(result); 901 } 902 903 void Delete(T* p) { 904 HardenedSLL new_head = HardenedSLL::create(p); 905 SLL_SetNext(new_head, free_list_, entropy_); 906 free_list_ = new_head; 907 inuse_--; 908 } 909 910 int inuse() const { return inuse_; } 911 912 #if OS(MACOSX) 913 template <class Recorder> 914 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) 915 { 916 for (HardenedSLL adminAllocation = allocated_regions_; adminAllocation; adminAllocation.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(adminAllocation.value()), entropy_))) 917 recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation.value()), kAllocIncrement); 918 } 919 #endif 920 }; 921 922 // ------------------------------------------------------------------------- 923 // Span - a contiguous run of pages 924 // ------------------------------------------------------------------------- 925 926 // Type that can hold a page number 927 typedef uintptr_t PageID; 928 929 // Type that can hold the length of a run of pages 930 typedef uintptr_t Length; 931 932 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 933 934 // Convert byte size into pages. This won't overflow, but may return 935 // an unreasonably large value if bytes is huge enough. 936 static inline Length pages(size_t bytes) { 937 return (bytes >> kPageShift) + 938 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 939 } 940 941 // Convert a user size into the number of bytes that will actually be 942 // allocated 943 static size_t AllocationSize(size_t bytes) { 944 if (bytes > kMaxSize) { 945 // Large object: we allocate an integral number of pages 946 ASSERT(bytes <= (kMaxValidPages << kPageShift)); 947 return pages(bytes) << kPageShift; 948 } else { 949 // Small object: find the size class to which it belongs 950 return ByteSizeForClass(SizeClass(bytes)); 951 } 952 } 953 954 enum { 955 kSpanCookieBits = 10, 956 kSpanCookieMask = (1 << 10) - 1, 957 kSpanThisShift = 7 958 }; 959 960 static uint32_t spanValidationCookie; 961 static uint32_t spanInitializerCookie() 962 { 963 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() & kSpanCookieMask; 964 spanValidationCookie = value; 965 return value; 966 } 967 968 // Information kept for a span (a contiguous run of pages). 969 struct Span { 970 PageID start; // Starting page number 971 Length length; // Number of pages in span 972 Span* next(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, this, entropy); } 973 Span* remoteNext(const Span* remoteSpanPointer, uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, remoteSpanPointer, entropy); } 974 Span* prev(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_prev, this, entropy); } 975 void setNext(Span* next, uintptr_t entropy) { m_next = XOR_MASK_PTR_WITH_KEY(next, this, entropy); } 976 void setPrev(Span* prev, uintptr_t entropy) { m_prev = XOR_MASK_PTR_WITH_KEY(prev, this, entropy); } 977 978 private: 979 Span* m_next; // Used when in link list 980 Span* m_prev; // Used when in link list 981 public: 982 HardenedSLL objects; // Linked list of free objects 983 unsigned int free : 1; // Is the span free 984 #ifndef NO_TCMALLOC_SAMPLES 985 unsigned int sample : 1; // Sampled object? 986 #endif 987 unsigned int sizeclass : 8; // Size-class for small objects (or 0) 988 unsigned int refcount : 11; // Number of non-free objects 989 bool decommitted : 1; 990 void initCookie() 991 { 992 m_cookie = ((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ spanInitializerCookie(); 993 } 994 void clearCookie() { m_cookie = 0; } 995 bool isValid() const 996 { 997 return (((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ m_cookie) == spanValidationCookie; 998 } 999 private: 1000 uint32_t m_cookie : kSpanCookieBits; 1001 1002 #undef SPAN_HISTORY 1003 #ifdef SPAN_HISTORY 1004 // For debugging, we can keep a log events per span 1005 int nexthistory; 1006 char history[64]; 1007 int value[64]; 1008 #endif 1009 }; 1010 1011 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) 1012 1013 #ifdef SPAN_HISTORY 1014 void Event(Span* span, char op, int v = 0) { 1015 span->history[span->nexthistory] = op; 1016 span->value[span->nexthistory] = v; 1017 span->nexthistory++; 1018 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; 1019 } 1020 #else 1021 #define Event(s,o,v) ((void) 0) 1022 #endif 1023 1024 // Allocator/deallocator for spans 1025 static PageHeapAllocator<Span> span_allocator; 1026 static Span* NewSpan(PageID p, Length len) { 1027 Span* result = span_allocator.New(); 1028 memset(result, 0, sizeof(*result)); 1029 result->start = p; 1030 result->length = len; 1031 result->initCookie(); 1032 #ifdef SPAN_HISTORY 1033 result->nexthistory = 0; 1034 #endif 1035 return result; 1036 } 1037 1038 static inline void DeleteSpan(Span* span) { 1039 RELEASE_ASSERT(span->isValid()); 1040 #ifndef NDEBUG 1041 // In debug mode, trash the contents of deleted Spans 1042 memset(span, 0x3f, sizeof(*span)); 1043 #endif 1044 span->clearCookie(); 1045 span_allocator.Delete(span); 1046 } 1047 1048 // ------------------------------------------------------------------------- 1049 // Doubly linked list of spans. 1050 // ------------------------------------------------------------------------- 1051 1052 static inline void DLL_Init(Span* list, uintptr_t entropy) { 1053 list->setNext(list, entropy); 1054 list->setPrev(list, entropy); 1055 } 1056 1057 static inline void DLL_Remove(Span* span, uintptr_t entropy) { 1058 span->prev(entropy)->setNext(span->next(entropy), entropy); 1059 span->next(entropy)->setPrev(span->prev(entropy), entropy); 1060 span->setPrev(NULL, entropy); 1061 span->setNext(NULL, entropy); 1062 } 1063 1064 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list, uintptr_t entropy) { 1065 return list->next(entropy) == list; 1066 } 1067 1068 static int DLL_Length(const Span* list, uintptr_t entropy) { 1069 int result = 0; 1070 for (Span* s = list->next(entropy); s != list; s = s->next(entropy)) { 1071 result++; 1072 } 1073 return result; 1074 } 1075 1076 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */ 1077 static void DLL_Print(const char* label, const Span* list) { 1078 MESSAGE("%-10s %p:", label, list); 1079 for (const Span* s = list->next; s != list; s = s->next) { 1080 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); 1081 } 1082 MESSAGE("\n"); 1083 } 1084 #endif 1085 1086 static inline void DLL_Prepend(Span* list, Span* span, uintptr_t entropy) { 1087 span->setNext(list->next(entropy), entropy); 1088 span->setPrev(list, entropy); 1089 list->next(entropy)->setPrev(span, entropy); 1090 list->setNext(span, entropy); 1091 } 1092 1093 //------------------------------------------------------------------- 1094 // Data kept per size-class in central cache 1095 //------------------------------------------------------------------- 1096 1097 class TCMalloc_Central_FreeList { 1098 public: 1099 void Init(size_t cl, uintptr_t entropy); 1100 1101 // These methods all do internal locking. 1102 1103 // Insert the specified range into the central freelist. N is the number of 1104 // elements in the range. 1105 void InsertRange(HardenedSLL start, HardenedSLL end, int N); 1106 1107 // Returns the actual number of fetched elements into N. 1108 void RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N); 1109 1110 // Returns the number of free objects in cache. 1111 size_t length() { 1112 SpinLockHolder h(&lock_); 1113 return counter_; 1114 } 1115 1116 // Returns the number of free objects in the transfer cache. 1117 int tc_length() { 1118 SpinLockHolder h(&lock_); 1119 return used_slots_ * num_objects_to_move[size_class_]; 1120 } 1121 1122 template <class Finder, class Reader> 1123 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) 1124 { 1125 { 1126 static const ptrdiff_t emptyOffset = reinterpret_cast<const char*>(&empty_) - reinterpret_cast<const char*>(this); 1127 Span* remoteEmpty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + emptyOffset); 1128 Span* remoteSpan = nonempty_.remoteNext(remoteEmpty, entropy_); 1129 for (Span* span = reader(remoteEmpty); span && span != &empty_; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) 1130 ASSERT(!span->objects); 1131 } 1132 1133 ASSERT(!nonempty_.objects); 1134 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); 1135 1136 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); 1137 Span* remoteSpan = nonempty_.remoteNext(remoteNonempty, entropy_); 1138 1139 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) { 1140 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) { 1141 finder.visit(nextObject.value()); 1142 } 1143 } 1144 } 1145 1146 uintptr_t entropy() const { return entropy_; } 1147 private: 1148 // REQUIRES: lock_ is held 1149 // Remove object from cache and return. 1150 // Return NULL if no free entries in cache. 1151 HardenedSLL FetchFromSpans(); 1152 1153 // REQUIRES: lock_ is held 1154 // Remove object from cache and return. Fetches 1155 // from pageheap if cache is empty. Only returns 1156 // NULL on allocation failure. 1157 HardenedSLL FetchFromSpansSafe(); 1158 1159 // REQUIRES: lock_ is held 1160 // Release a linked list of objects to spans. 1161 // May temporarily release lock_. 1162 void ReleaseListToSpans(HardenedSLL start); 1163 1164 // REQUIRES: lock_ is held 1165 // Release an object to spans. 1166 // May temporarily release lock_. 1167 ALWAYS_INLINE void ReleaseToSpans(HardenedSLL object); 1168 1169 // REQUIRES: lock_ is held 1170 // Populate cache by fetching from the page heap. 1171 // May temporarily release lock_. 1172 ALWAYS_INLINE void Populate(); 1173 1174 // REQUIRES: lock is held. 1175 // Tries to make room for a TCEntry. If the cache is full it will try to 1176 // expand it at the cost of some other cache size. Return false if there is 1177 // no space. 1178 bool MakeCacheSpace(); 1179 1180 // REQUIRES: lock_ for locked_size_class is held. 1181 // Picks a "random" size class to steal TCEntry slot from. In reality it 1182 // just iterates over the sizeclasses but does so without taking a lock. 1183 // Returns true on success. 1184 // May temporarily lock a "random" size class. 1185 static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); 1186 1187 // REQUIRES: lock_ is *not* held. 1188 // Tries to shrink the Cache. If force is true it will relase objects to 1189 // spans if it allows it to shrink the cache. Return false if it failed to 1190 // shrink the cache. Decrements cache_size_ on succeess. 1191 // May temporarily take lock_. If it takes lock_, the locked_size_class 1192 // lock is released to the thread from holding two size class locks 1193 // concurrently which could lead to a deadlock. 1194 bool ShrinkCache(int locked_size_class, bool force); 1195 1196 // This lock protects all the data members. cached_entries and cache_size_ 1197 // may be looked at without holding the lock. 1198 SpinLock lock_; 1199 1200 // We keep linked lists of empty and non-empty spans. 1201 size_t size_class_; // My size class 1202 Span empty_; // Dummy header for list of empty spans 1203 Span nonempty_; // Dummy header for list of non-empty spans 1204 size_t counter_; // Number of free objects in cache entry 1205 1206 // Here we reserve space for TCEntry cache slots. Since one size class can 1207 // end up getting all the TCEntries quota in the system we just preallocate 1208 // sufficient number of entries here. 1209 TCEntry tc_slots_[kNumTransferEntries]; 1210 1211 // Number of currently used cached entries in tc_slots_. This variable is 1212 // updated under a lock but can be read without one. 1213 int32_t used_slots_; 1214 // The current number of slots for this size class. This is an 1215 // adaptive value that is increased if there is lots of traffic 1216 // on a given size class. 1217 int32_t cache_size_; 1218 uintptr_t entropy_; 1219 }; 1220 1221 #if COMPILER(CLANG) && defined(__has_warning) 1222 #pragma clang diagnostic push 1223 #if __has_warning("-Wunused-private-field") 1224 #pragma clang diagnostic ignored "-Wunused-private-field" 1225 #endif 1226 #endif 1227 1228 // Pad each CentralCache object to multiple of 64 bytes 1229 template <size_t SizeToPad> 1230 class TCMalloc_Central_FreeListPadded_Template : public TCMalloc_Central_FreeList { 1231 private: 1232 char pad[64 - SizeToPad]; 1233 }; 1234 1235 // Zero-size specialization to avoid compiler error when TCMalloc_Central_FreeList happens 1236 // to be exactly 64 bytes. 1237 template <> class TCMalloc_Central_FreeListPadded_Template<0> : public TCMalloc_Central_FreeList { 1238 }; 1239 1240 typedef TCMalloc_Central_FreeListPadded_Template<sizeof(TCMalloc_Central_FreeList) % 64> TCMalloc_Central_FreeListPadded; 1241 1242 #if COMPILER(CLANG) && defined(__has_warning) 1243 #pragma clang diagnostic pop 1244 #endif 1245 1246 #if OS(MACOSX) 1247 struct Span; 1248 class TCMalloc_PageHeap; 1249 class TCMalloc_ThreadCache; 1250 template <typename T> class PageHeapAllocator; 1251 1252 class FastMallocZone { 1253 public: 1254 static void init(); 1255 1256 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); 1257 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } 1258 static boolean_t check(malloc_zone_t*) { return true; } 1259 static void print(malloc_zone_t*, boolean_t) { } 1260 static void log(malloc_zone_t*, void*) { } 1261 static void forceLock(malloc_zone_t*) { } 1262 static void forceUnlock(malloc_zone_t*) { } 1263 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } 1264 1265 private: 1266 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); 1267 static size_t size(malloc_zone_t*, const void*); 1268 static void* zoneMalloc(malloc_zone_t*, size_t); 1269 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); 1270 static void zoneFree(malloc_zone_t*, void*); 1271 static void* zoneRealloc(malloc_zone_t*, void*, size_t); 1272 static void* zoneValloc(malloc_zone_t*, size_t) { WTF_LOG_ERROR("valloc is not supported"); return 0; } 1273 static void zoneDestroy(malloc_zone_t*) { } 1274 1275 malloc_zone_t m_zone; 1276 TCMalloc_PageHeap* m_pageHeap; 1277 TCMalloc_ThreadCache** m_threadHeaps; 1278 TCMalloc_Central_FreeListPadded* m_centralCaches; 1279 PageHeapAllocator<Span>* m_spanAllocator; 1280 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; 1281 }; 1282 1283 #endif 1284 1285 // Even if we have support for thread-local storage in the compiler 1286 // and linker, the OS may not support it. We need to check that at 1287 // runtime. Right now, we have to keep a manual set of "bad" OSes. 1288 #if defined(HAVE_TLS) 1289 static bool kernel_supports_tls = false; // be conservative 1290 static inline bool KernelSupportsTLS() { 1291 return kernel_supports_tls; 1292 } 1293 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS 1294 static void CheckIfKernelSupportsTLS() { 1295 kernel_supports_tls = false; 1296 } 1297 # else 1298 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too 1299 static void CheckIfKernelSupportsTLS() { 1300 struct utsname buf; 1301 if (uname(&buf) != 0) { // should be impossible 1302 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); 1303 kernel_supports_tls = false; 1304 } else if (strcasecmp(buf.sysname, "linux") == 0) { 1305 // The linux case: the first kernel to support TLS was 2.6.0 1306 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x 1307 kernel_supports_tls = false; 1308 else if (buf.release[0] == '2' && buf.release[1] == '.' && 1309 buf.release[2] >= '0' && buf.release[2] < '6' && 1310 buf.release[3] == '.') // 2.0 - 2.5 1311 kernel_supports_tls = false; 1312 else 1313 kernel_supports_tls = true; 1314 } else { // some other kernel, we'll be optimisitic 1315 kernel_supports_tls = true; 1316 } 1317 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG 1318 } 1319 # endif // HAVE_DECL_UNAME 1320 #endif // HAVE_TLS 1321 1322 // __THROW is defined in glibc systems. It means, counter-intuitively, 1323 // "This function will never throw an exception." It's an optional 1324 // optimization tool, but we may need to use it to match glibc prototypes. 1325 #ifndef __THROW // I guess we're not on a glibc system 1326 # define __THROW // __THROW is just an optimization, so ok to make it "" 1327 #endif 1328 1329 // ------------------------------------------------------------------------- 1330 // Stack traces kept for sampled allocations 1331 // The following state is protected by pageheap_lock_. 1332 // ------------------------------------------------------------------------- 1333 1334 // size/depth are made the same size as a pointer so that some generic 1335 // code below can conveniently cast them back and forth to void*. 1336 static const int kMaxStackDepth = 31; 1337 struct StackTrace { 1338 uintptr_t size; // Size of object 1339 uintptr_t depth; // Number of PC values stored in array below 1340 void* stack[kMaxStackDepth]; 1341 }; 1342 static PageHeapAllocator<StackTrace> stacktrace_allocator; 1343 static Span sampled_objects; 1344 1345 // ------------------------------------------------------------------------- 1346 // Map from page-id to per-page data 1347 // ------------------------------------------------------------------------- 1348 1349 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. 1350 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings, 1351 // because sometimes the sizeclass is all the information we need. 1352 1353 // Selector class -- general selector uses 3-level map 1354 template <int BITS> class MapSelector { 1355 public: 1356 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; 1357 typedef PackedCache<BITS, uint64_t> CacheType; 1358 }; 1359 1360 #if CPU(X86_64) 1361 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore 1362 // can be excluded from the PageMap key. 1363 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details 1364 1365 static const size_t kBitsUnusedOn64Bit = 16; 1366 #else 1367 static const size_t kBitsUnusedOn64Bit = 0; 1368 #endif 1369 1370 // A three-level map for 64-bit machines 1371 template <> class MapSelector<64> { 1372 public: 1373 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; 1374 typedef PackedCache<64, uint64_t> CacheType; 1375 }; 1376 1377 // A two-level map for 32-bit machines 1378 template <> class MapSelector<32> { 1379 public: 1380 typedef TCMalloc_PageMap2<32 - kPageShift> Type; 1381 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; 1382 }; 1383 1384 // ------------------------------------------------------------------------- 1385 // Page-level allocator 1386 // * Eager coalescing 1387 // 1388 // Heap for page-level allocation. We allow allocating and freeing a 1389 // contiguous runs of pages (called a "span"). 1390 // ------------------------------------------------------------------------- 1391 1392 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1393 // The page heap maintains a free list for spans that are no longer in use by 1394 // the central cache or any thread caches. We use a background thread to 1395 // periodically scan the free list and release a percentage of it back to the OS. 1396 1397 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the 1398 // background thread: 1399 // - wakes up 1400 // - pauses for kScavengeDelayInSeconds 1401 // - returns to the OS a percentage of the memory that remained unused during 1402 // that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) 1403 // The goal of this strategy is to reduce memory pressure in a timely fashion 1404 // while avoiding thrashing the OS allocator. 1405 1406 // Time delay before the page heap scavenger will consider returning pages to 1407 // the OS. 1408 static const int kScavengeDelayInSeconds = 2; 1409 1410 // Approximate percentage of free committed pages to return to the OS in one 1411 // scavenge. 1412 static const float kScavengePercentage = .5f; 1413 1414 // number of span lists to keep spans in when memory is returned. 1415 static const int kMinSpanListsWithSpans = 32; 1416 1417 // Number of free committed pages that we want to keep around. The minimum number of pages used when there 1418 // is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. 1419 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); 1420 1421 #endif 1422 1423 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; 1424 1425 class TCMalloc_PageHeap { 1426 public: 1427 void init(); 1428 1429 // Allocate a run of "n" pages. Returns zero if out of memory. 1430 Span* New(Length n); 1431 1432 // Delete the span "[p, p+n-1]". 1433 // REQUIRES: span was returned by earlier call to New() and 1434 // has not yet been deleted. 1435 void Delete(Span* span); 1436 1437 // Mark an allocated span as being used for small objects of the 1438 // specified size-class. 1439 // REQUIRES: span was returned by an earlier call to New() 1440 // and has not yet been deleted. 1441 void RegisterSizeClass(Span* span, size_t sc); 1442 1443 // Split an allocated span into two spans: one of length "n" pages 1444 // followed by another span of length "span->length - n" pages. 1445 // Modifies "*span" to point to the first span of length "n" pages. 1446 // Returns a pointer to the second span. 1447 // 1448 // REQUIRES: "0 < n < span->length" 1449 // REQUIRES: !span->free 1450 // REQUIRES: span->sizeclass == 0 1451 Span* Split(Span* span, Length n); 1452 1453 // Return the descriptor for the specified page. 1454 inline Span* GetDescriptor(PageID p) const { 1455 return reinterpret_cast<Span*>(pagemap_.get(p)); 1456 } 1457 1458 inline Span* GetDescriptorEnsureSafe(PageID p) 1459 { 1460 pagemap_.Ensure(p, 1); 1461 return GetDescriptor(p); 1462 } 1463 1464 size_t ReturnedBytes() const; 1465 1466 // Return number of bytes allocated from system 1467 inline uint64_t SystemBytes() const { return system_bytes_; } 1468 1469 // Return number of free bytes in heap 1470 uint64_t FreeBytes() const { 1471 return (static_cast<uint64_t>(free_pages_) << kPageShift); 1472 } 1473 1474 bool Check(); 1475 size_t CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted); 1476 1477 // Release all pages on the free list for reuse by the OS: 1478 void ReleaseFreePages(); 1479 void ReleaseFreeList(Span*, Span*); 1480 1481 // Return 0 if we have no information, or else the correct sizeclass for p. 1482 // Reads and writes to pagemap_cache_ do not require locking. 1483 // The entries are 64 bits on 64-bit hardware and 16 bits on 1484 // 32-bit hardware, and we don't mind raciness as long as each read of 1485 // an entry yields a valid entry, not a partially updated entry. 1486 size_t GetSizeClassIfCached(PageID p) const { 1487 return pagemap_cache_.GetOrDefault(p, 0); 1488 } 1489 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } 1490 1491 private: 1492 // Pick the appropriate map and cache types based on pointer size 1493 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; 1494 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; 1495 PageMap pagemap_; 1496 mutable PageMapCache pagemap_cache_; 1497 1498 // We segregate spans of a given size into two circular linked 1499 // lists: one for normal spans, and one for spans whose memory 1500 // has been returned to the system. 1501 struct SpanList { 1502 Span normal; 1503 Span returned; 1504 }; 1505 1506 // List of free spans of length >= kMaxPages 1507 SpanList large_; 1508 1509 // Array mapping from span length to a doubly linked list of free spans 1510 SpanList free_[kMaxPages]; 1511 1512 // Number of pages kept in free lists 1513 uintptr_t free_pages_; 1514 1515 // Used for hardening 1516 uintptr_t entropy_; 1517 1518 // Bytes allocated from system 1519 uint64_t system_bytes_; 1520 1521 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1522 // Number of pages kept in free lists that are still committed. 1523 Length free_committed_pages_; 1524 1525 // Minimum number of free committed pages since last scavenge. (Can be 0 if 1526 // we've committed new pages since the last scavenge.) 1527 Length min_free_committed_pages_since_last_scavenge_; 1528 #endif 1529 1530 bool GrowHeap(Length n); 1531 1532 // REQUIRES span->length >= n 1533 // Remove span from its free list, and move any leftover part of 1534 // span into appropriate free lists. Also update "span" to have 1535 // length exactly "n" and mark it as non-free so it can be returned 1536 // to the client. 1537 // 1538 // "released" is true iff "span" was found on a "returned" list. 1539 void Carve(Span* span, Length n, bool released); 1540 1541 void RecordSpan(Span* span) { 1542 pagemap_.set(span->start, span); 1543 if (span->length > 1) { 1544 pagemap_.set(span->start + span->length - 1, span); 1545 } 1546 } 1547 1548 // Allocate a large span of length == n. If successful, returns a 1549 // span of exactly the specified length. Else, returns NULL. 1550 Span* AllocLarge(Length n); 1551 1552 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1553 // Incrementally release some memory to the system. 1554 // IncrementalScavenge(n) is called whenever n pages are freed. 1555 void IncrementalScavenge(Length n); 1556 #endif 1557 1558 // Number of pages to deallocate before doing more scavenging 1559 int64_t scavenge_counter_; 1560 1561 // Index of last free list we scavenged 1562 size_t scavenge_index_; 1563 1564 #if OS(MACOSX) 1565 friend class FastMallocZone; 1566 #endif 1567 1568 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1569 void initializeScavenger(); 1570 ALWAYS_INLINE void signalScavenger(); 1571 void scavenge(); 1572 ALWAYS_INLINE bool shouldScavenge() const; 1573 1574 #if HAVE(DISPATCH_H) || OS(WIN) 1575 void periodicScavenge(); 1576 ALWAYS_INLINE bool isScavengerSuspended(); 1577 ALWAYS_INLINE void scheduleScavenger(); 1578 ALWAYS_INLINE void rescheduleScavenger(); 1579 ALWAYS_INLINE void suspendScavenger(); 1580 #endif 1581 1582 #if HAVE(DISPATCH_H) 1583 dispatch_queue_t m_scavengeQueue; 1584 dispatch_source_t m_scavengeTimer; 1585 bool m_scavengingSuspended; 1586 #elif OS(WIN) 1587 static void CALLBACK scavengerTimerFired(void*, BOOLEAN); 1588 HANDLE m_scavengeQueueTimer; 1589 #else 1590 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); 1591 NO_RETURN void scavengerThread(); 1592 1593 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or 1594 // it's blocked waiting for more pages to be deleted. 1595 bool m_scavengeThreadActive; 1596 1597 pthread_mutex_t m_scavengeMutex; 1598 pthread_cond_t m_scavengeCondition; 1599 #endif 1600 1601 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1602 }; 1603 1604 void TCMalloc_PageHeap::init() 1605 { 1606 pagemap_.init(MetaDataAlloc); 1607 pagemap_cache_ = PageMapCache(0); 1608 free_pages_ = 0; 1609 system_bytes_ = 0; 1610 entropy_ = HARDENING_ENTROPY; 1611 1612 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1613 free_committed_pages_ = 0; 1614 min_free_committed_pages_since_last_scavenge_ = 0; 1615 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1616 1617 scavenge_counter_ = 0; 1618 // Start scavenging at kMaxPages list 1619 scavenge_index_ = kMaxPages-1; 1620 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); 1621 DLL_Init(&large_.normal, entropy_); 1622 DLL_Init(&large_.returned, entropy_); 1623 for (size_t i = 0; i < kMaxPages; i++) { 1624 DLL_Init(&free_[i].normal, entropy_); 1625 DLL_Init(&free_[i].returned, entropy_); 1626 } 1627 1628 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1629 initializeScavenger(); 1630 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1631 } 1632 1633 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1634 1635 #if HAVE(DISPATCH_H) 1636 1637 void TCMalloc_PageHeap::initializeScavenger() 1638 { 1639 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); 1640 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); 1641 uint64_t scavengeDelayInNanoseconds = kScavengeDelayInSeconds * NSEC_PER_SEC; 1642 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, scavengeDelayInNanoseconds); 1643 dispatch_source_set_timer(m_scavengeTimer, startTime, scavengeDelayInNanoseconds, scavengeDelayInNanoseconds / 10); 1644 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); 1645 m_scavengingSuspended = true; 1646 } 1647 1648 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1649 { 1650 ASSERT(pageheap_lock.IsHeld()); 1651 return m_scavengingSuspended; 1652 } 1653 1654 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1655 { 1656 ASSERT(pageheap_lock.IsHeld()); 1657 m_scavengingSuspended = false; 1658 dispatch_resume(m_scavengeTimer); 1659 } 1660 1661 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1662 { 1663 // Nothing to do here for libdispatch. 1664 } 1665 1666 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1667 { 1668 ASSERT(pageheap_lock.IsHeld()); 1669 m_scavengingSuspended = true; 1670 dispatch_suspend(m_scavengeTimer); 1671 } 1672 1673 #elif OS(WIN) 1674 1675 void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) 1676 { 1677 static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge(); 1678 } 1679 1680 void TCMalloc_PageHeap::initializeScavenger() 1681 { 1682 m_scavengeQueueTimer = 0; 1683 } 1684 1685 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1686 { 1687 ASSERT(pageheap_lock.IsHeld()); 1688 return !m_scavengeQueueTimer; 1689 } 1690 1691 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1692 { 1693 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because 1694 // Windows will fire the timer event even when the function is already running. 1695 ASSERT(pageheap_lock.IsHeld()); 1696 CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); 1697 } 1698 1699 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1700 { 1701 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. 1702 suspendScavenger(); 1703 scheduleScavenger(); 1704 } 1705 1706 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1707 { 1708 ASSERT(pageheap_lock.IsHeld()); 1709 HANDLE scavengeQueueTimer = m_scavengeQueueTimer; 1710 m_scavengeQueueTimer = 0; 1711 DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); 1712 } 1713 1714 #else 1715 1716 void TCMalloc_PageHeap::initializeScavenger() 1717 { 1718 // Create a non-recursive mutex. 1719 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT 1720 pthread_mutex_init(&m_scavengeMutex, 0); 1721 #else 1722 pthread_mutexattr_t attr; 1723 pthread_mutexattr_init(&attr); 1724 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 1725 1726 pthread_mutex_init(&m_scavengeMutex, &attr); 1727 1728 pthread_mutexattr_destroy(&attr); 1729 #endif 1730 1731 pthread_cond_init(&m_scavengeCondition, 0); 1732 m_scavengeThreadActive = true; 1733 pthread_t thread; 1734 pthread_create(&thread, 0, runScavengerThread, this); 1735 } 1736 1737 void* TCMalloc_PageHeap::runScavengerThread(void* context) 1738 { 1739 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); 1740 #if COMPILER(MSVC) 1741 // Without this, Visual Studio will complain that this method does not return a value. 1742 return 0; 1743 #endif 1744 } 1745 1746 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 1747 { 1748 // shouldScavenge() should be called only when the pageheap_lock spinlock is held, additionally, 1749 // m_scavengeThreadActive is only set to false whilst pageheap_lock is held. The caller must ensure this is 1750 // taken prior to calling this method. If the scavenger thread is sleeping and shouldScavenge() indicates there 1751 // is memory to free the scavenger thread is signalled to start. 1752 ASSERT(pageheap_lock.IsHeld()); 1753 if (!m_scavengeThreadActive && shouldScavenge()) 1754 pthread_cond_signal(&m_scavengeCondition); 1755 } 1756 1757 #endif 1758 1759 void TCMalloc_PageHeap::scavenge() 1760 { 1761 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; 1762 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); 1763 1764 Length lastFreeCommittedPages = free_committed_pages_; 1765 while (free_committed_pages_ > targetPageCount) { 1766 ASSERT(Check()); 1767 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { 1768 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; 1769 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. 1770 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. 1771 size_t length = DLL_Length(&slist->normal, entropy_); 1772 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; 1773 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal, entropy_) && free_committed_pages_ > targetPageCount; j++) { 1774 Span* s = slist->normal.prev(entropy_); 1775 DLL_Remove(s, entropy_); 1776 ASSERT(!s->decommitted); 1777 if (!s->decommitted) { 1778 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1779 static_cast<size_t>(s->length << kPageShift)); 1780 ASSERT(free_committed_pages_ >= s->length); 1781 free_committed_pages_ -= s->length; 1782 s->decommitted = true; 1783 } 1784 DLL_Prepend(&slist->returned, s, entropy_); 1785 } 1786 } 1787 1788 if (lastFreeCommittedPages == free_committed_pages_) 1789 break; 1790 lastFreeCommittedPages = free_committed_pages_; 1791 } 1792 1793 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1794 } 1795 1796 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const 1797 { 1798 return free_committed_pages_ > kMinimumFreeCommittedPageCount; 1799 } 1800 1801 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1802 1803 inline Span* TCMalloc_PageHeap::New(Length n) { 1804 ASSERT(Check()); 1805 ASSERT(n > 0); 1806 1807 // Find first size >= n that has a non-empty list 1808 for (Length s = n; s < kMaxPages; s++) { 1809 Span* ll = NULL; 1810 bool released = false; 1811 if (!DLL_IsEmpty(&free_[s].normal, entropy_)) { 1812 // Found normal span 1813 ll = &free_[s].normal; 1814 } else if (!DLL_IsEmpty(&free_[s].returned, entropy_)) { 1815 // Found returned span; reallocate it 1816 ll = &free_[s].returned; 1817 released = true; 1818 } else { 1819 // Keep looking in larger classes 1820 continue; 1821 } 1822 1823 Span* result = ll->next(entropy_); 1824 Carve(result, n, released); 1825 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1826 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1827 // free committed pages count. 1828 ASSERT(free_committed_pages_ >= n); 1829 free_committed_pages_ -= n; 1830 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1831 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1832 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1833 ASSERT(Check()); 1834 free_pages_ -= n; 1835 return result; 1836 } 1837 1838 Span* result = AllocLarge(n); 1839 if (result != NULL) { 1840 ASSERT_SPAN_COMMITTED(result); 1841 return result; 1842 } 1843 1844 // Grow the heap and try again 1845 if (!GrowHeap(n)) { 1846 ASSERT(Check()); 1847 return NULL; 1848 } 1849 1850 return New(n); 1851 } 1852 1853 Span* TCMalloc_PageHeap::AllocLarge(Length n) { 1854 // find the best span (closest to n in size). 1855 // The following loops implements address-ordered best-fit. 1856 bool from_released = false; 1857 Span *best = NULL; 1858 1859 // Search through normal list 1860 for (Span* span = large_.normal.next(entropy_); 1861 span != &large_.normal; 1862 span = span->next(entropy_)) { 1863 if (span->length >= n) { 1864 if ((best == NULL) 1865 || (span->length < best->length) 1866 || ((span->length == best->length) && (span->start < best->start))) { 1867 best = span; 1868 from_released = false; 1869 } 1870 } 1871 } 1872 1873 // Search through released list in case it has a better fit 1874 for (Span* span = large_.returned.next(entropy_); 1875 span != &large_.returned; 1876 span = span->next(entropy_)) { 1877 if (span->length >= n) { 1878 if ((best == NULL) 1879 || (span->length < best->length) 1880 || ((span->length == best->length) && (span->start < best->start))) { 1881 best = span; 1882 from_released = true; 1883 } 1884 } 1885 } 1886 1887 if (best != NULL) { 1888 Carve(best, n, from_released); 1889 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1890 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1891 // free committed pages count. 1892 ASSERT(free_committed_pages_ >= n); 1893 free_committed_pages_ -= n; 1894 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1895 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1896 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1897 ASSERT(Check()); 1898 free_pages_ -= n; 1899 return best; 1900 } 1901 return NULL; 1902 } 1903 1904 Span* TCMalloc_PageHeap::Split(Span* span, Length n) { 1905 ASSERT(0 < n); 1906 ASSERT(n < span->length); 1907 ASSERT(!span->free); 1908 ASSERT(span->sizeclass == 0); 1909 Event(span, 'T', n); 1910 1911 const Length extra = span->length - n; 1912 Span* leftover = NewSpan(span->start + n, extra); 1913 Event(leftover, 'U', extra); 1914 RecordSpan(leftover); 1915 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span 1916 span->length = n; 1917 1918 return leftover; 1919 } 1920 1921 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { 1922 ASSERT(n > 0); 1923 DLL_Remove(span, entropy_); 1924 span->free = 0; 1925 Event(span, 'A', n); 1926 1927 if (released) { 1928 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. 1929 ASSERT(span->decommitted); 1930 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); 1931 span->decommitted = false; 1932 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1933 free_committed_pages_ += span->length; 1934 #endif 1935 } 1936 1937 const int extra = static_cast<int>(span->length - n); 1938 ASSERT(extra >= 0); 1939 if (extra > 0) { 1940 Span* leftover = NewSpan(span->start + n, extra); 1941 leftover->free = 1; 1942 leftover->decommitted = false; 1943 Event(leftover, 'S', extra); 1944 RecordSpan(leftover); 1945 1946 // Place leftover span on appropriate free list 1947 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; 1948 Span* dst = &listpair->normal; 1949 DLL_Prepend(dst, leftover, entropy_); 1950 1951 span->length = n; 1952 pagemap_.set(span->start + n - 1, span); 1953 } 1954 } 1955 1956 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) 1957 { 1958 if (destination->decommitted && !other->decommitted) { 1959 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), 1960 static_cast<size_t>(other->length << kPageShift)); 1961 } else if (other->decommitted && !destination->decommitted) { 1962 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), 1963 static_cast<size_t>(destination->length << kPageShift)); 1964 destination->decommitted = true; 1965 } 1966 } 1967 1968 inline void TCMalloc_PageHeap::Delete(Span* span) { 1969 ASSERT(Check()); 1970 ASSERT(!span->free); 1971 ASSERT(span->length > 0); 1972 ASSERT(GetDescriptor(span->start) == span); 1973 ASSERT(GetDescriptor(span->start + span->length - 1) == span); 1974 span->sizeclass = 0; 1975 #ifndef NO_TCMALLOC_SAMPLES 1976 span->sample = 0; 1977 #endif 1978 1979 // Coalesce -- we guarantee that "p" != 0, so no bounds checking 1980 // necessary. We do not bother resetting the stale pagemap 1981 // entries for the pieces we are merging together because we only 1982 // care about the pagemap entries for the boundaries. 1983 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1984 // Track the total size of the neighboring free spans that are committed. 1985 Length neighboringCommittedSpansLength = 0; 1986 #endif 1987 const PageID p = span->start; 1988 const Length n = span->length; 1989 Span* prev = GetDescriptor(p-1); 1990 if (prev != NULL && prev->free) { 1991 // Merge preceding span into this span 1992 ASSERT(prev->start + prev->length == p); 1993 const Length len = prev->length; 1994 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1995 if (!prev->decommitted) 1996 neighboringCommittedSpansLength += len; 1997 #endif 1998 mergeDecommittedStates(span, prev); 1999 DLL_Remove(prev, entropy_); 2000 DeleteSpan(prev); 2001 span->start -= len; 2002 span->length += len; 2003 pagemap_.set(span->start, span); 2004 Event(span, 'L', len); 2005 } 2006 Span* next = GetDescriptor(p+n); 2007 if (next != NULL && next->free) { 2008 // Merge next span into this span 2009 ASSERT(next->start == p+n); 2010 const Length len = next->length; 2011 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2012 if (!next->decommitted) 2013 neighboringCommittedSpansLength += len; 2014 #endif 2015 mergeDecommittedStates(span, next); 2016 DLL_Remove(next, entropy_); 2017 DeleteSpan(next); 2018 span->length += len; 2019 pagemap_.set(span->start + span->length - 1, span); 2020 Event(span, 'R', len); 2021 } 2022 2023 Event(span, 'D', span->length); 2024 span->free = 1; 2025 if (span->decommitted) { 2026 if (span->length < kMaxPages) 2027 DLL_Prepend(&free_[span->length].returned, span, entropy_); 2028 else 2029 DLL_Prepend(&large_.returned, span, entropy_); 2030 } else { 2031 if (span->length < kMaxPages) 2032 DLL_Prepend(&free_[span->length].normal, span, entropy_); 2033 else 2034 DLL_Prepend(&large_.normal, span, entropy_); 2035 } 2036 free_pages_ += n; 2037 2038 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2039 if (span->decommitted) { 2040 // If the merged span is decommitted, that means we decommitted any neighboring spans that were 2041 // committed. Update the free committed pages count. 2042 free_committed_pages_ -= neighboringCommittedSpansLength; 2043 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2044 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2045 } else { 2046 // If the merged span remains committed, add the deleted span's size to the free committed pages count. 2047 free_committed_pages_ += n; 2048 } 2049 2050 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. 2051 signalScavenger(); 2052 #else 2053 IncrementalScavenge(n); 2054 #endif 2055 2056 ASSERT(Check()); 2057 } 2058 2059 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2060 void TCMalloc_PageHeap::IncrementalScavenge(Length n) { 2061 // Fast path; not yet time to release memory 2062 scavenge_counter_ -= n; 2063 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge 2064 2065 // If there is nothing to release, wait for so many pages before 2066 // scavenging again. With 4K pages, this comes to 16MB of memory. 2067 static const size_t kDefaultReleaseDelay = 1 << 8; 2068 2069 // Find index of free list to scavenge 2070 size_t index = scavenge_index_ + 1; 2071 uintptr_t entropy = entropy_; 2072 for (size_t i = 0; i < kMaxPages+1; i++) { 2073 if (index > kMaxPages) index = 0; 2074 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; 2075 if (!DLL_IsEmpty(&slist->normal, entropy)) { 2076 // Release the last span on the normal portion of this list 2077 Span* s = slist->normal.prev(entropy); 2078 DLL_Remove(s, entropy_); 2079 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2080 static_cast<size_t>(s->length << kPageShift)); 2081 s->decommitted = true; 2082 DLL_Prepend(&slist->returned, s, entropy); 2083 2084 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); 2085 2086 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal, entropy)) 2087 scavenge_index_ = index - 1; 2088 else 2089 scavenge_index_ = index; 2090 return; 2091 } 2092 index++; 2093 } 2094 2095 // Nothing to scavenge, delay for a while 2096 scavenge_counter_ = kDefaultReleaseDelay; 2097 } 2098 #endif 2099 2100 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { 2101 // Associate span object with all interior pages as well 2102 ASSERT(!span->free); 2103 ASSERT(GetDescriptor(span->start) == span); 2104 ASSERT(GetDescriptor(span->start+span->length-1) == span); 2105 Event(span, 'C', sc); 2106 span->sizeclass = static_cast<unsigned int>(sc); 2107 for (Length i = 1; i < span->length-1; i++) { 2108 pagemap_.set(span->start+i, span); 2109 } 2110 } 2111 2112 size_t TCMalloc_PageHeap::ReturnedBytes() const { 2113 size_t result = 0; 2114 for (unsigned s = 0; s < kMaxPages; s++) { 2115 const int r_length = DLL_Length(&free_[s].returned, entropy_); 2116 unsigned r_pages = s * r_length; 2117 result += r_pages << kPageShift; 2118 } 2119 2120 for (Span* s = large_.returned.next(entropy_); s != &large_.returned; s = s->next(entropy_)) 2121 result += s->length << kPageShift; 2122 return result; 2123 } 2124 2125 bool TCMalloc_PageHeap::GrowHeap(Length n) { 2126 ASSERT(kMaxPages >= kMinSystemAlloc); 2127 if (n > kMaxValidPages) return false; 2128 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); 2129 size_t actual_size; 2130 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2131 if (ptr == NULL) { 2132 if (n < ask) { 2133 // Try growing just "n" pages 2134 ask = n; 2135 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2136 } 2137 if (ptr == NULL) return false; 2138 } 2139 ask = actual_size >> kPageShift; 2140 2141 uint64_t old_system_bytes = system_bytes_; 2142 system_bytes_ += (ask << kPageShift); 2143 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 2144 ASSERT(p > 0); 2145 2146 // If we have already a lot of pages allocated, just pre allocate a bunch of 2147 // memory for the page map. This prevents fragmentation by pagemap metadata 2148 // when a program keeps allocating and freeing large blocks. 2149 2150 if (old_system_bytes < kPageMapBigAllocationThreshold 2151 && system_bytes_ >= kPageMapBigAllocationThreshold) { 2152 pagemap_.PreallocateMoreMemory(); 2153 } 2154 2155 // Make sure pagemap_ has entries for all of the new pages. 2156 // Plus ensure one before and one after so coalescing code 2157 // does not need bounds-checking. 2158 if (pagemap_.Ensure(p-1, ask+2)) { 2159 // Pretend the new area is allocated and then Delete() it to 2160 // cause any necessary coalescing to occur. 2161 // 2162 // We do not adjust free_pages_ here since Delete() will do it for us. 2163 Span* span = NewSpan(p, ask); 2164 RecordSpan(span); 2165 Delete(span); 2166 ASSERT(Check()); 2167 return true; 2168 } else { 2169 // We could not allocate memory within "pagemap_" 2170 // TODO: Once we can return memory to the system, return the new span 2171 return false; 2172 } 2173 } 2174 2175 bool TCMalloc_PageHeap::Check() { 2176 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2177 size_t totalFreeCommitted = 0; 2178 #endif 2179 ASSERT(free_[0].normal.next(entropy_) == &free_[0].normal); 2180 ASSERT(free_[0].returned.next(entropy_) == &free_[0].returned); 2181 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2182 totalFreeCommitted = CheckList(&large_.normal, kMaxPages, 1000000000, false); 2183 #else 2184 CheckList(&large_.normal, kMaxPages, 1000000000, false); 2185 #endif 2186 CheckList(&large_.returned, kMaxPages, 1000000000, true); 2187 for (Length s = 1; s < kMaxPages; s++) { 2188 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2189 totalFreeCommitted += CheckList(&free_[s].normal, s, s, false); 2190 #else 2191 CheckList(&free_[s].normal, s, s, false); 2192 #endif 2193 CheckList(&free_[s].returned, s, s, true); 2194 } 2195 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2196 ASSERT(totalFreeCommitted == free_committed_pages_); 2197 #endif 2198 return true; 2199 } 2200 2201 #if ASSERT_DISABLED 2202 size_t TCMalloc_PageHeap::CheckList(Span*, Length, Length, bool) { 2203 return 0; 2204 } 2205 #else 2206 size_t TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted) { 2207 size_t freeCount = 0; 2208 for (Span* s = list->next(entropy_); s != list; s = s->next(entropy_)) { 2209 CHECK_CONDITION(s->free); 2210 CHECK_CONDITION(s->length >= min_pages); 2211 CHECK_CONDITION(s->length <= max_pages); 2212 CHECK_CONDITION(GetDescriptor(s->start) == s); 2213 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); 2214 CHECK_CONDITION(s->decommitted == decommitted); 2215 freeCount += s->length; 2216 } 2217 return freeCount; 2218 } 2219 #endif 2220 2221 void TCMalloc_PageHeap::ReleaseFreeList(Span* list, Span* returned) { 2222 // Walk backwards through list so that when we push these 2223 // spans on the "returned" list, we preserve the order. 2224 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2225 size_t freePageReduction = 0; 2226 #endif 2227 2228 while (!DLL_IsEmpty(list, entropy_)) { 2229 Span* s = list->prev(entropy_); 2230 2231 DLL_Remove(s, entropy_); 2232 s->decommitted = true; 2233 DLL_Prepend(returned, s, entropy_); 2234 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2235 static_cast<size_t>(s->length << kPageShift)); 2236 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2237 freePageReduction += s->length; 2238 #endif 2239 } 2240 2241 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2242 free_committed_pages_ -= freePageReduction; 2243 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2244 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2245 #endif 2246 } 2247 2248 void TCMalloc_PageHeap::ReleaseFreePages() { 2249 for (Length s = 0; s < kMaxPages; s++) { 2250 ReleaseFreeList(&free_[s].normal, &free_[s].returned); 2251 } 2252 ReleaseFreeList(&large_.normal, &large_.returned); 2253 ASSERT(Check()); 2254 } 2255 2256 //------------------------------------------------------------------- 2257 // Free list 2258 //------------------------------------------------------------------- 2259 2260 class TCMalloc_ThreadCache_FreeList { 2261 private: 2262 HardenedSLL list_; // Linked list of nodes 2263 uint16_t length_; // Current length 2264 uint16_t lowater_; // Low water mark for list length 2265 uintptr_t entropy_; // Entropy source for hardening 2266 2267 public: 2268 void Init(uintptr_t entropy) { 2269 list_.setValue(NULL); 2270 length_ = 0; 2271 lowater_ = 0; 2272 entropy_ = entropy; 2273 #if ENABLE(TCMALLOC_HARDENING) 2274 ASSERT(entropy_); 2275 #endif 2276 } 2277 2278 // Return current length of list 2279 int length() const { 2280 return length_; 2281 } 2282 2283 // Is list empty? 2284 bool empty() const { 2285 return !list_; 2286 } 2287 2288 // Low-water mark management 2289 int lowwatermark() const { return lowater_; } 2290 void clear_lowwatermark() { lowater_ = length_; } 2291 2292 ALWAYS_INLINE void Push(HardenedSLL ptr) { 2293 SLL_Push(&list_, ptr, entropy_); 2294 length_++; 2295 } 2296 2297 void PushRange(int N, HardenedSLL start, HardenedSLL end) { 2298 SLL_PushRange(&list_, start, end, entropy_); 2299 length_ = length_ + static_cast<uint16_t>(N); 2300 } 2301 2302 void PopRange(int N, HardenedSLL* start, HardenedSLL* end) { 2303 SLL_PopRange(&list_, N, start, end, entropy_); 2304 ASSERT(length_ >= N); 2305 length_ = length_ - static_cast<uint16_t>(N); 2306 if (length_ < lowater_) lowater_ = length_; 2307 } 2308 2309 ALWAYS_INLINE void* Pop() { 2310 ASSERT(list_); 2311 length_--; 2312 if (length_ < lowater_) lowater_ = length_; 2313 return SLL_Pop(&list_, entropy_).value(); 2314 } 2315 2316 // Runs through the linked list to ensure that 2317 // we can do that, and ensures that 'missing' 2318 // is not present 2319 NEVER_INLINE void Validate(HardenedSLL missing, size_t size) { 2320 HardenedSLL node = list_; 2321 while (node) { 2322 RELEASE_ASSERT(node != missing); 2323 RELEASE_ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2324 node = SLL_Next(node, entropy_); 2325 } 2326 } 2327 2328 template <class Finder, class Reader> 2329 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2330 { 2331 for (HardenedSLL nextObject = list_; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) 2332 finder.visit(nextObject.value()); 2333 } 2334 }; 2335 2336 //------------------------------------------------------------------- 2337 // Data kept per thread 2338 //------------------------------------------------------------------- 2339 2340 class TCMalloc_ThreadCache { 2341 private: 2342 typedef TCMalloc_ThreadCache_FreeList FreeList; 2343 #if OS(WIN) 2344 typedef DWORD ThreadIdentifier; 2345 #else 2346 typedef pthread_t ThreadIdentifier; 2347 #endif 2348 2349 size_t size_; // Combined size of data 2350 ThreadIdentifier tid_; // Which thread owns it 2351 bool in_setspecific_; // Called pthread_setspecific? 2352 FreeList list_[kNumClasses]; // Array indexed by size-class 2353 2354 // We sample allocations, biased by the size of the allocation 2355 uint32_t rnd_; // Cheap random number generator 2356 size_t bytes_until_sample_; // Bytes until we sample next 2357 2358 uintptr_t entropy_; // Entropy value used for hardening 2359 2360 // Allocate a new heap. REQUIRES: pageheap_lock is held. 2361 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid, uintptr_t entropy); 2362 2363 // Use only as pthread thread-specific destructor function. 2364 static void DestroyThreadCache(void* ptr); 2365 public: 2366 // All ThreadCache objects are kept in a linked list (for stats collection) 2367 TCMalloc_ThreadCache* next_; 2368 TCMalloc_ThreadCache* prev_; 2369 2370 void Init(ThreadIdentifier tid, uintptr_t entropy); 2371 void Cleanup(); 2372 2373 // Accessors (mostly just for printing stats) 2374 int freelist_length(size_t cl) const { return list_[cl].length(); } 2375 2376 // Total byte size in cache 2377 size_t Size() const { return size_; } 2378 2379 ALWAYS_INLINE void* Allocate(size_t size); 2380 void Deallocate(HardenedSLL ptr, size_t size_class); 2381 2382 ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); 2383 void ReleaseToCentralCache(size_t cl, int N); 2384 void Scavenge(); 2385 void Print() const; 2386 2387 // Record allocation of "k" bytes. Return true iff allocation 2388 // should be sampled 2389 bool SampleAllocation(size_t k); 2390 2391 // Pick next sampling point 2392 void PickNextSample(size_t k); 2393 2394 static void InitModule(); 2395 static void InitTSD(); 2396 static TCMalloc_ThreadCache* GetThreadHeap(); 2397 static TCMalloc_ThreadCache* GetCache(); 2398 static TCMalloc_ThreadCache* GetCacheIfPresent(); 2399 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); 2400 static void DeleteCache(TCMalloc_ThreadCache* heap); 2401 static void BecomeIdle(); 2402 static void RecomputeThreadCacheSize(); 2403 2404 template <class Finder, class Reader> 2405 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2406 { 2407 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) 2408 list_[sizeClass].enumerateFreeObjects(finder, reader); 2409 } 2410 }; 2411 2412 //------------------------------------------------------------------- 2413 // Global variables 2414 //------------------------------------------------------------------- 2415 2416 // Central cache -- a collection of free-lists, one per size-class. 2417 // We have a separate lock per free-list to reduce contention. 2418 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; 2419 2420 // Page-level allocator 2421 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; 2422 static bool phinited = false; 2423 2424 // Avoid extra level of indirection by making "pageheap" be just an alias 2425 // of pageheap_memory. 2426 typedef union { 2427 void* m_memory; 2428 TCMalloc_PageHeap* m_pageHeap; 2429 } PageHeapUnion; 2430 2431 static inline TCMalloc_PageHeap* getPageHeap() 2432 { 2433 PageHeapUnion u = { &pageheap_memory[0] }; 2434 return u.m_pageHeap; 2435 } 2436 2437 #define pageheap getPageHeap() 2438 2439 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2440 2441 #if HAVE(DISPATCH_H) || OS(WIN) 2442 2443 void TCMalloc_PageHeap::periodicScavenge() 2444 { 2445 SpinLockHolder h(&pageheap_lock); 2446 pageheap->scavenge(); 2447 2448 if (shouldScavenge()) { 2449 rescheduleScavenger(); 2450 return; 2451 } 2452 2453 suspendScavenger(); 2454 } 2455 2456 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 2457 { 2458 ASSERT(pageheap_lock.IsHeld()); 2459 if (isScavengerSuspended() && shouldScavenge()) 2460 scheduleScavenger(); 2461 } 2462 2463 #else 2464 2465 void TCMalloc_PageHeap::scavengerThread() 2466 { 2467 #if HAVE(PTHREAD_SETNAME_NP) 2468 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); 2469 #endif 2470 2471 while (1) { 2472 pageheap_lock.Lock(); 2473 if (!shouldScavenge()) { 2474 // Set to false so that signalScavenger() will check whether we need to be siganlled. 2475 m_scavengeThreadActive = false; 2476 2477 // We need to unlock now, as this thread will block on the condvar until scavenging is required. 2478 pageheap_lock.Unlock(); 2479 2480 // Block until there are enough free committed pages to release back to the system. 2481 pthread_mutex_lock(&m_scavengeMutex); 2482 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); 2483 // After exiting the pthread_cond_wait, we hold the lock on m_scavengeMutex. Unlock it to prevent 2484 // deadlock next time round the loop. 2485 pthread_mutex_unlock(&m_scavengeMutex); 2486 2487 // Set to true to prevent unnecessary signalling of the condvar. 2488 m_scavengeThreadActive = true; 2489 } else 2490 pageheap_lock.Unlock(); 2491 2492 // Wait for a while to calculate how much memory remains unused during this pause. 2493 sleep(kScavengeDelayInSeconds); 2494 2495 { 2496 SpinLockHolder h(&pageheap_lock); 2497 pageheap->scavenge(); 2498 } 2499 } 2500 } 2501 2502 #endif 2503 2504 #endif 2505 2506 // If TLS is available, we also store a copy 2507 // of the per-thread object in a __thread variable 2508 // since __thread variables are faster to read 2509 // than pthread_getspecific(). We still need 2510 // pthread_setspecific() because __thread 2511 // variables provide no way to run cleanup 2512 // code when a thread is destroyed. 2513 #ifdef HAVE_TLS 2514 static __thread TCMalloc_ThreadCache *threadlocal_heap; 2515 #endif 2516 // Thread-specific key. Initialization here is somewhat tricky 2517 // because some Linux startup code invokes malloc() before it 2518 // is in a good enough state to handle pthread_keycreate(). 2519 // Therefore, we use TSD keys only after tsd_inited is set to true. 2520 // Until then, we use a slow path to get the heap object. 2521 static bool tsd_inited = false; 2522 static pthread_key_t heap_key; 2523 #if OS(WIN) 2524 DWORD tlsIndex = TLS_OUT_OF_INDEXES; 2525 #endif 2526 2527 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) 2528 { 2529 // Still do pthread_setspecific even if there's an alternate form 2530 // of thread-local storage in use, to benefit from the delete callback. 2531 pthread_setspecific(heap_key, heap); 2532 2533 #if OS(WIN) 2534 TlsSetValue(tlsIndex, heap); 2535 #endif 2536 } 2537 2538 // Allocator for thread heaps 2539 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; 2540 2541 // Linked list of heap objects. Protected by pageheap_lock. 2542 static TCMalloc_ThreadCache* thread_heaps = NULL; 2543 static int thread_heap_count = 0; 2544 2545 // Overall thread cache size. Protected by pageheap_lock. 2546 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; 2547 2548 // Global per-thread cache size. Writes are protected by 2549 // pageheap_lock. Reads are done without any locking, which should be 2550 // fine as long as size_t can be written atomically and we don't place 2551 // invariants between this variable and other pieces of state. 2552 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; 2553 2554 //------------------------------------------------------------------- 2555 // Central cache implementation 2556 //------------------------------------------------------------------- 2557 2558 void TCMalloc_Central_FreeList::Init(size_t cl, uintptr_t entropy) { 2559 lock_.Init(); 2560 size_class_ = cl; 2561 entropy_ = entropy; 2562 #if ENABLE(TCMALLOC_HARDENING) 2563 ASSERT(entropy_); 2564 #endif 2565 DLL_Init(&empty_, entropy_); 2566 DLL_Init(&nonempty_, entropy_); 2567 counter_ = 0; 2568 2569 cache_size_ = 1; 2570 used_slots_ = 0; 2571 ASSERT(cache_size_ <= kNumTransferEntries); 2572 } 2573 2574 void TCMalloc_Central_FreeList::ReleaseListToSpans(HardenedSLL start) { 2575 while (start) { 2576 HardenedSLL next = SLL_Next(start, entropy_); 2577 ReleaseToSpans(start); 2578 start = next; 2579 } 2580 } 2581 2582 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(HardenedSLL object) { 2583 const PageID p = reinterpret_cast<uintptr_t>(object.value()) >> kPageShift; 2584 Span* span = pageheap->GetDescriptor(p); 2585 ASSERT(span != NULL); 2586 ASSERT(span->refcount > 0); 2587 2588 // If span is empty, move it to non-empty list 2589 if (!span->objects) { 2590 DLL_Remove(span, entropy_); 2591 DLL_Prepend(&nonempty_, span, entropy_); 2592 Event(span, 'N', 0); 2593 } 2594 2595 // The following check is expensive, so it is disabled by default 2596 if (false) { 2597 // Check that object does not occur in list 2598 unsigned got = 0; 2599 for (HardenedSLL p = span->objects; !p; SLL_Next(p, entropy_)) { 2600 ASSERT(p.value() != object.value()); 2601 got++; 2602 } 2603 ASSERT(got + span->refcount == 2604 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); 2605 } 2606 2607 counter_++; 2608 span->refcount--; 2609 if (span->refcount == 0) { 2610 Event(span, '#', 0); 2611 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); 2612 DLL_Remove(span, entropy_); 2613 2614 // Release central list lock while operating on pageheap 2615 lock_.Unlock(); 2616 { 2617 SpinLockHolder h(&pageheap_lock); 2618 pageheap->Delete(span); 2619 } 2620 lock_.Lock(); 2621 } else { 2622 SLL_SetNext(object, span->objects, entropy_); 2623 span->objects.setValue(object.value()); 2624 } 2625 } 2626 2627 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( 2628 size_t locked_size_class, bool force) { 2629 static int race_counter = 0; 2630 int t = race_counter++; // Updated without a lock, but who cares. 2631 if (t >= static_cast<int>(kNumClasses)) { 2632 while (t >= static_cast<int>(kNumClasses)) { 2633 t -= kNumClasses; 2634 } 2635 race_counter = t; 2636 } 2637 ASSERT(t >= 0); 2638 ASSERT(t < static_cast<int>(kNumClasses)); 2639 if (t == static_cast<int>(locked_size_class)) return false; 2640 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); 2641 } 2642 2643 bool TCMalloc_Central_FreeList::MakeCacheSpace() { 2644 // Is there room in the cache? 2645 if (used_slots_ < cache_size_) return true; 2646 // Check if we can expand this cache? 2647 if (cache_size_ == kNumTransferEntries) return false; 2648 // Ok, we'll try to grab an entry from some other size class. 2649 if (EvictRandomSizeClass(size_class_, false) || 2650 EvictRandomSizeClass(size_class_, true)) { 2651 // Succeeded in evicting, we're going to make our cache larger. 2652 cache_size_++; 2653 return true; 2654 } 2655 return false; 2656 } 2657 2658 2659 namespace { 2660 class LockInverter { 2661 private: 2662 SpinLock *held_, *temp_; 2663 public: 2664 inline explicit LockInverter(SpinLock* held, SpinLock *temp) 2665 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } 2666 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } 2667 }; 2668 } 2669 2670 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { 2671 // Start with a quick check without taking a lock. 2672 if (cache_size_ == 0) return false; 2673 // We don't evict from a full cache unless we are 'forcing'. 2674 if (force == false && used_slots_ == cache_size_) return false; 2675 2676 // Grab lock, but first release the other lock held by this thread. We use 2677 // the lock inverter to ensure that we never hold two size class locks 2678 // concurrently. That can create a deadlock because there is no well 2679 // defined nesting order. 2680 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); 2681 ASSERT(used_slots_ <= cache_size_); 2682 ASSERT(0 <= cache_size_); 2683 if (cache_size_ == 0) return false; 2684 if (used_slots_ == cache_size_) { 2685 if (force == false) return false; 2686 // ReleaseListToSpans releases the lock, so we have to make all the 2687 // updates to the central list before calling it. 2688 cache_size_--; 2689 used_slots_--; 2690 ReleaseListToSpans(tc_slots_[used_slots_].head); 2691 return true; 2692 } 2693 cache_size_--; 2694 return true; 2695 } 2696 2697 void TCMalloc_Central_FreeList::InsertRange(HardenedSLL start, HardenedSLL end, int N) { 2698 SpinLockHolder h(&lock_); 2699 if (N == num_objects_to_move[size_class_] && 2700 MakeCacheSpace()) { 2701 int slot = used_slots_++; 2702 ASSERT(slot >=0); 2703 ASSERT(slot < kNumTransferEntries); 2704 TCEntry *entry = &tc_slots_[slot]; 2705 entry->head = start; 2706 entry->tail = end; 2707 return; 2708 } 2709 ReleaseListToSpans(start); 2710 } 2711 2712 void TCMalloc_Central_FreeList::RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N) { 2713 int num = *N; 2714 ASSERT(num > 0); 2715 2716 SpinLockHolder h(&lock_); 2717 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { 2718 int slot = --used_slots_; 2719 ASSERT(slot >= 0); 2720 TCEntry *entry = &tc_slots_[slot]; 2721 *start = entry->head; 2722 *end = entry->tail; 2723 return; 2724 } 2725 2726 // TODO: Prefetch multiple TCEntries? 2727 HardenedSLL tail = FetchFromSpansSafe(); 2728 if (!tail) { 2729 // We are completely out of memory. 2730 *start = *end = HardenedSLL::null(); 2731 *N = 0; 2732 return; 2733 } 2734 2735 SLL_SetNext(tail, HardenedSLL::null(), entropy_); 2736 HardenedSLL head = tail; 2737 int count = 1; 2738 while (count < num) { 2739 HardenedSLL t = FetchFromSpans(); 2740 if (!t) break; 2741 SLL_Push(&head, t, entropy_); 2742 count++; 2743 } 2744 *start = head; 2745 *end = tail; 2746 *N = count; 2747 } 2748 2749 2750 HardenedSLL TCMalloc_Central_FreeList::FetchFromSpansSafe() { 2751 HardenedSLL t = FetchFromSpans(); 2752 if (!t) { 2753 Populate(); 2754 t = FetchFromSpans(); 2755 } 2756 return t; 2757 } 2758 2759 HardenedSLL TCMalloc_Central_FreeList::FetchFromSpans() { 2760 if (DLL_IsEmpty(&nonempty_, entropy_)) return HardenedSLL::null(); 2761 Span* span = nonempty_.next(entropy_); 2762 2763 ASSERT(span->objects); 2764 ASSERT_SPAN_COMMITTED(span); 2765 span->refcount++; 2766 HardenedSLL result = span->objects; 2767 span->objects = SLL_Next(result, entropy_); 2768 if (!span->objects) { 2769 // Move to empty list 2770 DLL_Remove(span, entropy_); 2771 DLL_Prepend(&empty_, span, entropy_); 2772 Event(span, 'E', 0); 2773 } 2774 counter_--; 2775 return result; 2776 } 2777 2778 // Fetch memory from the system and add to the central cache freelist. 2779 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { 2780 // Release central list lock while operating on pageheap 2781 lock_.Unlock(); 2782 const size_t npages = class_to_pages[size_class_]; 2783 2784 Span* span; 2785 { 2786 SpinLockHolder h(&pageheap_lock); 2787 span = pageheap->New(npages); 2788 if (span) pageheap->RegisterSizeClass(span, size_class_); 2789 } 2790 if (span == NULL) { 2791 #if OS(WIN) 2792 MESSAGE("allocation failed: %d\n", ::GetLastError()); 2793 #else 2794 MESSAGE("allocation failed: %d\n", errno); 2795 #endif 2796 lock_.Lock(); 2797 return; 2798 } 2799 ASSERT_SPAN_COMMITTED(span); 2800 ASSERT(span->length == npages); 2801 // Cache sizeclass info eagerly. Locking is not necessary. 2802 // (Instead of being eager, we could just replace any stale info 2803 // about this span, but that seems to be no better in practice.) 2804 for (size_t i = 0; i < npages; i++) { 2805 pageheap->CacheSizeClass(span->start + i, size_class_); 2806 } 2807 2808 // Split the block into pieces and add to the free-list 2809 // TODO: coloring of objects to avoid cache conflicts? 2810 HardenedSLL head = HardenedSLL::null(); 2811 char* start = reinterpret_cast<char*>(span->start << kPageShift); 2812 const size_t size = ByteSizeForClass(size_class_); 2813 char* ptr = start + (npages << kPageShift) - ((npages << kPageShift) % size); 2814 int num = 0; 2815 #if ENABLE(TCMALLOC_HARDENING) 2816 uint32_t startPoison = freedObjectStartPoison(); 2817 uint32_t endPoison = freedObjectEndPoison(); 2818 #endif 2819 2820 while (ptr > start) { 2821 ptr -= size; 2822 HardenedSLL node = HardenedSLL::create(ptr); 2823 POISON_DEALLOCATION_EXPLICIT(ptr, size, startPoison, endPoison); 2824 SLL_SetNext(node, head, entropy_); 2825 head = node; 2826 num++; 2827 } 2828 ASSERT(ptr == start); 2829 ASSERT(ptr == head.value()); 2830 #ifndef NDEBUG 2831 { 2832 HardenedSLL node = head; 2833 while (node) { 2834 ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2835 node = SLL_Next(node, entropy_); 2836 } 2837 } 2838 #endif 2839 span->objects = head; 2840 ASSERT(span->objects.value() == head.value()); 2841 span->refcount = 0; // No sub-object in use yet 2842 2843 // Add span to list of non-empty spans 2844 lock_.Lock(); 2845 DLL_Prepend(&nonempty_, span, entropy_); 2846 counter_ += num; 2847 } 2848 2849 //------------------------------------------------------------------- 2850 // TCMalloc_ThreadCache implementation 2851 //------------------------------------------------------------------- 2852 2853 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { 2854 if (bytes_until_sample_ < k) { 2855 PickNextSample(k); 2856 return true; 2857 } else { 2858 bytes_until_sample_ -= k; 2859 return false; 2860 } 2861 } 2862 2863 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid, uintptr_t entropy) { 2864 size_ = 0; 2865 next_ = NULL; 2866 prev_ = NULL; 2867 tid_ = tid; 2868 in_setspecific_ = false; 2869 entropy_ = entropy; 2870 #if ENABLE(TCMALLOC_HARDENING) 2871 ASSERT(entropy_); 2872 #endif 2873 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2874 list_[cl].Init(entropy_); 2875 } 2876 2877 // Initialize RNG -- run it for a bit to get to good values 2878 bytes_until_sample_ = 0; 2879 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); 2880 for (int i = 0; i < 100; i++) { 2881 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); 2882 } 2883 } 2884 2885 void TCMalloc_ThreadCache::Cleanup() { 2886 // Put unused memory back into central cache 2887 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2888 if (list_[cl].length() > 0) { 2889 ReleaseToCentralCache(cl, list_[cl].length()); 2890 } 2891 } 2892 } 2893 2894 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { 2895 ASSERT(size <= kMaxSize); 2896 const size_t cl = SizeClass(size); 2897 FreeList* list = &list_[cl]; 2898 size_t allocationSize = ByteSizeForClass(cl); 2899 if (list->empty()) { 2900 FetchFromCentralCache(cl, allocationSize); 2901 if (list->empty()) return NULL; 2902 } 2903 size_ -= allocationSize; 2904 void* result = list->Pop(); 2905 if (!result) 2906 return 0; 2907 RELEASE_ASSERT(IS_DEFINITELY_POISONED(result, allocationSize)); 2908 POISON_ALLOCATION(result, allocationSize); 2909 return result; 2910 } 2911 2912 inline void TCMalloc_ThreadCache::Deallocate(HardenedSLL ptr, size_t cl) { 2913 size_t allocationSize = ByteSizeForClass(cl); 2914 size_ += allocationSize; 2915 FreeList* list = &list_[cl]; 2916 if (MAY_BE_POISONED(ptr.value(), allocationSize)) 2917 list->Validate(ptr, allocationSize); 2918 2919 POISON_DEALLOCATION(ptr.value(), allocationSize); 2920 list->Push(ptr); 2921 // If enough data is free, put back into central cache 2922 if (list->length() > kMaxFreeListLength) { 2923 ReleaseToCentralCache(cl, num_objects_to_move[cl]); 2924 } 2925 if (size_ >= per_thread_cache_size) Scavenge(); 2926 } 2927 2928 // Remove some objects of class "cl" from central cache and add to thread heap 2929 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { 2930 int fetch_count = num_objects_to_move[cl]; 2931 HardenedSLL start, end; 2932 central_cache[cl].RemoveRange(&start, &end, &fetch_count); 2933 list_[cl].PushRange(fetch_count, start, end); 2934 size_ += allocationSize * fetch_count; 2935 } 2936 2937 // Remove some objects of class "cl" from thread heap and add to central cache 2938 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { 2939 ASSERT(N > 0); 2940 FreeList* src = &list_[cl]; 2941 if (N > src->length()) N = src->length(); 2942 size_ -= N*ByteSizeForClass(cl); 2943 2944 // We return prepackaged chains of the correct size to the central cache. 2945 // TODO: Use the same format internally in the thread caches? 2946 int batch_size = num_objects_to_move[cl]; 2947 while (N > batch_size) { 2948 HardenedSLL tail, head; 2949 src->PopRange(batch_size, &head, &tail); 2950 central_cache[cl].InsertRange(head, tail, batch_size); 2951 N -= batch_size; 2952 } 2953 HardenedSLL tail, head; 2954 src->PopRange(N, &head, &tail); 2955 central_cache[cl].InsertRange(head, tail, N); 2956 } 2957 2958 // Release idle memory to the central cache 2959 inline void TCMalloc_ThreadCache::Scavenge() { 2960 // If the low-water mark for the free list is L, it means we would 2961 // not have had to allocate anything from the central cache even if 2962 // we had reduced the free list size by L. We aim to get closer to 2963 // that situation by dropping L/2 nodes from the free list. This 2964 // may not release much memory, but if so we will call scavenge again 2965 // pretty soon and the low-water marks will be high on that call. 2966 //int64 start = CycleClock::Now(); 2967 2968 for (size_t cl = 0; cl < kNumClasses; cl++) { 2969 FreeList* list = &list_[cl]; 2970 const int lowmark = list->lowwatermark(); 2971 if (lowmark > 0) { 2972 const int drop = (lowmark > 1) ? lowmark/2 : 1; 2973 ReleaseToCentralCache(cl, drop); 2974 } 2975 list->clear_lowwatermark(); 2976 } 2977 2978 //int64 finish = CycleClock::Now(); 2979 //CycleTimer ct; 2980 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); 2981 } 2982 2983 void TCMalloc_ThreadCache::PickNextSample(size_t k) { 2984 // Make next "random" number 2985 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers 2986 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); 2987 uint32_t r = rnd_; 2988 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); 2989 2990 // Next point is "rnd_ % (sample_period)". I.e., average 2991 // increment is "sample_period/2". 2992 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); 2993 static int last_flag_value = -1; 2994 2995 if (flag_value != last_flag_value) { 2996 SpinLockHolder h(&sample_period_lock); 2997 int i; 2998 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { 2999 if (primes_list[i] >= flag_value) { 3000 break; 3001 } 3002 } 3003 sample_period = primes_list[i]; 3004 last_flag_value = flag_value; 3005 } 3006 3007 bytes_until_sample_ += rnd_ % sample_period; 3008 3009 if (k > (static_cast<size_t>(-1) >> 2)) { 3010 // If the user has asked for a huge allocation then it is possible 3011 // for the code below to loop infinitely. Just return (note that 3012 // this throws off the sampling accuracy somewhat, but a user who 3013 // is allocating more than 1G of memory at a time can live with a 3014 // minor inaccuracy in profiling of small allocations, and also 3015 // would rather not wait for the loop below to terminate). 3016 return; 3017 } 3018 3019 while (bytes_until_sample_ < k) { 3020 // Increase bytes_until_sample_ by enough average sampling periods 3021 // (sample_period >> 1) to allow us to sample past the current 3022 // allocation. 3023 bytes_until_sample_ += (sample_period >> 1); 3024 } 3025 3026 bytes_until_sample_ -= k; 3027 } 3028 3029 void TCMalloc_ThreadCache::InitModule() { 3030 // There is a slight potential race here because of double-checked 3031 // locking idiom. However, as long as the program does a small 3032 // allocation before switching to multi-threaded mode, we will be 3033 // fine. We increase the chances of doing such a small allocation 3034 // by doing one in the constructor of the module_enter_exit_hook 3035 // object declared below. 3036 SpinLockHolder h(&pageheap_lock); 3037 if (!phinited) { 3038 uintptr_t entropy = HARDENING_ENTROPY; 3039 InitTSD(); 3040 InitSizeClasses(); 3041 threadheap_allocator.Init(entropy); 3042 span_allocator.Init(entropy); 3043 span_allocator.New(); // Reduce cache conflicts 3044 span_allocator.New(); // Reduce cache conflicts 3045 stacktrace_allocator.Init(entropy); 3046 DLL_Init(&sampled_objects, entropy); 3047 for (size_t i = 0; i < kNumClasses; ++i) { 3048 central_cache[i].Init(i, entropy); 3049 } 3050 pageheap->init(); 3051 phinited = 1; 3052 #if OS(MACOSX) 3053 FastMallocZone::init(); 3054 #endif 3055 } 3056 } 3057 3058 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid, uintptr_t entropy) { 3059 // Create the heap and add it to the linked list 3060 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); 3061 heap->Init(tid, entropy); 3062 heap->next_ = thread_heaps; 3063 heap->prev_ = NULL; 3064 if (thread_heaps != NULL) thread_heaps->prev_ = heap; 3065 thread_heaps = heap; 3066 thread_heap_count++; 3067 RecomputeThreadCacheSize(); 3068 return heap; 3069 } 3070 3071 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { 3072 #ifdef HAVE_TLS 3073 // __thread is faster, but only when the kernel supports it 3074 if (KernelSupportsTLS()) 3075 return threadlocal_heap; 3076 #elif OS(WIN) 3077 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); 3078 #else 3079 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); 3080 #endif 3081 } 3082 3083 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { 3084 TCMalloc_ThreadCache* ptr = NULL; 3085 if (!tsd_inited) { 3086 InitModule(); 3087 } else { 3088 ptr = GetThreadHeap(); 3089 } 3090 if (ptr == NULL) ptr = CreateCacheIfNecessary(); 3091 return ptr; 3092 } 3093 3094 // In deletion paths, we do not try to create a thread-cache. This is 3095 // because we may be in the thread destruction code and may have 3096 // already cleaned up the cache for this thread. 3097 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { 3098 if (!tsd_inited) return NULL; 3099 void* const p = GetThreadHeap(); 3100 return reinterpret_cast<TCMalloc_ThreadCache*>(p); 3101 } 3102 3103 void TCMalloc_ThreadCache::InitTSD() { 3104 ASSERT(!tsd_inited); 3105 pthread_key_create(&heap_key, DestroyThreadCache); 3106 #if OS(WIN) 3107 tlsIndex = TlsAlloc(); 3108 #endif 3109 tsd_inited = true; 3110 3111 #if !OS(WIN) 3112 // We may have used a fake pthread_t for the main thread. Fix it. 3113 pthread_t zero; 3114 memset(&zero, 0, sizeof(zero)); 3115 #endif 3116 ASSERT(pageheap_lock.IsHeld()); 3117 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3118 #if OS(WIN) 3119 if (h->tid_ == 0) { 3120 h->tid_ = GetCurrentThreadId(); 3121 } 3122 #else 3123 if (pthread_equal(h->tid_, zero)) { 3124 h->tid_ = pthread_self(); 3125 } 3126 #endif 3127 } 3128 } 3129 3130 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { 3131 // Initialize per-thread data if necessary 3132 TCMalloc_ThreadCache* heap = NULL; 3133 { 3134 SpinLockHolder h(&pageheap_lock); 3135 3136 #if OS(WIN) 3137 DWORD me; 3138 if (!tsd_inited) { 3139 me = 0; 3140 } else { 3141 me = GetCurrentThreadId(); 3142 } 3143 #else 3144 // Early on in glibc's life, we cannot even call pthread_self() 3145 pthread_t me; 3146 if (!tsd_inited) { 3147 memset(&me, 0, sizeof(me)); 3148 } else { 3149 me = pthread_self(); 3150 } 3151 #endif 3152 3153 // This may be a recursive malloc call from pthread_setspecific() 3154 // In that case, the heap for this thread has already been created 3155 // and added to the linked list. So we search for that first. 3156 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3157 #if OS(WIN) 3158 if (h->tid_ == me) { 3159 #else 3160 if (pthread_equal(h->tid_, me)) { 3161 #endif 3162 heap = h; 3163 break; 3164 } 3165 } 3166 3167 if (heap == NULL) heap = NewHeap(me, HARDENING_ENTROPY); 3168 } 3169 3170 // We call pthread_setspecific() outside the lock because it may 3171 // call malloc() recursively. The recursive call will never get 3172 // here again because it will find the already allocated heap in the 3173 // linked list of heaps. 3174 if (!heap->in_setspecific_ && tsd_inited) { 3175 heap->in_setspecific_ = true; 3176 setThreadHeap(heap); 3177 } 3178 return heap; 3179 } 3180 3181 void TCMalloc_ThreadCache::BecomeIdle() { 3182 if (!tsd_inited) return; // No caches yet 3183 TCMalloc_ThreadCache* heap = GetThreadHeap(); 3184 if (heap == NULL) return; // No thread cache to remove 3185 if (heap->in_setspecific_) return; // Do not disturb the active caller 3186 3187 heap->in_setspecific_ = true; 3188 setThreadHeap(NULL); 3189 #ifdef HAVE_TLS 3190 // Also update the copy in __thread 3191 threadlocal_heap = NULL; 3192 #endif 3193 heap->in_setspecific_ = false; 3194 if (GetThreadHeap() == heap) { 3195 // Somehow heap got reinstated by a recursive call to malloc 3196 // from pthread_setspecific. We give up in this case. 3197 return; 3198 } 3199 3200 // We can now get rid of the heap 3201 DeleteCache(heap); 3202 } 3203 3204 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { 3205 // Note that "ptr" cannot be NULL since pthread promises not 3206 // to invoke the destructor on NULL values, but for safety, 3207 // we check anyway. 3208 if (ptr == NULL) return; 3209 #ifdef HAVE_TLS 3210 // Prevent fast path of GetThreadHeap() from returning heap. 3211 threadlocal_heap = NULL; 3212 #endif 3213 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); 3214 } 3215 3216 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { 3217 // Remove all memory from heap 3218 heap->Cleanup(); 3219 3220 // Remove from linked list 3221 SpinLockHolder h(&pageheap_lock); 3222 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; 3223 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; 3224 if (thread_heaps == heap) thread_heaps = heap->next_; 3225 thread_heap_count--; 3226 RecomputeThreadCacheSize(); 3227 3228 threadheap_allocator.Delete(heap); 3229 } 3230 3231 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { 3232 // Divide available space across threads 3233 int n = thread_heap_count > 0 ? thread_heap_count : 1; 3234 size_t space = overall_thread_cache_size / n; 3235 3236 // Limit to allowed range 3237 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; 3238 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; 3239 3240 per_thread_cache_size = space; 3241 } 3242 3243 void TCMalloc_ThreadCache::Print() const { 3244 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3245 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", 3246 ByteSizeForClass(cl), 3247 list_[cl].length(), 3248 list_[cl].lowwatermark()); 3249 } 3250 } 3251 3252 // Extract interesting stats 3253 struct TCMallocStats { 3254 uint64_t system_bytes; // Bytes alloced from system 3255 uint64_t thread_bytes; // Bytes in thread caches 3256 uint64_t central_bytes; // Bytes in central cache 3257 uint64_t transfer_bytes; // Bytes in central transfer cache 3258 uint64_t pageheap_bytes; // Bytes in page heap 3259 uint64_t metadata_bytes; // Bytes alloced for metadata 3260 }; 3261 3262 // The constructor allocates an object to ensure that initialization 3263 // runs before main(), and therefore we do not have a chance to become 3264 // multi-threaded before initialization. We also create the TSD key 3265 // here. Presumably by the time this constructor runs, glibc is in 3266 // good enough shape to handle pthread_key_create(). 3267 // 3268 // The constructor also takes the opportunity to tell STL to use 3269 // tcmalloc. We want to do this early, before construct time, so 3270 // all user STL allocations go through tcmalloc (which works really 3271 // well for STL). 3272 // 3273 // The destructor prints stats when the program exits. 3274 class TCMallocGuard { 3275 public: 3276 3277 TCMallocGuard() { 3278 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 3279 // Check whether the kernel also supports TLS (needs to happen at runtime) 3280 CheckIfKernelSupportsTLS(); 3281 #endif 3282 free(malloc(1)); 3283 TCMalloc_ThreadCache::InitTSD(); 3284 free(malloc(1)); 3285 } 3286 }; 3287 3288 //------------------------------------------------------------------- 3289 // Helpers for the exported routines below 3290 //------------------------------------------------------------------- 3291 3292 #if !ASSERT_DISABLED 3293 static inline bool CheckCachedSizeClass(void *ptr) { 3294 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3295 size_t cached_value = pageheap->GetSizeClassIfCached(p); 3296 return cached_value == 0 || 3297 cached_value == pageheap->GetDescriptor(p)->sizeclass; 3298 } 3299 #endif 3300 3301 static inline void* CheckedMallocResult(void *result) 3302 { 3303 ASSERT(result == 0 || CheckCachedSizeClass(result)); 3304 return result; 3305 } 3306 3307 static inline void* SpanToMallocResult(Span *span) { 3308 ASSERT_SPAN_COMMITTED(span); 3309 pageheap->CacheSizeClass(span->start, 0); 3310 void* result = reinterpret_cast<void*>(span->start << kPageShift); 3311 POISON_ALLOCATION(result, span->length << kPageShift); 3312 return CheckedMallocResult(result); 3313 } 3314 3315 static ALWAYS_INLINE void* do_malloc(size_t size) { 3316 void* ret = 0; 3317 3318 ASSERT(!isForbidden()); 3319 3320 // The following call forces module initialization 3321 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3322 if (size > kMaxSize) { 3323 // Use page-level allocator 3324 SpinLockHolder h(&pageheap_lock); 3325 Span* span = pageheap->New(pages(size)); 3326 if (span) 3327 ret = SpanToMallocResult(span); 3328 } else { 3329 // The common case, and also the simplest. This just pops the 3330 // size-appropriate freelist, afer replenishing it if it's empty. 3331 ret = CheckedMallocResult(heap->Allocate(size)); 3332 } 3333 // This is the out-of-memory crash line. 3334 RELEASE_ASSERT(ret); 3335 return ret; 3336 } 3337 3338 static ALWAYS_INLINE void do_free(void* ptr) { 3339 if (ptr == NULL) return; 3340 ASSERT(pageheap != NULL); // Should not call free() before malloc() 3341 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3342 Span* span = NULL; 3343 size_t cl = pageheap->GetSizeClassIfCached(p); 3344 3345 if (cl == 0) { 3346 span = pageheap->GetDescriptor(p); 3347 RELEASE_ASSERT(span->isValid()); 3348 cl = span->sizeclass; 3349 pageheap->CacheSizeClass(p, cl); 3350 } 3351 if (cl != 0) { 3352 #ifndef NO_TCMALLOC_SAMPLES 3353 ASSERT(!pageheap->GetDescriptor(p)->sample); 3354 #endif 3355 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); 3356 if (heap != NULL) { 3357 heap->Deallocate(HardenedSLL::create(ptr), cl); 3358 } else { 3359 // Delete directly into central cache 3360 POISON_DEALLOCATION(ptr, ByteSizeForClass(cl)); 3361 SLL_SetNext(HardenedSLL::create(ptr), HardenedSLL::null(), central_cache[cl].entropy()); 3362 central_cache[cl].InsertRange(HardenedSLL::create(ptr), HardenedSLL::create(ptr), 1); 3363 } 3364 } else { 3365 SpinLockHolder h(&pageheap_lock); 3366 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 3367 ASSERT(span != NULL && span->start == p); 3368 #ifndef NO_TCMALLOC_SAMPLES 3369 if (span->sample) { 3370 DLL_Remove(span); 3371 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); 3372 span->objects = NULL; 3373 } 3374 #endif 3375 3376 POISON_DEALLOCATION(ptr, span->length << kPageShift); 3377 pageheap->Delete(span); 3378 } 3379 } 3380 3381 // Helpers for use by exported routines below: 3382 3383 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance 3384 static inline struct mallinfo do_mallinfo() { 3385 TCMallocStats stats; 3386 ExtractStats(&stats, NULL); 3387 3388 // Just some of the fields are filled in. 3389 struct mallinfo info; 3390 memset(&info, 0, sizeof(info)); 3391 3392 // Unfortunately, the struct contains "int" field, so some of the 3393 // size values will be truncated. 3394 info.arena = static_cast<int>(stats.system_bytes); 3395 info.fsmblks = static_cast<int>(stats.thread_bytes 3396 + stats.central_bytes 3397 + stats.transfer_bytes); 3398 info.fordblks = static_cast<int>(stats.pageheap_bytes); 3399 info.uordblks = static_cast<int>(stats.system_bytes 3400 - stats.thread_bytes 3401 - stats.central_bytes 3402 - stats.transfer_bytes 3403 - stats.pageheap_bytes); 3404 3405 return info; 3406 } 3407 #endif 3408 3409 //------------------------------------------------------------------- 3410 // Exported routines 3411 //------------------------------------------------------------------- 3412 3413 // CAVEAT: The code structure below ensures that MallocHook methods are always 3414 // called from the stack frame of the invoked allocation function. 3415 // heap-checker.cc depends on this to start a stack trace from 3416 // the call to the (de)allocation function. 3417 3418 void* fastMalloc(size_t size) 3419 { 3420 return do_malloc(size); 3421 } 3422 3423 void fastFree(void* ptr) 3424 { 3425 do_free(ptr); 3426 } 3427 3428 void* fastCalloc(size_t n, size_t elem_size) 3429 { 3430 size_t totalBytes = n * elem_size; 3431 3432 // Protect against overflow 3433 if (n > 1 && elem_size && (totalBytes / elem_size) != n) 3434 return 0; 3435 3436 void* result = do_malloc(totalBytes); 3437 memset(result, 0, totalBytes); 3438 3439 return result; 3440 } 3441 3442 void* fastRealloc(void* old_ptr, size_t new_size) 3443 { 3444 if (old_ptr == NULL) { 3445 return do_malloc(new_size); 3446 } 3447 if (new_size == 0) { 3448 free(old_ptr); 3449 return NULL; 3450 } 3451 3452 // Get the size of the old entry 3453 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; 3454 size_t cl = pageheap->GetSizeClassIfCached(p); 3455 Span *span = NULL; 3456 size_t old_size; 3457 if (cl == 0) { 3458 span = pageheap->GetDescriptor(p); 3459 cl = span->sizeclass; 3460 pageheap->CacheSizeClass(p, cl); 3461 } 3462 if (cl != 0) { 3463 old_size = ByteSizeForClass(cl); 3464 } else { 3465 ASSERT(span != NULL); 3466 old_size = span->length << kPageShift; 3467 } 3468 3469 // Reallocate if the new size is larger than the old size, 3470 // or if the new size is significantly smaller than the old size. 3471 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { 3472 // Need to reallocate 3473 void* new_ptr = do_malloc(new_size); 3474 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 3475 // We could use a variant of do_free() that leverages the fact 3476 // that we already know the sizeclass of old_ptr. The benefit 3477 // would be small, so don't bother. 3478 do_free(old_ptr); 3479 return new_ptr; 3480 } else { 3481 return old_ptr; 3482 } 3483 } 3484 3485 void releaseFastMallocFreeMemory() 3486 { 3487 // Flush free pages in the current thread cache back to the page heap. 3488 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) 3489 threadCache->Cleanup(); 3490 3491 SpinLockHolder h(&pageheap_lock); 3492 pageheap->ReleaseFreePages(); 3493 } 3494 3495 FastMallocStatistics fastMallocStatistics() 3496 { 3497 FastMallocStatistics statistics; 3498 3499 SpinLockHolder lockHolder(&pageheap_lock); 3500 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); 3501 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); 3502 3503 statistics.freeListBytes = 0; 3504 for (unsigned cl = 0; cl < kNumClasses; ++cl) { 3505 const int length = central_cache[cl].length(); 3506 const int tc_length = central_cache[cl].tc_length(); 3507 3508 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); 3509 } 3510 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) 3511 statistics.freeListBytes += threadCache->Size(); 3512 3513 return statistics; 3514 } 3515 3516 #if OS(MACOSX) 3517 3518 template <typename T> 3519 T* RemoteMemoryReader::nextEntryInHardenedLinkedList(T** remoteAddress, uintptr_t entropy) const 3520 { 3521 T** localAddress = (*this)(remoteAddress); 3522 if (!localAddress) 3523 return 0; 3524 T* hardenedNext = *localAddress; 3525 if (!hardenedNext || hardenedNext == (void*)entropy) 3526 return 0; 3527 return XOR_MASK_PTR_WITH_KEY(hardenedNext, remoteAddress, entropy); 3528 } 3529 3530 class FreeObjectFinder { 3531 const RemoteMemoryReader& m_reader; 3532 HashSet<void*> m_freeObjects; 3533 3534 public: 3535 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } 3536 3537 void visit(void* ptr) { m_freeObjects.add(ptr); } 3538 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } 3539 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } 3540 size_t freeObjectCount() const { return m_freeObjects.size(); } 3541 3542 void findFreeObjects(TCMalloc_ThreadCache* threadCache) 3543 { 3544 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) 3545 threadCache->enumerateFreeObjects(*this, m_reader); 3546 } 3547 3548 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) 3549 { 3550 for (unsigned i = 0; i < numSizes; i++) 3551 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); 3552 } 3553 }; 3554 3555 class PageMapFreeObjectFinder { 3556 const RemoteMemoryReader& m_reader; 3557 FreeObjectFinder& m_freeObjectFinder; 3558 uintptr_t m_entropy; 3559 3560 public: 3561 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder, uintptr_t entropy) 3562 : m_reader(reader) 3563 , m_freeObjectFinder(freeObjectFinder) 3564 , m_entropy(entropy) 3565 { 3566 #if ENABLE(TCMALLOC_HARDENING) 3567 ASSERT(m_entropy); 3568 #endif 3569 } 3570 3571 int visit(void* ptr) const 3572 { 3573 if (!ptr) 3574 return 1; 3575 3576 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3577 if (!span) 3578 return 1; 3579 3580 if (span->free) { 3581 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); 3582 m_freeObjectFinder.visit(ptr); 3583 } else if (span->sizeclass) { 3584 // Walk the free list of the small-object span, keeping track of each object seen 3585 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(m_reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), m_entropy))) 3586 m_freeObjectFinder.visit(nextObject.value()); 3587 } 3588 return span->length; 3589 } 3590 }; 3591 3592 class PageMapMemoryUsageRecorder { 3593 task_t m_task; 3594 void* m_context; 3595 unsigned m_typeMask; 3596 vm_range_recorder_t* m_recorder; 3597 const RemoteMemoryReader& m_reader; 3598 const FreeObjectFinder& m_freeObjectFinder; 3599 3600 HashSet<void*> m_seenPointers; 3601 Vector<Span*> m_coalescedSpans; 3602 3603 public: 3604 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) 3605 : m_task(task) 3606 , m_context(context) 3607 , m_typeMask(typeMask) 3608 , m_recorder(recorder) 3609 , m_reader(reader) 3610 , m_freeObjectFinder(freeObjectFinder) 3611 { } 3612 3613 ~PageMapMemoryUsageRecorder() 3614 { 3615 ASSERT(!m_coalescedSpans.size()); 3616 } 3617 3618 void recordPendingRegions() 3619 { 3620 if (!(m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE))) { 3621 m_coalescedSpans.clear(); 3622 return; 3623 } 3624 3625 Vector<vm_range_t, 1024> allocatedPointers; 3626 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { 3627 Span *theSpan = m_coalescedSpans[i]; 3628 if (theSpan->free) 3629 continue; 3630 3631 vm_address_t spanStartAddress = theSpan->start << kPageShift; 3632 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; 3633 3634 if (!theSpan->sizeclass) { 3635 // If it's an allocated large object span, mark it as in use 3636 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) 3637 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); 3638 } else { 3639 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); 3640 3641 // Mark each allocated small object within the span as in use 3642 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; 3643 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { 3644 if (!m_freeObjectFinder.isFreeObject(object)) 3645 allocatedPointers.append((vm_range_t){object, objectSize}); 3646 } 3647 } 3648 } 3649 3650 (*m_recorder)(m_task, m_context, m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE), allocatedPointers.data(), allocatedPointers.size()); 3651 3652 m_coalescedSpans.clear(); 3653 } 3654 3655 int visit(void* ptr) 3656 { 3657 if (!ptr) 3658 return 1; 3659 3660 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3661 if (!span || !span->start) 3662 return 1; 3663 3664 if (!m_seenPointers.add(ptr).isNewEntry) 3665 return span->length; 3666 3667 if (!m_coalescedSpans.size()) { 3668 m_coalescedSpans.append(span); 3669 return span->length; 3670 } 3671 3672 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 3673 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; 3674 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; 3675 3676 // If the new span is adjacent to the previous span, do nothing for now. 3677 vm_address_t spanStartAddress = span->start << kPageShift; 3678 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { 3679 m_coalescedSpans.append(span); 3680 return span->length; 3681 } 3682 3683 // New span is not adjacent to previous span, so record the spans coalesced so far. 3684 recordPendingRegions(); 3685 m_coalescedSpans.append(span); 3686 3687 return span->length; 3688 } 3689 }; 3690 3691 class AdminRegionRecorder { 3692 task_t m_task; 3693 void* m_context; 3694 unsigned m_typeMask; 3695 vm_range_recorder_t* m_recorder; 3696 3697 Vector<vm_range_t, 1024> m_pendingRegions; 3698 3699 public: 3700 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder) 3701 : m_task(task) 3702 , m_context(context) 3703 , m_typeMask(typeMask) 3704 , m_recorder(recorder) 3705 { } 3706 3707 void recordRegion(vm_address_t ptr, size_t size) 3708 { 3709 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) 3710 m_pendingRegions.append((vm_range_t){ ptr, size }); 3711 } 3712 3713 void visit(void *ptr, size_t size) 3714 { 3715 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); 3716 } 3717 3718 void recordPendingRegions() 3719 { 3720 if (m_pendingRegions.size()) { 3721 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); 3722 m_pendingRegions.clear(); 3723 } 3724 } 3725 3726 ~AdminRegionRecorder() 3727 { 3728 ASSERT(!m_pendingRegions.size()); 3729 } 3730 }; 3731 3732 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) 3733 { 3734 RemoteMemoryReader memoryReader(task, reader); 3735 3736 InitSizeClasses(); 3737 3738 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); 3739 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); 3740 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); 3741 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); 3742 3743 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); 3744 3745 FreeObjectFinder finder(memoryReader); 3746 finder.findFreeObjects(threadHeaps); 3747 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); 3748 3749 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; 3750 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder, pageHeap->entropy_); 3751 pageMap->visitValues(pageMapFinder, memoryReader); 3752 3753 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); 3754 pageMap->visitValues(usageRecorder, memoryReader); 3755 usageRecorder.recordPendingRegions(); 3756 3757 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder); 3758 pageMap->visitAllocations(adminRegionRecorder, memoryReader); 3759 3760 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); 3761 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); 3762 3763 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3764 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3765 3766 adminRegionRecorder.recordPendingRegions(); 3767 3768 return 0; 3769 } 3770 3771 size_t FastMallocZone::size(malloc_zone_t*, const void*) 3772 { 3773 return 0; 3774 } 3775 3776 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) 3777 { 3778 return 0; 3779 } 3780 3781 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) 3782 { 3783 return 0; 3784 } 3785 3786 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) 3787 { 3788 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer 3789 // is not in this zone. When this happens, the pointer being freed was not allocated by any 3790 // zone so we need to print a useful error for the application developer. 3791 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); 3792 } 3793 3794 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) 3795 { 3796 return 0; 3797 } 3798 3799 3800 #undef malloc 3801 #undef free 3802 #undef realloc 3803 #undef calloc 3804 3805 extern "C" { 3806 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, 3807 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics 3808 3809 #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060 3810 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. 3811 #endif 3812 #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 3813 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. 3814 #endif 3815 3816 }; 3817 } 3818 3819 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) 3820 : m_pageHeap(pageHeap) 3821 , m_threadHeaps(threadHeaps) 3822 , m_centralCaches(centralCaches) 3823 , m_spanAllocator(spanAllocator) 3824 , m_pageHeapAllocator(pageHeapAllocator) 3825 { 3826 memset(&m_zone, 0, sizeof(m_zone)); 3827 m_zone.version = 4; 3828 m_zone.zone_name = "JavaScriptCore FastMalloc"; 3829 m_zone.size = &FastMallocZone::size; 3830 m_zone.malloc = &FastMallocZone::zoneMalloc; 3831 m_zone.calloc = &FastMallocZone::zoneCalloc; 3832 m_zone.realloc = &FastMallocZone::zoneRealloc; 3833 m_zone.free = &FastMallocZone::zoneFree; 3834 m_zone.valloc = &FastMallocZone::zoneValloc; 3835 m_zone.destroy = &FastMallocZone::zoneDestroy; 3836 m_zone.introspect = &jscore_fastmalloc_introspection; 3837 malloc_zone_register(&m_zone); 3838 } 3839 3840 3841 void FastMallocZone::init() 3842 { 3843 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); 3844 } 3845 3846 #endif // OS(MACOSX) 3847 3848 } // namespace WTF 3849 3850 #endif // FORCE_SYSTEM_MALLOC 3851