1 // Copyright (c) 2005, 2007, Google Inc. 2 // All rights reserved. 3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // --- 32 // Author: Sanjay Ghemawat <opensource (at) google.com> 33 // 34 // A malloc that uses a per-thread cache to satisfy small malloc requests. 35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 36 // 37 // See doc/tcmalloc.html for a high-level 38 // description of how this malloc works. 39 // 40 // SYNCHRONIZATION 41 // 1. The thread-specific lists are accessed without acquiring any locks. 42 // This is safe because each such list is only accessed by one thread. 43 // 2. We have a lock per central free-list, and hold it while manipulating 44 // the central free list for a particular size. 45 // 3. The central page allocator is protected by "pageheap_lock". 46 // 4. The pagemap (which maps from page-number to descriptor), 47 // can be read without holding any locks, and written while holding 48 // the "pageheap_lock". 49 // 5. To improve performance, a subset of the information one can get 50 // from the pagemap is cached in a data structure, pagemap_cache_, 51 // that atomically reads and writes its entries. This cache can be 52 // read and written without locking. 53 // 54 // This multi-threaded access to the pagemap is safe for fairly 55 // subtle reasons. We basically assume that when an object X is 56 // allocated by thread A and deallocated by thread B, there must 57 // have been appropriate synchronization in the handoff of object 58 // X from thread A to thread B. The same logic applies to pagemap_cache_. 59 // 60 // THE PAGEID-TO-SIZECLASS CACHE 61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 62 // returns 0 for a particular PageID then that means "no information," not that 63 // the sizeclass is 0. The cache may have stale information for pages that do 64 // not hold the beginning of any free()'able object. Staleness is eliminated 65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 66 // do_memalign() for all other relevant pages. 67 // 68 // TODO: Bias reclamation to larger addresses 69 // TODO: implement mallinfo/mallopt 70 // TODO: Better testing 71 // 72 // 9/28/2003 (new page-level allocator replaces ptmalloc2): 73 // * malloc/free of small objects goes from ~300 ns to ~50 ns. 74 // * allocation of a reasonably complicated struct 75 // goes from about 1100 ns to about 300 ns. 76 77 #include "config.h" 78 #include "wtf/FastMalloc.h" 79 80 #include "wtf/Assertions.h" 81 #include "wtf/CPU.h" 82 #include "wtf/StdLibExtras.h" 83 #include "wtf/UnusedParam.h" 84 85 #if OS(DARWIN) 86 #include <AvailabilityMacros.h> 87 #endif 88 89 #include <limits> 90 #if OS(WINDOWS) 91 #include <windows.h> 92 #else 93 #include <pthread.h> 94 #endif 95 #include <stdlib.h> 96 #include <string.h> 97 98 #ifndef NO_TCMALLOC_SAMPLES 99 #define NO_TCMALLOC_SAMPLES 100 #endif 101 102 #if !USE(SYSTEM_MALLOC) && defined(NDEBUG) 103 #define FORCE_SYSTEM_MALLOC 0 104 #else 105 #define FORCE_SYSTEM_MALLOC 1 106 #endif 107 108 // Harden the pointers stored in the TCMalloc linked lists 109 #if COMPILER(GCC) 110 #define ENABLE_TCMALLOC_HARDENING 1 111 #endif 112 113 // Use a background thread to periodically scavenge memory to release back to the system 114 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 115 116 #ifndef NDEBUG 117 namespace WTF { 118 119 #if OS(WINDOWS) 120 121 // TLS_OUT_OF_INDEXES is not defined on WinCE. 122 #ifndef TLS_OUT_OF_INDEXES 123 #define TLS_OUT_OF_INDEXES 0xffffffff 124 #endif 125 126 static DWORD isForibiddenTlsIndex = TLS_OUT_OF_INDEXES; 127 static const LPVOID kTlsAllowValue = reinterpret_cast<LPVOID>(0); // Must be zero. 128 static const LPVOID kTlsForbiddenValue = reinterpret_cast<LPVOID>(1); 129 130 #if !ASSERT_DISABLED 131 static bool isForbidden() 132 { 133 // By default, fastMalloc is allowed so we don't allocate the 134 // tls index unless we're asked to make it forbidden. If TlsSetValue 135 // has not been called on a thread, the value returned by TlsGetValue is 0. 136 return (isForibiddenTlsIndex != TLS_OUT_OF_INDEXES) && (TlsGetValue(isForibiddenTlsIndex) == kTlsForbiddenValue); 137 } 138 #endif 139 140 void fastMallocForbid() 141 { 142 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 143 isForibiddenTlsIndex = TlsAlloc(); // a little racey, but close enough for debug only 144 TlsSetValue(isForibiddenTlsIndex, kTlsForbiddenValue); 145 } 146 147 void fastMallocAllow() 148 { 149 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 150 return; 151 TlsSetValue(isForibiddenTlsIndex, kTlsAllowValue); 152 } 153 154 #else // !OS(WINDOWS) 155 156 static pthread_key_t isForbiddenKey; 157 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; 158 static void initializeIsForbiddenKey() 159 { 160 pthread_key_create(&isForbiddenKey, 0); 161 } 162 163 #if !ASSERT_DISABLED 164 static bool isForbidden() 165 { 166 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 167 return !!pthread_getspecific(isForbiddenKey); 168 } 169 #endif 170 171 void fastMallocForbid() 172 { 173 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 174 pthread_setspecific(isForbiddenKey, &isForbiddenKey); 175 } 176 177 void fastMallocAllow() 178 { 179 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 180 pthread_setspecific(isForbiddenKey, 0); 181 } 182 #endif // OS(WINDOWS) 183 184 } // namespace WTF 185 #endif // NDEBUG 186 187 namespace WTF { 188 189 void* fastZeroedMalloc(size_t n) 190 { 191 void* result = fastMalloc(n); 192 memset(result, 0, n); 193 return result; 194 } 195 196 char* fastStrDup(const char* src) 197 { 198 size_t len = strlen(src) + 1; 199 char* dup = static_cast<char*>(fastMalloc(len)); 200 memcpy(dup, src, len); 201 return dup; 202 } 203 204 } // namespace WTF 205 206 #if FORCE_SYSTEM_MALLOC 207 208 #if OS(DARWIN) 209 #include <malloc/malloc.h> 210 #elif OS(WINDOWS) 211 #include <malloc.h> 212 #endif 213 214 namespace WTF { 215 216 size_t fastMallocGoodSize(size_t bytes) 217 { 218 #if OS(DARWIN) 219 return malloc_good_size(bytes); 220 #else 221 return bytes; 222 #endif 223 } 224 225 void* fastMalloc(size_t n) 226 { 227 ASSERT(!isForbidden()); 228 229 void* result = malloc(n); 230 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 231 232 return result; 233 } 234 235 void* fastCalloc(size_t n_elements, size_t element_size) 236 { 237 ASSERT(!isForbidden()); 238 239 void* result = calloc(n_elements, element_size); 240 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 241 242 return result; 243 } 244 245 void fastFree(void* p) 246 { 247 ASSERT(!isForbidden()); 248 249 free(p); 250 } 251 252 void* fastRealloc(void* p, size_t n) 253 { 254 ASSERT(!isForbidden()); 255 256 void* result = realloc(p, n); 257 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 258 259 return result; 260 } 261 262 void releaseFastMallocFreeMemory() { } 263 264 FastMallocStatistics fastMallocStatistics() 265 { 266 FastMallocStatistics statistics = { 0, 0, 0 }; 267 return statistics; 268 } 269 270 } // namespace WTF 271 272 #if OS(DARWIN) 273 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. 274 // It will never be used in this case, so it's type and value are less interesting than its presence. 275 extern "C" const int jscore_fastmalloc_introspection = 0; 276 #endif 277 278 #else // FORCE_SYSTEM_MALLOC 279 280 #include "Compiler.h" 281 #include "TCPackedCache.h" 282 #include "TCPageMap.h" 283 #include "TCSpinLock.h" 284 #include "TCSystemAlloc.h" 285 #include <algorithm> 286 #include <errno.h> 287 #include <pthread.h> 288 #include <stdarg.h> 289 #include <stddef.h> 290 #include <stdio.h> 291 #if OS(UNIX) 292 #include <unistd.h> 293 #endif 294 #if OS(WINDOWS) 295 #ifndef WIN32_LEAN_AND_MEAN 296 #define WIN32_LEAN_AND_MEAN 297 #endif 298 #include <windows.h> 299 #endif 300 301 #if OS(DARWIN) 302 #include "MallocZoneSupport.h" 303 #include "wtf/HashSet.h" 304 #include "wtf/Vector.h" 305 #else 306 #include "wtf/CurrentTime.h" 307 #endif 308 309 #if HAVE(DISPATCH_H) 310 #include <dispatch/dispatch.h> 311 #endif 312 313 #ifdef __has_include 314 #if __has_include(<System/pthread_machdep.h>) 315 316 #include <System/pthread_machdep.h> 317 318 #endif 319 #endif 320 321 #ifndef PRIuS 322 #define PRIuS "zu" 323 #endif 324 325 // Calling pthread_getspecific through a global function pointer is faster than a normal 326 // call to the function on Mac OS X, and it's used in performance-critical code. So we 327 // use a function pointer. But that's not necessarily faster on other platforms, and we had 328 // problems with this technique on Windows, so we'll do this only on Mac OS X. 329 #if OS(DARWIN) 330 #if !USE(PTHREAD_GETSPECIFIC_DIRECT) 331 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; 332 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) 333 #else 334 #define pthread_getspecific(key) _pthread_getspecific_direct(key) 335 #define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val)) 336 #endif 337 #endif 338 339 #define DEFINE_VARIABLE(type, name, value, meaning) \ 340 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ 341 type FLAGS_##name(value); \ 342 char FLAGS_no##name; \ 343 } \ 344 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name 345 346 #define DEFINE_int64(name, value, meaning) \ 347 DEFINE_VARIABLE(int64_t, name, value, meaning) 348 349 #define DEFINE_double(name, value, meaning) \ 350 DEFINE_VARIABLE(double, name, value, meaning) 351 352 namespace WTF { 353 354 #define malloc fastMalloc 355 #define calloc fastCalloc 356 #define free fastFree 357 #define realloc fastRealloc 358 359 #define MESSAGE LOG_ERROR 360 #define CHECK_CONDITION ASSERT 361 362 static const char kLLHardeningMask = 0; 363 template <unsigned> struct EntropySource; 364 template <> struct EntropySource<4> { 365 static uint32_t value() 366 { 367 #if OS(DARWIN) 368 return arc4random(); 369 #else 370 return static_cast<uint32_t>(static_cast<uintptr_t>(currentTime() * 10000) ^ reinterpret_cast<uintptr_t>(&kLLHardeningMask)); 371 #endif 372 } 373 }; 374 375 template <> struct EntropySource<8> { 376 static uint64_t value() 377 { 378 return EntropySource<4>::value() | (static_cast<uint64_t>(EntropySource<4>::value()) << 32); 379 } 380 }; 381 382 #if ENABLE(TCMALLOC_HARDENING) 383 /* 384 * To make it harder to exploit use-after free style exploits 385 * we mask the addresses we put into our linked lists with the 386 * address of kLLHardeningMask. Due to ASLR the address of 387 * kLLHardeningMask should be sufficiently randomized to make direct 388 * freelist manipulation much more difficult. 389 */ 390 enum { 391 MaskKeyShift = 13 392 }; 393 394 static ALWAYS_INLINE uintptr_t internalEntropyValue() 395 { 396 static uintptr_t value = EntropySource<sizeof(uintptr_t)>::value() | 1; 397 ASSERT(value); 398 return value; 399 } 400 401 #define HARDENING_ENTROPY internalEntropyValue() 402 #define ROTATE_VALUE(value, amount) (((value) >> (amount)) | ((value) << (sizeof(value) * 8 - (amount)))) 403 #define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (reinterpret_cast<typeof(ptr)>(reinterpret_cast<uintptr_t>(ptr)^(ROTATE_VALUE(reinterpret_cast<uintptr_t>(key), MaskKeyShift)^entropy))) 404 405 406 static ALWAYS_INLINE uint32_t freedObjectStartPoison() 407 { 408 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 409 ASSERT(value); 410 return value; 411 } 412 413 static ALWAYS_INLINE uint32_t freedObjectEndPoison() 414 { 415 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 416 ASSERT(value); 417 return value; 418 } 419 420 #define PTR_TO_UINT32(ptr) static_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr)) 421 #define END_POISON_INDEX(allocationSize) (((allocationSize) - sizeof(uint32_t)) / sizeof(uint32_t)) 422 #define POISON_ALLOCATION(allocation, allocationSize) do { \ 423 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 424 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef1; \ 425 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeef3; \ 426 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 427 break; \ 428 reinterpret_cast<uint32_t*>(allocation)[2] = 0xbadbeef5; \ 429 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = 0xbadbeef7; \ 430 } while (false); 431 432 #define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) do { \ 433 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 434 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef9; \ 435 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeefb; \ 436 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 437 break; \ 438 reinterpret_cast<uint32_t*>(allocation)[2] = (startPoison) ^ PTR_TO_UINT32(allocation); \ 439 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = (endPoison) ^ PTR_TO_UINT32(allocation); \ 440 } while (false) 441 442 #define POISON_DEALLOCATION(allocation, allocationSize) \ 443 POISON_DEALLOCATION_EXPLICIT(allocation, (allocationSize), freedObjectStartPoison(), freedObjectEndPoison()) 444 445 #define MAY_BE_POISONED(allocation, allocationSize) (((allocationSize) >= 4 * sizeof(uint32_t)) && ( \ 446 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) || \ 447 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 448 )) 449 450 #define IS_DEFINITELY_POISONED(allocation, allocationSize) (((allocationSize) < 4 * sizeof(uint32_t)) || ( \ 451 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) && \ 452 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 453 )) 454 455 #else 456 457 #define POISON_ALLOCATION(allocation, allocationSize) 458 #define POISON_DEALLOCATION(allocation, allocationSize) 459 #define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) 460 #define MAY_BE_POISONED(allocation, allocationSize) (false) 461 #define IS_DEFINITELY_POISONED(allocation, allocationSize) (true) 462 #define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (((void)entropy), ((void)key), ptr) 463 464 #define HARDENING_ENTROPY 0 465 466 #endif 467 468 //------------------------------------------------------------------- 469 // Configuration 470 //------------------------------------------------------------------- 471 472 // Not all possible combinations of the following parameters make 473 // sense. In particular, if kMaxSize increases, you may have to 474 // increase kNumClasses as well. 475 static const size_t kPageShift = 12; 476 static const size_t kPageSize = 1 << kPageShift; 477 static const size_t kMaxSize = 8u * kPageSize; 478 static const size_t kAlignShift = 3; 479 static const size_t kAlignment = 1 << kAlignShift; 480 static const size_t kNumClasses = 68; 481 482 // Allocates a big block of memory for the pagemap once we reach more than 483 // 128MB 484 static const size_t kPageMapBigAllocationThreshold = 128 << 20; 485 486 // Minimum number of pages to fetch from system at a time. Must be 487 // significantly bigger than kPageSize to amortize system-call 488 // overhead, and also to reduce external fragementation. Also, we 489 // should keep this value big because various incarnations of Linux 490 // have small limits on the number of mmap() regions per 491 // address-space. 492 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); 493 494 // Number of objects to move between a per-thread list and a central 495 // list in one shot. We want this to be not too small so we can 496 // amortize the lock overhead for accessing the central list. Making 497 // it too big may temporarily cause unnecessary memory wastage in the 498 // per-thread free list until the scavenger cleans up the list. 499 static int num_objects_to_move[kNumClasses]; 500 501 // Maximum length we allow a per-thread free-list to have before we 502 // move objects from it into the corresponding central free-list. We 503 // want this big to avoid locking the central free-list too often. It 504 // should not hurt to make this list somewhat big because the 505 // scavenging code will shrink it down when its contents are not in use. 506 static const int kMaxFreeListLength = 256; 507 508 // Lower and upper bounds on the per-thread cache sizes 509 static const size_t kMinThreadCacheSize = kMaxSize * 2; 510 static const size_t kMaxThreadCacheSize = 2 << 20; 511 512 // Default bound on the total amount of thread caches 513 static const size_t kDefaultOverallThreadCacheSize = 16 << 20; 514 515 // For all span-lengths < kMaxPages we keep an exact-size list. 516 // REQUIRED: kMaxPages >= kMinSystemAlloc; 517 static const size_t kMaxPages = kMinSystemAlloc; 518 519 /* The smallest prime > 2^n */ 520 static int primes_list[] = { 521 // Small values might cause high rates of sampling 522 // and hence commented out. 523 // 2, 5, 11, 17, 37, 67, 131, 257, 524 // 521, 1031, 2053, 4099, 8209, 16411, 525 32771, 65537, 131101, 262147, 524309, 1048583, 526 2097169, 4194319, 8388617, 16777259, 33554467 }; 527 528 // Twice the approximate gap between sampling actions. 529 // I.e., we take one sample approximately once every 530 // tcmalloc_sample_parameter/2 531 // bytes of allocation, i.e., ~ once every 128KB. 532 // Must be a prime number. 533 #ifdef NO_TCMALLOC_SAMPLES 534 DEFINE_int64(tcmalloc_sample_parameter, 0, 535 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); 536 static size_t sample_period = 0; 537 #else 538 DEFINE_int64(tcmalloc_sample_parameter, 262147, 539 "Twice the approximate gap between sampling actions." 540 " Must be a prime number. Otherwise will be rounded up to a " 541 " larger prime number"); 542 static size_t sample_period = 262147; 543 #endif 544 545 // Protects sample_period above 546 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; 547 548 // Parameters for controlling how fast memory is returned to the OS. 549 550 DEFINE_double(tcmalloc_release_rate, 1, 551 "Rate at which we release unused memory to the system. " 552 "Zero means we never release memory back to the system. " 553 "Increase this flag to return memory faster; decrease it " 554 "to return memory slower. Reasonable rates are in the " 555 "range [0,10]"); 556 557 //------------------------------------------------------------------- 558 // Mapping from size to size_class and vice versa 559 //------------------------------------------------------------------- 560 561 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 562 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 563 // So for these larger sizes we have an array indexed by ceil(size/128). 564 // 565 // We flatten both logical arrays into one physical array and use 566 // arithmetic to compute an appropriate index. The constants used by 567 // ClassIndex() were selected to make the flattening work. 568 // 569 // Examples: 570 // Size Expression Index 571 // ------------------------------------------------------- 572 // 0 (0 + 7) / 8 0 573 // 1 (1 + 7) / 8 1 574 // ... 575 // 1024 (1024 + 7) / 8 128 576 // 1025 (1025 + 127 + (120<<7)) / 128 129 577 // ... 578 // 32768 (32768 + 127 + (120<<7)) / 128 376 579 static const size_t kMaxSmallSize = 1024; 580 static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 581 static const int add_amount[2] = { 7, 127 + (120 << 7) }; 582 static unsigned char class_array[377]; 583 584 // Compute index of the class_array[] entry for a given size 585 static inline int ClassIndex(size_t s) { 586 const int i = (s > kMaxSmallSize); 587 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); 588 } 589 590 // Mapping from size class to max size storable in that class 591 static size_t class_to_size[kNumClasses]; 592 593 // Mapping from size class to number of pages to allocate at a time 594 static size_t class_to_pages[kNumClasses]; 595 596 // Hardened singly linked list. We make this a class to allow compiler to 597 // statically prevent mismatching hardened and non-hardened list 598 class HardenedSLL { 599 public: 600 static ALWAYS_INLINE HardenedSLL create(void* value) 601 { 602 HardenedSLL result; 603 result.m_value = value; 604 return result; 605 } 606 607 static ALWAYS_INLINE HardenedSLL null() 608 { 609 HardenedSLL result; 610 result.m_value = 0; 611 return result; 612 } 613 614 ALWAYS_INLINE void setValue(void* value) { m_value = value; } 615 ALWAYS_INLINE void* value() const { return m_value; } 616 ALWAYS_INLINE bool operator!() const { return !m_value; } 617 typedef void* (HardenedSLL::*UnspecifiedBoolType); 618 ALWAYS_INLINE operator UnspecifiedBoolType() const { return m_value ? &HardenedSLL::m_value : 0; } 619 620 bool operator!=(const HardenedSLL& other) const { return m_value != other.m_value; } 621 bool operator==(const HardenedSLL& other) const { return m_value == other.m_value; } 622 623 private: 624 void* m_value; 625 }; 626 627 // TransferCache is used to cache transfers of num_objects_to_move[size_class] 628 // back and forth between thread caches and the central cache for a given size 629 // class. 630 struct TCEntry { 631 HardenedSLL head; // Head of chain of objects. 632 HardenedSLL tail; // Tail of chain of objects. 633 }; 634 // A central cache freelist can have anywhere from 0 to kNumTransferEntries 635 // slots to put link list chains into. To keep memory usage bounded the total 636 // number of TCEntries across size classes is fixed. Currently each size 637 // class is initially given one TCEntry which also means that the maximum any 638 // one class can have is kNumClasses. 639 static const int kNumTransferEntries = kNumClasses; 640 641 // Note: the following only works for "n"s that fit in 32-bits, but 642 // that is fine since we only use it for small sizes. 643 static inline int LgFloor(size_t n) { 644 int log = 0; 645 for (int i = 4; i >= 0; --i) { 646 int shift = (1 << i); 647 size_t x = n >> shift; 648 if (x != 0) { 649 n = x; 650 log += shift; 651 } 652 } 653 ASSERT(n == 1); 654 return log; 655 } 656 657 // Functions for using our simple hardened singly linked list 658 static ALWAYS_INLINE HardenedSLL SLL_Next(HardenedSLL t, uintptr_t entropy) { 659 return HardenedSLL::create(XOR_MASK_PTR_WITH_KEY(*(reinterpret_cast<void**>(t.value())), t.value(), entropy)); 660 } 661 662 static ALWAYS_INLINE void SLL_SetNext(HardenedSLL t, HardenedSLL n, uintptr_t entropy) { 663 *(reinterpret_cast<void**>(t.value())) = XOR_MASK_PTR_WITH_KEY(n.value(), t.value(), entropy); 664 } 665 666 static ALWAYS_INLINE void SLL_Push(HardenedSLL* list, HardenedSLL element, uintptr_t entropy) { 667 SLL_SetNext(element, *list, entropy); 668 *list = element; 669 } 670 671 static ALWAYS_INLINE HardenedSLL SLL_Pop(HardenedSLL *list, uintptr_t entropy) { 672 HardenedSLL result = *list; 673 *list = SLL_Next(*list, entropy); 674 return result; 675 } 676 677 // Remove N elements from a linked list to which head points. head will be 678 // modified to point to the new head. start and end will point to the first 679 // and last nodes of the range. Note that end will point to NULL after this 680 // function is called. 681 682 static ALWAYS_INLINE void SLL_PopRange(HardenedSLL* head, int N, HardenedSLL *start, HardenedSLL *end, uintptr_t entropy) { 683 if (N == 0) { 684 *start = HardenedSLL::null(); 685 *end = HardenedSLL::null(); 686 return; 687 } 688 689 HardenedSLL tmp = *head; 690 for (int i = 1; i < N; ++i) { 691 tmp = SLL_Next(tmp, entropy); 692 } 693 694 *start = *head; 695 *end = tmp; 696 *head = SLL_Next(tmp, entropy); 697 // Unlink range from list. 698 SLL_SetNext(tmp, HardenedSLL::null(), entropy); 699 } 700 701 static ALWAYS_INLINE void SLL_PushRange(HardenedSLL *head, HardenedSLL start, HardenedSLL end, uintptr_t entropy) { 702 if (!start) return; 703 SLL_SetNext(end, *head, entropy); 704 *head = start; 705 } 706 707 static ALWAYS_INLINE size_t SLL_Size(HardenedSLL head, uintptr_t entropy) { 708 int count = 0; 709 while (head) { 710 count++; 711 head = SLL_Next(head, entropy); 712 } 713 return count; 714 } 715 716 // Setup helper functions. 717 718 static ALWAYS_INLINE size_t SizeClass(size_t size) { 719 return class_array[ClassIndex(size)]; 720 } 721 722 // Get the byte-size for a specified class 723 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { 724 return class_to_size[cl]; 725 } 726 static int NumMoveSize(size_t size) { 727 if (size == 0) return 0; 728 // Use approx 64k transfers between thread and central caches. 729 int num = static_cast<int>(64.0 * 1024.0 / size); 730 if (num < 2) num = 2; 731 // Clamp well below kMaxFreeListLength to avoid ping pong between central 732 // and thread caches. 733 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) 734 num = static_cast<int>(0.8 * kMaxFreeListLength); 735 736 // Also, avoid bringing in too many objects into small object free 737 // lists. There are lots of such lists, and if we allow each one to 738 // fetch too many at a time, we end up having to scavenge too often 739 // (especially when there are lots of threads and each thread gets a 740 // small allowance for its thread cache). 741 // 742 // TODO: Make thread cache free list sizes dynamic so that we do not 743 // have to equally divide a fixed resource amongst lots of threads. 744 if (num > 32) num = 32; 745 746 return num; 747 } 748 749 // Initialize the mapping arrays 750 static void InitSizeClasses() { 751 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] 752 if (ClassIndex(0) < 0) { 753 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); 754 CRASH(); 755 } 756 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { 757 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); 758 CRASH(); 759 } 760 761 // Compute the size classes we want to use 762 size_t sc = 1; // Next size class to assign 763 unsigned char alignshift = kAlignShift; 764 int last_lg = -1; 765 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { 766 int lg = LgFloor(size); 767 if (lg > last_lg) { 768 // Increase alignment every so often. 769 // 770 // Since we double the alignment every time size doubles and 771 // size >= 128, this means that space wasted due to alignment is 772 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 773 // bytes, so the space wasted as a percentage starts falling for 774 // sizes > 2K. 775 if ((lg >= 7) && (alignshift < 8)) { 776 alignshift++; 777 } 778 last_lg = lg; 779 } 780 781 // Allocate enough pages so leftover is less than 1/8 of total. 782 // This bounds wasted space to at most 12.5%. 783 size_t psize = kPageSize; 784 while ((psize % size) > (psize >> 3)) { 785 psize += kPageSize; 786 } 787 const size_t my_pages = psize >> kPageShift; 788 789 if (sc > 1 && my_pages == class_to_pages[sc-1]) { 790 // See if we can merge this into the previous class without 791 // increasing the fragmentation of the previous class. 792 const size_t my_objects = (my_pages << kPageShift) / size; 793 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) 794 / class_to_size[sc-1]; 795 if (my_objects == prev_objects) { 796 // Adjust last class to include this size 797 class_to_size[sc-1] = size; 798 continue; 799 } 800 } 801 802 // Add new class 803 class_to_pages[sc] = my_pages; 804 class_to_size[sc] = size; 805 sc++; 806 } 807 if (sc != kNumClasses) { 808 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", 809 sc, int(kNumClasses)); 810 CRASH(); 811 } 812 813 // Initialize the mapping arrays 814 int next_size = 0; 815 for (unsigned char c = 1; c < kNumClasses; c++) { 816 const size_t max_size_in_class = class_to_size[c]; 817 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { 818 class_array[ClassIndex(s)] = c; 819 } 820 next_size = static_cast<int>(max_size_in_class + kAlignment); 821 } 822 823 // Double-check sizes just to be safe 824 for (size_t size = 0; size <= kMaxSize; size++) { 825 const size_t sc = SizeClass(size); 826 if (sc == 0) { 827 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 828 CRASH(); 829 } 830 if (sc > 1 && size <= class_to_size[sc-1]) { 831 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS 832 "\n", sc, size); 833 CRASH(); 834 } 835 if (sc >= kNumClasses) { 836 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 837 CRASH(); 838 } 839 const size_t s = class_to_size[sc]; 840 if (size > s) { 841 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 842 CRASH(); 843 } 844 if (s == 0) { 845 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 846 CRASH(); 847 } 848 } 849 850 // Initialize the num_objects_to_move array. 851 for (size_t cl = 1; cl < kNumClasses; ++cl) { 852 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); 853 } 854 } 855 856 // ------------------------------------------------------------------------- 857 // Simple allocator for objects of a specified type. External locking 858 // is required before accessing one of these objects. 859 // ------------------------------------------------------------------------- 860 861 // Metadata allocator -- keeps stats about how many bytes allocated 862 static uint64_t metadata_system_bytes = 0; 863 static void* MetaDataAlloc(size_t bytes) { 864 void* result = TCMalloc_SystemAlloc(bytes, 0); 865 if (result != NULL) { 866 metadata_system_bytes += bytes; 867 } 868 return result; 869 } 870 871 template <class T> 872 class PageHeapAllocator { 873 private: 874 // How much to allocate from system at a time 875 static const size_t kAllocIncrement = 32 << 10; 876 877 // Aligned size of T 878 static const size_t kAlignedSize 879 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); 880 881 // Free area from which to carve new objects 882 char* free_area_; 883 size_t free_avail_; 884 885 // Linked list of all regions allocated by this allocator 886 HardenedSLL allocated_regions_; 887 888 // Free list of already carved objects 889 HardenedSLL free_list_; 890 891 // Number of allocated but unfreed objects 892 int inuse_; 893 uintptr_t entropy_; 894 895 public: 896 void Init(uintptr_t entropy) { 897 ASSERT(kAlignedSize <= kAllocIncrement); 898 inuse_ = 0; 899 allocated_regions_ = HardenedSLL::null(); 900 free_area_ = NULL; 901 free_avail_ = 0; 902 free_list_.setValue(NULL); 903 entropy_ = entropy; 904 } 905 906 T* New() { 907 // Consult free list 908 void* result; 909 if (free_list_) { 910 result = free_list_.value(); 911 free_list_ = SLL_Next(free_list_, entropy_); 912 } else { 913 if (free_avail_ < kAlignedSize) { 914 // Need more room 915 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); 916 if (!new_allocation) 917 CRASH(); 918 919 HardenedSLL new_head = HardenedSLL::create(new_allocation); 920 SLL_SetNext(new_head, allocated_regions_, entropy_); 921 allocated_regions_ = new_head; 922 free_area_ = new_allocation + kAlignedSize; 923 free_avail_ = kAllocIncrement - kAlignedSize; 924 } 925 result = free_area_; 926 free_area_ += kAlignedSize; 927 free_avail_ -= kAlignedSize; 928 } 929 inuse_++; 930 return reinterpret_cast<T*>(result); 931 } 932 933 void Delete(T* p) { 934 HardenedSLL new_head = HardenedSLL::create(p); 935 SLL_SetNext(new_head, free_list_, entropy_); 936 free_list_ = new_head; 937 inuse_--; 938 } 939 940 int inuse() const { return inuse_; } 941 942 #if OS(DARWIN) 943 template <class Recorder> 944 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) 945 { 946 for (HardenedSLL adminAllocation = allocated_regions_; adminAllocation; adminAllocation.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(adminAllocation.value()), entropy_))) 947 recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation.value()), kAllocIncrement); 948 } 949 #endif 950 }; 951 952 // ------------------------------------------------------------------------- 953 // Span - a contiguous run of pages 954 // ------------------------------------------------------------------------- 955 956 // Type that can hold a page number 957 typedef uintptr_t PageID; 958 959 // Type that can hold the length of a run of pages 960 typedef uintptr_t Length; 961 962 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 963 964 // Convert byte size into pages. This won't overflow, but may return 965 // an unreasonably large value if bytes is huge enough. 966 static inline Length pages(size_t bytes) { 967 return (bytes >> kPageShift) + 968 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 969 } 970 971 // Convert a user size into the number of bytes that will actually be 972 // allocated 973 static size_t AllocationSize(size_t bytes) { 974 if (bytes > kMaxSize) { 975 // Large object: we allocate an integral number of pages 976 ASSERT(bytes <= (kMaxValidPages << kPageShift)); 977 return pages(bytes) << kPageShift; 978 } else { 979 // Small object: find the size class to which it belongs 980 return ByteSizeForClass(SizeClass(bytes)); 981 } 982 } 983 984 enum { 985 kSpanCookieBits = 10, 986 kSpanCookieMask = (1 << 10) - 1, 987 kSpanThisShift = 7 988 }; 989 990 static uint32_t spanValidationCookie; 991 static uint32_t spanInitializerCookie() 992 { 993 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() & kSpanCookieMask; 994 spanValidationCookie = value; 995 return value; 996 } 997 998 // Information kept for a span (a contiguous run of pages). 999 struct Span { 1000 PageID start; // Starting page number 1001 Length length; // Number of pages in span 1002 Span* next(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, this, entropy); } 1003 Span* remoteNext(const Span* remoteSpanPointer, uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, remoteSpanPointer, entropy); } 1004 Span* prev(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_prev, this, entropy); } 1005 void setNext(Span* next, uintptr_t entropy) { m_next = XOR_MASK_PTR_WITH_KEY(next, this, entropy); } 1006 void setPrev(Span* prev, uintptr_t entropy) { m_prev = XOR_MASK_PTR_WITH_KEY(prev, this, entropy); } 1007 1008 private: 1009 Span* m_next; // Used when in link list 1010 Span* m_prev; // Used when in link list 1011 public: 1012 HardenedSLL objects; // Linked list of free objects 1013 unsigned int free : 1; // Is the span free 1014 #ifndef NO_TCMALLOC_SAMPLES 1015 unsigned int sample : 1; // Sampled object? 1016 #endif 1017 unsigned int sizeclass : 8; // Size-class for small objects (or 0) 1018 unsigned int refcount : 11; // Number of non-free objects 1019 bool decommitted : 1; 1020 void initCookie() 1021 { 1022 m_cookie = ((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ spanInitializerCookie(); 1023 } 1024 void clearCookie() { m_cookie = 0; } 1025 bool isValid() const 1026 { 1027 return (((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ m_cookie) == spanValidationCookie; 1028 } 1029 private: 1030 uint32_t m_cookie : kSpanCookieBits; 1031 1032 #undef SPAN_HISTORY 1033 #ifdef SPAN_HISTORY 1034 // For debugging, we can keep a log events per span 1035 int nexthistory; 1036 char history[64]; 1037 int value[64]; 1038 #endif 1039 }; 1040 1041 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) 1042 1043 #ifdef SPAN_HISTORY 1044 void Event(Span* span, char op, int v = 0) { 1045 span->history[span->nexthistory] = op; 1046 span->value[span->nexthistory] = v; 1047 span->nexthistory++; 1048 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; 1049 } 1050 #else 1051 #define Event(s,o,v) ((void) 0) 1052 #endif 1053 1054 // Allocator/deallocator for spans 1055 static PageHeapAllocator<Span> span_allocator; 1056 static Span* NewSpan(PageID p, Length len) { 1057 Span* result = span_allocator.New(); 1058 memset(result, 0, sizeof(*result)); 1059 result->start = p; 1060 result->length = len; 1061 result->initCookie(); 1062 #ifdef SPAN_HISTORY 1063 result->nexthistory = 0; 1064 #endif 1065 return result; 1066 } 1067 1068 static inline void DeleteSpan(Span* span) { 1069 RELEASE_ASSERT(span->isValid()); 1070 #ifndef NDEBUG 1071 // In debug mode, trash the contents of deleted Spans 1072 memset(span, 0x3f, sizeof(*span)); 1073 #endif 1074 span->clearCookie(); 1075 span_allocator.Delete(span); 1076 } 1077 1078 // ------------------------------------------------------------------------- 1079 // Doubly linked list of spans. 1080 // ------------------------------------------------------------------------- 1081 1082 static inline void DLL_Init(Span* list, uintptr_t entropy) { 1083 list->setNext(list, entropy); 1084 list->setPrev(list, entropy); 1085 } 1086 1087 static inline void DLL_Remove(Span* span, uintptr_t entropy) { 1088 span->prev(entropy)->setNext(span->next(entropy), entropy); 1089 span->next(entropy)->setPrev(span->prev(entropy), entropy); 1090 span->setPrev(NULL, entropy); 1091 span->setNext(NULL, entropy); 1092 } 1093 1094 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list, uintptr_t entropy) { 1095 return list->next(entropy) == list; 1096 } 1097 1098 static int DLL_Length(const Span* list, uintptr_t entropy) { 1099 int result = 0; 1100 for (Span* s = list->next(entropy); s != list; s = s->next(entropy)) { 1101 result++; 1102 } 1103 return result; 1104 } 1105 1106 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */ 1107 static void DLL_Print(const char* label, const Span* list) { 1108 MESSAGE("%-10s %p:", label, list); 1109 for (const Span* s = list->next; s != list; s = s->next) { 1110 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); 1111 } 1112 MESSAGE("\n"); 1113 } 1114 #endif 1115 1116 static inline void DLL_Prepend(Span* list, Span* span, uintptr_t entropy) { 1117 span->setNext(list->next(entropy), entropy); 1118 span->setPrev(list, entropy); 1119 list->next(entropy)->setPrev(span, entropy); 1120 list->setNext(span, entropy); 1121 } 1122 1123 //------------------------------------------------------------------- 1124 // Data kept per size-class in central cache 1125 //------------------------------------------------------------------- 1126 1127 class TCMalloc_Central_FreeList { 1128 public: 1129 void Init(size_t cl, uintptr_t entropy); 1130 1131 // These methods all do internal locking. 1132 1133 // Insert the specified range into the central freelist. N is the number of 1134 // elements in the range. 1135 void InsertRange(HardenedSLL start, HardenedSLL end, int N); 1136 1137 // Returns the actual number of fetched elements into N. 1138 void RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N); 1139 1140 // Returns the number of free objects in cache. 1141 size_t length() { 1142 SpinLockHolder h(&lock_); 1143 return counter_; 1144 } 1145 1146 // Returns the number of free objects in the transfer cache. 1147 int tc_length() { 1148 SpinLockHolder h(&lock_); 1149 return used_slots_ * num_objects_to_move[size_class_]; 1150 } 1151 1152 template <class Finder, class Reader> 1153 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) 1154 { 1155 { 1156 static const ptrdiff_t emptyOffset = reinterpret_cast<const char*>(&empty_) - reinterpret_cast<const char*>(this); 1157 Span* remoteEmpty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + emptyOffset); 1158 Span* remoteSpan = nonempty_.remoteNext(remoteEmpty, entropy_); 1159 for (Span* span = reader(remoteEmpty); span && span != &empty_; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) 1160 ASSERT(!span->objects); 1161 } 1162 1163 ASSERT(!nonempty_.objects); 1164 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); 1165 1166 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); 1167 Span* remoteSpan = nonempty_.remoteNext(remoteNonempty, entropy_); 1168 1169 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) { 1170 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) { 1171 finder.visit(nextObject.value()); 1172 } 1173 } 1174 } 1175 1176 uintptr_t entropy() const { return entropy_; } 1177 private: 1178 // REQUIRES: lock_ is held 1179 // Remove object from cache and return. 1180 // Return NULL if no free entries in cache. 1181 HardenedSLL FetchFromSpans(); 1182 1183 // REQUIRES: lock_ is held 1184 // Remove object from cache and return. Fetches 1185 // from pageheap if cache is empty. Only returns 1186 // NULL on allocation failure. 1187 HardenedSLL FetchFromSpansSafe(); 1188 1189 // REQUIRES: lock_ is held 1190 // Release a linked list of objects to spans. 1191 // May temporarily release lock_. 1192 void ReleaseListToSpans(HardenedSLL start); 1193 1194 // REQUIRES: lock_ is held 1195 // Release an object to spans. 1196 // May temporarily release lock_. 1197 ALWAYS_INLINE void ReleaseToSpans(HardenedSLL object); 1198 1199 // REQUIRES: lock_ is held 1200 // Populate cache by fetching from the page heap. 1201 // May temporarily release lock_. 1202 ALWAYS_INLINE void Populate(); 1203 1204 // REQUIRES: lock is held. 1205 // Tries to make room for a TCEntry. If the cache is full it will try to 1206 // expand it at the cost of some other cache size. Return false if there is 1207 // no space. 1208 bool MakeCacheSpace(); 1209 1210 // REQUIRES: lock_ for locked_size_class is held. 1211 // Picks a "random" size class to steal TCEntry slot from. In reality it 1212 // just iterates over the sizeclasses but does so without taking a lock. 1213 // Returns true on success. 1214 // May temporarily lock a "random" size class. 1215 static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); 1216 1217 // REQUIRES: lock_ is *not* held. 1218 // Tries to shrink the Cache. If force is true it will relase objects to 1219 // spans if it allows it to shrink the cache. Return false if it failed to 1220 // shrink the cache. Decrements cache_size_ on succeess. 1221 // May temporarily take lock_. If it takes lock_, the locked_size_class 1222 // lock is released to the thread from holding two size class locks 1223 // concurrently which could lead to a deadlock. 1224 bool ShrinkCache(int locked_size_class, bool force); 1225 1226 // This lock protects all the data members. cached_entries and cache_size_ 1227 // may be looked at without holding the lock. 1228 SpinLock lock_; 1229 1230 // We keep linked lists of empty and non-empty spans. 1231 size_t size_class_; // My size class 1232 Span empty_; // Dummy header for list of empty spans 1233 Span nonempty_; // Dummy header for list of non-empty spans 1234 size_t counter_; // Number of free objects in cache entry 1235 1236 // Here we reserve space for TCEntry cache slots. Since one size class can 1237 // end up getting all the TCEntries quota in the system we just preallocate 1238 // sufficient number of entries here. 1239 TCEntry tc_slots_[kNumTransferEntries]; 1240 1241 // Number of currently used cached entries in tc_slots_. This variable is 1242 // updated under a lock but can be read without one. 1243 int32_t used_slots_; 1244 // The current number of slots for this size class. This is an 1245 // adaptive value that is increased if there is lots of traffic 1246 // on a given size class. 1247 int32_t cache_size_; 1248 uintptr_t entropy_; 1249 }; 1250 1251 #if COMPILER(CLANG) && defined(__has_warning) 1252 #pragma clang diagnostic push 1253 #if __has_warning("-Wunused-private-field") 1254 #pragma clang diagnostic ignored "-Wunused-private-field" 1255 #endif 1256 #endif 1257 1258 // Pad each CentralCache object to multiple of 64 bytes 1259 template <size_t SizeToPad> 1260 class TCMalloc_Central_FreeListPadded_Template : public TCMalloc_Central_FreeList { 1261 private: 1262 char pad[64 - SizeToPad]; 1263 }; 1264 1265 // Zero-size specialization to avoid compiler error when TCMalloc_Central_FreeList happens 1266 // to be exactly 64 bytes. 1267 template <> class TCMalloc_Central_FreeListPadded_Template<0> : public TCMalloc_Central_FreeList { 1268 }; 1269 1270 typedef TCMalloc_Central_FreeListPadded_Template<sizeof(TCMalloc_Central_FreeList) % 64> TCMalloc_Central_FreeListPadded; 1271 1272 #if COMPILER(CLANG) && defined(__has_warning) 1273 #pragma clang diagnostic pop 1274 #endif 1275 1276 #if OS(DARWIN) 1277 struct Span; 1278 class TCMalloc_PageHeap; 1279 class TCMalloc_ThreadCache; 1280 template <typename T> class PageHeapAllocator; 1281 1282 class FastMallocZone { 1283 public: 1284 static void init(); 1285 1286 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); 1287 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } 1288 static boolean_t check(malloc_zone_t*) { return true; } 1289 static void print(malloc_zone_t*, boolean_t) { } 1290 static void log(malloc_zone_t*, void*) { } 1291 static void forceLock(malloc_zone_t*) { } 1292 static void forceUnlock(malloc_zone_t*) { } 1293 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } 1294 1295 private: 1296 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); 1297 static size_t size(malloc_zone_t*, const void*); 1298 static void* zoneMalloc(malloc_zone_t*, size_t); 1299 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); 1300 static void zoneFree(malloc_zone_t*, void*); 1301 static void* zoneRealloc(malloc_zone_t*, void*, size_t); 1302 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; } 1303 static void zoneDestroy(malloc_zone_t*) { } 1304 1305 malloc_zone_t m_zone; 1306 TCMalloc_PageHeap* m_pageHeap; 1307 TCMalloc_ThreadCache** m_threadHeaps; 1308 TCMalloc_Central_FreeListPadded* m_centralCaches; 1309 PageHeapAllocator<Span>* m_spanAllocator; 1310 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; 1311 }; 1312 1313 #endif 1314 1315 // Even if we have support for thread-local storage in the compiler 1316 // and linker, the OS may not support it. We need to check that at 1317 // runtime. Right now, we have to keep a manual set of "bad" OSes. 1318 #if defined(HAVE_TLS) 1319 static bool kernel_supports_tls = false; // be conservative 1320 static inline bool KernelSupportsTLS() { 1321 return kernel_supports_tls; 1322 } 1323 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS 1324 static void CheckIfKernelSupportsTLS() { 1325 kernel_supports_tls = false; 1326 } 1327 # else 1328 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too 1329 static void CheckIfKernelSupportsTLS() { 1330 struct utsname buf; 1331 if (uname(&buf) != 0) { // should be impossible 1332 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); 1333 kernel_supports_tls = false; 1334 } else if (strcasecmp(buf.sysname, "linux") == 0) { 1335 // The linux case: the first kernel to support TLS was 2.6.0 1336 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x 1337 kernel_supports_tls = false; 1338 else if (buf.release[0] == '2' && buf.release[1] == '.' && 1339 buf.release[2] >= '0' && buf.release[2] < '6' && 1340 buf.release[3] == '.') // 2.0 - 2.5 1341 kernel_supports_tls = false; 1342 else 1343 kernel_supports_tls = true; 1344 } else { // some other kernel, we'll be optimisitic 1345 kernel_supports_tls = true; 1346 } 1347 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG 1348 } 1349 # endif // HAVE_DECL_UNAME 1350 #endif // HAVE_TLS 1351 1352 // __THROW is defined in glibc systems. It means, counter-intuitively, 1353 // "This function will never throw an exception." It's an optional 1354 // optimization tool, but we may need to use it to match glibc prototypes. 1355 #ifndef __THROW // I guess we're not on a glibc system 1356 # define __THROW // __THROW is just an optimization, so ok to make it "" 1357 #endif 1358 1359 // ------------------------------------------------------------------------- 1360 // Stack traces kept for sampled allocations 1361 // The following state is protected by pageheap_lock_. 1362 // ------------------------------------------------------------------------- 1363 1364 // size/depth are made the same size as a pointer so that some generic 1365 // code below can conveniently cast them back and forth to void*. 1366 static const int kMaxStackDepth = 31; 1367 struct StackTrace { 1368 uintptr_t size; // Size of object 1369 uintptr_t depth; // Number of PC values stored in array below 1370 void* stack[kMaxStackDepth]; 1371 }; 1372 static PageHeapAllocator<StackTrace> stacktrace_allocator; 1373 static Span sampled_objects; 1374 1375 // ------------------------------------------------------------------------- 1376 // Map from page-id to per-page data 1377 // ------------------------------------------------------------------------- 1378 1379 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. 1380 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings, 1381 // because sometimes the sizeclass is all the information we need. 1382 1383 // Selector class -- general selector uses 3-level map 1384 template <int BITS> class MapSelector { 1385 public: 1386 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; 1387 typedef PackedCache<BITS, uint64_t> CacheType; 1388 }; 1389 1390 #if CPU(X86_64) 1391 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore 1392 // can be excluded from the PageMap key. 1393 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details 1394 1395 static const size_t kBitsUnusedOn64Bit = 16; 1396 #else 1397 static const size_t kBitsUnusedOn64Bit = 0; 1398 #endif 1399 1400 // A three-level map for 64-bit machines 1401 template <> class MapSelector<64> { 1402 public: 1403 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; 1404 typedef PackedCache<64, uint64_t> CacheType; 1405 }; 1406 1407 // A two-level map for 32-bit machines 1408 template <> class MapSelector<32> { 1409 public: 1410 typedef TCMalloc_PageMap2<32 - kPageShift> Type; 1411 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; 1412 }; 1413 1414 // ------------------------------------------------------------------------- 1415 // Page-level allocator 1416 // * Eager coalescing 1417 // 1418 // Heap for page-level allocation. We allow allocating and freeing a 1419 // contiguous runs of pages (called a "span"). 1420 // ------------------------------------------------------------------------- 1421 1422 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1423 // The page heap maintains a free list for spans that are no longer in use by 1424 // the central cache or any thread caches. We use a background thread to 1425 // periodically scan the free list and release a percentage of it back to the OS. 1426 1427 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the 1428 // background thread: 1429 // - wakes up 1430 // - pauses for kScavengeDelayInSeconds 1431 // - returns to the OS a percentage of the memory that remained unused during 1432 // that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) 1433 // The goal of this strategy is to reduce memory pressure in a timely fashion 1434 // while avoiding thrashing the OS allocator. 1435 1436 // Time delay before the page heap scavenger will consider returning pages to 1437 // the OS. 1438 static const int kScavengeDelayInSeconds = 2; 1439 1440 // Approximate percentage of free committed pages to return to the OS in one 1441 // scavenge. 1442 static const float kScavengePercentage = .5f; 1443 1444 // number of span lists to keep spans in when memory is returned. 1445 static const int kMinSpanListsWithSpans = 32; 1446 1447 // Number of free committed pages that we want to keep around. The minimum number of pages used when there 1448 // is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. 1449 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); 1450 1451 #endif 1452 1453 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; 1454 1455 class TCMalloc_PageHeap { 1456 public: 1457 void init(); 1458 1459 // Allocate a run of "n" pages. Returns zero if out of memory. 1460 Span* New(Length n); 1461 1462 // Delete the span "[p, p+n-1]". 1463 // REQUIRES: span was returned by earlier call to New() and 1464 // has not yet been deleted. 1465 void Delete(Span* span); 1466 1467 // Mark an allocated span as being used for small objects of the 1468 // specified size-class. 1469 // REQUIRES: span was returned by an earlier call to New() 1470 // and has not yet been deleted. 1471 void RegisterSizeClass(Span* span, size_t sc); 1472 1473 // Split an allocated span into two spans: one of length "n" pages 1474 // followed by another span of length "span->length - n" pages. 1475 // Modifies "*span" to point to the first span of length "n" pages. 1476 // Returns a pointer to the second span. 1477 // 1478 // REQUIRES: "0 < n < span->length" 1479 // REQUIRES: !span->free 1480 // REQUIRES: span->sizeclass == 0 1481 Span* Split(Span* span, Length n); 1482 1483 // Return the descriptor for the specified page. 1484 inline Span* GetDescriptor(PageID p) const { 1485 return reinterpret_cast<Span*>(pagemap_.get(p)); 1486 } 1487 1488 inline Span* GetDescriptorEnsureSafe(PageID p) 1489 { 1490 pagemap_.Ensure(p, 1); 1491 return GetDescriptor(p); 1492 } 1493 1494 size_t ReturnedBytes() const; 1495 1496 // Return number of bytes allocated from system 1497 inline uint64_t SystemBytes() const { return system_bytes_; } 1498 1499 // Return number of free bytes in heap 1500 uint64_t FreeBytes() const { 1501 return (static_cast<uint64_t>(free_pages_) << kPageShift); 1502 } 1503 1504 bool Check(); 1505 size_t CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted); 1506 1507 // Release all pages on the free list for reuse by the OS: 1508 void ReleaseFreePages(); 1509 void ReleaseFreeList(Span*, Span*); 1510 1511 // Return 0 if we have no information, or else the correct sizeclass for p. 1512 // Reads and writes to pagemap_cache_ do not require locking. 1513 // The entries are 64 bits on 64-bit hardware and 16 bits on 1514 // 32-bit hardware, and we don't mind raciness as long as each read of 1515 // an entry yields a valid entry, not a partially updated entry. 1516 size_t GetSizeClassIfCached(PageID p) const { 1517 return pagemap_cache_.GetOrDefault(p, 0); 1518 } 1519 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } 1520 1521 private: 1522 // Pick the appropriate map and cache types based on pointer size 1523 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; 1524 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; 1525 PageMap pagemap_; 1526 mutable PageMapCache pagemap_cache_; 1527 1528 // We segregate spans of a given size into two circular linked 1529 // lists: one for normal spans, and one for spans whose memory 1530 // has been returned to the system. 1531 struct SpanList { 1532 Span normal; 1533 Span returned; 1534 }; 1535 1536 // List of free spans of length >= kMaxPages 1537 SpanList large_; 1538 1539 // Array mapping from span length to a doubly linked list of free spans 1540 SpanList free_[kMaxPages]; 1541 1542 // Number of pages kept in free lists 1543 uintptr_t free_pages_; 1544 1545 // Used for hardening 1546 uintptr_t entropy_; 1547 1548 // Bytes allocated from system 1549 uint64_t system_bytes_; 1550 1551 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1552 // Number of pages kept in free lists that are still committed. 1553 Length free_committed_pages_; 1554 1555 // Minimum number of free committed pages since last scavenge. (Can be 0 if 1556 // we've committed new pages since the last scavenge.) 1557 Length min_free_committed_pages_since_last_scavenge_; 1558 #endif 1559 1560 bool GrowHeap(Length n); 1561 1562 // REQUIRES span->length >= n 1563 // Remove span from its free list, and move any leftover part of 1564 // span into appropriate free lists. Also update "span" to have 1565 // length exactly "n" and mark it as non-free so it can be returned 1566 // to the client. 1567 // 1568 // "released" is true iff "span" was found on a "returned" list. 1569 void Carve(Span* span, Length n, bool released); 1570 1571 void RecordSpan(Span* span) { 1572 pagemap_.set(span->start, span); 1573 if (span->length > 1) { 1574 pagemap_.set(span->start + span->length - 1, span); 1575 } 1576 } 1577 1578 // Allocate a large span of length == n. If successful, returns a 1579 // span of exactly the specified length. Else, returns NULL. 1580 Span* AllocLarge(Length n); 1581 1582 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1583 // Incrementally release some memory to the system. 1584 // IncrementalScavenge(n) is called whenever n pages are freed. 1585 void IncrementalScavenge(Length n); 1586 #endif 1587 1588 // Number of pages to deallocate before doing more scavenging 1589 int64_t scavenge_counter_; 1590 1591 // Index of last free list we scavenged 1592 size_t scavenge_index_; 1593 1594 #if OS(DARWIN) 1595 friend class FastMallocZone; 1596 #endif 1597 1598 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1599 void initializeScavenger(); 1600 ALWAYS_INLINE void signalScavenger(); 1601 void scavenge(); 1602 ALWAYS_INLINE bool shouldScavenge() const; 1603 1604 #if HAVE(DISPATCH_H) || OS(WINDOWS) 1605 void periodicScavenge(); 1606 ALWAYS_INLINE bool isScavengerSuspended(); 1607 ALWAYS_INLINE void scheduleScavenger(); 1608 ALWAYS_INLINE void rescheduleScavenger(); 1609 ALWAYS_INLINE void suspendScavenger(); 1610 #endif 1611 1612 #if HAVE(DISPATCH_H) 1613 dispatch_queue_t m_scavengeQueue; 1614 dispatch_source_t m_scavengeTimer; 1615 bool m_scavengingSuspended; 1616 #elif OS(WINDOWS) 1617 static void CALLBACK scavengerTimerFired(void*, BOOLEAN); 1618 HANDLE m_scavengeQueueTimer; 1619 #else 1620 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); 1621 NO_RETURN void scavengerThread(); 1622 1623 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or 1624 // it's blocked waiting for more pages to be deleted. 1625 bool m_scavengeThreadActive; 1626 1627 pthread_mutex_t m_scavengeMutex; 1628 pthread_cond_t m_scavengeCondition; 1629 #endif 1630 1631 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1632 }; 1633 1634 void TCMalloc_PageHeap::init() 1635 { 1636 pagemap_.init(MetaDataAlloc); 1637 pagemap_cache_ = PageMapCache(0); 1638 free_pages_ = 0; 1639 system_bytes_ = 0; 1640 entropy_ = HARDENING_ENTROPY; 1641 1642 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1643 free_committed_pages_ = 0; 1644 min_free_committed_pages_since_last_scavenge_ = 0; 1645 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1646 1647 scavenge_counter_ = 0; 1648 // Start scavenging at kMaxPages list 1649 scavenge_index_ = kMaxPages-1; 1650 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); 1651 DLL_Init(&large_.normal, entropy_); 1652 DLL_Init(&large_.returned, entropy_); 1653 for (size_t i = 0; i < kMaxPages; i++) { 1654 DLL_Init(&free_[i].normal, entropy_); 1655 DLL_Init(&free_[i].returned, entropy_); 1656 } 1657 1658 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1659 initializeScavenger(); 1660 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1661 } 1662 1663 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1664 1665 #if HAVE(DISPATCH_H) 1666 1667 void TCMalloc_PageHeap::initializeScavenger() 1668 { 1669 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); 1670 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); 1671 uint64_t scavengeDelayInNanoseconds = kScavengeDelayInSeconds * NSEC_PER_SEC; 1672 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, scavengeDelayInNanoseconds); 1673 dispatch_source_set_timer(m_scavengeTimer, startTime, scavengeDelayInNanoseconds, scavengeDelayInNanoseconds / 10); 1674 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); 1675 m_scavengingSuspended = true; 1676 } 1677 1678 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1679 { 1680 ASSERT(pageheap_lock.IsHeld()); 1681 return m_scavengingSuspended; 1682 } 1683 1684 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1685 { 1686 ASSERT(pageheap_lock.IsHeld()); 1687 m_scavengingSuspended = false; 1688 dispatch_resume(m_scavengeTimer); 1689 } 1690 1691 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1692 { 1693 // Nothing to do here for libdispatch. 1694 } 1695 1696 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1697 { 1698 ASSERT(pageheap_lock.IsHeld()); 1699 m_scavengingSuspended = true; 1700 dispatch_suspend(m_scavengeTimer); 1701 } 1702 1703 #elif OS(WINDOWS) 1704 1705 void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) 1706 { 1707 static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge(); 1708 } 1709 1710 void TCMalloc_PageHeap::initializeScavenger() 1711 { 1712 m_scavengeQueueTimer = 0; 1713 } 1714 1715 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1716 { 1717 ASSERT(pageheap_lock.IsHeld()); 1718 return !m_scavengeQueueTimer; 1719 } 1720 1721 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1722 { 1723 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because 1724 // Windows will fire the timer event even when the function is already running. 1725 ASSERT(pageheap_lock.IsHeld()); 1726 CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); 1727 } 1728 1729 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1730 { 1731 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. 1732 suspendScavenger(); 1733 scheduleScavenger(); 1734 } 1735 1736 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1737 { 1738 ASSERT(pageheap_lock.IsHeld()); 1739 HANDLE scavengeQueueTimer = m_scavengeQueueTimer; 1740 m_scavengeQueueTimer = 0; 1741 DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); 1742 } 1743 1744 #else 1745 1746 void TCMalloc_PageHeap::initializeScavenger() 1747 { 1748 // Create a non-recursive mutex. 1749 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT 1750 pthread_mutex_init(&m_scavengeMutex, 0); 1751 #else 1752 pthread_mutexattr_t attr; 1753 pthread_mutexattr_init(&attr); 1754 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 1755 1756 pthread_mutex_init(&m_scavengeMutex, &attr); 1757 1758 pthread_mutexattr_destroy(&attr); 1759 #endif 1760 1761 pthread_cond_init(&m_scavengeCondition, 0); 1762 m_scavengeThreadActive = true; 1763 pthread_t thread; 1764 pthread_create(&thread, 0, runScavengerThread, this); 1765 } 1766 1767 void* TCMalloc_PageHeap::runScavengerThread(void* context) 1768 { 1769 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); 1770 #if COMPILER(MSVC) 1771 // Without this, Visual Studio will complain that this method does not return a value. 1772 return 0; 1773 #endif 1774 } 1775 1776 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 1777 { 1778 // shouldScavenge() should be called only when the pageheap_lock spinlock is held, additionally, 1779 // m_scavengeThreadActive is only set to false whilst pageheap_lock is held. The caller must ensure this is 1780 // taken prior to calling this method. If the scavenger thread is sleeping and shouldScavenge() indicates there 1781 // is memory to free the scavenger thread is signalled to start. 1782 ASSERT(pageheap_lock.IsHeld()); 1783 if (!m_scavengeThreadActive && shouldScavenge()) 1784 pthread_cond_signal(&m_scavengeCondition); 1785 } 1786 1787 #endif 1788 1789 void TCMalloc_PageHeap::scavenge() 1790 { 1791 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; 1792 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); 1793 1794 Length lastFreeCommittedPages = free_committed_pages_; 1795 while (free_committed_pages_ > targetPageCount) { 1796 ASSERT(Check()); 1797 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { 1798 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; 1799 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. 1800 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. 1801 size_t length = DLL_Length(&slist->normal, entropy_); 1802 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; 1803 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal, entropy_) && free_committed_pages_ > targetPageCount; j++) { 1804 Span* s = slist->normal.prev(entropy_); 1805 DLL_Remove(s, entropy_); 1806 ASSERT(!s->decommitted); 1807 if (!s->decommitted) { 1808 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1809 static_cast<size_t>(s->length << kPageShift)); 1810 ASSERT(free_committed_pages_ >= s->length); 1811 free_committed_pages_ -= s->length; 1812 s->decommitted = true; 1813 } 1814 DLL_Prepend(&slist->returned, s, entropy_); 1815 } 1816 } 1817 1818 if (lastFreeCommittedPages == free_committed_pages_) 1819 break; 1820 lastFreeCommittedPages = free_committed_pages_; 1821 } 1822 1823 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1824 } 1825 1826 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const 1827 { 1828 return free_committed_pages_ > kMinimumFreeCommittedPageCount; 1829 } 1830 1831 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1832 1833 inline Span* TCMalloc_PageHeap::New(Length n) { 1834 ASSERT(Check()); 1835 ASSERT(n > 0); 1836 1837 // Find first size >= n that has a non-empty list 1838 for (Length s = n; s < kMaxPages; s++) { 1839 Span* ll = NULL; 1840 bool released = false; 1841 if (!DLL_IsEmpty(&free_[s].normal, entropy_)) { 1842 // Found normal span 1843 ll = &free_[s].normal; 1844 } else if (!DLL_IsEmpty(&free_[s].returned, entropy_)) { 1845 // Found returned span; reallocate it 1846 ll = &free_[s].returned; 1847 released = true; 1848 } else { 1849 // Keep looking in larger classes 1850 continue; 1851 } 1852 1853 Span* result = ll->next(entropy_); 1854 Carve(result, n, released); 1855 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1856 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1857 // free committed pages count. 1858 ASSERT(free_committed_pages_ >= n); 1859 free_committed_pages_ -= n; 1860 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1861 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1862 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1863 ASSERT(Check()); 1864 free_pages_ -= n; 1865 return result; 1866 } 1867 1868 Span* result = AllocLarge(n); 1869 if (result != NULL) { 1870 ASSERT_SPAN_COMMITTED(result); 1871 return result; 1872 } 1873 1874 // Grow the heap and try again 1875 if (!GrowHeap(n)) { 1876 ASSERT(Check()); 1877 return NULL; 1878 } 1879 1880 return New(n); 1881 } 1882 1883 Span* TCMalloc_PageHeap::AllocLarge(Length n) { 1884 // find the best span (closest to n in size). 1885 // The following loops implements address-ordered best-fit. 1886 bool from_released = false; 1887 Span *best = NULL; 1888 1889 // Search through normal list 1890 for (Span* span = large_.normal.next(entropy_); 1891 span != &large_.normal; 1892 span = span->next(entropy_)) { 1893 if (span->length >= n) { 1894 if ((best == NULL) 1895 || (span->length < best->length) 1896 || ((span->length == best->length) && (span->start < best->start))) { 1897 best = span; 1898 from_released = false; 1899 } 1900 } 1901 } 1902 1903 // Search through released list in case it has a better fit 1904 for (Span* span = large_.returned.next(entropy_); 1905 span != &large_.returned; 1906 span = span->next(entropy_)) { 1907 if (span->length >= n) { 1908 if ((best == NULL) 1909 || (span->length < best->length) 1910 || ((span->length == best->length) && (span->start < best->start))) { 1911 best = span; 1912 from_released = true; 1913 } 1914 } 1915 } 1916 1917 if (best != NULL) { 1918 Carve(best, n, from_released); 1919 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1920 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1921 // free committed pages count. 1922 ASSERT(free_committed_pages_ >= n); 1923 free_committed_pages_ -= n; 1924 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1925 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1926 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1927 ASSERT(Check()); 1928 free_pages_ -= n; 1929 return best; 1930 } 1931 return NULL; 1932 } 1933 1934 Span* TCMalloc_PageHeap::Split(Span* span, Length n) { 1935 ASSERT(0 < n); 1936 ASSERT(n < span->length); 1937 ASSERT(!span->free); 1938 ASSERT(span->sizeclass == 0); 1939 Event(span, 'T', n); 1940 1941 const Length extra = span->length - n; 1942 Span* leftover = NewSpan(span->start + n, extra); 1943 Event(leftover, 'U', extra); 1944 RecordSpan(leftover); 1945 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span 1946 span->length = n; 1947 1948 return leftover; 1949 } 1950 1951 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { 1952 ASSERT(n > 0); 1953 DLL_Remove(span, entropy_); 1954 span->free = 0; 1955 Event(span, 'A', n); 1956 1957 if (released) { 1958 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. 1959 ASSERT(span->decommitted); 1960 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); 1961 span->decommitted = false; 1962 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1963 free_committed_pages_ += span->length; 1964 #endif 1965 } 1966 1967 const int extra = static_cast<int>(span->length - n); 1968 ASSERT(extra >= 0); 1969 if (extra > 0) { 1970 Span* leftover = NewSpan(span->start + n, extra); 1971 leftover->free = 1; 1972 leftover->decommitted = false; 1973 Event(leftover, 'S', extra); 1974 RecordSpan(leftover); 1975 1976 // Place leftover span on appropriate free list 1977 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; 1978 Span* dst = &listpair->normal; 1979 DLL_Prepend(dst, leftover, entropy_); 1980 1981 span->length = n; 1982 pagemap_.set(span->start + n - 1, span); 1983 } 1984 } 1985 1986 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) 1987 { 1988 if (destination->decommitted && !other->decommitted) { 1989 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), 1990 static_cast<size_t>(other->length << kPageShift)); 1991 } else if (other->decommitted && !destination->decommitted) { 1992 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), 1993 static_cast<size_t>(destination->length << kPageShift)); 1994 destination->decommitted = true; 1995 } 1996 } 1997 1998 inline void TCMalloc_PageHeap::Delete(Span* span) { 1999 ASSERT(Check()); 2000 ASSERT(!span->free); 2001 ASSERT(span->length > 0); 2002 ASSERT(GetDescriptor(span->start) == span); 2003 ASSERT(GetDescriptor(span->start + span->length - 1) == span); 2004 span->sizeclass = 0; 2005 #ifndef NO_TCMALLOC_SAMPLES 2006 span->sample = 0; 2007 #endif 2008 2009 // Coalesce -- we guarantee that "p" != 0, so no bounds checking 2010 // necessary. We do not bother resetting the stale pagemap 2011 // entries for the pieces we are merging together because we only 2012 // care about the pagemap entries for the boundaries. 2013 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2014 // Track the total size of the neighboring free spans that are committed. 2015 Length neighboringCommittedSpansLength = 0; 2016 #endif 2017 const PageID p = span->start; 2018 const Length n = span->length; 2019 Span* prev = GetDescriptor(p-1); 2020 if (prev != NULL && prev->free) { 2021 // Merge preceding span into this span 2022 ASSERT(prev->start + prev->length == p); 2023 const Length len = prev->length; 2024 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2025 if (!prev->decommitted) 2026 neighboringCommittedSpansLength += len; 2027 #endif 2028 mergeDecommittedStates(span, prev); 2029 DLL_Remove(prev, entropy_); 2030 DeleteSpan(prev); 2031 span->start -= len; 2032 span->length += len; 2033 pagemap_.set(span->start, span); 2034 Event(span, 'L', len); 2035 } 2036 Span* next = GetDescriptor(p+n); 2037 if (next != NULL && next->free) { 2038 // Merge next span into this span 2039 ASSERT(next->start == p+n); 2040 const Length len = next->length; 2041 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2042 if (!next->decommitted) 2043 neighboringCommittedSpansLength += len; 2044 #endif 2045 mergeDecommittedStates(span, next); 2046 DLL_Remove(next, entropy_); 2047 DeleteSpan(next); 2048 span->length += len; 2049 pagemap_.set(span->start + span->length - 1, span); 2050 Event(span, 'R', len); 2051 } 2052 2053 Event(span, 'D', span->length); 2054 span->free = 1; 2055 if (span->decommitted) { 2056 if (span->length < kMaxPages) 2057 DLL_Prepend(&free_[span->length].returned, span, entropy_); 2058 else 2059 DLL_Prepend(&large_.returned, span, entropy_); 2060 } else { 2061 if (span->length < kMaxPages) 2062 DLL_Prepend(&free_[span->length].normal, span, entropy_); 2063 else 2064 DLL_Prepend(&large_.normal, span, entropy_); 2065 } 2066 free_pages_ += n; 2067 2068 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2069 if (span->decommitted) { 2070 // If the merged span is decommitted, that means we decommitted any neighboring spans that were 2071 // committed. Update the free committed pages count. 2072 free_committed_pages_ -= neighboringCommittedSpansLength; 2073 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2074 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2075 } else { 2076 // If the merged span remains committed, add the deleted span's size to the free committed pages count. 2077 free_committed_pages_ += n; 2078 } 2079 2080 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. 2081 signalScavenger(); 2082 #else 2083 IncrementalScavenge(n); 2084 #endif 2085 2086 ASSERT(Check()); 2087 } 2088 2089 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2090 void TCMalloc_PageHeap::IncrementalScavenge(Length n) { 2091 // Fast path; not yet time to release memory 2092 scavenge_counter_ -= n; 2093 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge 2094 2095 // If there is nothing to release, wait for so many pages before 2096 // scavenging again. With 4K pages, this comes to 16MB of memory. 2097 static const size_t kDefaultReleaseDelay = 1 << 8; 2098 2099 // Find index of free list to scavenge 2100 size_t index = scavenge_index_ + 1; 2101 uintptr_t entropy = entropy_; 2102 for (size_t i = 0; i < kMaxPages+1; i++) { 2103 if (index > kMaxPages) index = 0; 2104 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; 2105 if (!DLL_IsEmpty(&slist->normal, entropy)) { 2106 // Release the last span on the normal portion of this list 2107 Span* s = slist->normal.prev(entropy); 2108 DLL_Remove(s, entropy_); 2109 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2110 static_cast<size_t>(s->length << kPageShift)); 2111 s->decommitted = true; 2112 DLL_Prepend(&slist->returned, s, entropy); 2113 2114 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); 2115 2116 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal, entropy)) 2117 scavenge_index_ = index - 1; 2118 else 2119 scavenge_index_ = index; 2120 return; 2121 } 2122 index++; 2123 } 2124 2125 // Nothing to scavenge, delay for a while 2126 scavenge_counter_ = kDefaultReleaseDelay; 2127 } 2128 #endif 2129 2130 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { 2131 // Associate span object with all interior pages as well 2132 ASSERT(!span->free); 2133 ASSERT(GetDescriptor(span->start) == span); 2134 ASSERT(GetDescriptor(span->start+span->length-1) == span); 2135 Event(span, 'C', sc); 2136 span->sizeclass = static_cast<unsigned int>(sc); 2137 for (Length i = 1; i < span->length-1; i++) { 2138 pagemap_.set(span->start+i, span); 2139 } 2140 } 2141 2142 size_t TCMalloc_PageHeap::ReturnedBytes() const { 2143 size_t result = 0; 2144 for (unsigned s = 0; s < kMaxPages; s++) { 2145 const int r_length = DLL_Length(&free_[s].returned, entropy_); 2146 unsigned r_pages = s * r_length; 2147 result += r_pages << kPageShift; 2148 } 2149 2150 for (Span* s = large_.returned.next(entropy_); s != &large_.returned; s = s->next(entropy_)) 2151 result += s->length << kPageShift; 2152 return result; 2153 } 2154 2155 bool TCMalloc_PageHeap::GrowHeap(Length n) { 2156 ASSERT(kMaxPages >= kMinSystemAlloc); 2157 if (n > kMaxValidPages) return false; 2158 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); 2159 size_t actual_size; 2160 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2161 if (ptr == NULL) { 2162 if (n < ask) { 2163 // Try growing just "n" pages 2164 ask = n; 2165 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2166 } 2167 if (ptr == NULL) return false; 2168 } 2169 ask = actual_size >> kPageShift; 2170 2171 uint64_t old_system_bytes = system_bytes_; 2172 system_bytes_ += (ask << kPageShift); 2173 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 2174 ASSERT(p > 0); 2175 2176 // If we have already a lot of pages allocated, just pre allocate a bunch of 2177 // memory for the page map. This prevents fragmentation by pagemap metadata 2178 // when a program keeps allocating and freeing large blocks. 2179 2180 if (old_system_bytes < kPageMapBigAllocationThreshold 2181 && system_bytes_ >= kPageMapBigAllocationThreshold) { 2182 pagemap_.PreallocateMoreMemory(); 2183 } 2184 2185 // Make sure pagemap_ has entries for all of the new pages. 2186 // Plus ensure one before and one after so coalescing code 2187 // does not need bounds-checking. 2188 if (pagemap_.Ensure(p-1, ask+2)) { 2189 // Pretend the new area is allocated and then Delete() it to 2190 // cause any necessary coalescing to occur. 2191 // 2192 // We do not adjust free_pages_ here since Delete() will do it for us. 2193 Span* span = NewSpan(p, ask); 2194 RecordSpan(span); 2195 Delete(span); 2196 ASSERT(Check()); 2197 return true; 2198 } else { 2199 // We could not allocate memory within "pagemap_" 2200 // TODO: Once we can return memory to the system, return the new span 2201 return false; 2202 } 2203 } 2204 2205 bool TCMalloc_PageHeap::Check() { 2206 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2207 size_t totalFreeCommitted = 0; 2208 #endif 2209 ASSERT(free_[0].normal.next(entropy_) == &free_[0].normal); 2210 ASSERT(free_[0].returned.next(entropy_) == &free_[0].returned); 2211 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2212 totalFreeCommitted = CheckList(&large_.normal, kMaxPages, 1000000000, false); 2213 #else 2214 CheckList(&large_.normal, kMaxPages, 1000000000, false); 2215 #endif 2216 CheckList(&large_.returned, kMaxPages, 1000000000, true); 2217 for (Length s = 1; s < kMaxPages; s++) { 2218 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2219 totalFreeCommitted += CheckList(&free_[s].normal, s, s, false); 2220 #else 2221 CheckList(&free_[s].normal, s, s, false); 2222 #endif 2223 CheckList(&free_[s].returned, s, s, true); 2224 } 2225 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2226 ASSERT(totalFreeCommitted == free_committed_pages_); 2227 #endif 2228 return true; 2229 } 2230 2231 #if ASSERT_DISABLED 2232 size_t TCMalloc_PageHeap::CheckList(Span*, Length, Length, bool) { 2233 return 0; 2234 } 2235 #else 2236 size_t TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted) { 2237 size_t freeCount = 0; 2238 for (Span* s = list->next(entropy_); s != list; s = s->next(entropy_)) { 2239 CHECK_CONDITION(s->free); 2240 CHECK_CONDITION(s->length >= min_pages); 2241 CHECK_CONDITION(s->length <= max_pages); 2242 CHECK_CONDITION(GetDescriptor(s->start) == s); 2243 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); 2244 CHECK_CONDITION(s->decommitted == decommitted); 2245 freeCount += s->length; 2246 } 2247 return freeCount; 2248 } 2249 #endif 2250 2251 void TCMalloc_PageHeap::ReleaseFreeList(Span* list, Span* returned) { 2252 // Walk backwards through list so that when we push these 2253 // spans on the "returned" list, we preserve the order. 2254 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2255 size_t freePageReduction = 0; 2256 #endif 2257 2258 while (!DLL_IsEmpty(list, entropy_)) { 2259 Span* s = list->prev(entropy_); 2260 2261 DLL_Remove(s, entropy_); 2262 s->decommitted = true; 2263 DLL_Prepend(returned, s, entropy_); 2264 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2265 static_cast<size_t>(s->length << kPageShift)); 2266 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2267 freePageReduction += s->length; 2268 #endif 2269 } 2270 2271 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2272 free_committed_pages_ -= freePageReduction; 2273 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2274 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2275 #endif 2276 } 2277 2278 void TCMalloc_PageHeap::ReleaseFreePages() { 2279 for (Length s = 0; s < kMaxPages; s++) { 2280 ReleaseFreeList(&free_[s].normal, &free_[s].returned); 2281 } 2282 ReleaseFreeList(&large_.normal, &large_.returned); 2283 ASSERT(Check()); 2284 } 2285 2286 //------------------------------------------------------------------- 2287 // Free list 2288 //------------------------------------------------------------------- 2289 2290 class TCMalloc_ThreadCache_FreeList { 2291 private: 2292 HardenedSLL list_; // Linked list of nodes 2293 uint16_t length_; // Current length 2294 uint16_t lowater_; // Low water mark for list length 2295 uintptr_t entropy_; // Entropy source for hardening 2296 2297 public: 2298 void Init(uintptr_t entropy) { 2299 list_.setValue(NULL); 2300 length_ = 0; 2301 lowater_ = 0; 2302 entropy_ = entropy; 2303 #if ENABLE(TCMALLOC_HARDENING) 2304 ASSERT(entropy_); 2305 #endif 2306 } 2307 2308 // Return current length of list 2309 int length() const { 2310 return length_; 2311 } 2312 2313 // Is list empty? 2314 bool empty() const { 2315 return !list_; 2316 } 2317 2318 // Low-water mark management 2319 int lowwatermark() const { return lowater_; } 2320 void clear_lowwatermark() { lowater_ = length_; } 2321 2322 ALWAYS_INLINE void Push(HardenedSLL ptr) { 2323 SLL_Push(&list_, ptr, entropy_); 2324 length_++; 2325 } 2326 2327 void PushRange(int N, HardenedSLL start, HardenedSLL end) { 2328 SLL_PushRange(&list_, start, end, entropy_); 2329 length_ = length_ + static_cast<uint16_t>(N); 2330 } 2331 2332 void PopRange(int N, HardenedSLL* start, HardenedSLL* end) { 2333 SLL_PopRange(&list_, N, start, end, entropy_); 2334 ASSERT(length_ >= N); 2335 length_ = length_ - static_cast<uint16_t>(N); 2336 if (length_ < lowater_) lowater_ = length_; 2337 } 2338 2339 ALWAYS_INLINE void* Pop() { 2340 ASSERT(list_); 2341 length_--; 2342 if (length_ < lowater_) lowater_ = length_; 2343 return SLL_Pop(&list_, entropy_).value(); 2344 } 2345 2346 // Runs through the linked list to ensure that 2347 // we can do that, and ensures that 'missing' 2348 // is not present 2349 NEVER_INLINE void Validate(HardenedSLL missing, size_t size) { 2350 HardenedSLL node = list_; 2351 UNUSED_PARAM(size); 2352 while (node) { 2353 RELEASE_ASSERT(node != missing); 2354 RELEASE_ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2355 node = SLL_Next(node, entropy_); 2356 } 2357 } 2358 2359 template <class Finder, class Reader> 2360 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2361 { 2362 for (HardenedSLL nextObject = list_; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) 2363 finder.visit(nextObject.value()); 2364 } 2365 }; 2366 2367 //------------------------------------------------------------------- 2368 // Data kept per thread 2369 //------------------------------------------------------------------- 2370 2371 class TCMalloc_ThreadCache { 2372 private: 2373 typedef TCMalloc_ThreadCache_FreeList FreeList; 2374 #if OS(WINDOWS) 2375 typedef DWORD ThreadIdentifier; 2376 #else 2377 typedef pthread_t ThreadIdentifier; 2378 #endif 2379 2380 size_t size_; // Combined size of data 2381 ThreadIdentifier tid_; // Which thread owns it 2382 bool in_setspecific_; // Called pthread_setspecific? 2383 FreeList list_[kNumClasses]; // Array indexed by size-class 2384 2385 // We sample allocations, biased by the size of the allocation 2386 uint32_t rnd_; // Cheap random number generator 2387 size_t bytes_until_sample_; // Bytes until we sample next 2388 2389 uintptr_t entropy_; // Entropy value used for hardening 2390 2391 // Allocate a new heap. REQUIRES: pageheap_lock is held. 2392 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid, uintptr_t entropy); 2393 2394 // Use only as pthread thread-specific destructor function. 2395 static void DestroyThreadCache(void* ptr); 2396 public: 2397 // All ThreadCache objects are kept in a linked list (for stats collection) 2398 TCMalloc_ThreadCache* next_; 2399 TCMalloc_ThreadCache* prev_; 2400 2401 void Init(ThreadIdentifier tid, uintptr_t entropy); 2402 void Cleanup(); 2403 2404 // Accessors (mostly just for printing stats) 2405 int freelist_length(size_t cl) const { return list_[cl].length(); } 2406 2407 // Total byte size in cache 2408 size_t Size() const { return size_; } 2409 2410 ALWAYS_INLINE void* Allocate(size_t size); 2411 void Deallocate(HardenedSLL ptr, size_t size_class); 2412 2413 ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); 2414 void ReleaseToCentralCache(size_t cl, int N); 2415 void Scavenge(); 2416 void Print() const; 2417 2418 // Record allocation of "k" bytes. Return true iff allocation 2419 // should be sampled 2420 bool SampleAllocation(size_t k); 2421 2422 // Pick next sampling point 2423 void PickNextSample(size_t k); 2424 2425 static void InitModule(); 2426 static void InitTSD(); 2427 static TCMalloc_ThreadCache* GetThreadHeap(); 2428 static TCMalloc_ThreadCache* GetCache(); 2429 static TCMalloc_ThreadCache* GetCacheIfPresent(); 2430 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); 2431 static void DeleteCache(TCMalloc_ThreadCache* heap); 2432 static void BecomeIdle(); 2433 static void RecomputeThreadCacheSize(); 2434 2435 template <class Finder, class Reader> 2436 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2437 { 2438 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) 2439 list_[sizeClass].enumerateFreeObjects(finder, reader); 2440 } 2441 }; 2442 2443 //------------------------------------------------------------------- 2444 // Global variables 2445 //------------------------------------------------------------------- 2446 2447 // Central cache -- a collection of free-lists, one per size-class. 2448 // We have a separate lock per free-list to reduce contention. 2449 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; 2450 2451 // Page-level allocator 2452 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; 2453 static bool phinited = false; 2454 2455 // Avoid extra level of indirection by making "pageheap" be just an alias 2456 // of pageheap_memory. 2457 typedef union { 2458 void* m_memory; 2459 TCMalloc_PageHeap* m_pageHeap; 2460 } PageHeapUnion; 2461 2462 static inline TCMalloc_PageHeap* getPageHeap() 2463 { 2464 PageHeapUnion u = { &pageheap_memory[0] }; 2465 return u.m_pageHeap; 2466 } 2467 2468 #define pageheap getPageHeap() 2469 2470 size_t fastMallocGoodSize(size_t bytes) 2471 { 2472 if (!phinited) 2473 TCMalloc_ThreadCache::InitModule(); 2474 return AllocationSize(bytes); 2475 } 2476 2477 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2478 2479 #if HAVE(DISPATCH_H) || OS(WINDOWS) 2480 2481 void TCMalloc_PageHeap::periodicScavenge() 2482 { 2483 SpinLockHolder h(&pageheap_lock); 2484 pageheap->scavenge(); 2485 2486 if (shouldScavenge()) { 2487 rescheduleScavenger(); 2488 return; 2489 } 2490 2491 suspendScavenger(); 2492 } 2493 2494 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 2495 { 2496 ASSERT(pageheap_lock.IsHeld()); 2497 if (isScavengerSuspended() && shouldScavenge()) 2498 scheduleScavenger(); 2499 } 2500 2501 #else 2502 2503 void TCMalloc_PageHeap::scavengerThread() 2504 { 2505 #if HAVE(PTHREAD_SETNAME_NP) 2506 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); 2507 #endif 2508 2509 while (1) { 2510 pageheap_lock.Lock(); 2511 if (!shouldScavenge()) { 2512 // Set to false so that signalScavenger() will check whether we need to be siganlled. 2513 m_scavengeThreadActive = false; 2514 2515 // We need to unlock now, as this thread will block on the condvar until scavenging is required. 2516 pageheap_lock.Unlock(); 2517 2518 // Block until there are enough free committed pages to release back to the system. 2519 pthread_mutex_lock(&m_scavengeMutex); 2520 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); 2521 // After exiting the pthread_cond_wait, we hold the lock on m_scavengeMutex. Unlock it to prevent 2522 // deadlock next time round the loop. 2523 pthread_mutex_unlock(&m_scavengeMutex); 2524 2525 // Set to true to prevent unnecessary signalling of the condvar. 2526 m_scavengeThreadActive = true; 2527 } else 2528 pageheap_lock.Unlock(); 2529 2530 // Wait for a while to calculate how much memory remains unused during this pause. 2531 sleep(kScavengeDelayInSeconds); 2532 2533 { 2534 SpinLockHolder h(&pageheap_lock); 2535 pageheap->scavenge(); 2536 } 2537 } 2538 } 2539 2540 #endif 2541 2542 #endif 2543 2544 // If TLS is available, we also store a copy 2545 // of the per-thread object in a __thread variable 2546 // since __thread variables are faster to read 2547 // than pthread_getspecific(). We still need 2548 // pthread_setspecific() because __thread 2549 // variables provide no way to run cleanup 2550 // code when a thread is destroyed. 2551 #ifdef HAVE_TLS 2552 static __thread TCMalloc_ThreadCache *threadlocal_heap; 2553 #endif 2554 // Thread-specific key. Initialization here is somewhat tricky 2555 // because some Linux startup code invokes malloc() before it 2556 // is in a good enough state to handle pthread_keycreate(). 2557 // Therefore, we use TSD keys only after tsd_inited is set to true. 2558 // Until then, we use a slow path to get the heap object. 2559 static bool tsd_inited = false; 2560 static pthread_key_t heap_key; 2561 #if OS(WINDOWS) 2562 DWORD tlsIndex = TLS_OUT_OF_INDEXES; 2563 #endif 2564 2565 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) 2566 { 2567 #if USE(PTHREAD_GETSPECIFIC_DIRECT) 2568 // Can't have two libraries both doing this in the same process, 2569 // so check and make this crash right away. 2570 if (pthread_getspecific(heap_key)) 2571 CRASH(); 2572 #endif 2573 2574 // Still do pthread_setspecific even if there's an alternate form 2575 // of thread-local storage in use, to benefit from the delete callback. 2576 pthread_setspecific(heap_key, heap); 2577 2578 #if OS(WINDOWS) 2579 TlsSetValue(tlsIndex, heap); 2580 #endif 2581 } 2582 2583 // Allocator for thread heaps 2584 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; 2585 2586 // Linked list of heap objects. Protected by pageheap_lock. 2587 static TCMalloc_ThreadCache* thread_heaps = NULL; 2588 static int thread_heap_count = 0; 2589 2590 // Overall thread cache size. Protected by pageheap_lock. 2591 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; 2592 2593 // Global per-thread cache size. Writes are protected by 2594 // pageheap_lock. Reads are done without any locking, which should be 2595 // fine as long as size_t can be written atomically and we don't place 2596 // invariants between this variable and other pieces of state. 2597 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; 2598 2599 //------------------------------------------------------------------- 2600 // Central cache implementation 2601 //------------------------------------------------------------------- 2602 2603 void TCMalloc_Central_FreeList::Init(size_t cl, uintptr_t entropy) { 2604 lock_.Init(); 2605 size_class_ = cl; 2606 entropy_ = entropy; 2607 #if ENABLE(TCMALLOC_HARDENING) 2608 ASSERT(entropy_); 2609 #endif 2610 DLL_Init(&empty_, entropy_); 2611 DLL_Init(&nonempty_, entropy_); 2612 counter_ = 0; 2613 2614 cache_size_ = 1; 2615 used_slots_ = 0; 2616 ASSERT(cache_size_ <= kNumTransferEntries); 2617 } 2618 2619 void TCMalloc_Central_FreeList::ReleaseListToSpans(HardenedSLL start) { 2620 while (start) { 2621 HardenedSLL next = SLL_Next(start, entropy_); 2622 ReleaseToSpans(start); 2623 start = next; 2624 } 2625 } 2626 2627 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(HardenedSLL object) { 2628 const PageID p = reinterpret_cast<uintptr_t>(object.value()) >> kPageShift; 2629 Span* span = pageheap->GetDescriptor(p); 2630 ASSERT(span != NULL); 2631 ASSERT(span->refcount > 0); 2632 2633 // If span is empty, move it to non-empty list 2634 if (!span->objects) { 2635 DLL_Remove(span, entropy_); 2636 DLL_Prepend(&nonempty_, span, entropy_); 2637 Event(span, 'N', 0); 2638 } 2639 2640 // The following check is expensive, so it is disabled by default 2641 if (false) { 2642 // Check that object does not occur in list 2643 unsigned got = 0; 2644 for (HardenedSLL p = span->objects; !p; SLL_Next(p, entropy_)) { 2645 ASSERT(p.value() != object.value()); 2646 got++; 2647 } 2648 ASSERT(got + span->refcount == 2649 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); 2650 } 2651 2652 counter_++; 2653 span->refcount--; 2654 if (span->refcount == 0) { 2655 Event(span, '#', 0); 2656 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); 2657 DLL_Remove(span, entropy_); 2658 2659 // Release central list lock while operating on pageheap 2660 lock_.Unlock(); 2661 { 2662 SpinLockHolder h(&pageheap_lock); 2663 pageheap->Delete(span); 2664 } 2665 lock_.Lock(); 2666 } else { 2667 SLL_SetNext(object, span->objects, entropy_); 2668 span->objects.setValue(object.value()); 2669 } 2670 } 2671 2672 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( 2673 size_t locked_size_class, bool force) { 2674 static int race_counter = 0; 2675 int t = race_counter++; // Updated without a lock, but who cares. 2676 if (t >= static_cast<int>(kNumClasses)) { 2677 while (t >= static_cast<int>(kNumClasses)) { 2678 t -= kNumClasses; 2679 } 2680 race_counter = t; 2681 } 2682 ASSERT(t >= 0); 2683 ASSERT(t < static_cast<int>(kNumClasses)); 2684 if (t == static_cast<int>(locked_size_class)) return false; 2685 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); 2686 } 2687 2688 bool TCMalloc_Central_FreeList::MakeCacheSpace() { 2689 // Is there room in the cache? 2690 if (used_slots_ < cache_size_) return true; 2691 // Check if we can expand this cache? 2692 if (cache_size_ == kNumTransferEntries) return false; 2693 // Ok, we'll try to grab an entry from some other size class. 2694 if (EvictRandomSizeClass(size_class_, false) || 2695 EvictRandomSizeClass(size_class_, true)) { 2696 // Succeeded in evicting, we're going to make our cache larger. 2697 cache_size_++; 2698 return true; 2699 } 2700 return false; 2701 } 2702 2703 2704 namespace { 2705 class LockInverter { 2706 private: 2707 SpinLock *held_, *temp_; 2708 public: 2709 inline explicit LockInverter(SpinLock* held, SpinLock *temp) 2710 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } 2711 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } 2712 }; 2713 } 2714 2715 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { 2716 // Start with a quick check without taking a lock. 2717 if (cache_size_ == 0) return false; 2718 // We don't evict from a full cache unless we are 'forcing'. 2719 if (force == false && used_slots_ == cache_size_) return false; 2720 2721 // Grab lock, but first release the other lock held by this thread. We use 2722 // the lock inverter to ensure that we never hold two size class locks 2723 // concurrently. That can create a deadlock because there is no well 2724 // defined nesting order. 2725 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); 2726 ASSERT(used_slots_ <= cache_size_); 2727 ASSERT(0 <= cache_size_); 2728 if (cache_size_ == 0) return false; 2729 if (used_slots_ == cache_size_) { 2730 if (force == false) return false; 2731 // ReleaseListToSpans releases the lock, so we have to make all the 2732 // updates to the central list before calling it. 2733 cache_size_--; 2734 used_slots_--; 2735 ReleaseListToSpans(tc_slots_[used_slots_].head); 2736 return true; 2737 } 2738 cache_size_--; 2739 return true; 2740 } 2741 2742 void TCMalloc_Central_FreeList::InsertRange(HardenedSLL start, HardenedSLL end, int N) { 2743 SpinLockHolder h(&lock_); 2744 if (N == num_objects_to_move[size_class_] && 2745 MakeCacheSpace()) { 2746 int slot = used_slots_++; 2747 ASSERT(slot >=0); 2748 ASSERT(slot < kNumTransferEntries); 2749 TCEntry *entry = &tc_slots_[slot]; 2750 entry->head = start; 2751 entry->tail = end; 2752 return; 2753 } 2754 ReleaseListToSpans(start); 2755 } 2756 2757 void TCMalloc_Central_FreeList::RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N) { 2758 int num = *N; 2759 ASSERT(num > 0); 2760 2761 SpinLockHolder h(&lock_); 2762 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { 2763 int slot = --used_slots_; 2764 ASSERT(slot >= 0); 2765 TCEntry *entry = &tc_slots_[slot]; 2766 *start = entry->head; 2767 *end = entry->tail; 2768 return; 2769 } 2770 2771 // TODO: Prefetch multiple TCEntries? 2772 HardenedSLL tail = FetchFromSpansSafe(); 2773 if (!tail) { 2774 // We are completely out of memory. 2775 *start = *end = HardenedSLL::null(); 2776 *N = 0; 2777 return; 2778 } 2779 2780 SLL_SetNext(tail, HardenedSLL::null(), entropy_); 2781 HardenedSLL head = tail; 2782 int count = 1; 2783 while (count < num) { 2784 HardenedSLL t = FetchFromSpans(); 2785 if (!t) break; 2786 SLL_Push(&head, t, entropy_); 2787 count++; 2788 } 2789 *start = head; 2790 *end = tail; 2791 *N = count; 2792 } 2793 2794 2795 HardenedSLL TCMalloc_Central_FreeList::FetchFromSpansSafe() { 2796 HardenedSLL t = FetchFromSpans(); 2797 if (!t) { 2798 Populate(); 2799 t = FetchFromSpans(); 2800 } 2801 return t; 2802 } 2803 2804 HardenedSLL TCMalloc_Central_FreeList::FetchFromSpans() { 2805 if (DLL_IsEmpty(&nonempty_, entropy_)) return HardenedSLL::null(); 2806 Span* span = nonempty_.next(entropy_); 2807 2808 ASSERT(span->objects); 2809 ASSERT_SPAN_COMMITTED(span); 2810 span->refcount++; 2811 HardenedSLL result = span->objects; 2812 span->objects = SLL_Next(result, entropy_); 2813 if (!span->objects) { 2814 // Move to empty list 2815 DLL_Remove(span, entropy_); 2816 DLL_Prepend(&empty_, span, entropy_); 2817 Event(span, 'E', 0); 2818 } 2819 counter_--; 2820 return result; 2821 } 2822 2823 // Fetch memory from the system and add to the central cache freelist. 2824 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { 2825 // Release central list lock while operating on pageheap 2826 lock_.Unlock(); 2827 const size_t npages = class_to_pages[size_class_]; 2828 2829 Span* span; 2830 { 2831 SpinLockHolder h(&pageheap_lock); 2832 span = pageheap->New(npages); 2833 if (span) pageheap->RegisterSizeClass(span, size_class_); 2834 } 2835 if (span == NULL) { 2836 #if OS(WINDOWS) 2837 MESSAGE("allocation failed: %d\n", ::GetLastError()); 2838 #else 2839 MESSAGE("allocation failed: %d\n", errno); 2840 #endif 2841 lock_.Lock(); 2842 return; 2843 } 2844 ASSERT_SPAN_COMMITTED(span); 2845 ASSERT(span->length == npages); 2846 // Cache sizeclass info eagerly. Locking is not necessary. 2847 // (Instead of being eager, we could just replace any stale info 2848 // about this span, but that seems to be no better in practice.) 2849 for (size_t i = 0; i < npages; i++) { 2850 pageheap->CacheSizeClass(span->start + i, size_class_); 2851 } 2852 2853 // Split the block into pieces and add to the free-list 2854 // TODO: coloring of objects to avoid cache conflicts? 2855 HardenedSLL head = HardenedSLL::null(); 2856 char* start = reinterpret_cast<char*>(span->start << kPageShift); 2857 const size_t size = ByteSizeForClass(size_class_); 2858 char* ptr = start + (npages << kPageShift) - ((npages << kPageShift) % size); 2859 int num = 0; 2860 #if ENABLE(TCMALLOC_HARDENING) 2861 uint32_t startPoison = freedObjectStartPoison(); 2862 uint32_t endPoison = freedObjectEndPoison(); 2863 #endif 2864 2865 while (ptr > start) { 2866 ptr -= size; 2867 HardenedSLL node = HardenedSLL::create(ptr); 2868 POISON_DEALLOCATION_EXPLICIT(ptr, size, startPoison, endPoison); 2869 SLL_SetNext(node, head, entropy_); 2870 head = node; 2871 num++; 2872 } 2873 ASSERT(ptr == start); 2874 ASSERT(ptr == head.value()); 2875 #ifndef NDEBUG 2876 { 2877 HardenedSLL node = head; 2878 while (node) { 2879 ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2880 node = SLL_Next(node, entropy_); 2881 } 2882 } 2883 #endif 2884 span->objects = head; 2885 ASSERT(span->objects.value() == head.value()); 2886 span->refcount = 0; // No sub-object in use yet 2887 2888 // Add span to list of non-empty spans 2889 lock_.Lock(); 2890 DLL_Prepend(&nonempty_, span, entropy_); 2891 counter_ += num; 2892 } 2893 2894 //------------------------------------------------------------------- 2895 // TCMalloc_ThreadCache implementation 2896 //------------------------------------------------------------------- 2897 2898 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { 2899 if (bytes_until_sample_ < k) { 2900 PickNextSample(k); 2901 return true; 2902 } else { 2903 bytes_until_sample_ -= k; 2904 return false; 2905 } 2906 } 2907 2908 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid, uintptr_t entropy) { 2909 size_ = 0; 2910 next_ = NULL; 2911 prev_ = NULL; 2912 tid_ = tid; 2913 in_setspecific_ = false; 2914 entropy_ = entropy; 2915 #if ENABLE(TCMALLOC_HARDENING) 2916 ASSERT(entropy_); 2917 #endif 2918 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2919 list_[cl].Init(entropy_); 2920 } 2921 2922 // Initialize RNG -- run it for a bit to get to good values 2923 bytes_until_sample_ = 0; 2924 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); 2925 for (int i = 0; i < 100; i++) { 2926 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); 2927 } 2928 } 2929 2930 void TCMalloc_ThreadCache::Cleanup() { 2931 // Put unused memory back into central cache 2932 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2933 if (list_[cl].length() > 0) { 2934 ReleaseToCentralCache(cl, list_[cl].length()); 2935 } 2936 } 2937 } 2938 2939 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { 2940 ASSERT(size <= kMaxSize); 2941 const size_t cl = SizeClass(size); 2942 FreeList* list = &list_[cl]; 2943 size_t allocationSize = ByteSizeForClass(cl); 2944 if (list->empty()) { 2945 FetchFromCentralCache(cl, allocationSize); 2946 if (list->empty()) return NULL; 2947 } 2948 size_ -= allocationSize; 2949 void* result = list->Pop(); 2950 if (!result) 2951 return 0; 2952 RELEASE_ASSERT(IS_DEFINITELY_POISONED(result, allocationSize)); 2953 POISON_ALLOCATION(result, allocationSize); 2954 return result; 2955 } 2956 2957 inline void TCMalloc_ThreadCache::Deallocate(HardenedSLL ptr, size_t cl) { 2958 size_t allocationSize = ByteSizeForClass(cl); 2959 size_ += allocationSize; 2960 FreeList* list = &list_[cl]; 2961 if (MAY_BE_POISONED(ptr.value(), allocationSize)) 2962 list->Validate(ptr, allocationSize); 2963 2964 POISON_DEALLOCATION(ptr.value(), allocationSize); 2965 list->Push(ptr); 2966 // If enough data is free, put back into central cache 2967 if (list->length() > kMaxFreeListLength) { 2968 ReleaseToCentralCache(cl, num_objects_to_move[cl]); 2969 } 2970 if (size_ >= per_thread_cache_size) Scavenge(); 2971 } 2972 2973 // Remove some objects of class "cl" from central cache and add to thread heap 2974 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { 2975 int fetch_count = num_objects_to_move[cl]; 2976 HardenedSLL start, end; 2977 central_cache[cl].RemoveRange(&start, &end, &fetch_count); 2978 list_[cl].PushRange(fetch_count, start, end); 2979 size_ += allocationSize * fetch_count; 2980 } 2981 2982 // Remove some objects of class "cl" from thread heap and add to central cache 2983 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { 2984 ASSERT(N > 0); 2985 FreeList* src = &list_[cl]; 2986 if (N > src->length()) N = src->length(); 2987 size_ -= N*ByteSizeForClass(cl); 2988 2989 // We return prepackaged chains of the correct size to the central cache. 2990 // TODO: Use the same format internally in the thread caches? 2991 int batch_size = num_objects_to_move[cl]; 2992 while (N > batch_size) { 2993 HardenedSLL tail, head; 2994 src->PopRange(batch_size, &head, &tail); 2995 central_cache[cl].InsertRange(head, tail, batch_size); 2996 N -= batch_size; 2997 } 2998 HardenedSLL tail, head; 2999 src->PopRange(N, &head, &tail); 3000 central_cache[cl].InsertRange(head, tail, N); 3001 } 3002 3003 // Release idle memory to the central cache 3004 inline void TCMalloc_ThreadCache::Scavenge() { 3005 // If the low-water mark for the free list is L, it means we would 3006 // not have had to allocate anything from the central cache even if 3007 // we had reduced the free list size by L. We aim to get closer to 3008 // that situation by dropping L/2 nodes from the free list. This 3009 // may not release much memory, but if so we will call scavenge again 3010 // pretty soon and the low-water marks will be high on that call. 3011 //int64 start = CycleClock::Now(); 3012 3013 for (size_t cl = 0; cl < kNumClasses; cl++) { 3014 FreeList* list = &list_[cl]; 3015 const int lowmark = list->lowwatermark(); 3016 if (lowmark > 0) { 3017 const int drop = (lowmark > 1) ? lowmark/2 : 1; 3018 ReleaseToCentralCache(cl, drop); 3019 } 3020 list->clear_lowwatermark(); 3021 } 3022 3023 //int64 finish = CycleClock::Now(); 3024 //CycleTimer ct; 3025 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); 3026 } 3027 3028 void TCMalloc_ThreadCache::PickNextSample(size_t k) { 3029 // Make next "random" number 3030 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers 3031 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); 3032 uint32_t r = rnd_; 3033 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); 3034 3035 // Next point is "rnd_ % (sample_period)". I.e., average 3036 // increment is "sample_period/2". 3037 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); 3038 static int last_flag_value = -1; 3039 3040 if (flag_value != last_flag_value) { 3041 SpinLockHolder h(&sample_period_lock); 3042 int i; 3043 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { 3044 if (primes_list[i] >= flag_value) { 3045 break; 3046 } 3047 } 3048 sample_period = primes_list[i]; 3049 last_flag_value = flag_value; 3050 } 3051 3052 bytes_until_sample_ += rnd_ % sample_period; 3053 3054 if (k > (static_cast<size_t>(-1) >> 2)) { 3055 // If the user has asked for a huge allocation then it is possible 3056 // for the code below to loop infinitely. Just return (note that 3057 // this throws off the sampling accuracy somewhat, but a user who 3058 // is allocating more than 1G of memory at a time can live with a 3059 // minor inaccuracy in profiling of small allocations, and also 3060 // would rather not wait for the loop below to terminate). 3061 return; 3062 } 3063 3064 while (bytes_until_sample_ < k) { 3065 // Increase bytes_until_sample_ by enough average sampling periods 3066 // (sample_period >> 1) to allow us to sample past the current 3067 // allocation. 3068 bytes_until_sample_ += (sample_period >> 1); 3069 } 3070 3071 bytes_until_sample_ -= k; 3072 } 3073 3074 void TCMalloc_ThreadCache::InitModule() { 3075 // There is a slight potential race here because of double-checked 3076 // locking idiom. However, as long as the program does a small 3077 // allocation before switching to multi-threaded mode, we will be 3078 // fine. We increase the chances of doing such a small allocation 3079 // by doing one in the constructor of the module_enter_exit_hook 3080 // object declared below. 3081 SpinLockHolder h(&pageheap_lock); 3082 if (!phinited) { 3083 uintptr_t entropy = HARDENING_ENTROPY; 3084 InitTSD(); 3085 InitSizeClasses(); 3086 threadheap_allocator.Init(entropy); 3087 span_allocator.Init(entropy); 3088 span_allocator.New(); // Reduce cache conflicts 3089 span_allocator.New(); // Reduce cache conflicts 3090 stacktrace_allocator.Init(entropy); 3091 DLL_Init(&sampled_objects, entropy); 3092 for (size_t i = 0; i < kNumClasses; ++i) { 3093 central_cache[i].Init(i, entropy); 3094 } 3095 pageheap->init(); 3096 phinited = 1; 3097 #if OS(DARWIN) 3098 FastMallocZone::init(); 3099 #endif 3100 } 3101 } 3102 3103 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid, uintptr_t entropy) { 3104 // Create the heap and add it to the linked list 3105 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); 3106 heap->Init(tid, entropy); 3107 heap->next_ = thread_heaps; 3108 heap->prev_ = NULL; 3109 if (thread_heaps != NULL) thread_heaps->prev_ = heap; 3110 thread_heaps = heap; 3111 thread_heap_count++; 3112 RecomputeThreadCacheSize(); 3113 return heap; 3114 } 3115 3116 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { 3117 #ifdef HAVE_TLS 3118 // __thread is faster, but only when the kernel supports it 3119 if (KernelSupportsTLS()) 3120 return threadlocal_heap; 3121 #elif OS(WINDOWS) 3122 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); 3123 #else 3124 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); 3125 #endif 3126 } 3127 3128 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { 3129 TCMalloc_ThreadCache* ptr = NULL; 3130 if (!tsd_inited) { 3131 InitModule(); 3132 } else { 3133 ptr = GetThreadHeap(); 3134 } 3135 if (ptr == NULL) ptr = CreateCacheIfNecessary(); 3136 return ptr; 3137 } 3138 3139 // In deletion paths, we do not try to create a thread-cache. This is 3140 // because we may be in the thread destruction code and may have 3141 // already cleaned up the cache for this thread. 3142 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { 3143 if (!tsd_inited) return NULL; 3144 void* const p = GetThreadHeap(); 3145 return reinterpret_cast<TCMalloc_ThreadCache*>(p); 3146 } 3147 3148 void TCMalloc_ThreadCache::InitTSD() { 3149 ASSERT(!tsd_inited); 3150 #if USE(PTHREAD_GETSPECIFIC_DIRECT) 3151 pthread_key_init_np(heap_key, DestroyThreadCache); 3152 #else 3153 pthread_key_create(&heap_key, DestroyThreadCache); 3154 #endif 3155 #if OS(WINDOWS) 3156 tlsIndex = TlsAlloc(); 3157 #endif 3158 tsd_inited = true; 3159 3160 #if !OS(WINDOWS) 3161 // We may have used a fake pthread_t for the main thread. Fix it. 3162 pthread_t zero; 3163 memset(&zero, 0, sizeof(zero)); 3164 #endif 3165 ASSERT(pageheap_lock.IsHeld()); 3166 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3167 #if OS(WINDOWS) 3168 if (h->tid_ == 0) { 3169 h->tid_ = GetCurrentThreadId(); 3170 } 3171 #else 3172 if (pthread_equal(h->tid_, zero)) { 3173 h->tid_ = pthread_self(); 3174 } 3175 #endif 3176 } 3177 } 3178 3179 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { 3180 // Initialize per-thread data if necessary 3181 TCMalloc_ThreadCache* heap = NULL; 3182 { 3183 SpinLockHolder h(&pageheap_lock); 3184 3185 #if OS(WINDOWS) 3186 DWORD me; 3187 if (!tsd_inited) { 3188 me = 0; 3189 } else { 3190 me = GetCurrentThreadId(); 3191 } 3192 #else 3193 // Early on in glibc's life, we cannot even call pthread_self() 3194 pthread_t me; 3195 if (!tsd_inited) { 3196 memset(&me, 0, sizeof(me)); 3197 } else { 3198 me = pthread_self(); 3199 } 3200 #endif 3201 3202 // This may be a recursive malloc call from pthread_setspecific() 3203 // In that case, the heap for this thread has already been created 3204 // and added to the linked list. So we search for that first. 3205 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3206 #if OS(WINDOWS) 3207 if (h->tid_ == me) { 3208 #else 3209 if (pthread_equal(h->tid_, me)) { 3210 #endif 3211 heap = h; 3212 break; 3213 } 3214 } 3215 3216 if (heap == NULL) heap = NewHeap(me, HARDENING_ENTROPY); 3217 } 3218 3219 // We call pthread_setspecific() outside the lock because it may 3220 // call malloc() recursively. The recursive call will never get 3221 // here again because it will find the already allocated heap in the 3222 // linked list of heaps. 3223 if (!heap->in_setspecific_ && tsd_inited) { 3224 heap->in_setspecific_ = true; 3225 setThreadHeap(heap); 3226 } 3227 return heap; 3228 } 3229 3230 void TCMalloc_ThreadCache::BecomeIdle() { 3231 if (!tsd_inited) return; // No caches yet 3232 TCMalloc_ThreadCache* heap = GetThreadHeap(); 3233 if (heap == NULL) return; // No thread cache to remove 3234 if (heap->in_setspecific_) return; // Do not disturb the active caller 3235 3236 heap->in_setspecific_ = true; 3237 setThreadHeap(NULL); 3238 #ifdef HAVE_TLS 3239 // Also update the copy in __thread 3240 threadlocal_heap = NULL; 3241 #endif 3242 heap->in_setspecific_ = false; 3243 if (GetThreadHeap() == heap) { 3244 // Somehow heap got reinstated by a recursive call to malloc 3245 // from pthread_setspecific. We give up in this case. 3246 return; 3247 } 3248 3249 // We can now get rid of the heap 3250 DeleteCache(heap); 3251 } 3252 3253 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { 3254 // Note that "ptr" cannot be NULL since pthread promises not 3255 // to invoke the destructor on NULL values, but for safety, 3256 // we check anyway. 3257 if (ptr == NULL) return; 3258 #ifdef HAVE_TLS 3259 // Prevent fast path of GetThreadHeap() from returning heap. 3260 threadlocal_heap = NULL; 3261 #endif 3262 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); 3263 } 3264 3265 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { 3266 // Remove all memory from heap 3267 heap->Cleanup(); 3268 3269 // Remove from linked list 3270 SpinLockHolder h(&pageheap_lock); 3271 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; 3272 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; 3273 if (thread_heaps == heap) thread_heaps = heap->next_; 3274 thread_heap_count--; 3275 RecomputeThreadCacheSize(); 3276 3277 threadheap_allocator.Delete(heap); 3278 } 3279 3280 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { 3281 // Divide available space across threads 3282 int n = thread_heap_count > 0 ? thread_heap_count : 1; 3283 size_t space = overall_thread_cache_size / n; 3284 3285 // Limit to allowed range 3286 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; 3287 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; 3288 3289 per_thread_cache_size = space; 3290 } 3291 3292 void TCMalloc_ThreadCache::Print() const { 3293 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3294 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", 3295 ByteSizeForClass(cl), 3296 list_[cl].length(), 3297 list_[cl].lowwatermark()); 3298 } 3299 } 3300 3301 // Extract interesting stats 3302 struct TCMallocStats { 3303 uint64_t system_bytes; // Bytes alloced from system 3304 uint64_t thread_bytes; // Bytes in thread caches 3305 uint64_t central_bytes; // Bytes in central cache 3306 uint64_t transfer_bytes; // Bytes in central transfer cache 3307 uint64_t pageheap_bytes; // Bytes in page heap 3308 uint64_t metadata_bytes; // Bytes alloced for metadata 3309 }; 3310 3311 // The constructor allocates an object to ensure that initialization 3312 // runs before main(), and therefore we do not have a chance to become 3313 // multi-threaded before initialization. We also create the TSD key 3314 // here. Presumably by the time this constructor runs, glibc is in 3315 // good enough shape to handle pthread_key_create(). 3316 // 3317 // The constructor also takes the opportunity to tell STL to use 3318 // tcmalloc. We want to do this early, before construct time, so 3319 // all user STL allocations go through tcmalloc (which works really 3320 // well for STL). 3321 // 3322 // The destructor prints stats when the program exits. 3323 class TCMallocGuard { 3324 public: 3325 3326 TCMallocGuard() { 3327 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 3328 // Check whether the kernel also supports TLS (needs to happen at runtime) 3329 CheckIfKernelSupportsTLS(); 3330 #endif 3331 free(malloc(1)); 3332 TCMalloc_ThreadCache::InitTSD(); 3333 free(malloc(1)); 3334 } 3335 }; 3336 3337 //------------------------------------------------------------------- 3338 // Helpers for the exported routines below 3339 //------------------------------------------------------------------- 3340 3341 static inline bool CheckCachedSizeClass(void *ptr) { 3342 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3343 size_t cached_value = pageheap->GetSizeClassIfCached(p); 3344 return cached_value == 0 || 3345 cached_value == pageheap->GetDescriptor(p)->sizeclass; 3346 } 3347 3348 static inline void* CheckedMallocResult(void *result) 3349 { 3350 ASSERT(result == 0 || CheckCachedSizeClass(result)); 3351 return result; 3352 } 3353 3354 static inline void* SpanToMallocResult(Span *span) { 3355 ASSERT_SPAN_COMMITTED(span); 3356 pageheap->CacheSizeClass(span->start, 0); 3357 void* result = reinterpret_cast<void*>(span->start << kPageShift); 3358 POISON_ALLOCATION(result, span->length << kPageShift); 3359 return CheckedMallocResult(result); 3360 } 3361 3362 static ALWAYS_INLINE void* do_malloc(size_t size) { 3363 void* ret = 0; 3364 3365 ASSERT(!isForbidden()); 3366 3367 // The following call forces module initialization 3368 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3369 if (size > kMaxSize) { 3370 // Use page-level allocator 3371 SpinLockHolder h(&pageheap_lock); 3372 Span* span = pageheap->New(pages(size)); 3373 if (span) 3374 ret = SpanToMallocResult(span); 3375 } else { 3376 // The common case, and also the simplest. This just pops the 3377 // size-appropriate freelist, afer replenishing it if it's empty. 3378 ret = CheckedMallocResult(heap->Allocate(size)); 3379 } 3380 if (!ret) 3381 CRASH(); 3382 return ret; 3383 } 3384 3385 static ALWAYS_INLINE void do_free(void* ptr) { 3386 if (ptr == NULL) return; 3387 ASSERT(pageheap != NULL); // Should not call free() before malloc() 3388 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3389 Span* span = NULL; 3390 size_t cl = pageheap->GetSizeClassIfCached(p); 3391 3392 if (cl == 0) { 3393 span = pageheap->GetDescriptor(p); 3394 RELEASE_ASSERT(span->isValid()); 3395 cl = span->sizeclass; 3396 pageheap->CacheSizeClass(p, cl); 3397 } 3398 if (cl != 0) { 3399 #ifndef NO_TCMALLOC_SAMPLES 3400 ASSERT(!pageheap->GetDescriptor(p)->sample); 3401 #endif 3402 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); 3403 if (heap != NULL) { 3404 heap->Deallocate(HardenedSLL::create(ptr), cl); 3405 } else { 3406 // Delete directly into central cache 3407 POISON_DEALLOCATION(ptr, ByteSizeForClass(cl)); 3408 SLL_SetNext(HardenedSLL::create(ptr), HardenedSLL::null(), central_cache[cl].entropy()); 3409 central_cache[cl].InsertRange(HardenedSLL::create(ptr), HardenedSLL::create(ptr), 1); 3410 } 3411 } else { 3412 SpinLockHolder h(&pageheap_lock); 3413 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 3414 ASSERT(span != NULL && span->start == p); 3415 #ifndef NO_TCMALLOC_SAMPLES 3416 if (span->sample) { 3417 DLL_Remove(span); 3418 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); 3419 span->objects = NULL; 3420 } 3421 #endif 3422 3423 POISON_DEALLOCATION(ptr, span->length << kPageShift); 3424 pageheap->Delete(span); 3425 } 3426 } 3427 3428 // Helpers for use by exported routines below: 3429 3430 static inline int do_mallopt(int, int) { 3431 return 1; // Indicates error 3432 } 3433 3434 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance 3435 static inline struct mallinfo do_mallinfo() { 3436 TCMallocStats stats; 3437 ExtractStats(&stats, NULL); 3438 3439 // Just some of the fields are filled in. 3440 struct mallinfo info; 3441 memset(&info, 0, sizeof(info)); 3442 3443 // Unfortunately, the struct contains "int" field, so some of the 3444 // size values will be truncated. 3445 info.arena = static_cast<int>(stats.system_bytes); 3446 info.fsmblks = static_cast<int>(stats.thread_bytes 3447 + stats.central_bytes 3448 + stats.transfer_bytes); 3449 info.fordblks = static_cast<int>(stats.pageheap_bytes); 3450 info.uordblks = static_cast<int>(stats.system_bytes 3451 - stats.thread_bytes 3452 - stats.central_bytes 3453 - stats.transfer_bytes 3454 - stats.pageheap_bytes); 3455 3456 return info; 3457 } 3458 #endif 3459 3460 //------------------------------------------------------------------- 3461 // Exported routines 3462 //------------------------------------------------------------------- 3463 3464 // CAVEAT: The code structure below ensures that MallocHook methods are always 3465 // called from the stack frame of the invoked allocation function. 3466 // heap-checker.cc depends on this to start a stack trace from 3467 // the call to the (de)allocation function. 3468 3469 void* fastMalloc(size_t size) 3470 { 3471 return do_malloc(size); 3472 } 3473 3474 void fastFree(void* ptr) 3475 { 3476 do_free(ptr); 3477 } 3478 3479 void* fastCalloc(size_t n, size_t elem_size) 3480 { 3481 size_t totalBytes = n * elem_size; 3482 3483 // Protect against overflow 3484 if (n > 1 && elem_size && (totalBytes / elem_size) != n) 3485 return 0; 3486 3487 void* result = do_malloc(totalBytes); 3488 memset(result, 0, totalBytes); 3489 3490 return result; 3491 } 3492 3493 void* fastRealloc(void* old_ptr, size_t new_size) 3494 { 3495 if (old_ptr == NULL) { 3496 return do_malloc(new_size); 3497 } 3498 if (new_size == 0) { 3499 free(old_ptr); 3500 return NULL; 3501 } 3502 3503 // Get the size of the old entry 3504 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; 3505 size_t cl = pageheap->GetSizeClassIfCached(p); 3506 Span *span = NULL; 3507 size_t old_size; 3508 if (cl == 0) { 3509 span = pageheap->GetDescriptor(p); 3510 cl = span->sizeclass; 3511 pageheap->CacheSizeClass(p, cl); 3512 } 3513 if (cl != 0) { 3514 old_size = ByteSizeForClass(cl); 3515 } else { 3516 ASSERT(span != NULL); 3517 old_size = span->length << kPageShift; 3518 } 3519 3520 // Reallocate if the new size is larger than the old size, 3521 // or if the new size is significantly smaller than the old size. 3522 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { 3523 // Need to reallocate 3524 void* new_ptr = do_malloc(new_size); 3525 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 3526 // We could use a variant of do_free() that leverages the fact 3527 // that we already know the sizeclass of old_ptr. The benefit 3528 // would be small, so don't bother. 3529 do_free(old_ptr); 3530 return new_ptr; 3531 } else { 3532 return old_ptr; 3533 } 3534 } 3535 3536 void releaseFastMallocFreeMemory() 3537 { 3538 // Flush free pages in the current thread cache back to the page heap. 3539 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) 3540 threadCache->Cleanup(); 3541 3542 SpinLockHolder h(&pageheap_lock); 3543 pageheap->ReleaseFreePages(); 3544 } 3545 3546 FastMallocStatistics fastMallocStatistics() 3547 { 3548 FastMallocStatistics statistics; 3549 3550 SpinLockHolder lockHolder(&pageheap_lock); 3551 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); 3552 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); 3553 3554 statistics.freeListBytes = 0; 3555 for (unsigned cl = 0; cl < kNumClasses; ++cl) { 3556 const int length = central_cache[cl].length(); 3557 const int tc_length = central_cache[cl].tc_length(); 3558 3559 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); 3560 } 3561 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) 3562 statistics.freeListBytes += threadCache->Size(); 3563 3564 return statistics; 3565 } 3566 3567 #if OS(DARWIN) 3568 3569 template <typename T> 3570 T* RemoteMemoryReader::nextEntryInHardenedLinkedList(T** remoteAddress, uintptr_t entropy) const 3571 { 3572 T** localAddress = (*this)(remoteAddress); 3573 if (!localAddress) 3574 return 0; 3575 T* hardenedNext = *localAddress; 3576 if (!hardenedNext || hardenedNext == (void*)entropy) 3577 return 0; 3578 return XOR_MASK_PTR_WITH_KEY(hardenedNext, remoteAddress, entropy); 3579 } 3580 3581 class FreeObjectFinder { 3582 const RemoteMemoryReader& m_reader; 3583 HashSet<void*> m_freeObjects; 3584 3585 public: 3586 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } 3587 3588 void visit(void* ptr) { m_freeObjects.add(ptr); } 3589 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } 3590 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } 3591 size_t freeObjectCount() const { return m_freeObjects.size(); } 3592 3593 void findFreeObjects(TCMalloc_ThreadCache* threadCache) 3594 { 3595 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) 3596 threadCache->enumerateFreeObjects(*this, m_reader); 3597 } 3598 3599 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) 3600 { 3601 for (unsigned i = 0; i < numSizes; i++) 3602 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); 3603 } 3604 }; 3605 3606 class PageMapFreeObjectFinder { 3607 const RemoteMemoryReader& m_reader; 3608 FreeObjectFinder& m_freeObjectFinder; 3609 uintptr_t m_entropy; 3610 3611 public: 3612 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder, uintptr_t entropy) 3613 : m_reader(reader) 3614 , m_freeObjectFinder(freeObjectFinder) 3615 , m_entropy(entropy) 3616 { 3617 #if ENABLE(TCMALLOC_HARDENING) 3618 ASSERT(m_entropy); 3619 #endif 3620 } 3621 3622 int visit(void* ptr) const 3623 { 3624 if (!ptr) 3625 return 1; 3626 3627 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3628 if (!span) 3629 return 1; 3630 3631 if (span->free) { 3632 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); 3633 m_freeObjectFinder.visit(ptr); 3634 } else if (span->sizeclass) { 3635 // Walk the free list of the small-object span, keeping track of each object seen 3636 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(m_reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), m_entropy))) 3637 m_freeObjectFinder.visit(nextObject.value()); 3638 } 3639 return span->length; 3640 } 3641 }; 3642 3643 class PageMapMemoryUsageRecorder { 3644 task_t m_task; 3645 void* m_context; 3646 unsigned m_typeMask; 3647 vm_range_recorder_t* m_recorder; 3648 const RemoteMemoryReader& m_reader; 3649 const FreeObjectFinder& m_freeObjectFinder; 3650 3651 HashSet<void*> m_seenPointers; 3652 Vector<Span*> m_coalescedSpans; 3653 3654 public: 3655 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) 3656 : m_task(task) 3657 , m_context(context) 3658 , m_typeMask(typeMask) 3659 , m_recorder(recorder) 3660 , m_reader(reader) 3661 , m_freeObjectFinder(freeObjectFinder) 3662 { } 3663 3664 ~PageMapMemoryUsageRecorder() 3665 { 3666 ASSERT(!m_coalescedSpans.size()); 3667 } 3668 3669 void recordPendingRegions() 3670 { 3671 if (!(m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE))) { 3672 m_coalescedSpans.clear(); 3673 return; 3674 } 3675 3676 Vector<vm_range_t, 1024> allocatedPointers; 3677 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { 3678 Span *theSpan = m_coalescedSpans[i]; 3679 if (theSpan->free) 3680 continue; 3681 3682 vm_address_t spanStartAddress = theSpan->start << kPageShift; 3683 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; 3684 3685 if (!theSpan->sizeclass) { 3686 // If it's an allocated large object span, mark it as in use 3687 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) 3688 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); 3689 } else { 3690 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); 3691 3692 // Mark each allocated small object within the span as in use 3693 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; 3694 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { 3695 if (!m_freeObjectFinder.isFreeObject(object)) 3696 allocatedPointers.append((vm_range_t){object, objectSize}); 3697 } 3698 } 3699 } 3700 3701 (*m_recorder)(m_task, m_context, m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE), allocatedPointers.data(), allocatedPointers.size()); 3702 3703 m_coalescedSpans.clear(); 3704 } 3705 3706 int visit(void* ptr) 3707 { 3708 if (!ptr) 3709 return 1; 3710 3711 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3712 if (!span || !span->start) 3713 return 1; 3714 3715 if (!m_seenPointers.add(ptr).isNewEntry) 3716 return span->length; 3717 3718 if (!m_coalescedSpans.size()) { 3719 m_coalescedSpans.append(span); 3720 return span->length; 3721 } 3722 3723 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 3724 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; 3725 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; 3726 3727 // If the new span is adjacent to the previous span, do nothing for now. 3728 vm_address_t spanStartAddress = span->start << kPageShift; 3729 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { 3730 m_coalescedSpans.append(span); 3731 return span->length; 3732 } 3733 3734 // New span is not adjacent to previous span, so record the spans coalesced so far. 3735 recordPendingRegions(); 3736 m_coalescedSpans.append(span); 3737 3738 return span->length; 3739 } 3740 }; 3741 3742 class AdminRegionRecorder { 3743 task_t m_task; 3744 void* m_context; 3745 unsigned m_typeMask; 3746 vm_range_recorder_t* m_recorder; 3747 3748 Vector<vm_range_t, 1024> m_pendingRegions; 3749 3750 public: 3751 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder) 3752 : m_task(task) 3753 , m_context(context) 3754 , m_typeMask(typeMask) 3755 , m_recorder(recorder) 3756 { } 3757 3758 void recordRegion(vm_address_t ptr, size_t size) 3759 { 3760 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) 3761 m_pendingRegions.append((vm_range_t){ ptr, size }); 3762 } 3763 3764 void visit(void *ptr, size_t size) 3765 { 3766 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); 3767 } 3768 3769 void recordPendingRegions() 3770 { 3771 if (m_pendingRegions.size()) { 3772 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); 3773 m_pendingRegions.clear(); 3774 } 3775 } 3776 3777 ~AdminRegionRecorder() 3778 { 3779 ASSERT(!m_pendingRegions.size()); 3780 } 3781 }; 3782 3783 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) 3784 { 3785 RemoteMemoryReader memoryReader(task, reader); 3786 3787 InitSizeClasses(); 3788 3789 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); 3790 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); 3791 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); 3792 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); 3793 3794 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); 3795 3796 FreeObjectFinder finder(memoryReader); 3797 finder.findFreeObjects(threadHeaps); 3798 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); 3799 3800 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; 3801 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder, pageHeap->entropy_); 3802 pageMap->visitValues(pageMapFinder, memoryReader); 3803 3804 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); 3805 pageMap->visitValues(usageRecorder, memoryReader); 3806 usageRecorder.recordPendingRegions(); 3807 3808 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder); 3809 pageMap->visitAllocations(adminRegionRecorder, memoryReader); 3810 3811 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); 3812 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); 3813 3814 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3815 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3816 3817 adminRegionRecorder.recordPendingRegions(); 3818 3819 return 0; 3820 } 3821 3822 size_t FastMallocZone::size(malloc_zone_t*, const void*) 3823 { 3824 return 0; 3825 } 3826 3827 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) 3828 { 3829 return 0; 3830 } 3831 3832 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) 3833 { 3834 return 0; 3835 } 3836 3837 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) 3838 { 3839 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer 3840 // is not in this zone. When this happens, the pointer being freed was not allocated by any 3841 // zone so we need to print a useful error for the application developer. 3842 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); 3843 } 3844 3845 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) 3846 { 3847 return 0; 3848 } 3849 3850 3851 #undef malloc 3852 #undef free 3853 #undef realloc 3854 #undef calloc 3855 3856 extern "C" { 3857 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, 3858 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics 3859 3860 #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060 3861 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. 3862 #endif 3863 #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 3864 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. 3865 #endif 3866 3867 }; 3868 } 3869 3870 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) 3871 : m_pageHeap(pageHeap) 3872 , m_threadHeaps(threadHeaps) 3873 , m_centralCaches(centralCaches) 3874 , m_spanAllocator(spanAllocator) 3875 , m_pageHeapAllocator(pageHeapAllocator) 3876 { 3877 memset(&m_zone, 0, sizeof(m_zone)); 3878 m_zone.version = 4; 3879 m_zone.zone_name = "JavaScriptCore FastMalloc"; 3880 m_zone.size = &FastMallocZone::size; 3881 m_zone.malloc = &FastMallocZone::zoneMalloc; 3882 m_zone.calloc = &FastMallocZone::zoneCalloc; 3883 m_zone.realloc = &FastMallocZone::zoneRealloc; 3884 m_zone.free = &FastMallocZone::zoneFree; 3885 m_zone.valloc = &FastMallocZone::zoneValloc; 3886 m_zone.destroy = &FastMallocZone::zoneDestroy; 3887 m_zone.introspect = &jscore_fastmalloc_introspection; 3888 malloc_zone_register(&m_zone); 3889 } 3890 3891 3892 void FastMallocZone::init() 3893 { 3894 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); 3895 } 3896 3897 #endif // OS(DARWIN) 3898 3899 } // namespace WTF 3900 3901 #endif // FORCE_SYSTEM_MALLOC 3902