1 // Copyright (c) 2005, 2007, Google Inc. 2 // All rights reserved. 3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // --- 32 // Author: Sanjay Ghemawat <opensource (at) google.com> 33 // 34 // A malloc that uses a per-thread cache to satisfy small malloc requests. 35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 36 // 37 // See doc/tcmalloc.html for a high-level 38 // description of how this malloc works. 39 // 40 // SYNCHRONIZATION 41 // 1. The thread-specific lists are accessed without acquiring any locks. 42 // This is safe because each such list is only accessed by one thread. 43 // 2. We have a lock per central free-list, and hold it while manipulating 44 // the central free list for a particular size. 45 // 3. The central page allocator is protected by "pageheap_lock". 46 // 4. The pagemap (which maps from page-number to descriptor), 47 // can be read without holding any locks, and written while holding 48 // the "pageheap_lock". 49 // 5. To improve performance, a subset of the information one can get 50 // from the pagemap is cached in a data structure, pagemap_cache_, 51 // that atomically reads and writes its entries. This cache can be 52 // read and written without locking. 53 // 54 // This multi-threaded access to the pagemap is safe for fairly 55 // subtle reasons. We basically assume that when an object X is 56 // allocated by thread A and deallocated by thread B, there must 57 // have been appropriate synchronization in the handoff of object 58 // X from thread A to thread B. The same logic applies to pagemap_cache_. 59 // 60 // THE PAGEID-TO-SIZECLASS CACHE 61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 62 // returns 0 for a particular PageID then that means "no information," not that 63 // the sizeclass is 0. The cache may have stale information for pages that do 64 // not hold the beginning of any free()'able object. Staleness is eliminated 65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 66 // do_memalign() for all other relevant pages. 67 // 68 // TODO: Bias reclamation to larger addresses 69 // TODO: implement mallinfo/mallopt 70 // TODO: Better testing 71 // 72 // 9/28/2003 (new page-level allocator replaces ptmalloc2): 73 // * malloc/free of small objects goes from ~300 ns to ~50 ns. 74 // * allocation of a reasonably complicated struct 75 // goes from about 1100 ns to about 300 ns. 76 77 #include "config.h" 78 #include "FastMalloc.h" 79 80 #include "Assertions.h" 81 #include <limits> 82 #if ENABLE(JSC_MULTIPLE_THREADS) 83 #include <pthread.h> 84 #endif 85 #include <wtf/StdLibExtras.h> 86 87 #ifndef NO_TCMALLOC_SAMPLES 88 #ifdef WTF_CHANGES 89 #define NO_TCMALLOC_SAMPLES 90 #endif 91 #endif 92 93 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG) 94 #define FORCE_SYSTEM_MALLOC 0 95 #else 96 #define FORCE_SYSTEM_MALLOC 1 97 #endif 98 99 // Use a background thread to periodically scavenge memory to release back to the system 100 // https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash. 101 #if defined(BUILDING_ON_TIGER) 102 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0 103 #else 104 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 105 #endif 106 107 #ifndef NDEBUG 108 namespace WTF { 109 110 #if ENABLE(JSC_MULTIPLE_THREADS) 111 static pthread_key_t isForbiddenKey; 112 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; 113 static void initializeIsForbiddenKey() 114 { 115 pthread_key_create(&isForbiddenKey, 0); 116 } 117 118 #if !ASSERT_DISABLED 119 static bool isForbidden() 120 { 121 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 122 return !!pthread_getspecific(isForbiddenKey); 123 } 124 #endif 125 126 void fastMallocForbid() 127 { 128 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 129 pthread_setspecific(isForbiddenKey, &isForbiddenKey); 130 } 131 132 void fastMallocAllow() 133 { 134 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 135 pthread_setspecific(isForbiddenKey, 0); 136 } 137 138 #else 139 140 static bool staticIsForbidden; 141 static bool isForbidden() 142 { 143 return staticIsForbidden; 144 } 145 146 void fastMallocForbid() 147 { 148 staticIsForbidden = true; 149 } 150 151 void fastMallocAllow() 152 { 153 staticIsForbidden = false; 154 } 155 #endif // ENABLE(JSC_MULTIPLE_THREADS) 156 157 } // namespace WTF 158 #endif // NDEBUG 159 160 #include <string.h> 161 162 namespace WTF { 163 164 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 165 166 namespace Internal { 167 168 void fastMallocMatchFailed(void*) 169 { 170 CRASH(); 171 } 172 173 } // namespace Internal 174 175 #endif 176 177 void* fastZeroedMalloc(size_t n) 178 { 179 void* result = fastMalloc(n); 180 memset(result, 0, n); 181 return result; 182 } 183 184 char* fastStrDup(const char* src) 185 { 186 size_t len = strlen(src) + 1; 187 char* dup = static_cast<char*>(fastMalloc(len)); 188 memcpy(dup, src, len); 189 return dup; 190 } 191 192 TryMallocReturnValue tryFastZeroedMalloc(size_t n) 193 { 194 void* result; 195 if (!tryFastMalloc(n).getValue(result)) 196 return 0; 197 memset(result, 0, n); 198 return result; 199 } 200 201 } // namespace WTF 202 203 #if FORCE_SYSTEM_MALLOC 204 205 #if PLATFORM(BREWMP) 206 #include "brew/SystemMallocBrew.h" 207 #endif 208 209 #if OS(DARWIN) 210 #include <malloc/malloc.h> 211 #elif OS(WINDOWS) 212 #include <malloc.h> 213 #endif 214 215 namespace WTF { 216 217 TryMallocReturnValue tryFastMalloc(size_t n) 218 { 219 ASSERT(!isForbidden()); 220 221 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 222 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... 223 return 0; 224 225 void* result = malloc(n + sizeof(AllocAlignmentInteger)); 226 if (!result) 227 return 0; 228 229 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 230 result = static_cast<AllocAlignmentInteger*>(result) + 1; 231 232 return result; 233 #else 234 return malloc(n); 235 #endif 236 } 237 238 void* fastMalloc(size_t n) 239 { 240 ASSERT(!isForbidden()); 241 242 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 243 TryMallocReturnValue returnValue = tryFastMalloc(n); 244 void* result; 245 if (!returnValue.getValue(result)) 246 CRASH(); 247 #else 248 void* result = malloc(n); 249 #endif 250 251 if (!result) { 252 #if PLATFORM(BREWMP) 253 // The behavior of malloc(0) is implementation defined. 254 // To make sure that fastMalloc never returns 0, retry with fastMalloc(1). 255 if (!n) 256 return fastMalloc(1); 257 #endif 258 CRASH(); 259 } 260 261 return result; 262 } 263 264 TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size) 265 { 266 ASSERT(!isForbidden()); 267 268 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 269 size_t totalBytes = n_elements * element_size; 270 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes)) 271 return 0; 272 273 totalBytes += sizeof(AllocAlignmentInteger); 274 void* result = malloc(totalBytes); 275 if (!result) 276 return 0; 277 278 memset(result, 0, totalBytes); 279 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 280 result = static_cast<AllocAlignmentInteger*>(result) + 1; 281 return result; 282 #else 283 return calloc(n_elements, element_size); 284 #endif 285 } 286 287 void* fastCalloc(size_t n_elements, size_t element_size) 288 { 289 ASSERT(!isForbidden()); 290 291 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 292 TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size); 293 void* result; 294 if (!returnValue.getValue(result)) 295 CRASH(); 296 #else 297 void* result = calloc(n_elements, element_size); 298 #endif 299 300 if (!result) { 301 #if PLATFORM(BREWMP) 302 // If either n_elements or element_size is 0, the behavior of calloc is implementation defined. 303 // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1). 304 if (!n_elements || !element_size) 305 return fastCalloc(1, 1); 306 #endif 307 CRASH(); 308 } 309 310 return result; 311 } 312 313 void fastFree(void* p) 314 { 315 ASSERT(!isForbidden()); 316 317 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 318 if (!p) 319 return; 320 321 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); 322 if (*header != Internal::AllocTypeMalloc) 323 Internal::fastMallocMatchFailed(p); 324 free(header); 325 #else 326 free(p); 327 #endif 328 } 329 330 TryMallocReturnValue tryFastRealloc(void* p, size_t n) 331 { 332 ASSERT(!isForbidden()); 333 334 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 335 if (p) { 336 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... 337 return 0; 338 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); 339 if (*header != Internal::AllocTypeMalloc) 340 Internal::fastMallocMatchFailed(p); 341 void* result = realloc(header, n + sizeof(AllocAlignmentInteger)); 342 if (!result) 343 return 0; 344 345 // This should not be needed because the value is already there: 346 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 347 result = static_cast<AllocAlignmentInteger*>(result) + 1; 348 return result; 349 } else { 350 return fastMalloc(n); 351 } 352 #else 353 return realloc(p, n); 354 #endif 355 } 356 357 void* fastRealloc(void* p, size_t n) 358 { 359 ASSERT(!isForbidden()); 360 361 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 362 TryMallocReturnValue returnValue = tryFastRealloc(p, n); 363 void* result; 364 if (!returnValue.getValue(result)) 365 CRASH(); 366 #else 367 void* result = realloc(p, n); 368 #endif 369 370 if (!result) 371 CRASH(); 372 return result; 373 } 374 375 void releaseFastMallocFreeMemory() { } 376 377 FastMallocStatistics fastMallocStatistics() 378 { 379 FastMallocStatistics statistics = { 0, 0, 0 }; 380 return statistics; 381 } 382 383 size_t fastMallocSize(const void* p) 384 { 385 #if OS(DARWIN) 386 return malloc_size(p); 387 #elif OS(WINDOWS) && !PLATFORM(BREWMP) 388 // Brew MP uses its own memory allocator, so _msize does not work on the Brew MP simulator. 389 return _msize(const_cast<void*>(p)); 390 #else 391 return 1; 392 #endif 393 } 394 395 } // namespace WTF 396 397 #if OS(DARWIN) 398 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. 399 // It will never be used in this case, so it's type and value are less interesting than its presence. 400 extern "C" const int jscore_fastmalloc_introspection = 0; 401 #endif 402 403 #else // FORCE_SYSTEM_MALLOC 404 405 #if HAVE(STDINT_H) 406 #include <stdint.h> 407 #elif HAVE(INTTYPES_H) 408 #include <inttypes.h> 409 #else 410 #include <sys/types.h> 411 #endif 412 413 #include "AlwaysInline.h" 414 #include "Assertions.h" 415 #include "TCPackedCache.h" 416 #include "TCPageMap.h" 417 #include "TCSpinLock.h" 418 #include "TCSystemAlloc.h" 419 #include <algorithm> 420 #include <limits> 421 #include <pthread.h> 422 #include <stdarg.h> 423 #include <stddef.h> 424 #include <stdio.h> 425 #if HAVE(ERRNO_H) 426 #include <errno.h> 427 #endif 428 #if OS(UNIX) 429 #include <unistd.h> 430 #endif 431 #if OS(WINDOWS) 432 #ifndef WIN32_LEAN_AND_MEAN 433 #define WIN32_LEAN_AND_MEAN 434 #endif 435 #include <windows.h> 436 #endif 437 438 #ifdef WTF_CHANGES 439 440 #if OS(DARWIN) 441 #include "MallocZoneSupport.h" 442 #include <wtf/HashSet.h> 443 #include <wtf/Vector.h> 444 #endif 445 446 #if HAVE(HEADER_DETECTION_H) 447 #include "HeaderDetection.h" 448 #endif 449 450 #if HAVE(DISPATCH_H) 451 #include <dispatch/dispatch.h> 452 #endif 453 454 #if HAVE(PTHREAD_MACHDEP_H) 455 #include <System/pthread_machdep.h> 456 457 #if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0) 458 #define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1 459 #endif 460 #endif 461 462 #ifndef PRIuS 463 #define PRIuS "zu" 464 #endif 465 466 // Calling pthread_getspecific through a global function pointer is faster than a normal 467 // call to the function on Mac OS X, and it's used in performance-critical code. So we 468 // use a function pointer. But that's not necessarily faster on other platforms, and we had 469 // problems with this technique on Windows, so we'll do this only on Mac OS X. 470 #if OS(DARWIN) 471 #if !USE(PTHREAD_GETSPECIFIC_DIRECT) 472 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; 473 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) 474 #else 475 #define pthread_getspecific(key) _pthread_getspecific_direct(key) 476 #define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val)) 477 #endif 478 #endif 479 480 #define DEFINE_VARIABLE(type, name, value, meaning) \ 481 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ 482 type FLAGS_##name(value); \ 483 char FLAGS_no##name; \ 484 } \ 485 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name 486 487 #define DEFINE_int64(name, value, meaning) \ 488 DEFINE_VARIABLE(int64_t, name, value, meaning) 489 490 #define DEFINE_double(name, value, meaning) \ 491 DEFINE_VARIABLE(double, name, value, meaning) 492 493 namespace WTF { 494 495 #define malloc fastMalloc 496 #define calloc fastCalloc 497 #define free fastFree 498 #define realloc fastRealloc 499 500 #define MESSAGE LOG_ERROR 501 #define CHECK_CONDITION ASSERT 502 503 #if OS(DARWIN) 504 struct Span; 505 class TCMalloc_Central_FreeListPadded; 506 class TCMalloc_PageHeap; 507 class TCMalloc_ThreadCache; 508 template <typename T> class PageHeapAllocator; 509 510 class FastMallocZone { 511 public: 512 static void init(); 513 514 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); 515 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } 516 static boolean_t check(malloc_zone_t*) { return true; } 517 static void print(malloc_zone_t*, boolean_t) { } 518 static void log(malloc_zone_t*, void*) { } 519 static void forceLock(malloc_zone_t*) { } 520 static void forceUnlock(malloc_zone_t*) { } 521 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } 522 523 private: 524 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); 525 static size_t size(malloc_zone_t*, const void*); 526 static void* zoneMalloc(malloc_zone_t*, size_t); 527 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); 528 static void zoneFree(malloc_zone_t*, void*); 529 static void* zoneRealloc(malloc_zone_t*, void*, size_t); 530 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; } 531 static void zoneDestroy(malloc_zone_t*) { } 532 533 malloc_zone_t m_zone; 534 TCMalloc_PageHeap* m_pageHeap; 535 TCMalloc_ThreadCache** m_threadHeaps; 536 TCMalloc_Central_FreeListPadded* m_centralCaches; 537 PageHeapAllocator<Span>* m_spanAllocator; 538 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; 539 }; 540 541 #endif 542 543 #endif 544 545 #ifndef WTF_CHANGES 546 // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if 547 // you're porting to a system where you really can't get a stacktrace. 548 #ifdef NO_TCMALLOC_SAMPLES 549 // We use #define so code compiles even if you #include stacktrace.h somehow. 550 # define GetStackTrace(stack, depth, skip) (0) 551 #else 552 # include <google/stacktrace.h> 553 #endif 554 #endif 555 556 // Even if we have support for thread-local storage in the compiler 557 // and linker, the OS may not support it. We need to check that at 558 // runtime. Right now, we have to keep a manual set of "bad" OSes. 559 #if defined(HAVE_TLS) 560 static bool kernel_supports_tls = false; // be conservative 561 static inline bool KernelSupportsTLS() { 562 return kernel_supports_tls; 563 } 564 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS 565 static void CheckIfKernelSupportsTLS() { 566 kernel_supports_tls = false; 567 } 568 # else 569 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too 570 static void CheckIfKernelSupportsTLS() { 571 struct utsname buf; 572 if (uname(&buf) != 0) { // should be impossible 573 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); 574 kernel_supports_tls = false; 575 } else if (strcasecmp(buf.sysname, "linux") == 0) { 576 // The linux case: the first kernel to support TLS was 2.6.0 577 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x 578 kernel_supports_tls = false; 579 else if (buf.release[0] == '2' && buf.release[1] == '.' && 580 buf.release[2] >= '0' && buf.release[2] < '6' && 581 buf.release[3] == '.') // 2.0 - 2.5 582 kernel_supports_tls = false; 583 else 584 kernel_supports_tls = true; 585 } else { // some other kernel, we'll be optimisitic 586 kernel_supports_tls = true; 587 } 588 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG 589 } 590 # endif // HAVE_DECL_UNAME 591 #endif // HAVE_TLS 592 593 // __THROW is defined in glibc systems. It means, counter-intuitively, 594 // "This function will never throw an exception." It's an optional 595 // optimization tool, but we may need to use it to match glibc prototypes. 596 #ifndef __THROW // I guess we're not on a glibc system 597 # define __THROW // __THROW is just an optimization, so ok to make it "" 598 #endif 599 600 //------------------------------------------------------------------- 601 // Configuration 602 //------------------------------------------------------------------- 603 604 // Not all possible combinations of the following parameters make 605 // sense. In particular, if kMaxSize increases, you may have to 606 // increase kNumClasses as well. 607 static const size_t kPageShift = 12; 608 static const size_t kPageSize = 1 << kPageShift; 609 static const size_t kMaxSize = 8u * kPageSize; 610 static const size_t kAlignShift = 3; 611 static const size_t kAlignment = 1 << kAlignShift; 612 static const size_t kNumClasses = 68; 613 614 // Allocates a big block of memory for the pagemap once we reach more than 615 // 128MB 616 static const size_t kPageMapBigAllocationThreshold = 128 << 20; 617 618 // Minimum number of pages to fetch from system at a time. Must be 619 // significantly bigger than kPageSize to amortize system-call 620 // overhead, and also to reduce external fragementation. Also, we 621 // should keep this value big because various incarnations of Linux 622 // have small limits on the number of mmap() regions per 623 // address-space. 624 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); 625 626 // Number of objects to move between a per-thread list and a central 627 // list in one shot. We want this to be not too small so we can 628 // amortize the lock overhead for accessing the central list. Making 629 // it too big may temporarily cause unnecessary memory wastage in the 630 // per-thread free list until the scavenger cleans up the list. 631 static int num_objects_to_move[kNumClasses]; 632 633 // Maximum length we allow a per-thread free-list to have before we 634 // move objects from it into the corresponding central free-list. We 635 // want this big to avoid locking the central free-list too often. It 636 // should not hurt to make this list somewhat big because the 637 // scavenging code will shrink it down when its contents are not in use. 638 static const int kMaxFreeListLength = 256; 639 640 // Lower and upper bounds on the per-thread cache sizes 641 static const size_t kMinThreadCacheSize = kMaxSize * 2; 642 static const size_t kMaxThreadCacheSize = 2 << 20; 643 644 // Default bound on the total amount of thread caches 645 static const size_t kDefaultOverallThreadCacheSize = 16 << 20; 646 647 // For all span-lengths < kMaxPages we keep an exact-size list. 648 // REQUIRED: kMaxPages >= kMinSystemAlloc; 649 static const size_t kMaxPages = kMinSystemAlloc; 650 651 /* The smallest prime > 2^n */ 652 static int primes_list[] = { 653 // Small values might cause high rates of sampling 654 // and hence commented out. 655 // 2, 5, 11, 17, 37, 67, 131, 257, 656 // 521, 1031, 2053, 4099, 8209, 16411, 657 32771, 65537, 131101, 262147, 524309, 1048583, 658 2097169, 4194319, 8388617, 16777259, 33554467 }; 659 660 // Twice the approximate gap between sampling actions. 661 // I.e., we take one sample approximately once every 662 // tcmalloc_sample_parameter/2 663 // bytes of allocation, i.e., ~ once every 128KB. 664 // Must be a prime number. 665 #ifdef NO_TCMALLOC_SAMPLES 666 DEFINE_int64(tcmalloc_sample_parameter, 0, 667 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); 668 static size_t sample_period = 0; 669 #else 670 DEFINE_int64(tcmalloc_sample_parameter, 262147, 671 "Twice the approximate gap between sampling actions." 672 " Must be a prime number. Otherwise will be rounded up to a " 673 " larger prime number"); 674 static size_t sample_period = 262147; 675 #endif 676 677 // Protects sample_period above 678 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; 679 680 // Parameters for controlling how fast memory is returned to the OS. 681 682 DEFINE_double(tcmalloc_release_rate, 1, 683 "Rate at which we release unused memory to the system. " 684 "Zero means we never release memory back to the system. " 685 "Increase this flag to return memory faster; decrease it " 686 "to return memory slower. Reasonable rates are in the " 687 "range [0,10]"); 688 689 //------------------------------------------------------------------- 690 // Mapping from size to size_class and vice versa 691 //------------------------------------------------------------------- 692 693 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 694 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 695 // So for these larger sizes we have an array indexed by ceil(size/128). 696 // 697 // We flatten both logical arrays into one physical array and use 698 // arithmetic to compute an appropriate index. The constants used by 699 // ClassIndex() were selected to make the flattening work. 700 // 701 // Examples: 702 // Size Expression Index 703 // ------------------------------------------------------- 704 // 0 (0 + 7) / 8 0 705 // 1 (1 + 7) / 8 1 706 // ... 707 // 1024 (1024 + 7) / 8 128 708 // 1025 (1025 + 127 + (120<<7)) / 128 129 709 // ... 710 // 32768 (32768 + 127 + (120<<7)) / 128 376 711 static const size_t kMaxSmallSize = 1024; 712 static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 713 static const int add_amount[2] = { 7, 127 + (120 << 7) }; 714 static unsigned char class_array[377]; 715 716 // Compute index of the class_array[] entry for a given size 717 static inline int ClassIndex(size_t s) { 718 const int i = (s > kMaxSmallSize); 719 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); 720 } 721 722 // Mapping from size class to max size storable in that class 723 static size_t class_to_size[kNumClasses]; 724 725 // Mapping from size class to number of pages to allocate at a time 726 static size_t class_to_pages[kNumClasses]; 727 728 // TransferCache is used to cache transfers of num_objects_to_move[size_class] 729 // back and forth between thread caches and the central cache for a given size 730 // class. 731 struct TCEntry { 732 void *head; // Head of chain of objects. 733 void *tail; // Tail of chain of objects. 734 }; 735 // A central cache freelist can have anywhere from 0 to kNumTransferEntries 736 // slots to put link list chains into. To keep memory usage bounded the total 737 // number of TCEntries across size classes is fixed. Currently each size 738 // class is initially given one TCEntry which also means that the maximum any 739 // one class can have is kNumClasses. 740 static const int kNumTransferEntries = kNumClasses; 741 742 // Note: the following only works for "n"s that fit in 32-bits, but 743 // that is fine since we only use it for small sizes. 744 static inline int LgFloor(size_t n) { 745 int log = 0; 746 for (int i = 4; i >= 0; --i) { 747 int shift = (1 << i); 748 size_t x = n >> shift; 749 if (x != 0) { 750 n = x; 751 log += shift; 752 } 753 } 754 ASSERT(n == 1); 755 return log; 756 } 757 758 // Some very basic linked list functions for dealing with using void * as 759 // storage. 760 761 static inline void *SLL_Next(void *t) { 762 return *(reinterpret_cast<void**>(t)); 763 } 764 765 static inline void SLL_SetNext(void *t, void *n) { 766 *(reinterpret_cast<void**>(t)) = n; 767 } 768 769 static inline void SLL_Push(void **list, void *element) { 770 SLL_SetNext(element, *list); 771 *list = element; 772 } 773 774 static inline void *SLL_Pop(void **list) { 775 void *result = *list; 776 *list = SLL_Next(*list); 777 return result; 778 } 779 780 781 // Remove N elements from a linked list to which head points. head will be 782 // modified to point to the new head. start and end will point to the first 783 // and last nodes of the range. Note that end will point to NULL after this 784 // function is called. 785 static inline void SLL_PopRange(void **head, int N, void **start, void **end) { 786 if (N == 0) { 787 *start = NULL; 788 *end = NULL; 789 return; 790 } 791 792 void *tmp = *head; 793 for (int i = 1; i < N; ++i) { 794 tmp = SLL_Next(tmp); 795 } 796 797 *start = *head; 798 *end = tmp; 799 *head = SLL_Next(tmp); 800 // Unlink range from list. 801 SLL_SetNext(tmp, NULL); 802 } 803 804 static inline void SLL_PushRange(void **head, void *start, void *end) { 805 if (!start) return; 806 SLL_SetNext(end, *head); 807 *head = start; 808 } 809 810 static inline size_t SLL_Size(void *head) { 811 int count = 0; 812 while (head) { 813 count++; 814 head = SLL_Next(head); 815 } 816 return count; 817 } 818 819 // Setup helper functions. 820 821 static ALWAYS_INLINE size_t SizeClass(size_t size) { 822 return class_array[ClassIndex(size)]; 823 } 824 825 // Get the byte-size for a specified class 826 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { 827 return class_to_size[cl]; 828 } 829 static int NumMoveSize(size_t size) { 830 if (size == 0) return 0; 831 // Use approx 64k transfers between thread and central caches. 832 int num = static_cast<int>(64.0 * 1024.0 / size); 833 if (num < 2) num = 2; 834 // Clamp well below kMaxFreeListLength to avoid ping pong between central 835 // and thread caches. 836 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) 837 num = static_cast<int>(0.8 * kMaxFreeListLength); 838 839 // Also, avoid bringing in too many objects into small object free 840 // lists. There are lots of such lists, and if we allow each one to 841 // fetch too many at a time, we end up having to scavenge too often 842 // (especially when there are lots of threads and each thread gets a 843 // small allowance for its thread cache). 844 // 845 // TODO: Make thread cache free list sizes dynamic so that we do not 846 // have to equally divide a fixed resource amongst lots of threads. 847 if (num > 32) num = 32; 848 849 return num; 850 } 851 852 // Initialize the mapping arrays 853 static void InitSizeClasses() { 854 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] 855 if (ClassIndex(0) < 0) { 856 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); 857 CRASH(); 858 } 859 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { 860 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); 861 CRASH(); 862 } 863 864 // Compute the size classes we want to use 865 size_t sc = 1; // Next size class to assign 866 unsigned char alignshift = kAlignShift; 867 int last_lg = -1; 868 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { 869 int lg = LgFloor(size); 870 if (lg > last_lg) { 871 // Increase alignment every so often. 872 // 873 // Since we double the alignment every time size doubles and 874 // size >= 128, this means that space wasted due to alignment is 875 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 876 // bytes, so the space wasted as a percentage starts falling for 877 // sizes > 2K. 878 if ((lg >= 7) && (alignshift < 8)) { 879 alignshift++; 880 } 881 last_lg = lg; 882 } 883 884 // Allocate enough pages so leftover is less than 1/8 of total. 885 // This bounds wasted space to at most 12.5%. 886 size_t psize = kPageSize; 887 while ((psize % size) > (psize >> 3)) { 888 psize += kPageSize; 889 } 890 const size_t my_pages = psize >> kPageShift; 891 892 if (sc > 1 && my_pages == class_to_pages[sc-1]) { 893 // See if we can merge this into the previous class without 894 // increasing the fragmentation of the previous class. 895 const size_t my_objects = (my_pages << kPageShift) / size; 896 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) 897 / class_to_size[sc-1]; 898 if (my_objects == prev_objects) { 899 // Adjust last class to include this size 900 class_to_size[sc-1] = size; 901 continue; 902 } 903 } 904 905 // Add new class 906 class_to_pages[sc] = my_pages; 907 class_to_size[sc] = size; 908 sc++; 909 } 910 if (sc != kNumClasses) { 911 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", 912 sc, int(kNumClasses)); 913 CRASH(); 914 } 915 916 // Initialize the mapping arrays 917 int next_size = 0; 918 for (unsigned char c = 1; c < kNumClasses; c++) { 919 const size_t max_size_in_class = class_to_size[c]; 920 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { 921 class_array[ClassIndex(s)] = c; 922 } 923 next_size = static_cast<int>(max_size_in_class + kAlignment); 924 } 925 926 // Double-check sizes just to be safe 927 for (size_t size = 0; size <= kMaxSize; size++) { 928 const size_t sc = SizeClass(size); 929 if (sc == 0) { 930 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 931 CRASH(); 932 } 933 if (sc > 1 && size <= class_to_size[sc-1]) { 934 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS 935 "\n", sc, size); 936 CRASH(); 937 } 938 if (sc >= kNumClasses) { 939 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 940 CRASH(); 941 } 942 const size_t s = class_to_size[sc]; 943 if (size > s) { 944 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 945 CRASH(); 946 } 947 if (s == 0) { 948 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 949 CRASH(); 950 } 951 } 952 953 // Initialize the num_objects_to_move array. 954 for (size_t cl = 1; cl < kNumClasses; ++cl) { 955 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); 956 } 957 958 #ifndef WTF_CHANGES 959 if (false) { 960 // Dump class sizes and maximum external wastage per size class 961 for (size_t cl = 1; cl < kNumClasses; ++cl) { 962 const int alloc_size = class_to_pages[cl] << kPageShift; 963 const int alloc_objs = alloc_size / class_to_size[cl]; 964 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs; 965 const int max_waste = alloc_size - min_used; 966 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n", 967 int(cl), 968 int(class_to_size[cl-1] + 1), 969 int(class_to_size[cl]), 970 int(class_to_pages[cl] << kPageShift), 971 max_waste * 100.0 / alloc_size 972 ); 973 } 974 } 975 #endif 976 } 977 978 // ------------------------------------------------------------------------- 979 // Simple allocator for objects of a specified type. External locking 980 // is required before accessing one of these objects. 981 // ------------------------------------------------------------------------- 982 983 // Metadata allocator -- keeps stats about how many bytes allocated 984 static uint64_t metadata_system_bytes = 0; 985 static void* MetaDataAlloc(size_t bytes) { 986 void* result = TCMalloc_SystemAlloc(bytes, 0); 987 if (result != NULL) { 988 metadata_system_bytes += bytes; 989 } 990 return result; 991 } 992 993 template <class T> 994 class PageHeapAllocator { 995 private: 996 // How much to allocate from system at a time 997 static const size_t kAllocIncrement = 32 << 10; 998 999 // Aligned size of T 1000 static const size_t kAlignedSize 1001 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); 1002 1003 // Free area from which to carve new objects 1004 char* free_area_; 1005 size_t free_avail_; 1006 1007 // Linked list of all regions allocated by this allocator 1008 void* allocated_regions_; 1009 1010 // Free list of already carved objects 1011 void* free_list_; 1012 1013 // Number of allocated but unfreed objects 1014 int inuse_; 1015 1016 public: 1017 void Init() { 1018 ASSERT(kAlignedSize <= kAllocIncrement); 1019 inuse_ = 0; 1020 allocated_regions_ = 0; 1021 free_area_ = NULL; 1022 free_avail_ = 0; 1023 free_list_ = NULL; 1024 } 1025 1026 T* New() { 1027 // Consult free list 1028 void* result; 1029 if (free_list_ != NULL) { 1030 result = free_list_; 1031 free_list_ = *(reinterpret_cast<void**>(result)); 1032 } else { 1033 if (free_avail_ < kAlignedSize) { 1034 // Need more room 1035 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); 1036 if (!new_allocation) 1037 CRASH(); 1038 1039 *reinterpret_cast_ptr<void**>(new_allocation) = allocated_regions_; 1040 allocated_regions_ = new_allocation; 1041 free_area_ = new_allocation + kAlignedSize; 1042 free_avail_ = kAllocIncrement - kAlignedSize; 1043 } 1044 result = free_area_; 1045 free_area_ += kAlignedSize; 1046 free_avail_ -= kAlignedSize; 1047 } 1048 inuse_++; 1049 return reinterpret_cast<T*>(result); 1050 } 1051 1052 void Delete(T* p) { 1053 *(reinterpret_cast<void**>(p)) = free_list_; 1054 free_list_ = p; 1055 inuse_--; 1056 } 1057 1058 int inuse() const { return inuse_; } 1059 1060 #if defined(WTF_CHANGES) && OS(DARWIN) 1061 template <class Recorder> 1062 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) 1063 { 1064 for (void* adminAllocation = allocated_regions_; adminAllocation; adminAllocation = reader.nextEntryInLinkedList(reinterpret_cast<void**>(adminAllocation))) 1065 recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation), kAllocIncrement); 1066 } 1067 #endif 1068 }; 1069 1070 // ------------------------------------------------------------------------- 1071 // Span - a contiguous run of pages 1072 // ------------------------------------------------------------------------- 1073 1074 // Type that can hold a page number 1075 typedef uintptr_t PageID; 1076 1077 // Type that can hold the length of a run of pages 1078 typedef uintptr_t Length; 1079 1080 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 1081 1082 // Convert byte size into pages. This won't overflow, but may return 1083 // an unreasonably large value if bytes is huge enough. 1084 static inline Length pages(size_t bytes) { 1085 return (bytes >> kPageShift) + 1086 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 1087 } 1088 1089 // Convert a user size into the number of bytes that will actually be 1090 // allocated 1091 static size_t AllocationSize(size_t bytes) { 1092 if (bytes > kMaxSize) { 1093 // Large object: we allocate an integral number of pages 1094 ASSERT(bytes <= (kMaxValidPages << kPageShift)); 1095 return pages(bytes) << kPageShift; 1096 } else { 1097 // Small object: find the size class to which it belongs 1098 return ByteSizeForClass(SizeClass(bytes)); 1099 } 1100 } 1101 1102 // Information kept for a span (a contiguous run of pages). 1103 struct Span { 1104 PageID start; // Starting page number 1105 Length length; // Number of pages in span 1106 Span* next; // Used when in link list 1107 Span* prev; // Used when in link list 1108 void* objects; // Linked list of free objects 1109 unsigned int free : 1; // Is the span free 1110 #ifndef NO_TCMALLOC_SAMPLES 1111 unsigned int sample : 1; // Sampled object? 1112 #endif 1113 unsigned int sizeclass : 8; // Size-class for small objects (or 0) 1114 unsigned int refcount : 11; // Number of non-free objects 1115 bool decommitted : 1; 1116 1117 #undef SPAN_HISTORY 1118 #ifdef SPAN_HISTORY 1119 // For debugging, we can keep a log events per span 1120 int nexthistory; 1121 char history[64]; 1122 int value[64]; 1123 #endif 1124 }; 1125 1126 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) 1127 1128 #ifdef SPAN_HISTORY 1129 void Event(Span* span, char op, int v = 0) { 1130 span->history[span->nexthistory] = op; 1131 span->value[span->nexthistory] = v; 1132 span->nexthistory++; 1133 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; 1134 } 1135 #else 1136 #define Event(s,o,v) ((void) 0) 1137 #endif 1138 1139 // Allocator/deallocator for spans 1140 static PageHeapAllocator<Span> span_allocator; 1141 static Span* NewSpan(PageID p, Length len) { 1142 Span* result = span_allocator.New(); 1143 memset(result, 0, sizeof(*result)); 1144 result->start = p; 1145 result->length = len; 1146 #ifdef SPAN_HISTORY 1147 result->nexthistory = 0; 1148 #endif 1149 return result; 1150 } 1151 1152 static inline void DeleteSpan(Span* span) { 1153 #ifndef NDEBUG 1154 // In debug mode, trash the contents of deleted Spans 1155 memset(span, 0x3f, sizeof(*span)); 1156 #endif 1157 span_allocator.Delete(span); 1158 } 1159 1160 // ------------------------------------------------------------------------- 1161 // Doubly linked list of spans. 1162 // ------------------------------------------------------------------------- 1163 1164 static inline void DLL_Init(Span* list) { 1165 list->next = list; 1166 list->prev = list; 1167 } 1168 1169 static inline void DLL_Remove(Span* span) { 1170 span->prev->next = span->next; 1171 span->next->prev = span->prev; 1172 span->prev = NULL; 1173 span->next = NULL; 1174 } 1175 1176 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) { 1177 return list->next == list; 1178 } 1179 1180 static int DLL_Length(const Span* list) { 1181 int result = 0; 1182 for (Span* s = list->next; s != list; s = s->next) { 1183 result++; 1184 } 1185 return result; 1186 } 1187 1188 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */ 1189 static void DLL_Print(const char* label, const Span* list) { 1190 MESSAGE("%-10s %p:", label, list); 1191 for (const Span* s = list->next; s != list; s = s->next) { 1192 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); 1193 } 1194 MESSAGE("\n"); 1195 } 1196 #endif 1197 1198 static inline void DLL_Prepend(Span* list, Span* span) { 1199 ASSERT(span->next == NULL); 1200 ASSERT(span->prev == NULL); 1201 span->next = list->next; 1202 span->prev = list; 1203 list->next->prev = span; 1204 list->next = span; 1205 } 1206 1207 // ------------------------------------------------------------------------- 1208 // Stack traces kept for sampled allocations 1209 // The following state is protected by pageheap_lock_. 1210 // ------------------------------------------------------------------------- 1211 1212 // size/depth are made the same size as a pointer so that some generic 1213 // code below can conveniently cast them back and forth to void*. 1214 static const int kMaxStackDepth = 31; 1215 struct StackTrace { 1216 uintptr_t size; // Size of object 1217 uintptr_t depth; // Number of PC values stored in array below 1218 void* stack[kMaxStackDepth]; 1219 }; 1220 static PageHeapAllocator<StackTrace> stacktrace_allocator; 1221 static Span sampled_objects; 1222 1223 // ------------------------------------------------------------------------- 1224 // Map from page-id to per-page data 1225 // ------------------------------------------------------------------------- 1226 1227 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. 1228 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings, 1229 // because sometimes the sizeclass is all the information we need. 1230 1231 // Selector class -- general selector uses 3-level map 1232 template <int BITS> class MapSelector { 1233 public: 1234 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; 1235 typedef PackedCache<BITS, uint64_t> CacheType; 1236 }; 1237 1238 #if defined(WTF_CHANGES) 1239 #if CPU(X86_64) 1240 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore 1241 // can be excluded from the PageMap key. 1242 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details 1243 1244 static const size_t kBitsUnusedOn64Bit = 16; 1245 #else 1246 static const size_t kBitsUnusedOn64Bit = 0; 1247 #endif 1248 1249 // A three-level map for 64-bit machines 1250 template <> class MapSelector<64> { 1251 public: 1252 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; 1253 typedef PackedCache<64, uint64_t> CacheType; 1254 }; 1255 #endif 1256 1257 // A two-level map for 32-bit machines 1258 template <> class MapSelector<32> { 1259 public: 1260 typedef TCMalloc_PageMap2<32 - kPageShift> Type; 1261 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; 1262 }; 1263 1264 // ------------------------------------------------------------------------- 1265 // Page-level allocator 1266 // * Eager coalescing 1267 // 1268 // Heap for page-level allocation. We allow allocating and freeing a 1269 // contiguous runs of pages (called a "span"). 1270 // ------------------------------------------------------------------------- 1271 1272 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1273 // The page heap maintains a free list for spans that are no longer in use by 1274 // the central cache or any thread caches. We use a background thread to 1275 // periodically scan the free list and release a percentage of it back to the OS. 1276 1277 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the 1278 // background thread: 1279 // - wakes up 1280 // - pauses for kScavengeDelayInSeconds 1281 // - returns to the OS a percentage of the memory that remained unused during 1282 // that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) 1283 // The goal of this strategy is to reduce memory pressure in a timely fashion 1284 // while avoiding thrashing the OS allocator. 1285 1286 // Time delay before the page heap scavenger will consider returning pages to 1287 // the OS. 1288 static const int kScavengeDelayInSeconds = 2; 1289 1290 // Approximate percentage of free committed pages to return to the OS in one 1291 // scavenge. 1292 static const float kScavengePercentage = .5f; 1293 1294 // number of span lists to keep spans in when memory is returned. 1295 static const int kMinSpanListsWithSpans = 32; 1296 1297 // Number of free committed pages that we want to keep around. The minimum number of pages used when there 1298 // is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. 1299 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); 1300 1301 #endif 1302 1303 class TCMalloc_PageHeap { 1304 public: 1305 void init(); 1306 1307 // Allocate a run of "n" pages. Returns zero if out of memory. 1308 Span* New(Length n); 1309 1310 // Delete the span "[p, p+n-1]". 1311 // REQUIRES: span was returned by earlier call to New() and 1312 // has not yet been deleted. 1313 void Delete(Span* span); 1314 1315 // Mark an allocated span as being used for small objects of the 1316 // specified size-class. 1317 // REQUIRES: span was returned by an earlier call to New() 1318 // and has not yet been deleted. 1319 void RegisterSizeClass(Span* span, size_t sc); 1320 1321 // Split an allocated span into two spans: one of length "n" pages 1322 // followed by another span of length "span->length - n" pages. 1323 // Modifies "*span" to point to the first span of length "n" pages. 1324 // Returns a pointer to the second span. 1325 // 1326 // REQUIRES: "0 < n < span->length" 1327 // REQUIRES: !span->free 1328 // REQUIRES: span->sizeclass == 0 1329 Span* Split(Span* span, Length n); 1330 1331 // Return the descriptor for the specified page. 1332 inline Span* GetDescriptor(PageID p) const { 1333 return reinterpret_cast<Span*>(pagemap_.get(p)); 1334 } 1335 1336 #ifdef WTF_CHANGES 1337 inline Span* GetDescriptorEnsureSafe(PageID p) 1338 { 1339 pagemap_.Ensure(p, 1); 1340 return GetDescriptor(p); 1341 } 1342 1343 size_t ReturnedBytes() const; 1344 #endif 1345 1346 // Dump state to stderr 1347 #ifndef WTF_CHANGES 1348 void Dump(TCMalloc_Printer* out); 1349 #endif 1350 1351 // Return number of bytes allocated from system 1352 inline uint64_t SystemBytes() const { return system_bytes_; } 1353 1354 // Return number of free bytes in heap 1355 uint64_t FreeBytes() const { 1356 return (static_cast<uint64_t>(free_pages_) << kPageShift); 1357 } 1358 1359 bool Check(); 1360 bool CheckList(Span* list, Length min_pages, Length max_pages); 1361 1362 // Release all pages on the free list for reuse by the OS: 1363 void ReleaseFreePages(); 1364 1365 // Return 0 if we have no information, or else the correct sizeclass for p. 1366 // Reads and writes to pagemap_cache_ do not require locking. 1367 // The entries are 64 bits on 64-bit hardware and 16 bits on 1368 // 32-bit hardware, and we don't mind raciness as long as each read of 1369 // an entry yields a valid entry, not a partially updated entry. 1370 size_t GetSizeClassIfCached(PageID p) const { 1371 return pagemap_cache_.GetOrDefault(p, 0); 1372 } 1373 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } 1374 1375 private: 1376 // Pick the appropriate map and cache types based on pointer size 1377 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; 1378 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; 1379 PageMap pagemap_; 1380 mutable PageMapCache pagemap_cache_; 1381 1382 // We segregate spans of a given size into two circular linked 1383 // lists: one for normal spans, and one for spans whose memory 1384 // has been returned to the system. 1385 struct SpanList { 1386 Span normal; 1387 Span returned; 1388 }; 1389 1390 // List of free spans of length >= kMaxPages 1391 SpanList large_; 1392 1393 // Array mapping from span length to a doubly linked list of free spans 1394 SpanList free_[kMaxPages]; 1395 1396 // Number of pages kept in free lists 1397 uintptr_t free_pages_; 1398 1399 // Bytes allocated from system 1400 uint64_t system_bytes_; 1401 1402 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1403 // Number of pages kept in free lists that are still committed. 1404 Length free_committed_pages_; 1405 1406 // Minimum number of free committed pages since last scavenge. (Can be 0 if 1407 // we've committed new pages since the last scavenge.) 1408 Length min_free_committed_pages_since_last_scavenge_; 1409 #endif 1410 1411 bool GrowHeap(Length n); 1412 1413 // REQUIRES span->length >= n 1414 // Remove span from its free list, and move any leftover part of 1415 // span into appropriate free lists. Also update "span" to have 1416 // length exactly "n" and mark it as non-free so it can be returned 1417 // to the client. 1418 // 1419 // "released" is true iff "span" was found on a "returned" list. 1420 void Carve(Span* span, Length n, bool released); 1421 1422 void RecordSpan(Span* span) { 1423 pagemap_.set(span->start, span); 1424 if (span->length > 1) { 1425 pagemap_.set(span->start + span->length - 1, span); 1426 } 1427 } 1428 1429 // Allocate a large span of length == n. If successful, returns a 1430 // span of exactly the specified length. Else, returns NULL. 1431 Span* AllocLarge(Length n); 1432 1433 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1434 // Incrementally release some memory to the system. 1435 // IncrementalScavenge(n) is called whenever n pages are freed. 1436 void IncrementalScavenge(Length n); 1437 #endif 1438 1439 // Number of pages to deallocate before doing more scavenging 1440 int64_t scavenge_counter_; 1441 1442 // Index of last free list we scavenged 1443 size_t scavenge_index_; 1444 1445 #if defined(WTF_CHANGES) && OS(DARWIN) 1446 friend class FastMallocZone; 1447 #endif 1448 1449 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1450 void initializeScavenger(); 1451 ALWAYS_INLINE void signalScavenger(); 1452 void scavenge(); 1453 ALWAYS_INLINE bool shouldScavenge() const; 1454 1455 #if HAVE(DISPATCH_H) || OS(WINDOWS) 1456 void periodicScavenge(); 1457 ALWAYS_INLINE bool isScavengerSuspended(); 1458 ALWAYS_INLINE void scheduleScavenger(); 1459 ALWAYS_INLINE void rescheduleScavenger(); 1460 ALWAYS_INLINE void suspendScavenger(); 1461 #endif 1462 1463 #if HAVE(DISPATCH_H) 1464 dispatch_queue_t m_scavengeQueue; 1465 dispatch_source_t m_scavengeTimer; 1466 bool m_scavengingSuspended; 1467 #elif OS(WINDOWS) 1468 static void CALLBACK scavengerTimerFired(void*, BOOLEAN); 1469 HANDLE m_scavengeQueueTimer; 1470 #else 1471 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); 1472 NO_RETURN void scavengerThread(); 1473 1474 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or 1475 // it's blocked waiting for more pages to be deleted. 1476 bool m_scavengeThreadActive; 1477 1478 pthread_mutex_t m_scavengeMutex; 1479 pthread_cond_t m_scavengeCondition; 1480 #endif 1481 1482 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1483 }; 1484 1485 void TCMalloc_PageHeap::init() 1486 { 1487 pagemap_.init(MetaDataAlloc); 1488 pagemap_cache_ = PageMapCache(0); 1489 free_pages_ = 0; 1490 system_bytes_ = 0; 1491 1492 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1493 free_committed_pages_ = 0; 1494 min_free_committed_pages_since_last_scavenge_ = 0; 1495 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1496 1497 scavenge_counter_ = 0; 1498 // Start scavenging at kMaxPages list 1499 scavenge_index_ = kMaxPages-1; 1500 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); 1501 DLL_Init(&large_.normal); 1502 DLL_Init(&large_.returned); 1503 for (size_t i = 0; i < kMaxPages; i++) { 1504 DLL_Init(&free_[i].normal); 1505 DLL_Init(&free_[i].returned); 1506 } 1507 1508 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1509 initializeScavenger(); 1510 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1511 } 1512 1513 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1514 1515 #if HAVE(DISPATCH_H) 1516 1517 void TCMalloc_PageHeap::initializeScavenger() 1518 { 1519 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); 1520 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); 1521 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC); 1522 dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC); 1523 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); 1524 m_scavengingSuspended = true; 1525 } 1526 1527 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1528 { 1529 ASSERT(IsHeld(pageheap_lock)); 1530 return m_scavengingSuspended; 1531 } 1532 1533 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1534 { 1535 ASSERT(IsHeld(pageheap_lock)); 1536 m_scavengingSuspended = false; 1537 dispatch_resume(m_scavengeTimer); 1538 } 1539 1540 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1541 { 1542 // Nothing to do here for libdispatch. 1543 } 1544 1545 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1546 { 1547 ASSERT(IsHeld(pageheap_lock)); 1548 m_scavengingSuspended = true; 1549 dispatch_suspend(m_scavengeTimer); 1550 } 1551 1552 #elif OS(WINDOWS) 1553 1554 void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) 1555 { 1556 static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge(); 1557 } 1558 1559 void TCMalloc_PageHeap::initializeScavenger() 1560 { 1561 m_scavengeQueueTimer = 0; 1562 } 1563 1564 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1565 { 1566 ASSERT(IsHeld(pageheap_lock)); 1567 return !m_scavengeQueueTimer; 1568 } 1569 1570 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1571 { 1572 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because 1573 // Windows will fire the timer event even when the function is already running. 1574 ASSERT(IsHeld(pageheap_lock)); 1575 CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); 1576 } 1577 1578 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1579 { 1580 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. 1581 suspendScavenger(); 1582 scheduleScavenger(); 1583 } 1584 1585 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1586 { 1587 ASSERT(IsHeld(pageheap_lock)); 1588 HANDLE scavengeQueueTimer = m_scavengeQueueTimer; 1589 m_scavengeQueueTimer = 0; 1590 DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); 1591 } 1592 1593 #else 1594 1595 void TCMalloc_PageHeap::initializeScavenger() 1596 { 1597 // Create a non-recursive mutex. 1598 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT 1599 pthread_mutex_init(&m_scavengeMutex, 0); 1600 #else 1601 pthread_mutexattr_t attr; 1602 pthread_mutexattr_init(&attr); 1603 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 1604 1605 pthread_mutex_init(&m_scavengeMutex, &attr); 1606 1607 pthread_mutexattr_destroy(&attr); 1608 #endif 1609 1610 pthread_cond_init(&m_scavengeCondition, 0); 1611 m_scavengeThreadActive = true; 1612 pthread_t thread; 1613 pthread_create(&thread, 0, runScavengerThread, this); 1614 } 1615 1616 void* TCMalloc_PageHeap::runScavengerThread(void* context) 1617 { 1618 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); 1619 #if (COMPILER(MSVC) || COMPILER(SUNCC)) 1620 // Without this, Visual Studio and Sun Studio will complain that this method does not return a value. 1621 return 0; 1622 #endif 1623 } 1624 1625 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 1626 { 1627 // m_scavengeMutex should be held before accessing m_scavengeThreadActive. 1628 ASSERT(pthread_mutex_trylock(m_scavengeMutex)); 1629 if (!m_scavengeThreadActive && shouldScavenge()) 1630 pthread_cond_signal(&m_scavengeCondition); 1631 } 1632 1633 #endif 1634 1635 void TCMalloc_PageHeap::scavenge() 1636 { 1637 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; 1638 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); 1639 1640 while (free_committed_pages_ > targetPageCount) { 1641 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { 1642 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; 1643 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. 1644 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. 1645 size_t length = DLL_Length(&slist->normal); 1646 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; 1647 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) { 1648 Span* s = slist->normal.prev; 1649 DLL_Remove(s); 1650 ASSERT(!s->decommitted); 1651 if (!s->decommitted) { 1652 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1653 static_cast<size_t>(s->length << kPageShift)); 1654 ASSERT(free_committed_pages_ >= s->length); 1655 free_committed_pages_ -= s->length; 1656 s->decommitted = true; 1657 } 1658 DLL_Prepend(&slist->returned, s); 1659 } 1660 } 1661 } 1662 1663 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1664 } 1665 1666 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const 1667 { 1668 return free_committed_pages_ > kMinimumFreeCommittedPageCount; 1669 } 1670 1671 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1672 1673 inline Span* TCMalloc_PageHeap::New(Length n) { 1674 ASSERT(Check()); 1675 ASSERT(n > 0); 1676 1677 // Find first size >= n that has a non-empty list 1678 for (Length s = n; s < kMaxPages; s++) { 1679 Span* ll = NULL; 1680 bool released = false; 1681 if (!DLL_IsEmpty(&free_[s].normal)) { 1682 // Found normal span 1683 ll = &free_[s].normal; 1684 } else if (!DLL_IsEmpty(&free_[s].returned)) { 1685 // Found returned span; reallocate it 1686 ll = &free_[s].returned; 1687 released = true; 1688 } else { 1689 // Keep looking in larger classes 1690 continue; 1691 } 1692 1693 Span* result = ll->next; 1694 Carve(result, n, released); 1695 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1696 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1697 // free committed pages count. 1698 ASSERT(free_committed_pages_ >= n); 1699 free_committed_pages_ -= n; 1700 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1701 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1702 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1703 ASSERT(Check()); 1704 free_pages_ -= n; 1705 return result; 1706 } 1707 1708 Span* result = AllocLarge(n); 1709 if (result != NULL) { 1710 ASSERT_SPAN_COMMITTED(result); 1711 return result; 1712 } 1713 1714 // Grow the heap and try again 1715 if (!GrowHeap(n)) { 1716 ASSERT(Check()); 1717 return NULL; 1718 } 1719 1720 return AllocLarge(n); 1721 } 1722 1723 Span* TCMalloc_PageHeap::AllocLarge(Length n) { 1724 // find the best span (closest to n in size). 1725 // The following loops implements address-ordered best-fit. 1726 bool from_released = false; 1727 Span *best = NULL; 1728 1729 // Search through normal list 1730 for (Span* span = large_.normal.next; 1731 span != &large_.normal; 1732 span = span->next) { 1733 if (span->length >= n) { 1734 if ((best == NULL) 1735 || (span->length < best->length) 1736 || ((span->length == best->length) && (span->start < best->start))) { 1737 best = span; 1738 from_released = false; 1739 } 1740 } 1741 } 1742 1743 // Search through released list in case it has a better fit 1744 for (Span* span = large_.returned.next; 1745 span != &large_.returned; 1746 span = span->next) { 1747 if (span->length >= n) { 1748 if ((best == NULL) 1749 || (span->length < best->length) 1750 || ((span->length == best->length) && (span->start < best->start))) { 1751 best = span; 1752 from_released = true; 1753 } 1754 } 1755 } 1756 1757 if (best != NULL) { 1758 Carve(best, n, from_released); 1759 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1760 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1761 // free committed pages count. 1762 ASSERT(free_committed_pages_ >= n); 1763 free_committed_pages_ -= n; 1764 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1765 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1766 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1767 ASSERT(Check()); 1768 free_pages_ -= n; 1769 return best; 1770 } 1771 return NULL; 1772 } 1773 1774 Span* TCMalloc_PageHeap::Split(Span* span, Length n) { 1775 ASSERT(0 < n); 1776 ASSERT(n < span->length); 1777 ASSERT(!span->free); 1778 ASSERT(span->sizeclass == 0); 1779 Event(span, 'T', n); 1780 1781 const Length extra = span->length - n; 1782 Span* leftover = NewSpan(span->start + n, extra); 1783 Event(leftover, 'U', extra); 1784 RecordSpan(leftover); 1785 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span 1786 span->length = n; 1787 1788 return leftover; 1789 } 1790 1791 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { 1792 ASSERT(n > 0); 1793 DLL_Remove(span); 1794 span->free = 0; 1795 Event(span, 'A', n); 1796 1797 if (released) { 1798 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. 1799 ASSERT(span->decommitted); 1800 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); 1801 span->decommitted = false; 1802 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1803 free_committed_pages_ += span->length; 1804 #endif 1805 } 1806 1807 const int extra = static_cast<int>(span->length - n); 1808 ASSERT(extra >= 0); 1809 if (extra > 0) { 1810 Span* leftover = NewSpan(span->start + n, extra); 1811 leftover->free = 1; 1812 leftover->decommitted = false; 1813 Event(leftover, 'S', extra); 1814 RecordSpan(leftover); 1815 1816 // Place leftover span on appropriate free list 1817 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; 1818 Span* dst = &listpair->normal; 1819 DLL_Prepend(dst, leftover); 1820 1821 span->length = n; 1822 pagemap_.set(span->start + n - 1, span); 1823 } 1824 } 1825 1826 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) 1827 { 1828 if (destination->decommitted && !other->decommitted) { 1829 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), 1830 static_cast<size_t>(other->length << kPageShift)); 1831 } else if (other->decommitted && !destination->decommitted) { 1832 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), 1833 static_cast<size_t>(destination->length << kPageShift)); 1834 destination->decommitted = true; 1835 } 1836 } 1837 1838 inline void TCMalloc_PageHeap::Delete(Span* span) { 1839 ASSERT(Check()); 1840 ASSERT(!span->free); 1841 ASSERT(span->length > 0); 1842 ASSERT(GetDescriptor(span->start) == span); 1843 ASSERT(GetDescriptor(span->start + span->length - 1) == span); 1844 span->sizeclass = 0; 1845 #ifndef NO_TCMALLOC_SAMPLES 1846 span->sample = 0; 1847 #endif 1848 1849 // Coalesce -- we guarantee that "p" != 0, so no bounds checking 1850 // necessary. We do not bother resetting the stale pagemap 1851 // entries for the pieces we are merging together because we only 1852 // care about the pagemap entries for the boundaries. 1853 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1854 // Track the total size of the neighboring free spans that are committed. 1855 Length neighboringCommittedSpansLength = 0; 1856 #endif 1857 const PageID p = span->start; 1858 const Length n = span->length; 1859 Span* prev = GetDescriptor(p-1); 1860 if (prev != NULL && prev->free) { 1861 // Merge preceding span into this span 1862 ASSERT(prev->start + prev->length == p); 1863 const Length len = prev->length; 1864 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1865 if (!prev->decommitted) 1866 neighboringCommittedSpansLength += len; 1867 #endif 1868 mergeDecommittedStates(span, prev); 1869 DLL_Remove(prev); 1870 DeleteSpan(prev); 1871 span->start -= len; 1872 span->length += len; 1873 pagemap_.set(span->start, span); 1874 Event(span, 'L', len); 1875 } 1876 Span* next = GetDescriptor(p+n); 1877 if (next != NULL && next->free) { 1878 // Merge next span into this span 1879 ASSERT(next->start == p+n); 1880 const Length len = next->length; 1881 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1882 if (!next->decommitted) 1883 neighboringCommittedSpansLength += len; 1884 #endif 1885 mergeDecommittedStates(span, next); 1886 DLL_Remove(next); 1887 DeleteSpan(next); 1888 span->length += len; 1889 pagemap_.set(span->start + span->length - 1, span); 1890 Event(span, 'R', len); 1891 } 1892 1893 Event(span, 'D', span->length); 1894 span->free = 1; 1895 if (span->decommitted) { 1896 if (span->length < kMaxPages) 1897 DLL_Prepend(&free_[span->length].returned, span); 1898 else 1899 DLL_Prepend(&large_.returned, span); 1900 } else { 1901 if (span->length < kMaxPages) 1902 DLL_Prepend(&free_[span->length].normal, span); 1903 else 1904 DLL_Prepend(&large_.normal, span); 1905 } 1906 free_pages_ += n; 1907 1908 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1909 if (span->decommitted) { 1910 // If the merged span is decommitted, that means we decommitted any neighboring spans that were 1911 // committed. Update the free committed pages count. 1912 free_committed_pages_ -= neighboringCommittedSpansLength; 1913 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1914 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1915 } else { 1916 // If the merged span remains committed, add the deleted span's size to the free committed pages count. 1917 free_committed_pages_ += n; 1918 } 1919 1920 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. 1921 signalScavenger(); 1922 #else 1923 IncrementalScavenge(n); 1924 #endif 1925 1926 ASSERT(Check()); 1927 } 1928 1929 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1930 void TCMalloc_PageHeap::IncrementalScavenge(Length n) { 1931 // Fast path; not yet time to release memory 1932 scavenge_counter_ -= n; 1933 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge 1934 1935 // If there is nothing to release, wait for so many pages before 1936 // scavenging again. With 4K pages, this comes to 16MB of memory. 1937 static const size_t kDefaultReleaseDelay = 1 << 8; 1938 1939 // Find index of free list to scavenge 1940 size_t index = scavenge_index_ + 1; 1941 for (size_t i = 0; i < kMaxPages+1; i++) { 1942 if (index > kMaxPages) index = 0; 1943 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; 1944 if (!DLL_IsEmpty(&slist->normal)) { 1945 // Release the last span on the normal portion of this list 1946 Span* s = slist->normal.prev; 1947 DLL_Remove(s); 1948 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1949 static_cast<size_t>(s->length << kPageShift)); 1950 s->decommitted = true; 1951 DLL_Prepend(&slist->returned, s); 1952 1953 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); 1954 1955 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal)) 1956 scavenge_index_ = index - 1; 1957 else 1958 scavenge_index_ = index; 1959 return; 1960 } 1961 index++; 1962 } 1963 1964 // Nothing to scavenge, delay for a while 1965 scavenge_counter_ = kDefaultReleaseDelay; 1966 } 1967 #endif 1968 1969 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { 1970 // Associate span object with all interior pages as well 1971 ASSERT(!span->free); 1972 ASSERT(GetDescriptor(span->start) == span); 1973 ASSERT(GetDescriptor(span->start+span->length-1) == span); 1974 Event(span, 'C', sc); 1975 span->sizeclass = static_cast<unsigned int>(sc); 1976 for (Length i = 1; i < span->length-1; i++) { 1977 pagemap_.set(span->start+i, span); 1978 } 1979 } 1980 1981 #ifdef WTF_CHANGES 1982 size_t TCMalloc_PageHeap::ReturnedBytes() const { 1983 size_t result = 0; 1984 for (unsigned s = 0; s < kMaxPages; s++) { 1985 const int r_length = DLL_Length(&free_[s].returned); 1986 unsigned r_pages = s * r_length; 1987 result += r_pages << kPageShift; 1988 } 1989 1990 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) 1991 result += s->length << kPageShift; 1992 return result; 1993 } 1994 #endif 1995 1996 #ifndef WTF_CHANGES 1997 static double PagesToMB(uint64_t pages) { 1998 return (pages << kPageShift) / 1048576.0; 1999 } 2000 2001 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) { 2002 int nonempty_sizes = 0; 2003 for (int s = 0; s < kMaxPages; s++) { 2004 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) { 2005 nonempty_sizes++; 2006 } 2007 } 2008 out->printf("------------------------------------------------\n"); 2009 out->printf("PageHeap: %d sizes; %6.1f MB free\n", 2010 nonempty_sizes, PagesToMB(free_pages_)); 2011 out->printf("------------------------------------------------\n"); 2012 uint64_t total_normal = 0; 2013 uint64_t total_returned = 0; 2014 for (int s = 0; s < kMaxPages; s++) { 2015 const int n_length = DLL_Length(&free_[s].normal); 2016 const int r_length = DLL_Length(&free_[s].returned); 2017 if (n_length + r_length > 0) { 2018 uint64_t n_pages = s * n_length; 2019 uint64_t r_pages = s * r_length; 2020 total_normal += n_pages; 2021 total_returned += r_pages; 2022 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" 2023 "; unmapped: %6.1f MB; %6.1f MB cum\n", 2024 s, 2025 (n_length + r_length), 2026 PagesToMB(n_pages + r_pages), 2027 PagesToMB(total_normal + total_returned), 2028 PagesToMB(r_pages), 2029 PagesToMB(total_returned)); 2030 } 2031 } 2032 2033 uint64_t n_pages = 0; 2034 uint64_t r_pages = 0; 2035 int n_spans = 0; 2036 int r_spans = 0; 2037 out->printf("Normal large spans:\n"); 2038 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { 2039 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", 2040 s->length, PagesToMB(s->length)); 2041 n_pages += s->length; 2042 n_spans++; 2043 } 2044 out->printf("Unmapped large spans:\n"); 2045 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { 2046 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", 2047 s->length, PagesToMB(s->length)); 2048 r_pages += s->length; 2049 r_spans++; 2050 } 2051 total_normal += n_pages; 2052 total_returned += r_pages; 2053 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" 2054 "; unmapped: %6.1f MB; %6.1f MB cum\n", 2055 (n_spans + r_spans), 2056 PagesToMB(n_pages + r_pages), 2057 PagesToMB(total_normal + total_returned), 2058 PagesToMB(r_pages), 2059 PagesToMB(total_returned)); 2060 } 2061 #endif 2062 2063 bool TCMalloc_PageHeap::GrowHeap(Length n) { 2064 ASSERT(kMaxPages >= kMinSystemAlloc); 2065 if (n > kMaxValidPages) return false; 2066 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); 2067 size_t actual_size; 2068 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2069 if (ptr == NULL) { 2070 if (n < ask) { 2071 // Try growing just "n" pages 2072 ask = n; 2073 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2074 } 2075 if (ptr == NULL) return false; 2076 } 2077 ask = actual_size >> kPageShift; 2078 2079 uint64_t old_system_bytes = system_bytes_; 2080 system_bytes_ += (ask << kPageShift); 2081 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 2082 ASSERT(p > 0); 2083 2084 // If we have already a lot of pages allocated, just pre allocate a bunch of 2085 // memory for the page map. This prevents fragmentation by pagemap metadata 2086 // when a program keeps allocating and freeing large blocks. 2087 2088 if (old_system_bytes < kPageMapBigAllocationThreshold 2089 && system_bytes_ >= kPageMapBigAllocationThreshold) { 2090 pagemap_.PreallocateMoreMemory(); 2091 } 2092 2093 // Make sure pagemap_ has entries for all of the new pages. 2094 // Plus ensure one before and one after so coalescing code 2095 // does not need bounds-checking. 2096 if (pagemap_.Ensure(p-1, ask+2)) { 2097 // Pretend the new area is allocated and then Delete() it to 2098 // cause any necessary coalescing to occur. 2099 // 2100 // We do not adjust free_pages_ here since Delete() will do it for us. 2101 Span* span = NewSpan(p, ask); 2102 RecordSpan(span); 2103 Delete(span); 2104 ASSERT(Check()); 2105 return true; 2106 } else { 2107 // We could not allocate memory within "pagemap_" 2108 // TODO: Once we can return memory to the system, return the new span 2109 return false; 2110 } 2111 } 2112 2113 bool TCMalloc_PageHeap::Check() { 2114 ASSERT(free_[0].normal.next == &free_[0].normal); 2115 ASSERT(free_[0].returned.next == &free_[0].returned); 2116 CheckList(&large_.normal, kMaxPages, 1000000000); 2117 CheckList(&large_.returned, kMaxPages, 1000000000); 2118 for (Length s = 1; s < kMaxPages; s++) { 2119 CheckList(&free_[s].normal, s, s); 2120 CheckList(&free_[s].returned, s, s); 2121 } 2122 return true; 2123 } 2124 2125 #if ASSERT_DISABLED 2126 bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) { 2127 return true; 2128 } 2129 #else 2130 bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) { 2131 for (Span* s = list->next; s != list; s = s->next) { 2132 CHECK_CONDITION(s->free); 2133 CHECK_CONDITION(s->length >= min_pages); 2134 CHECK_CONDITION(s->length <= max_pages); 2135 CHECK_CONDITION(GetDescriptor(s->start) == s); 2136 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); 2137 } 2138 return true; 2139 } 2140 #endif 2141 2142 static void ReleaseFreeList(Span* list, Span* returned) { 2143 // Walk backwards through list so that when we push these 2144 // spans on the "returned" list, we preserve the order. 2145 while (!DLL_IsEmpty(list)) { 2146 Span* s = list->prev; 2147 DLL_Remove(s); 2148 DLL_Prepend(returned, s); 2149 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2150 static_cast<size_t>(s->length << kPageShift)); 2151 } 2152 } 2153 2154 void TCMalloc_PageHeap::ReleaseFreePages() { 2155 for (Length s = 0; s < kMaxPages; s++) { 2156 ReleaseFreeList(&free_[s].normal, &free_[s].returned); 2157 } 2158 ReleaseFreeList(&large_.normal, &large_.returned); 2159 ASSERT(Check()); 2160 } 2161 2162 //------------------------------------------------------------------- 2163 // Free list 2164 //------------------------------------------------------------------- 2165 2166 class TCMalloc_ThreadCache_FreeList { 2167 private: 2168 void* list_; // Linked list of nodes 2169 uint16_t length_; // Current length 2170 uint16_t lowater_; // Low water mark for list length 2171 2172 public: 2173 void Init() { 2174 list_ = NULL; 2175 length_ = 0; 2176 lowater_ = 0; 2177 } 2178 2179 // Return current length of list 2180 int length() const { 2181 return length_; 2182 } 2183 2184 // Is list empty? 2185 bool empty() const { 2186 return list_ == NULL; 2187 } 2188 2189 // Low-water mark management 2190 int lowwatermark() const { return lowater_; } 2191 void clear_lowwatermark() { lowater_ = length_; } 2192 2193 ALWAYS_INLINE void Push(void* ptr) { 2194 SLL_Push(&list_, ptr); 2195 length_++; 2196 } 2197 2198 void PushRange(int N, void *start, void *end) { 2199 SLL_PushRange(&list_, start, end); 2200 length_ = length_ + static_cast<uint16_t>(N); 2201 } 2202 2203 void PopRange(int N, void **start, void **end) { 2204 SLL_PopRange(&list_, N, start, end); 2205 ASSERT(length_ >= N); 2206 length_ = length_ - static_cast<uint16_t>(N); 2207 if (length_ < lowater_) lowater_ = length_; 2208 } 2209 2210 ALWAYS_INLINE void* Pop() { 2211 ASSERT(list_ != NULL); 2212 length_--; 2213 if (length_ < lowater_) lowater_ = length_; 2214 return SLL_Pop(&list_); 2215 } 2216 2217 #ifdef WTF_CHANGES 2218 template <class Finder, class Reader> 2219 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2220 { 2221 for (void* nextObject = list_; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 2222 finder.visit(nextObject); 2223 } 2224 #endif 2225 }; 2226 2227 //------------------------------------------------------------------- 2228 // Data kept per thread 2229 //------------------------------------------------------------------- 2230 2231 class TCMalloc_ThreadCache { 2232 private: 2233 typedef TCMalloc_ThreadCache_FreeList FreeList; 2234 #if OS(WINDOWS) 2235 typedef DWORD ThreadIdentifier; 2236 #else 2237 typedef pthread_t ThreadIdentifier; 2238 #endif 2239 2240 size_t size_; // Combined size of data 2241 ThreadIdentifier tid_; // Which thread owns it 2242 bool in_setspecific_; // Called pthread_setspecific? 2243 FreeList list_[kNumClasses]; // Array indexed by size-class 2244 2245 // We sample allocations, biased by the size of the allocation 2246 uint32_t rnd_; // Cheap random number generator 2247 size_t bytes_until_sample_; // Bytes until we sample next 2248 2249 // Allocate a new heap. REQUIRES: pageheap_lock is held. 2250 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid); 2251 2252 // Use only as pthread thread-specific destructor function. 2253 static void DestroyThreadCache(void* ptr); 2254 public: 2255 // All ThreadCache objects are kept in a linked list (for stats collection) 2256 TCMalloc_ThreadCache* next_; 2257 TCMalloc_ThreadCache* prev_; 2258 2259 void Init(ThreadIdentifier tid); 2260 void Cleanup(); 2261 2262 // Accessors (mostly just for printing stats) 2263 int freelist_length(size_t cl) const { return list_[cl].length(); } 2264 2265 // Total byte size in cache 2266 size_t Size() const { return size_; } 2267 2268 ALWAYS_INLINE void* Allocate(size_t size); 2269 void Deallocate(void* ptr, size_t size_class); 2270 2271 ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); 2272 void ReleaseToCentralCache(size_t cl, int N); 2273 void Scavenge(); 2274 void Print() const; 2275 2276 // Record allocation of "k" bytes. Return true iff allocation 2277 // should be sampled 2278 bool SampleAllocation(size_t k); 2279 2280 // Pick next sampling point 2281 void PickNextSample(size_t k); 2282 2283 static void InitModule(); 2284 static void InitTSD(); 2285 static TCMalloc_ThreadCache* GetThreadHeap(); 2286 static TCMalloc_ThreadCache* GetCache(); 2287 static TCMalloc_ThreadCache* GetCacheIfPresent(); 2288 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); 2289 static void DeleteCache(TCMalloc_ThreadCache* heap); 2290 static void BecomeIdle(); 2291 static void RecomputeThreadCacheSize(); 2292 2293 #ifdef WTF_CHANGES 2294 template <class Finder, class Reader> 2295 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2296 { 2297 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) 2298 list_[sizeClass].enumerateFreeObjects(finder, reader); 2299 } 2300 #endif 2301 }; 2302 2303 //------------------------------------------------------------------- 2304 // Data kept per size-class in central cache 2305 //------------------------------------------------------------------- 2306 2307 class TCMalloc_Central_FreeList { 2308 public: 2309 void Init(size_t cl); 2310 2311 // These methods all do internal locking. 2312 2313 // Insert the specified range into the central freelist. N is the number of 2314 // elements in the range. 2315 void InsertRange(void *start, void *end, int N); 2316 2317 // Returns the actual number of fetched elements into N. 2318 void RemoveRange(void **start, void **end, int *N); 2319 2320 // Returns the number of free objects in cache. 2321 size_t length() { 2322 SpinLockHolder h(&lock_); 2323 return counter_; 2324 } 2325 2326 // Returns the number of free objects in the transfer cache. 2327 int tc_length() { 2328 SpinLockHolder h(&lock_); 2329 return used_slots_ * num_objects_to_move[size_class_]; 2330 } 2331 2332 #ifdef WTF_CHANGES 2333 template <class Finder, class Reader> 2334 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) 2335 { 2336 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0)) 2337 ASSERT(!span->objects); 2338 2339 ASSERT(!nonempty_.objects); 2340 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); 2341 2342 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); 2343 Span* remoteSpan = nonempty_.next; 2344 2345 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) { 2346 for (void* nextObject = span->objects; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 2347 finder.visit(nextObject); 2348 } 2349 } 2350 #endif 2351 2352 private: 2353 // REQUIRES: lock_ is held 2354 // Remove object from cache and return. 2355 // Return NULL if no free entries in cache. 2356 void* FetchFromSpans(); 2357 2358 // REQUIRES: lock_ is held 2359 // Remove object from cache and return. Fetches 2360 // from pageheap if cache is empty. Only returns 2361 // NULL on allocation failure. 2362 void* FetchFromSpansSafe(); 2363 2364 // REQUIRES: lock_ is held 2365 // Release a linked list of objects to spans. 2366 // May temporarily release lock_. 2367 void ReleaseListToSpans(void *start); 2368 2369 // REQUIRES: lock_ is held 2370 // Release an object to spans. 2371 // May temporarily release lock_. 2372 ALWAYS_INLINE void ReleaseToSpans(void* object); 2373 2374 // REQUIRES: lock_ is held 2375 // Populate cache by fetching from the page heap. 2376 // May temporarily release lock_. 2377 ALWAYS_INLINE void Populate(); 2378 2379 // REQUIRES: lock is held. 2380 // Tries to make room for a TCEntry. If the cache is full it will try to 2381 // expand it at the cost of some other cache size. Return false if there is 2382 // no space. 2383 bool MakeCacheSpace(); 2384 2385 // REQUIRES: lock_ for locked_size_class is held. 2386 // Picks a "random" size class to steal TCEntry slot from. In reality it 2387 // just iterates over the sizeclasses but does so without taking a lock. 2388 // Returns true on success. 2389 // May temporarily lock a "random" size class. 2390 static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); 2391 2392 // REQUIRES: lock_ is *not* held. 2393 // Tries to shrink the Cache. If force is true it will relase objects to 2394 // spans if it allows it to shrink the cache. Return false if it failed to 2395 // shrink the cache. Decrements cache_size_ on succeess. 2396 // May temporarily take lock_. If it takes lock_, the locked_size_class 2397 // lock is released to the thread from holding two size class locks 2398 // concurrently which could lead to a deadlock. 2399 bool ShrinkCache(int locked_size_class, bool force); 2400 2401 // This lock protects all the data members. cached_entries and cache_size_ 2402 // may be looked at without holding the lock. 2403 SpinLock lock_; 2404 2405 // We keep linked lists of empty and non-empty spans. 2406 size_t size_class_; // My size class 2407 Span empty_; // Dummy header for list of empty spans 2408 Span nonempty_; // Dummy header for list of non-empty spans 2409 size_t counter_; // Number of free objects in cache entry 2410 2411 // Here we reserve space for TCEntry cache slots. Since one size class can 2412 // end up getting all the TCEntries quota in the system we just preallocate 2413 // sufficient number of entries here. 2414 TCEntry tc_slots_[kNumTransferEntries]; 2415 2416 // Number of currently used cached entries in tc_slots_. This variable is 2417 // updated under a lock but can be read without one. 2418 int32_t used_slots_; 2419 // The current number of slots for this size class. This is an 2420 // adaptive value that is increased if there is lots of traffic 2421 // on a given size class. 2422 int32_t cache_size_; 2423 }; 2424 2425 // Pad each CentralCache object to multiple of 64 bytes 2426 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList { 2427 private: 2428 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64]; 2429 }; 2430 2431 //------------------------------------------------------------------- 2432 // Global variables 2433 //------------------------------------------------------------------- 2434 2435 // Central cache -- a collection of free-lists, one per size-class. 2436 // We have a separate lock per free-list to reduce contention. 2437 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; 2438 2439 // Page-level allocator 2440 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; 2441 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; 2442 static bool phinited = false; 2443 2444 // Avoid extra level of indirection by making "pageheap" be just an alias 2445 // of pageheap_memory. 2446 typedef union { 2447 void* m_memory; 2448 TCMalloc_PageHeap* m_pageHeap; 2449 } PageHeapUnion; 2450 2451 static inline TCMalloc_PageHeap* getPageHeap() 2452 { 2453 PageHeapUnion u = { &pageheap_memory[0] }; 2454 return u.m_pageHeap; 2455 } 2456 2457 #define pageheap getPageHeap() 2458 2459 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2460 2461 #if HAVE(DISPATCH_H) || OS(WINDOWS) 2462 2463 void TCMalloc_PageHeap::periodicScavenge() 2464 { 2465 SpinLockHolder h(&pageheap_lock); 2466 pageheap->scavenge(); 2467 2468 if (shouldScavenge()) { 2469 rescheduleScavenger(); 2470 return; 2471 } 2472 2473 suspendScavenger(); 2474 } 2475 2476 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 2477 { 2478 ASSERT(IsHeld(pageheap_lock)); 2479 if (isScavengerSuspended() && shouldScavenge()) 2480 scheduleScavenger(); 2481 } 2482 2483 #else 2484 2485 void TCMalloc_PageHeap::scavengerThread() 2486 { 2487 #if HAVE(PTHREAD_SETNAME_NP) 2488 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); 2489 #endif 2490 2491 while (1) { 2492 if (!shouldScavenge()) { 2493 pthread_mutex_lock(&m_scavengeMutex); 2494 m_scavengeThreadActive = false; 2495 // Block until there are enough free committed pages to release back to the system. 2496 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); 2497 m_scavengeThreadActive = true; 2498 pthread_mutex_unlock(&m_scavengeMutex); 2499 } 2500 sleep(kScavengeDelayInSeconds); 2501 { 2502 SpinLockHolder h(&pageheap_lock); 2503 pageheap->scavenge(); 2504 } 2505 } 2506 } 2507 2508 #endif 2509 2510 #endif 2511 2512 // If TLS is available, we also store a copy 2513 // of the per-thread object in a __thread variable 2514 // since __thread variables are faster to read 2515 // than pthread_getspecific(). We still need 2516 // pthread_setspecific() because __thread 2517 // variables provide no way to run cleanup 2518 // code when a thread is destroyed. 2519 #ifdef HAVE_TLS 2520 static __thread TCMalloc_ThreadCache *threadlocal_heap; 2521 #endif 2522 // Thread-specific key. Initialization here is somewhat tricky 2523 // because some Linux startup code invokes malloc() before it 2524 // is in a good enough state to handle pthread_keycreate(). 2525 // Therefore, we use TSD keys only after tsd_inited is set to true. 2526 // Until then, we use a slow path to get the heap object. 2527 static bool tsd_inited = false; 2528 #if USE(PTHREAD_GETSPECIFIC_DIRECT) 2529 static const pthread_key_t heap_key = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0; 2530 #else 2531 static pthread_key_t heap_key; 2532 #endif 2533 #if OS(WINDOWS) 2534 DWORD tlsIndex = TLS_OUT_OF_INDEXES; 2535 #endif 2536 2537 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) 2538 { 2539 #if USE(PTHREAD_GETSPECIFIC_DIRECT) 2540 // Can't have two libraries both doing this in the same process, 2541 // so check and make this crash right away. 2542 if (pthread_getspecific(heap_key)) 2543 CRASH(); 2544 #endif 2545 2546 // Still do pthread_setspecific even if there's an alternate form 2547 // of thread-local storage in use, to benefit from the delete callback. 2548 pthread_setspecific(heap_key, heap); 2549 2550 #if OS(WINDOWS) 2551 TlsSetValue(tlsIndex, heap); 2552 #endif 2553 } 2554 2555 // Allocator for thread heaps 2556 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; 2557 2558 // Linked list of heap objects. Protected by pageheap_lock. 2559 static TCMalloc_ThreadCache* thread_heaps = NULL; 2560 static int thread_heap_count = 0; 2561 2562 // Overall thread cache size. Protected by pageheap_lock. 2563 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; 2564 2565 // Global per-thread cache size. Writes are protected by 2566 // pageheap_lock. Reads are done without any locking, which should be 2567 // fine as long as size_t can be written atomically and we don't place 2568 // invariants between this variable and other pieces of state. 2569 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; 2570 2571 //------------------------------------------------------------------- 2572 // Central cache implementation 2573 //------------------------------------------------------------------- 2574 2575 void TCMalloc_Central_FreeList::Init(size_t cl) { 2576 lock_.Init(); 2577 size_class_ = cl; 2578 DLL_Init(&empty_); 2579 DLL_Init(&nonempty_); 2580 counter_ = 0; 2581 2582 cache_size_ = 1; 2583 used_slots_ = 0; 2584 ASSERT(cache_size_ <= kNumTransferEntries); 2585 } 2586 2587 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) { 2588 while (start) { 2589 void *next = SLL_Next(start); 2590 ReleaseToSpans(start); 2591 start = next; 2592 } 2593 } 2594 2595 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) { 2596 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; 2597 Span* span = pageheap->GetDescriptor(p); 2598 ASSERT(span != NULL); 2599 ASSERT(span->refcount > 0); 2600 2601 // If span is empty, move it to non-empty list 2602 if (span->objects == NULL) { 2603 DLL_Remove(span); 2604 DLL_Prepend(&nonempty_, span); 2605 Event(span, 'N', 0); 2606 } 2607 2608 // The following check is expensive, so it is disabled by default 2609 if (false) { 2610 // Check that object does not occur in list 2611 unsigned got = 0; 2612 for (void* p = span->objects; p != NULL; p = *((void**) p)) { 2613 ASSERT(p != object); 2614 got++; 2615 } 2616 ASSERT(got + span->refcount == 2617 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); 2618 } 2619 2620 counter_++; 2621 span->refcount--; 2622 if (span->refcount == 0) { 2623 Event(span, '#', 0); 2624 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); 2625 DLL_Remove(span); 2626 2627 // Release central list lock while operating on pageheap 2628 lock_.Unlock(); 2629 { 2630 SpinLockHolder h(&pageheap_lock); 2631 pageheap->Delete(span); 2632 } 2633 lock_.Lock(); 2634 } else { 2635 *(reinterpret_cast<void**>(object)) = span->objects; 2636 span->objects = object; 2637 } 2638 } 2639 2640 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( 2641 size_t locked_size_class, bool force) { 2642 static int race_counter = 0; 2643 int t = race_counter++; // Updated without a lock, but who cares. 2644 if (t >= static_cast<int>(kNumClasses)) { 2645 while (t >= static_cast<int>(kNumClasses)) { 2646 t -= kNumClasses; 2647 } 2648 race_counter = t; 2649 } 2650 ASSERT(t >= 0); 2651 ASSERT(t < static_cast<int>(kNumClasses)); 2652 if (t == static_cast<int>(locked_size_class)) return false; 2653 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); 2654 } 2655 2656 bool TCMalloc_Central_FreeList::MakeCacheSpace() { 2657 // Is there room in the cache? 2658 if (used_slots_ < cache_size_) return true; 2659 // Check if we can expand this cache? 2660 if (cache_size_ == kNumTransferEntries) return false; 2661 // Ok, we'll try to grab an entry from some other size class. 2662 if (EvictRandomSizeClass(size_class_, false) || 2663 EvictRandomSizeClass(size_class_, true)) { 2664 // Succeeded in evicting, we're going to make our cache larger. 2665 cache_size_++; 2666 return true; 2667 } 2668 return false; 2669 } 2670 2671 2672 namespace { 2673 class LockInverter { 2674 private: 2675 SpinLock *held_, *temp_; 2676 public: 2677 inline explicit LockInverter(SpinLock* held, SpinLock *temp) 2678 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } 2679 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } 2680 }; 2681 } 2682 2683 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { 2684 // Start with a quick check without taking a lock. 2685 if (cache_size_ == 0) return false; 2686 // We don't evict from a full cache unless we are 'forcing'. 2687 if (force == false && used_slots_ == cache_size_) return false; 2688 2689 // Grab lock, but first release the other lock held by this thread. We use 2690 // the lock inverter to ensure that we never hold two size class locks 2691 // concurrently. That can create a deadlock because there is no well 2692 // defined nesting order. 2693 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); 2694 ASSERT(used_slots_ <= cache_size_); 2695 ASSERT(0 <= cache_size_); 2696 if (cache_size_ == 0) return false; 2697 if (used_slots_ == cache_size_) { 2698 if (force == false) return false; 2699 // ReleaseListToSpans releases the lock, so we have to make all the 2700 // updates to the central list before calling it. 2701 cache_size_--; 2702 used_slots_--; 2703 ReleaseListToSpans(tc_slots_[used_slots_].head); 2704 return true; 2705 } 2706 cache_size_--; 2707 return true; 2708 } 2709 2710 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) { 2711 SpinLockHolder h(&lock_); 2712 if (N == num_objects_to_move[size_class_] && 2713 MakeCacheSpace()) { 2714 int slot = used_slots_++; 2715 ASSERT(slot >=0); 2716 ASSERT(slot < kNumTransferEntries); 2717 TCEntry *entry = &tc_slots_[slot]; 2718 entry->head = start; 2719 entry->tail = end; 2720 return; 2721 } 2722 ReleaseListToSpans(start); 2723 } 2724 2725 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) { 2726 int num = *N; 2727 ASSERT(num > 0); 2728 2729 SpinLockHolder h(&lock_); 2730 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { 2731 int slot = --used_slots_; 2732 ASSERT(slot >= 0); 2733 TCEntry *entry = &tc_slots_[slot]; 2734 *start = entry->head; 2735 *end = entry->tail; 2736 return; 2737 } 2738 2739 // TODO: Prefetch multiple TCEntries? 2740 void *tail = FetchFromSpansSafe(); 2741 if (!tail) { 2742 // We are completely out of memory. 2743 *start = *end = NULL; 2744 *N = 0; 2745 return; 2746 } 2747 2748 SLL_SetNext(tail, NULL); 2749 void *head = tail; 2750 int count = 1; 2751 while (count < num) { 2752 void *t = FetchFromSpans(); 2753 if (!t) break; 2754 SLL_Push(&head, t); 2755 count++; 2756 } 2757 *start = head; 2758 *end = tail; 2759 *N = count; 2760 } 2761 2762 2763 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() { 2764 void *t = FetchFromSpans(); 2765 if (!t) { 2766 Populate(); 2767 t = FetchFromSpans(); 2768 } 2769 return t; 2770 } 2771 2772 void* TCMalloc_Central_FreeList::FetchFromSpans() { 2773 if (DLL_IsEmpty(&nonempty_)) return NULL; 2774 Span* span = nonempty_.next; 2775 2776 ASSERT(span->objects != NULL); 2777 ASSERT_SPAN_COMMITTED(span); 2778 span->refcount++; 2779 void* result = span->objects; 2780 span->objects = *(reinterpret_cast<void**>(result)); 2781 if (span->objects == NULL) { 2782 // Move to empty list 2783 DLL_Remove(span); 2784 DLL_Prepend(&empty_, span); 2785 Event(span, 'E', 0); 2786 } 2787 counter_--; 2788 return result; 2789 } 2790 2791 // Fetch memory from the system and add to the central cache freelist. 2792 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { 2793 // Release central list lock while operating on pageheap 2794 lock_.Unlock(); 2795 const size_t npages = class_to_pages[size_class_]; 2796 2797 Span* span; 2798 { 2799 SpinLockHolder h(&pageheap_lock); 2800 span = pageheap->New(npages); 2801 if (span) pageheap->RegisterSizeClass(span, size_class_); 2802 } 2803 if (span == NULL) { 2804 #if HAVE(ERRNO_H) 2805 MESSAGE("allocation failed: %d\n", errno); 2806 #elif OS(WINDOWS) 2807 MESSAGE("allocation failed: %d\n", ::GetLastError()); 2808 #else 2809 MESSAGE("allocation failed\n"); 2810 #endif 2811 lock_.Lock(); 2812 return; 2813 } 2814 ASSERT_SPAN_COMMITTED(span); 2815 ASSERT(span->length == npages); 2816 // Cache sizeclass info eagerly. Locking is not necessary. 2817 // (Instead of being eager, we could just replace any stale info 2818 // about this span, but that seems to be no better in practice.) 2819 for (size_t i = 0; i < npages; i++) { 2820 pageheap->CacheSizeClass(span->start + i, size_class_); 2821 } 2822 2823 // Split the block into pieces and add to the free-list 2824 // TODO: coloring of objects to avoid cache conflicts? 2825 void** tail = &span->objects; 2826 char* ptr = reinterpret_cast<char*>(span->start << kPageShift); 2827 char* limit = ptr + (npages << kPageShift); 2828 const size_t size = ByteSizeForClass(size_class_); 2829 int num = 0; 2830 char* nptr; 2831 while ((nptr = ptr + size) <= limit) { 2832 *tail = ptr; 2833 tail = reinterpret_cast_ptr<void**>(ptr); 2834 ptr = nptr; 2835 num++; 2836 } 2837 ASSERT(ptr <= limit); 2838 *tail = NULL; 2839 span->refcount = 0; // No sub-object in use yet 2840 2841 // Add span to list of non-empty spans 2842 lock_.Lock(); 2843 DLL_Prepend(&nonempty_, span); 2844 counter_ += num; 2845 } 2846 2847 //------------------------------------------------------------------- 2848 // TCMalloc_ThreadCache implementation 2849 //------------------------------------------------------------------- 2850 2851 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { 2852 if (bytes_until_sample_ < k) { 2853 PickNextSample(k); 2854 return true; 2855 } else { 2856 bytes_until_sample_ -= k; 2857 return false; 2858 } 2859 } 2860 2861 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) { 2862 size_ = 0; 2863 next_ = NULL; 2864 prev_ = NULL; 2865 tid_ = tid; 2866 in_setspecific_ = false; 2867 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2868 list_[cl].Init(); 2869 } 2870 2871 // Initialize RNG -- run it for a bit to get to good values 2872 bytes_until_sample_ = 0; 2873 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); 2874 for (int i = 0; i < 100; i++) { 2875 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); 2876 } 2877 } 2878 2879 void TCMalloc_ThreadCache::Cleanup() { 2880 // Put unused memory back into central cache 2881 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2882 if (list_[cl].length() > 0) { 2883 ReleaseToCentralCache(cl, list_[cl].length()); 2884 } 2885 } 2886 } 2887 2888 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { 2889 ASSERT(size <= kMaxSize); 2890 const size_t cl = SizeClass(size); 2891 FreeList* list = &list_[cl]; 2892 size_t allocationSize = ByteSizeForClass(cl); 2893 if (list->empty()) { 2894 FetchFromCentralCache(cl, allocationSize); 2895 if (list->empty()) return NULL; 2896 } 2897 size_ -= allocationSize; 2898 return list->Pop(); 2899 } 2900 2901 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) { 2902 size_ += ByteSizeForClass(cl); 2903 FreeList* list = &list_[cl]; 2904 list->Push(ptr); 2905 // If enough data is free, put back into central cache 2906 if (list->length() > kMaxFreeListLength) { 2907 ReleaseToCentralCache(cl, num_objects_to_move[cl]); 2908 } 2909 if (size_ >= per_thread_cache_size) Scavenge(); 2910 } 2911 2912 // Remove some objects of class "cl" from central cache and add to thread heap 2913 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { 2914 int fetch_count = num_objects_to_move[cl]; 2915 void *start, *end; 2916 central_cache[cl].RemoveRange(&start, &end, &fetch_count); 2917 list_[cl].PushRange(fetch_count, start, end); 2918 size_ += allocationSize * fetch_count; 2919 } 2920 2921 // Remove some objects of class "cl" from thread heap and add to central cache 2922 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { 2923 ASSERT(N > 0); 2924 FreeList* src = &list_[cl]; 2925 if (N > src->length()) N = src->length(); 2926 size_ -= N*ByteSizeForClass(cl); 2927 2928 // We return prepackaged chains of the correct size to the central cache. 2929 // TODO: Use the same format internally in the thread caches? 2930 int batch_size = num_objects_to_move[cl]; 2931 while (N > batch_size) { 2932 void *tail, *head; 2933 src->PopRange(batch_size, &head, &tail); 2934 central_cache[cl].InsertRange(head, tail, batch_size); 2935 N -= batch_size; 2936 } 2937 void *tail, *head; 2938 src->PopRange(N, &head, &tail); 2939 central_cache[cl].InsertRange(head, tail, N); 2940 } 2941 2942 // Release idle memory to the central cache 2943 inline void TCMalloc_ThreadCache::Scavenge() { 2944 // If the low-water mark for the free list is L, it means we would 2945 // not have had to allocate anything from the central cache even if 2946 // we had reduced the free list size by L. We aim to get closer to 2947 // that situation by dropping L/2 nodes from the free list. This 2948 // may not release much memory, but if so we will call scavenge again 2949 // pretty soon and the low-water marks will be high on that call. 2950 //int64 start = CycleClock::Now(); 2951 2952 for (size_t cl = 0; cl < kNumClasses; cl++) { 2953 FreeList* list = &list_[cl]; 2954 const int lowmark = list->lowwatermark(); 2955 if (lowmark > 0) { 2956 const int drop = (lowmark > 1) ? lowmark/2 : 1; 2957 ReleaseToCentralCache(cl, drop); 2958 } 2959 list->clear_lowwatermark(); 2960 } 2961 2962 //int64 finish = CycleClock::Now(); 2963 //CycleTimer ct; 2964 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); 2965 } 2966 2967 void TCMalloc_ThreadCache::PickNextSample(size_t k) { 2968 // Make next "random" number 2969 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers 2970 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); 2971 uint32_t r = rnd_; 2972 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); 2973 2974 // Next point is "rnd_ % (sample_period)". I.e., average 2975 // increment is "sample_period/2". 2976 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); 2977 static int last_flag_value = -1; 2978 2979 if (flag_value != last_flag_value) { 2980 SpinLockHolder h(&sample_period_lock); 2981 int i; 2982 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { 2983 if (primes_list[i] >= flag_value) { 2984 break; 2985 } 2986 } 2987 sample_period = primes_list[i]; 2988 last_flag_value = flag_value; 2989 } 2990 2991 bytes_until_sample_ += rnd_ % sample_period; 2992 2993 if (k > (static_cast<size_t>(-1) >> 2)) { 2994 // If the user has asked for a huge allocation then it is possible 2995 // for the code below to loop infinitely. Just return (note that 2996 // this throws off the sampling accuracy somewhat, but a user who 2997 // is allocating more than 1G of memory at a time can live with a 2998 // minor inaccuracy in profiling of small allocations, and also 2999 // would rather not wait for the loop below to terminate). 3000 return; 3001 } 3002 3003 while (bytes_until_sample_ < k) { 3004 // Increase bytes_until_sample_ by enough average sampling periods 3005 // (sample_period >> 1) to allow us to sample past the current 3006 // allocation. 3007 bytes_until_sample_ += (sample_period >> 1); 3008 } 3009 3010 bytes_until_sample_ -= k; 3011 } 3012 3013 void TCMalloc_ThreadCache::InitModule() { 3014 // There is a slight potential race here because of double-checked 3015 // locking idiom. However, as long as the program does a small 3016 // allocation before switching to multi-threaded mode, we will be 3017 // fine. We increase the chances of doing such a small allocation 3018 // by doing one in the constructor of the module_enter_exit_hook 3019 // object declared below. 3020 SpinLockHolder h(&pageheap_lock); 3021 if (!phinited) { 3022 #ifdef WTF_CHANGES 3023 InitTSD(); 3024 #endif 3025 InitSizeClasses(); 3026 threadheap_allocator.Init(); 3027 span_allocator.Init(); 3028 span_allocator.New(); // Reduce cache conflicts 3029 span_allocator.New(); // Reduce cache conflicts 3030 stacktrace_allocator.Init(); 3031 DLL_Init(&sampled_objects); 3032 for (size_t i = 0; i < kNumClasses; ++i) { 3033 central_cache[i].Init(i); 3034 } 3035 pageheap->init(); 3036 phinited = 1; 3037 #if defined(WTF_CHANGES) && OS(DARWIN) 3038 FastMallocZone::init(); 3039 #endif 3040 } 3041 } 3042 3043 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) { 3044 // Create the heap and add it to the linked list 3045 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); 3046 heap->Init(tid); 3047 heap->next_ = thread_heaps; 3048 heap->prev_ = NULL; 3049 if (thread_heaps != NULL) thread_heaps->prev_ = heap; 3050 thread_heaps = heap; 3051 thread_heap_count++; 3052 RecomputeThreadCacheSize(); 3053 return heap; 3054 } 3055 3056 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { 3057 #ifdef HAVE_TLS 3058 // __thread is faster, but only when the kernel supports it 3059 if (KernelSupportsTLS()) 3060 return threadlocal_heap; 3061 #elif OS(WINDOWS) 3062 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); 3063 #else 3064 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); 3065 #endif 3066 } 3067 3068 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { 3069 TCMalloc_ThreadCache* ptr = NULL; 3070 if (!tsd_inited) { 3071 InitModule(); 3072 } else { 3073 ptr = GetThreadHeap(); 3074 } 3075 if (ptr == NULL) ptr = CreateCacheIfNecessary(); 3076 return ptr; 3077 } 3078 3079 // In deletion paths, we do not try to create a thread-cache. This is 3080 // because we may be in the thread destruction code and may have 3081 // already cleaned up the cache for this thread. 3082 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { 3083 if (!tsd_inited) return NULL; 3084 void* const p = GetThreadHeap(); 3085 return reinterpret_cast<TCMalloc_ThreadCache*>(p); 3086 } 3087 3088 void TCMalloc_ThreadCache::InitTSD() { 3089 ASSERT(!tsd_inited); 3090 #if USE(PTHREAD_GETSPECIFIC_DIRECT) 3091 pthread_key_init_np(heap_key, DestroyThreadCache); 3092 #else 3093 pthread_key_create(&heap_key, DestroyThreadCache); 3094 #endif 3095 #if OS(WINDOWS) 3096 tlsIndex = TlsAlloc(); 3097 #endif 3098 tsd_inited = true; 3099 3100 #if !OS(WINDOWS) 3101 // We may have used a fake pthread_t for the main thread. Fix it. 3102 pthread_t zero; 3103 memset(&zero, 0, sizeof(zero)); 3104 #endif 3105 #ifndef WTF_CHANGES 3106 SpinLockHolder h(&pageheap_lock); 3107 #else 3108 ASSERT(pageheap_lock.IsHeld()); 3109 #endif 3110 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3111 #if OS(WINDOWS) 3112 if (h->tid_ == 0) { 3113 h->tid_ = GetCurrentThreadId(); 3114 } 3115 #else 3116 if (pthread_equal(h->tid_, zero)) { 3117 h->tid_ = pthread_self(); 3118 } 3119 #endif 3120 } 3121 } 3122 3123 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { 3124 // Initialize per-thread data if necessary 3125 TCMalloc_ThreadCache* heap = NULL; 3126 { 3127 SpinLockHolder h(&pageheap_lock); 3128 3129 #if OS(WINDOWS) 3130 DWORD me; 3131 if (!tsd_inited) { 3132 me = 0; 3133 } else { 3134 me = GetCurrentThreadId(); 3135 } 3136 #else 3137 // Early on in glibc's life, we cannot even call pthread_self() 3138 pthread_t me; 3139 if (!tsd_inited) { 3140 memset(&me, 0, sizeof(me)); 3141 } else { 3142 me = pthread_self(); 3143 } 3144 #endif 3145 3146 // This may be a recursive malloc call from pthread_setspecific() 3147 // In that case, the heap for this thread has already been created 3148 // and added to the linked list. So we search for that first. 3149 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3150 #if OS(WINDOWS) 3151 if (h->tid_ == me) { 3152 #else 3153 if (pthread_equal(h->tid_, me)) { 3154 #endif 3155 heap = h; 3156 break; 3157 } 3158 } 3159 3160 if (heap == NULL) heap = NewHeap(me); 3161 } 3162 3163 // We call pthread_setspecific() outside the lock because it may 3164 // call malloc() recursively. The recursive call will never get 3165 // here again because it will find the already allocated heap in the 3166 // linked list of heaps. 3167 if (!heap->in_setspecific_ && tsd_inited) { 3168 heap->in_setspecific_ = true; 3169 setThreadHeap(heap); 3170 } 3171 return heap; 3172 } 3173 3174 void TCMalloc_ThreadCache::BecomeIdle() { 3175 if (!tsd_inited) return; // No caches yet 3176 TCMalloc_ThreadCache* heap = GetThreadHeap(); 3177 if (heap == NULL) return; // No thread cache to remove 3178 if (heap->in_setspecific_) return; // Do not disturb the active caller 3179 3180 heap->in_setspecific_ = true; 3181 setThreadHeap(NULL); 3182 #ifdef HAVE_TLS 3183 // Also update the copy in __thread 3184 threadlocal_heap = NULL; 3185 #endif 3186 heap->in_setspecific_ = false; 3187 if (GetThreadHeap() == heap) { 3188 // Somehow heap got reinstated by a recursive call to malloc 3189 // from pthread_setspecific. We give up in this case. 3190 return; 3191 } 3192 3193 // We can now get rid of the heap 3194 DeleteCache(heap); 3195 } 3196 3197 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { 3198 // Note that "ptr" cannot be NULL since pthread promises not 3199 // to invoke the destructor on NULL values, but for safety, 3200 // we check anyway. 3201 if (ptr == NULL) return; 3202 #ifdef HAVE_TLS 3203 // Prevent fast path of GetThreadHeap() from returning heap. 3204 threadlocal_heap = NULL; 3205 #endif 3206 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); 3207 } 3208 3209 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { 3210 // Remove all memory from heap 3211 heap->Cleanup(); 3212 3213 // Remove from linked list 3214 SpinLockHolder h(&pageheap_lock); 3215 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; 3216 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; 3217 if (thread_heaps == heap) thread_heaps = heap->next_; 3218 thread_heap_count--; 3219 RecomputeThreadCacheSize(); 3220 3221 threadheap_allocator.Delete(heap); 3222 } 3223 3224 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { 3225 // Divide available space across threads 3226 int n = thread_heap_count > 0 ? thread_heap_count : 1; 3227 size_t space = overall_thread_cache_size / n; 3228 3229 // Limit to allowed range 3230 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; 3231 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; 3232 3233 per_thread_cache_size = space; 3234 } 3235 3236 void TCMalloc_ThreadCache::Print() const { 3237 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3238 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", 3239 ByteSizeForClass(cl), 3240 list_[cl].length(), 3241 list_[cl].lowwatermark()); 3242 } 3243 } 3244 3245 // Extract interesting stats 3246 struct TCMallocStats { 3247 uint64_t system_bytes; // Bytes alloced from system 3248 uint64_t thread_bytes; // Bytes in thread caches 3249 uint64_t central_bytes; // Bytes in central cache 3250 uint64_t transfer_bytes; // Bytes in central transfer cache 3251 uint64_t pageheap_bytes; // Bytes in page heap 3252 uint64_t metadata_bytes; // Bytes alloced for metadata 3253 }; 3254 3255 #ifndef WTF_CHANGES 3256 // Get stats into "r". Also get per-size-class counts if class_count != NULL 3257 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { 3258 r->central_bytes = 0; 3259 r->transfer_bytes = 0; 3260 for (int cl = 0; cl < kNumClasses; ++cl) { 3261 const int length = central_cache[cl].length(); 3262 const int tc_length = central_cache[cl].tc_length(); 3263 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length; 3264 r->transfer_bytes += 3265 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length; 3266 if (class_count) class_count[cl] = length + tc_length; 3267 } 3268 3269 // Add stats from per-thread heaps 3270 r->thread_bytes = 0; 3271 { // scope 3272 SpinLockHolder h(&pageheap_lock); 3273 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3274 r->thread_bytes += h->Size(); 3275 if (class_count) { 3276 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3277 class_count[cl] += h->freelist_length(cl); 3278 } 3279 } 3280 } 3281 } 3282 3283 { //scope 3284 SpinLockHolder h(&pageheap_lock); 3285 r->system_bytes = pageheap->SystemBytes(); 3286 r->metadata_bytes = metadata_system_bytes; 3287 r->pageheap_bytes = pageheap->FreeBytes(); 3288 } 3289 } 3290 #endif 3291 3292 #ifndef WTF_CHANGES 3293 // WRITE stats to "out" 3294 static void DumpStats(TCMalloc_Printer* out, int level) { 3295 TCMallocStats stats; 3296 uint64_t class_count[kNumClasses]; 3297 ExtractStats(&stats, (level >= 2 ? class_count : NULL)); 3298 3299 if (level >= 2) { 3300 out->printf("------------------------------------------------\n"); 3301 uint64_t cumulative = 0; 3302 for (int cl = 0; cl < kNumClasses; ++cl) { 3303 if (class_count[cl] > 0) { 3304 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl); 3305 cumulative += class_bytes; 3306 out->printf("class %3d [ %8" PRIuS " bytes ] : " 3307 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", 3308 cl, ByteSizeForClass(cl), 3309 class_count[cl], 3310 class_bytes / 1048576.0, 3311 cumulative / 1048576.0); 3312 } 3313 } 3314 3315 SpinLockHolder h(&pageheap_lock); 3316 pageheap->Dump(out); 3317 } 3318 3319 const uint64_t bytes_in_use = stats.system_bytes 3320 - stats.pageheap_bytes 3321 - stats.central_bytes 3322 - stats.transfer_bytes 3323 - stats.thread_bytes; 3324 3325 out->printf("------------------------------------------------\n" 3326 "MALLOC: %12" PRIu64 " Heap size\n" 3327 "MALLOC: %12" PRIu64 " Bytes in use by application\n" 3328 "MALLOC: %12" PRIu64 " Bytes free in page heap\n" 3329 "MALLOC: %12" PRIu64 " Bytes free in central cache\n" 3330 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n" 3331 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n" 3332 "MALLOC: %12" PRIu64 " Spans in use\n" 3333 "MALLOC: %12" PRIu64 " Thread heaps in use\n" 3334 "MALLOC: %12" PRIu64 " Metadata allocated\n" 3335 "------------------------------------------------\n", 3336 stats.system_bytes, 3337 bytes_in_use, 3338 stats.pageheap_bytes, 3339 stats.central_bytes, 3340 stats.transfer_bytes, 3341 stats.thread_bytes, 3342 uint64_t(span_allocator.inuse()), 3343 uint64_t(threadheap_allocator.inuse()), 3344 stats.metadata_bytes); 3345 } 3346 3347 static void PrintStats(int level) { 3348 const int kBufferSize = 16 << 10; 3349 char* buffer = new char[kBufferSize]; 3350 TCMalloc_Printer printer(buffer, kBufferSize); 3351 DumpStats(&printer, level); 3352 write(STDERR_FILENO, buffer, strlen(buffer)); 3353 delete[] buffer; 3354 } 3355 3356 static void** DumpStackTraces() { 3357 // Count how much space we need 3358 int needed_slots = 0; 3359 { 3360 SpinLockHolder h(&pageheap_lock); 3361 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { 3362 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); 3363 needed_slots += 3 + stack->depth; 3364 } 3365 needed_slots += 100; // Slop in case sample grows 3366 needed_slots += needed_slots/8; // An extra 12.5% slop 3367 } 3368 3369 void** result = new void*[needed_slots]; 3370 if (result == NULL) { 3371 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n", 3372 needed_slots); 3373 return NULL; 3374 } 3375 3376 SpinLockHolder h(&pageheap_lock); 3377 int used_slots = 0; 3378 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { 3379 ASSERT(used_slots < needed_slots); // Need to leave room for terminator 3380 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); 3381 if (used_slots + 3 + stack->depth >= needed_slots) { 3382 // No more room 3383 break; 3384 } 3385 3386 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); 3387 result[used_slots+1] = reinterpret_cast<void*>(stack->size); 3388 result[used_slots+2] = reinterpret_cast<void*>(stack->depth); 3389 for (int d = 0; d < stack->depth; d++) { 3390 result[used_slots+3+d] = stack->stack[d]; 3391 } 3392 used_slots += 3 + stack->depth; 3393 } 3394 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); 3395 return result; 3396 } 3397 #endif 3398 3399 #ifndef WTF_CHANGES 3400 3401 // TCMalloc's support for extra malloc interfaces 3402 class TCMallocImplementation : public MallocExtension { 3403 public: 3404 virtual void GetStats(char* buffer, int buffer_length) { 3405 ASSERT(buffer_length > 0); 3406 TCMalloc_Printer printer(buffer, buffer_length); 3407 3408 // Print level one stats unless lots of space is available 3409 if (buffer_length < 10000) { 3410 DumpStats(&printer, 1); 3411 } else { 3412 DumpStats(&printer, 2); 3413 } 3414 } 3415 3416 virtual void** ReadStackTraces() { 3417 return DumpStackTraces(); 3418 } 3419 3420 virtual bool GetNumericProperty(const char* name, size_t* value) { 3421 ASSERT(name != NULL); 3422 3423 if (strcmp(name, "generic.current_allocated_bytes") == 0) { 3424 TCMallocStats stats; 3425 ExtractStats(&stats, NULL); 3426 *value = stats.system_bytes 3427 - stats.thread_bytes 3428 - stats.central_bytes 3429 - stats.pageheap_bytes; 3430 return true; 3431 } 3432 3433 if (strcmp(name, "generic.heap_size") == 0) { 3434 TCMallocStats stats; 3435 ExtractStats(&stats, NULL); 3436 *value = stats.system_bytes; 3437 return true; 3438 } 3439 3440 if (strcmp(name, "tcmalloc.slack_bytes") == 0) { 3441 // We assume that bytes in the page heap are not fragmented too 3442 // badly, and are therefore available for allocation. 3443 SpinLockHolder l(&pageheap_lock); 3444 *value = pageheap->FreeBytes(); 3445 return true; 3446 } 3447 3448 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 3449 SpinLockHolder l(&pageheap_lock); 3450 *value = overall_thread_cache_size; 3451 return true; 3452 } 3453 3454 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { 3455 TCMallocStats stats; 3456 ExtractStats(&stats, NULL); 3457 *value = stats.thread_bytes; 3458 return true; 3459 } 3460 3461 return false; 3462 } 3463 3464 virtual bool SetNumericProperty(const char* name, size_t value) { 3465 ASSERT(name != NULL); 3466 3467 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 3468 // Clip the value to a reasonable range 3469 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize; 3470 if (value > (1<<30)) value = (1<<30); // Limit to 1GB 3471 3472 SpinLockHolder l(&pageheap_lock); 3473 overall_thread_cache_size = static_cast<size_t>(value); 3474 TCMalloc_ThreadCache::RecomputeThreadCacheSize(); 3475 return true; 3476 } 3477 3478 return false; 3479 } 3480 3481 virtual void MarkThreadIdle() { 3482 TCMalloc_ThreadCache::BecomeIdle(); 3483 } 3484 3485 virtual void ReleaseFreeMemory() { 3486 SpinLockHolder h(&pageheap_lock); 3487 pageheap->ReleaseFreePages(); 3488 } 3489 }; 3490 #endif 3491 3492 // The constructor allocates an object to ensure that initialization 3493 // runs before main(), and therefore we do not have a chance to become 3494 // multi-threaded before initialization. We also create the TSD key 3495 // here. Presumably by the time this constructor runs, glibc is in 3496 // good enough shape to handle pthread_key_create(). 3497 // 3498 // The constructor also takes the opportunity to tell STL to use 3499 // tcmalloc. We want to do this early, before construct time, so 3500 // all user STL allocations go through tcmalloc (which works really 3501 // well for STL). 3502 // 3503 // The destructor prints stats when the program exits. 3504 class TCMallocGuard { 3505 public: 3506 3507 TCMallocGuard() { 3508 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 3509 // Check whether the kernel also supports TLS (needs to happen at runtime) 3510 CheckIfKernelSupportsTLS(); 3511 #endif 3512 #ifndef WTF_CHANGES 3513 #ifdef WIN32 // patch the windows VirtualAlloc, etc. 3514 PatchWindowsFunctions(); // defined in windows/patch_functions.cc 3515 #endif 3516 #endif 3517 free(malloc(1)); 3518 TCMalloc_ThreadCache::InitTSD(); 3519 free(malloc(1)); 3520 #ifndef WTF_CHANGES 3521 MallocExtension::Register(new TCMallocImplementation); 3522 #endif 3523 } 3524 3525 #ifndef WTF_CHANGES 3526 ~TCMallocGuard() { 3527 const char* env = getenv("MALLOCSTATS"); 3528 if (env != NULL) { 3529 int level = atoi(env); 3530 if (level < 1) level = 1; 3531 PrintStats(level); 3532 } 3533 #ifdef WIN32 3534 UnpatchWindowsFunctions(); 3535 #endif 3536 } 3537 #endif 3538 }; 3539 3540 #ifndef WTF_CHANGES 3541 static TCMallocGuard module_enter_exit_hook; 3542 #endif 3543 3544 3545 //------------------------------------------------------------------- 3546 // Helpers for the exported routines below 3547 //------------------------------------------------------------------- 3548 3549 #ifndef WTF_CHANGES 3550 3551 static Span* DoSampledAllocation(size_t size) { 3552 3553 // Grab the stack trace outside the heap lock 3554 StackTrace tmp; 3555 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1); 3556 tmp.size = size; 3557 3558 SpinLockHolder h(&pageheap_lock); 3559 // Allocate span 3560 Span *span = pageheap->New(pages(size == 0 ? 1 : size)); 3561 if (span == NULL) { 3562 return NULL; 3563 } 3564 3565 // Allocate stack trace 3566 StackTrace *stack = stacktrace_allocator.New(); 3567 if (stack == NULL) { 3568 // Sampling failed because of lack of memory 3569 return span; 3570 } 3571 3572 *stack = tmp; 3573 span->sample = 1; 3574 span->objects = stack; 3575 DLL_Prepend(&sampled_objects, span); 3576 3577 return span; 3578 } 3579 #endif 3580 3581 static inline bool CheckCachedSizeClass(void *ptr) { 3582 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3583 size_t cached_value = pageheap->GetSizeClassIfCached(p); 3584 return cached_value == 0 || 3585 cached_value == pageheap->GetDescriptor(p)->sizeclass; 3586 } 3587 3588 static inline void* CheckedMallocResult(void *result) 3589 { 3590 ASSERT(result == 0 || CheckCachedSizeClass(result)); 3591 return result; 3592 } 3593 3594 static inline void* SpanToMallocResult(Span *span) { 3595 ASSERT_SPAN_COMMITTED(span); 3596 pageheap->CacheSizeClass(span->start, 0); 3597 return 3598 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); 3599 } 3600 3601 #ifdef WTF_CHANGES 3602 template <bool crashOnFailure> 3603 #endif 3604 static ALWAYS_INLINE void* do_malloc(size_t size) { 3605 void* ret = NULL; 3606 3607 #ifdef WTF_CHANGES 3608 ASSERT(!isForbidden()); 3609 #endif 3610 3611 // The following call forces module initialization 3612 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3613 #ifndef WTF_CHANGES 3614 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { 3615 Span* span = DoSampledAllocation(size); 3616 if (span != NULL) { 3617 ret = SpanToMallocResult(span); 3618 } 3619 } else 3620 #endif 3621 if (size > kMaxSize) { 3622 // Use page-level allocator 3623 SpinLockHolder h(&pageheap_lock); 3624 Span* span = pageheap->New(pages(size)); 3625 if (span != NULL) { 3626 ret = SpanToMallocResult(span); 3627 } 3628 } else { 3629 // The common case, and also the simplest. This just pops the 3630 // size-appropriate freelist, afer replenishing it if it's empty. 3631 ret = CheckedMallocResult(heap->Allocate(size)); 3632 } 3633 if (!ret) { 3634 #ifdef WTF_CHANGES 3635 if (crashOnFailure) // This branch should be optimized out by the compiler. 3636 CRASH(); 3637 #else 3638 errno = ENOMEM; 3639 #endif 3640 } 3641 return ret; 3642 } 3643 3644 static ALWAYS_INLINE void do_free(void* ptr) { 3645 if (ptr == NULL) return; 3646 ASSERT(pageheap != NULL); // Should not call free() before malloc() 3647 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3648 Span* span = NULL; 3649 size_t cl = pageheap->GetSizeClassIfCached(p); 3650 3651 if (cl == 0) { 3652 span = pageheap->GetDescriptor(p); 3653 cl = span->sizeclass; 3654 pageheap->CacheSizeClass(p, cl); 3655 } 3656 if (cl != 0) { 3657 #ifndef NO_TCMALLOC_SAMPLES 3658 ASSERT(!pageheap->GetDescriptor(p)->sample); 3659 #endif 3660 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); 3661 if (heap != NULL) { 3662 heap->Deallocate(ptr, cl); 3663 } else { 3664 // Delete directly into central cache 3665 SLL_SetNext(ptr, NULL); 3666 central_cache[cl].InsertRange(ptr, ptr, 1); 3667 } 3668 } else { 3669 SpinLockHolder h(&pageheap_lock); 3670 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 3671 ASSERT(span != NULL && span->start == p); 3672 #ifndef NO_TCMALLOC_SAMPLES 3673 if (span->sample) { 3674 DLL_Remove(span); 3675 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); 3676 span->objects = NULL; 3677 } 3678 #endif 3679 pageheap->Delete(span); 3680 } 3681 } 3682 3683 #ifndef WTF_CHANGES 3684 // For use by exported routines below that want specific alignments 3685 // 3686 // Note: this code can be slow, and can significantly fragment memory. 3687 // The expectation is that memalign/posix_memalign/valloc/pvalloc will 3688 // not be invoked very often. This requirement simplifies our 3689 // implementation and allows us to tune for expected allocation 3690 // patterns. 3691 static void* do_memalign(size_t align, size_t size) { 3692 ASSERT((align & (align - 1)) == 0); 3693 ASSERT(align > 0); 3694 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule(); 3695 3696 // Allocate at least one byte to avoid boundary conditions below 3697 if (size == 0) size = 1; 3698 3699 if (size <= kMaxSize && align < kPageSize) { 3700 // Search through acceptable size classes looking for one with 3701 // enough alignment. This depends on the fact that 3702 // InitSizeClasses() currently produces several size classes that 3703 // are aligned at powers of two. We will waste time and space if 3704 // we miss in the size class array, but that is deemed acceptable 3705 // since memalign() should be used rarely. 3706 size_t cl = SizeClass(size); 3707 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) { 3708 cl++; 3709 } 3710 if (cl < kNumClasses) { 3711 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3712 return CheckedMallocResult(heap->Allocate(class_to_size[cl])); 3713 } 3714 } 3715 3716 // We will allocate directly from the page heap 3717 SpinLockHolder h(&pageheap_lock); 3718 3719 if (align <= kPageSize) { 3720 // Any page-level allocation will be fine 3721 // TODO: We could put the rest of this page in the appropriate 3722 // TODO: cache but it does not seem worth it. 3723 Span* span = pageheap->New(pages(size)); 3724 return span == NULL ? NULL : SpanToMallocResult(span); 3725 } 3726 3727 // Allocate extra pages and carve off an aligned portion 3728 const Length alloc = pages(size + align); 3729 Span* span = pageheap->New(alloc); 3730 if (span == NULL) return NULL; 3731 3732 // Skip starting portion so that we end up aligned 3733 Length skip = 0; 3734 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { 3735 skip++; 3736 } 3737 ASSERT(skip < alloc); 3738 if (skip > 0) { 3739 Span* rest = pageheap->Split(span, skip); 3740 pageheap->Delete(span); 3741 span = rest; 3742 } 3743 3744 // Skip trailing portion that we do not need to return 3745 const Length needed = pages(size); 3746 ASSERT(span->length >= needed); 3747 if (span->length > needed) { 3748 Span* trailer = pageheap->Split(span, needed); 3749 pageheap->Delete(trailer); 3750 } 3751 return SpanToMallocResult(span); 3752 } 3753 #endif 3754 3755 // Helpers for use by exported routines below: 3756 3757 #ifndef WTF_CHANGES 3758 static inline void do_malloc_stats() { 3759 PrintStats(1); 3760 } 3761 #endif 3762 3763 static inline int do_mallopt(int, int) { 3764 return 1; // Indicates error 3765 } 3766 3767 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance 3768 static inline struct mallinfo do_mallinfo() { 3769 TCMallocStats stats; 3770 ExtractStats(&stats, NULL); 3771 3772 // Just some of the fields are filled in. 3773 struct mallinfo info; 3774 memset(&info, 0, sizeof(info)); 3775 3776 // Unfortunately, the struct contains "int" field, so some of the 3777 // size values will be truncated. 3778 info.arena = static_cast<int>(stats.system_bytes); 3779 info.fsmblks = static_cast<int>(stats.thread_bytes 3780 + stats.central_bytes 3781 + stats.transfer_bytes); 3782 info.fordblks = static_cast<int>(stats.pageheap_bytes); 3783 info.uordblks = static_cast<int>(stats.system_bytes 3784 - stats.thread_bytes 3785 - stats.central_bytes 3786 - stats.transfer_bytes 3787 - stats.pageheap_bytes); 3788 3789 return info; 3790 } 3791 #endif 3792 3793 //------------------------------------------------------------------- 3794 // Exported routines 3795 //------------------------------------------------------------------- 3796 3797 // CAVEAT: The code structure below ensures that MallocHook methods are always 3798 // called from the stack frame of the invoked allocation function. 3799 // heap-checker.cc depends on this to start a stack trace from 3800 // the call to the (de)allocation function. 3801 3802 #ifndef WTF_CHANGES 3803 extern "C" 3804 #else 3805 #define do_malloc do_malloc<crashOnFailure> 3806 3807 template <bool crashOnFailure> 3808 ALWAYS_INLINE void* malloc(size_t); 3809 3810 void* fastMalloc(size_t size) 3811 { 3812 return malloc<true>(size); 3813 } 3814 3815 TryMallocReturnValue tryFastMalloc(size_t size) 3816 { 3817 return malloc<false>(size); 3818 } 3819 3820 template <bool crashOnFailure> 3821 ALWAYS_INLINE 3822 #endif 3823 void* malloc(size_t size) { 3824 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3825 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur... 3826 return 0; 3827 size += sizeof(AllocAlignmentInteger); 3828 void* result = do_malloc(size); 3829 if (!result) 3830 return 0; 3831 3832 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 3833 result = static_cast<AllocAlignmentInteger*>(result) + 1; 3834 #else 3835 void* result = do_malloc(size); 3836 #endif 3837 3838 #ifndef WTF_CHANGES 3839 MallocHook::InvokeNewHook(result, size); 3840 #endif 3841 return result; 3842 } 3843 3844 #ifndef WTF_CHANGES 3845 extern "C" 3846 #endif 3847 void free(void* ptr) { 3848 #ifndef WTF_CHANGES 3849 MallocHook::InvokeDeleteHook(ptr); 3850 #endif 3851 3852 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3853 if (!ptr) 3854 return; 3855 3856 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr); 3857 if (*header != Internal::AllocTypeMalloc) 3858 Internal::fastMallocMatchFailed(ptr); 3859 do_free(header); 3860 #else 3861 do_free(ptr); 3862 #endif 3863 } 3864 3865 #ifndef WTF_CHANGES 3866 extern "C" 3867 #else 3868 template <bool crashOnFailure> 3869 ALWAYS_INLINE void* calloc(size_t, size_t); 3870 3871 void* fastCalloc(size_t n, size_t elem_size) 3872 { 3873 return calloc<true>(n, elem_size); 3874 } 3875 3876 TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size) 3877 { 3878 return calloc<false>(n, elem_size); 3879 } 3880 3881 template <bool crashOnFailure> 3882 ALWAYS_INLINE 3883 #endif 3884 void* calloc(size_t n, size_t elem_size) { 3885 size_t totalBytes = n * elem_size; 3886 3887 // Protect against overflow 3888 if (n > 1 && elem_size && (totalBytes / elem_size) != n) 3889 return 0; 3890 3891 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3892 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur... 3893 return 0; 3894 3895 totalBytes += sizeof(AllocAlignmentInteger); 3896 void* result = do_malloc(totalBytes); 3897 if (!result) 3898 return 0; 3899 3900 memset(result, 0, totalBytes); 3901 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 3902 result = static_cast<AllocAlignmentInteger*>(result) + 1; 3903 #else 3904 void* result = do_malloc(totalBytes); 3905 if (result != NULL) { 3906 memset(result, 0, totalBytes); 3907 } 3908 #endif 3909 3910 #ifndef WTF_CHANGES 3911 MallocHook::InvokeNewHook(result, totalBytes); 3912 #endif 3913 return result; 3914 } 3915 3916 // Since cfree isn't used anywhere, we don't compile it in. 3917 #ifndef WTF_CHANGES 3918 #ifndef WTF_CHANGES 3919 extern "C" 3920 #endif 3921 void cfree(void* ptr) { 3922 #ifndef WTF_CHANGES 3923 MallocHook::InvokeDeleteHook(ptr); 3924 #endif 3925 do_free(ptr); 3926 } 3927 #endif 3928 3929 #ifndef WTF_CHANGES 3930 extern "C" 3931 #else 3932 template <bool crashOnFailure> 3933 ALWAYS_INLINE void* realloc(void*, size_t); 3934 3935 void* fastRealloc(void* old_ptr, size_t new_size) 3936 { 3937 return realloc<true>(old_ptr, new_size); 3938 } 3939 3940 TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size) 3941 { 3942 return realloc<false>(old_ptr, new_size); 3943 } 3944 3945 template <bool crashOnFailure> 3946 ALWAYS_INLINE 3947 #endif 3948 void* realloc(void* old_ptr, size_t new_size) { 3949 if (old_ptr == NULL) { 3950 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3951 void* result = malloc(new_size); 3952 #else 3953 void* result = do_malloc(new_size); 3954 #ifndef WTF_CHANGES 3955 MallocHook::InvokeNewHook(result, new_size); 3956 #endif 3957 #endif 3958 return result; 3959 } 3960 if (new_size == 0) { 3961 #ifndef WTF_CHANGES 3962 MallocHook::InvokeDeleteHook(old_ptr); 3963 #endif 3964 free(old_ptr); 3965 return NULL; 3966 } 3967 3968 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3969 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur... 3970 return 0; 3971 new_size += sizeof(AllocAlignmentInteger); 3972 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr); 3973 if (*header != Internal::AllocTypeMalloc) 3974 Internal::fastMallocMatchFailed(old_ptr); 3975 old_ptr = header; 3976 #endif 3977 3978 // Get the size of the old entry 3979 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; 3980 size_t cl = pageheap->GetSizeClassIfCached(p); 3981 Span *span = NULL; 3982 size_t old_size; 3983 if (cl == 0) { 3984 span = pageheap->GetDescriptor(p); 3985 cl = span->sizeclass; 3986 pageheap->CacheSizeClass(p, cl); 3987 } 3988 if (cl != 0) { 3989 old_size = ByteSizeForClass(cl); 3990 } else { 3991 ASSERT(span != NULL); 3992 old_size = span->length << kPageShift; 3993 } 3994 3995 // Reallocate if the new size is larger than the old size, 3996 // or if the new size is significantly smaller than the old size. 3997 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { 3998 // Need to reallocate 3999 void* new_ptr = do_malloc(new_size); 4000 if (new_ptr == NULL) { 4001 return NULL; 4002 } 4003 #ifndef WTF_CHANGES 4004 MallocHook::InvokeNewHook(new_ptr, new_size); 4005 #endif 4006 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 4007 #ifndef WTF_CHANGES 4008 MallocHook::InvokeDeleteHook(old_ptr); 4009 #endif 4010 // We could use a variant of do_free() that leverages the fact 4011 // that we already know the sizeclass of old_ptr. The benefit 4012 // would be small, so don't bother. 4013 do_free(old_ptr); 4014 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 4015 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1; 4016 #endif 4017 return new_ptr; 4018 } else { 4019 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 4020 old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer. 4021 #endif 4022 return old_ptr; 4023 } 4024 } 4025 4026 #ifdef WTF_CHANGES 4027 #undef do_malloc 4028 #else 4029 4030 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER; 4031 4032 static inline void* cpp_alloc(size_t size, bool nothrow) { 4033 for (;;) { 4034 void* p = do_malloc(size); 4035 #ifdef PREANSINEW 4036 return p; 4037 #else 4038 if (p == NULL) { // allocation failed 4039 // Get the current new handler. NB: this function is not 4040 // thread-safe. We make a feeble stab at making it so here, but 4041 // this lock only protects against tcmalloc interfering with 4042 // itself, not with other libraries calling set_new_handler. 4043 std::new_handler nh; 4044 { 4045 SpinLockHolder h(&set_new_handler_lock); 4046 nh = std::set_new_handler(0); 4047 (void) std::set_new_handler(nh); 4048 } 4049 // If no new_handler is established, the allocation failed. 4050 if (!nh) { 4051 if (nothrow) return 0; 4052 throw std::bad_alloc(); 4053 } 4054 // Otherwise, try the new_handler. If it returns, retry the 4055 // allocation. If it throws std::bad_alloc, fail the allocation. 4056 // if it throws something else, don't interfere. 4057 try { 4058 (*nh)(); 4059 } catch (const std::bad_alloc&) { 4060 if (!nothrow) throw; 4061 return p; 4062 } 4063 } else { // allocation success 4064 return p; 4065 } 4066 #endif 4067 } 4068 } 4069 4070 #if ENABLE(GLOBAL_FASTMALLOC_NEW) 4071 4072 void* operator new(size_t size) { 4073 void* p = cpp_alloc(size, false); 4074 // We keep this next instruction out of cpp_alloc for a reason: when 4075 // it's in, and new just calls cpp_alloc, the optimizer may fold the 4076 // new call into cpp_alloc, which messes up our whole section-based 4077 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 4078 // isn't the last thing this fn calls, and prevents the folding. 4079 MallocHook::InvokeNewHook(p, size); 4080 return p; 4081 } 4082 4083 void* operator new(size_t size, const std::nothrow_t&) __THROW { 4084 void* p = cpp_alloc(size, true); 4085 MallocHook::InvokeNewHook(p, size); 4086 return p; 4087 } 4088 4089 void operator delete(void* p) __THROW { 4090 MallocHook::InvokeDeleteHook(p); 4091 do_free(p); 4092 } 4093 4094 void operator delete(void* p, const std::nothrow_t&) __THROW { 4095 MallocHook::InvokeDeleteHook(p); 4096 do_free(p); 4097 } 4098 4099 void* operator new[](size_t size) { 4100 void* p = cpp_alloc(size, false); 4101 // We keep this next instruction out of cpp_alloc for a reason: when 4102 // it's in, and new just calls cpp_alloc, the optimizer may fold the 4103 // new call into cpp_alloc, which messes up our whole section-based 4104 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 4105 // isn't the last thing this fn calls, and prevents the folding. 4106 MallocHook::InvokeNewHook(p, size); 4107 return p; 4108 } 4109 4110 void* operator new[](size_t size, const std::nothrow_t&) __THROW { 4111 void* p = cpp_alloc(size, true); 4112 MallocHook::InvokeNewHook(p, size); 4113 return p; 4114 } 4115 4116 void operator delete[](void* p) __THROW { 4117 MallocHook::InvokeDeleteHook(p); 4118 do_free(p); 4119 } 4120 4121 void operator delete[](void* p, const std::nothrow_t&) __THROW { 4122 MallocHook::InvokeDeleteHook(p); 4123 do_free(p); 4124 } 4125 4126 #endif 4127 4128 extern "C" void* memalign(size_t align, size_t size) __THROW { 4129 void* result = do_memalign(align, size); 4130 MallocHook::InvokeNewHook(result, size); 4131 return result; 4132 } 4133 4134 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) 4135 __THROW { 4136 if (((align % sizeof(void*)) != 0) || 4137 ((align & (align - 1)) != 0) || 4138 (align == 0)) { 4139 return EINVAL; 4140 } 4141 4142 void* result = do_memalign(align, size); 4143 MallocHook::InvokeNewHook(result, size); 4144 if (result == NULL) { 4145 return ENOMEM; 4146 } else { 4147 *result_ptr = result; 4148 return 0; 4149 } 4150 } 4151 4152 static size_t pagesize = 0; 4153 4154 extern "C" void* valloc(size_t size) __THROW { 4155 // Allocate page-aligned object of length >= size bytes 4156 if (pagesize == 0) pagesize = getpagesize(); 4157 void* result = do_memalign(pagesize, size); 4158 MallocHook::InvokeNewHook(result, size); 4159 return result; 4160 } 4161 4162 extern "C" void* pvalloc(size_t size) __THROW { 4163 // Round up size to a multiple of pagesize 4164 if (pagesize == 0) pagesize = getpagesize(); 4165 size = (size + pagesize - 1) & ~(pagesize - 1); 4166 void* result = do_memalign(pagesize, size); 4167 MallocHook::InvokeNewHook(result, size); 4168 return result; 4169 } 4170 4171 extern "C" void malloc_stats(void) { 4172 do_malloc_stats(); 4173 } 4174 4175 extern "C" int mallopt(int cmd, int value) { 4176 return do_mallopt(cmd, value); 4177 } 4178 4179 #ifdef HAVE_STRUCT_MALLINFO 4180 extern "C" struct mallinfo mallinfo(void) { 4181 return do_mallinfo(); 4182 } 4183 #endif 4184 4185 //------------------------------------------------------------------- 4186 // Some library routines on RedHat 9 allocate memory using malloc() 4187 // and free it using __libc_free() (or vice-versa). Since we provide 4188 // our own implementations of malloc/free, we need to make sure that 4189 // the __libc_XXX variants (defined as part of glibc) also point to 4190 // the same implementations. 4191 //------------------------------------------------------------------- 4192 4193 #if defined(__GLIBC__) 4194 extern "C" { 4195 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) 4196 // Potentially faster variants that use the gcc alias extension. 4197 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. 4198 # define ALIAS(x) __attribute__ ((weak, alias (x))) 4199 void* __libc_malloc(size_t size) ALIAS("malloc"); 4200 void __libc_free(void* ptr) ALIAS("free"); 4201 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc"); 4202 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc"); 4203 void __libc_cfree(void* ptr) ALIAS("cfree"); 4204 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign"); 4205 void* __libc_valloc(size_t size) ALIAS("valloc"); 4206 void* __libc_pvalloc(size_t size) ALIAS("pvalloc"); 4207 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign"); 4208 # undef ALIAS 4209 # else /* not __GNUC__ */ 4210 // Portable wrappers 4211 void* __libc_malloc(size_t size) { return malloc(size); } 4212 void __libc_free(void* ptr) { free(ptr); } 4213 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } 4214 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } 4215 void __libc_cfree(void* ptr) { cfree(ptr); } 4216 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } 4217 void* __libc_valloc(size_t size) { return valloc(size); } 4218 void* __libc_pvalloc(size_t size) { return pvalloc(size); } 4219 int __posix_memalign(void** r, size_t a, size_t s) { 4220 return posix_memalign(r, a, s); 4221 } 4222 # endif /* __GNUC__ */ 4223 } 4224 #endif /* __GLIBC__ */ 4225 4226 // Override __libc_memalign in libc on linux boxes specially. 4227 // They have a bug in libc that causes them to (very rarely) allocate 4228 // with __libc_memalign() yet deallocate with free() and the 4229 // definitions above don't catch it. 4230 // This function is an exception to the rule of calling MallocHook method 4231 // from the stack frame of the allocation function; 4232 // heap-checker handles this special case explicitly. 4233 static void *MemalignOverride(size_t align, size_t size, const void *caller) 4234 __THROW { 4235 void* result = do_memalign(align, size); 4236 MallocHook::InvokeNewHook(result, size); 4237 return result; 4238 } 4239 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; 4240 4241 #endif 4242 4243 #ifdef WTF_CHANGES 4244 void releaseFastMallocFreeMemory() 4245 { 4246 // Flush free pages in the current thread cache back to the page heap. 4247 // Low watermark mechanism in Scavenge() prevents full return on the first pass. 4248 // The second pass flushes everything. 4249 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { 4250 threadCache->Scavenge(); 4251 threadCache->Scavenge(); 4252 } 4253 4254 SpinLockHolder h(&pageheap_lock); 4255 pageheap->ReleaseFreePages(); 4256 } 4257 4258 FastMallocStatistics fastMallocStatistics() 4259 { 4260 FastMallocStatistics statistics; 4261 4262 SpinLockHolder lockHolder(&pageheap_lock); 4263 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); 4264 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); 4265 4266 statistics.freeListBytes = 0; 4267 for (unsigned cl = 0; cl < kNumClasses; ++cl) { 4268 const int length = central_cache[cl].length(); 4269 const int tc_length = central_cache[cl].tc_length(); 4270 4271 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); 4272 } 4273 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) 4274 statistics.freeListBytes += threadCache->Size(); 4275 4276 return statistics; 4277 } 4278 4279 size_t fastMallocSize(const void* ptr) 4280 { 4281 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 4282 Span* span = pageheap->GetDescriptorEnsureSafe(p); 4283 4284 if (!span || span->free) 4285 return 0; 4286 4287 for (void* free = span->objects; free != NULL; free = *((void**) free)) { 4288 if (ptr == free) 4289 return 0; 4290 } 4291 4292 if (size_t cl = span->sizeclass) 4293 return ByteSizeForClass(cl); 4294 4295 return span->length << kPageShift; 4296 } 4297 4298 #if OS(DARWIN) 4299 4300 class FreeObjectFinder { 4301 const RemoteMemoryReader& m_reader; 4302 HashSet<void*> m_freeObjects; 4303 4304 public: 4305 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } 4306 4307 void visit(void* ptr) { m_freeObjects.add(ptr); } 4308 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } 4309 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } 4310 size_t freeObjectCount() const { return m_freeObjects.size(); } 4311 4312 void findFreeObjects(TCMalloc_ThreadCache* threadCache) 4313 { 4314 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) 4315 threadCache->enumerateFreeObjects(*this, m_reader); 4316 } 4317 4318 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) 4319 { 4320 for (unsigned i = 0; i < numSizes; i++) 4321 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); 4322 } 4323 }; 4324 4325 class PageMapFreeObjectFinder { 4326 const RemoteMemoryReader& m_reader; 4327 FreeObjectFinder& m_freeObjectFinder; 4328 4329 public: 4330 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder) 4331 : m_reader(reader) 4332 , m_freeObjectFinder(freeObjectFinder) 4333 { } 4334 4335 int visit(void* ptr) const 4336 { 4337 if (!ptr) 4338 return 1; 4339 4340 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 4341 if (!span) 4342 return 1; 4343 4344 if (span->free) { 4345 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); 4346 m_freeObjectFinder.visit(ptr); 4347 } else if (span->sizeclass) { 4348 // Walk the free list of the small-object span, keeping track of each object seen 4349 for (void* nextObject = span->objects; nextObject; nextObject = m_reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 4350 m_freeObjectFinder.visit(nextObject); 4351 } 4352 return span->length; 4353 } 4354 }; 4355 4356 class PageMapMemoryUsageRecorder { 4357 task_t m_task; 4358 void* m_context; 4359 unsigned m_typeMask; 4360 vm_range_recorder_t* m_recorder; 4361 const RemoteMemoryReader& m_reader; 4362 const FreeObjectFinder& m_freeObjectFinder; 4363 4364 HashSet<void*> m_seenPointers; 4365 Vector<Span*> m_coalescedSpans; 4366 4367 public: 4368 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) 4369 : m_task(task) 4370 , m_context(context) 4371 , m_typeMask(typeMask) 4372 , m_recorder(recorder) 4373 , m_reader(reader) 4374 , m_freeObjectFinder(freeObjectFinder) 4375 { } 4376 4377 ~PageMapMemoryUsageRecorder() 4378 { 4379 ASSERT(!m_coalescedSpans.size()); 4380 } 4381 4382 void recordPendingRegions() 4383 { 4384 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 4385 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 }; 4386 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize); 4387 4388 // Mark the memory region the spans represent as a candidate for containing pointers 4389 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE) 4390 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); 4391 4392 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { 4393 m_coalescedSpans.clear(); 4394 return; 4395 } 4396 4397 Vector<vm_range_t, 1024> allocatedPointers; 4398 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { 4399 Span *theSpan = m_coalescedSpans[i]; 4400 if (theSpan->free) 4401 continue; 4402 4403 vm_address_t spanStartAddress = theSpan->start << kPageShift; 4404 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; 4405 4406 if (!theSpan->sizeclass) { 4407 // If it's an allocated large object span, mark it as in use 4408 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) 4409 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); 4410 } else { 4411 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); 4412 4413 // Mark each allocated small object within the span as in use 4414 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; 4415 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { 4416 if (!m_freeObjectFinder.isFreeObject(object)) 4417 allocatedPointers.append((vm_range_t){object, objectSize}); 4418 } 4419 } 4420 } 4421 4422 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size()); 4423 4424 m_coalescedSpans.clear(); 4425 } 4426 4427 int visit(void* ptr) 4428 { 4429 if (!ptr) 4430 return 1; 4431 4432 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 4433 if (!span || !span->start) 4434 return 1; 4435 4436 if (m_seenPointers.contains(ptr)) 4437 return span->length; 4438 m_seenPointers.add(ptr); 4439 4440 if (!m_coalescedSpans.size()) { 4441 m_coalescedSpans.append(span); 4442 return span->length; 4443 } 4444 4445 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 4446 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; 4447 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; 4448 4449 // If the new span is adjacent to the previous span, do nothing for now. 4450 vm_address_t spanStartAddress = span->start << kPageShift; 4451 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { 4452 m_coalescedSpans.append(span); 4453 return span->length; 4454 } 4455 4456 // New span is not adjacent to previous span, so record the spans coalesced so far. 4457 recordPendingRegions(); 4458 m_coalescedSpans.append(span); 4459 4460 return span->length; 4461 } 4462 }; 4463 4464 class AdminRegionRecorder { 4465 task_t m_task; 4466 void* m_context; 4467 unsigned m_typeMask; 4468 vm_range_recorder_t* m_recorder; 4469 const RemoteMemoryReader& m_reader; 4470 4471 Vector<vm_range_t, 1024> m_pendingRegions; 4472 4473 public: 4474 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader) 4475 : m_task(task) 4476 , m_context(context) 4477 , m_typeMask(typeMask) 4478 , m_recorder(recorder) 4479 , m_reader(reader) 4480 { } 4481 4482 void recordRegion(vm_address_t ptr, size_t size) 4483 { 4484 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) 4485 m_pendingRegions.append((vm_range_t){ ptr, size }); 4486 } 4487 4488 void visit(void *ptr, size_t size) 4489 { 4490 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); 4491 } 4492 4493 void recordPendingRegions() 4494 { 4495 if (m_pendingRegions.size()) { 4496 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); 4497 m_pendingRegions.clear(); 4498 } 4499 } 4500 4501 ~AdminRegionRecorder() 4502 { 4503 ASSERT(!m_pendingRegions.size()); 4504 } 4505 }; 4506 4507 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) 4508 { 4509 RemoteMemoryReader memoryReader(task, reader); 4510 4511 InitSizeClasses(); 4512 4513 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); 4514 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); 4515 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); 4516 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); 4517 4518 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); 4519 4520 FreeObjectFinder finder(memoryReader); 4521 finder.findFreeObjects(threadHeaps); 4522 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); 4523 4524 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; 4525 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder); 4526 pageMap->visitValues(pageMapFinder, memoryReader); 4527 4528 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); 4529 pageMap->visitValues(usageRecorder, memoryReader); 4530 usageRecorder.recordPendingRegions(); 4531 4532 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader); 4533 pageMap->visitAllocations(adminRegionRecorder, memoryReader); 4534 4535 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); 4536 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); 4537 4538 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 4539 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 4540 4541 adminRegionRecorder.recordPendingRegions(); 4542 4543 return 0; 4544 } 4545 4546 size_t FastMallocZone::size(malloc_zone_t*, const void*) 4547 { 4548 return 0; 4549 } 4550 4551 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) 4552 { 4553 return 0; 4554 } 4555 4556 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) 4557 { 4558 return 0; 4559 } 4560 4561 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) 4562 { 4563 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer 4564 // is not in this zone. When this happens, the pointer being freed was not allocated by any 4565 // zone so we need to print a useful error for the application developer. 4566 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); 4567 } 4568 4569 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) 4570 { 4571 return 0; 4572 } 4573 4574 4575 #undef malloc 4576 #undef free 4577 #undef realloc 4578 #undef calloc 4579 4580 extern "C" { 4581 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, 4582 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics 4583 4584 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) 4585 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. 4586 #endif 4587 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) 4588 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. 4589 #endif 4590 4591 }; 4592 } 4593 4594 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) 4595 : m_pageHeap(pageHeap) 4596 , m_threadHeaps(threadHeaps) 4597 , m_centralCaches(centralCaches) 4598 , m_spanAllocator(spanAllocator) 4599 , m_pageHeapAllocator(pageHeapAllocator) 4600 { 4601 memset(&m_zone, 0, sizeof(m_zone)); 4602 m_zone.version = 4; 4603 m_zone.zone_name = "JavaScriptCore FastMalloc"; 4604 m_zone.size = &FastMallocZone::size; 4605 m_zone.malloc = &FastMallocZone::zoneMalloc; 4606 m_zone.calloc = &FastMallocZone::zoneCalloc; 4607 m_zone.realloc = &FastMallocZone::zoneRealloc; 4608 m_zone.free = &FastMallocZone::zoneFree; 4609 m_zone.valloc = &FastMallocZone::zoneValloc; 4610 m_zone.destroy = &FastMallocZone::zoneDestroy; 4611 m_zone.introspect = &jscore_fastmalloc_introspection; 4612 malloc_zone_register(&m_zone); 4613 } 4614 4615 4616 void FastMallocZone::init() 4617 { 4618 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); 4619 } 4620 4621 #endif // OS(DARWIN) 4622 4623 } // namespace WTF 4624 #endif // WTF_CHANGES 4625 4626 #endif // FORCE_SYSTEM_MALLOC 4627