1 // Copyright (c) 2005, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // Author: Sanjay Ghemawat <opensource (at) google.com> 32 // 33 // A malloc that uses a per-thread cache to satisfy small malloc requests. 34 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 35 // 36 // See doc/tcmalloc.html for a high-level 37 // description of how this malloc works. 38 // 39 // SYNCHRONIZATION 40 // 1. The thread-specific lists are accessed without acquiring any locks. 41 // This is safe because each such list is only accessed by one thread. 42 // 2. We have a lock per central free-list, and hold it while manipulating 43 // the central free list for a particular size. 44 // 3. The central page allocator is protected by "pageheap_lock". 45 // 4. The pagemap (which maps from page-number to descriptor), 46 // can be read without holding any locks, and written while holding 47 // the "pageheap_lock". 48 // 5. To improve performance, a subset of the information one can get 49 // from the pagemap is cached in a data structure, pagemap_cache_, 50 // that atomically reads and writes its entries. This cache can be 51 // read and written without locking. 52 // 53 // This multi-threaded access to the pagemap is safe for fairly 54 // subtle reasons. We basically assume that when an object X is 55 // allocated by thread A and deallocated by thread B, there must 56 // have been appropriate synchronization in the handoff of object 57 // X from thread A to thread B. The same logic applies to pagemap_cache_. 58 // 59 // THE PAGEID-TO-SIZECLASS CACHE 60 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 61 // returns 0 for a particular PageID then that means "no information," not that 62 // the sizeclass is 0. The cache may have stale information for pages that do 63 // not hold the beginning of any free()'able object. Staleness is eliminated 64 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 65 // do_memalign() for all other relevant pages. 66 // 67 // PAGEMAP 68 // ------- 69 // Page map contains a mapping from page id to Span. 70 // 71 // If Span s occupies pages [p..q], 72 // pagemap[p] == s 73 // pagemap[q] == s 74 // pagemap[p+1..q-1] are undefined 75 // pagemap[p-1] and pagemap[q+1] are defined: 76 // NULL if the corresponding page is not yet in the address space. 77 // Otherwise it points to a Span. This span may be free 78 // or allocated. If free, it is in one of pageheap's freelist. 79 // 80 // TODO: Bias reclamation to larger addresses 81 // TODO: implement mallinfo/mallopt 82 // TODO: Better testing 83 // 84 // 9/28/2003 (new page-level allocator replaces ptmalloc2): 85 // * malloc/free of small objects goes from ~300 ns to ~50 ns. 86 // * allocation of a reasonably complicated struct 87 // goes from about 1100 ns to about 300 ns. 88 89 #include "config.h" 90 #include <gperftools/tcmalloc.h> 91 92 #include <errno.h> // for ENOMEM, EINVAL, errno 93 #ifdef HAVE_SYS_CDEFS_H 94 #include <sys/cdefs.h> // for __THROW 95 #endif 96 #if defined HAVE_STDINT_H 97 #include <stdint.h> 98 #elif defined HAVE_INTTYPES_H 99 #include <inttypes.h> 100 #else 101 #include <sys/types.h> 102 #endif 103 #include <stddef.h> // for size_t, NULL 104 #include <stdlib.h> // for getenv 105 #include <string.h> // for strcmp, memset, strlen, etc 106 #ifdef HAVE_UNISTD_H 107 #include <unistd.h> // for getpagesize, write, etc 108 #endif 109 #include <algorithm> // for max, min 110 #include <limits> // for numeric_limits 111 #include <new> // for nothrow_t (ptr only), etc 112 #include <vector> // for vector 113 114 #include <gperftools/malloc_extension.h> 115 #include <gperftools/malloc_hook.h> // for MallocHook 116 #include "base/basictypes.h" // for int64 117 #include "base/commandlineflags.h" // for RegisterFlagValidator, etc 118 #include "base/dynamic_annotations.h" // for RunningOnValgrind 119 #include "base/spinlock.h" // for SpinLockHolder 120 #include "central_freelist.h" // for CentralFreeListPadded 121 #include "common.h" // for StackTrace, kPageShift, etc 122 #include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc 123 #include "linked_list.h" // for SLL_SetNext 124 #include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc 125 #include "page_heap.h" // for PageHeap, PageHeap::Stats 126 #include "page_heap_allocator.h" // for PageHeapAllocator 127 #include "span.h" // for Span, DLL_Prepend, etc 128 #include "stack_trace_table.h" // for StackTraceTable 129 #include "static_vars.h" // for Static 130 #include "system-alloc.h" // for DumpSystemAllocatorStats, etc 131 #include "tcmalloc_guard.h" // for TCMallocGuard 132 #include "thread_cache.h" // for ThreadCache 133 134 // We only need malloc.h for struct mallinfo. 135 #ifdef HAVE_STRUCT_MALLINFO 136 // Malloc can be in several places on older versions of OS X. 137 # if defined(HAVE_MALLOC_H) 138 # include <malloc.h> 139 # elif defined(HAVE_SYS_MALLOC_H) 140 # include <sys/malloc.h> 141 # elif defined(HAVE_MALLOC_MALLOC_H) 142 # include <malloc/malloc.h> 143 # endif 144 #endif 145 146 #if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) 147 # define WIN32_DO_PATCHING 1 148 #endif 149 150 // Some windows file somewhere (at least on cygwin) #define's small (!) 151 #undef small 152 153 using STL_NAMESPACE::max; 154 using STL_NAMESPACE::numeric_limits; 155 using STL_NAMESPACE::vector; 156 157 #include "libc_override.h" 158 159 // __THROW is defined in glibc (via <sys/cdefs.h>). It means, 160 // counter-intuitively, "This function will never throw an exception." 161 // It's an optional optimization tool, but we may need to use it to 162 // match glibc prototypes. 163 #ifndef __THROW // I guess we're not on a glibc system 164 # define __THROW // __THROW is just an optimization, so ok to make it "" 165 #endif 166 167 using tcmalloc::AlignmentForSize; 168 using tcmalloc::kLog; 169 using tcmalloc::kCrash; 170 using tcmalloc::kCrashWithStats; 171 using tcmalloc::Log; 172 using tcmalloc::PageHeap; 173 using tcmalloc::PageHeapAllocator; 174 using tcmalloc::SizeMap; 175 using tcmalloc::Span; 176 using tcmalloc::StackTrace; 177 using tcmalloc::Static; 178 using tcmalloc::ThreadCache; 179 180 DECLARE_int64(tcmalloc_sample_parameter); 181 DECLARE_double(tcmalloc_release_rate); 182 183 // For windows, the printf we use to report large allocs is 184 // potentially dangerous: it could cause a malloc that would cause an 185 // infinite loop. So by default we set the threshold to a huge number 186 // on windows, so this bad situation will never trigger. You can 187 // always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you 188 // want this functionality. 189 #ifdef _WIN32 190 const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62; 191 #else 192 const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30; 193 #endif 194 DEFINE_int64(tcmalloc_large_alloc_report_threshold, 195 EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD", 196 kDefaultLargeAllocReportThreshold), 197 "Allocations larger than this value cause a stack " 198 "trace to be dumped to stderr. The threshold for " 199 "dumping stack traces is increased by a factor of 1.125 " 200 "every time we print a message so that the threshold " 201 "automatically goes up by a factor of ~1000 every 60 " 202 "messages. This bounds the amount of extra logging " 203 "generated by this flag. Default value of this flag " 204 "is very large and therefore you should see no extra " 205 "logging unless the flag is overridden. Set to 0 to " 206 "disable reporting entirely."); 207 208 209 // We already declared these functions in tcmalloc.h, but we have to 210 // declare them again to give them an ATTRIBUTE_SECTION: we want to 211 // put all callers of MallocHook::Invoke* in this module into 212 // ATTRIBUTE_SECTION(google_malloc) section, so that 213 // MallocHook::GetCallerStackTrace can function accurately. 214 #ifndef _WIN32 // windows doesn't have attribute_section, so don't bother 215 extern "C" { 216 void* tc_malloc(size_t size) __THROW 217 ATTRIBUTE_SECTION(google_malloc); 218 void tc_free(void* ptr) __THROW 219 ATTRIBUTE_SECTION(google_malloc); 220 void* tc_realloc(void* ptr, size_t size) __THROW 221 ATTRIBUTE_SECTION(google_malloc); 222 void* tc_calloc(size_t nmemb, size_t size) __THROW 223 ATTRIBUTE_SECTION(google_malloc); 224 void tc_cfree(void* ptr) __THROW 225 ATTRIBUTE_SECTION(google_malloc); 226 227 void* tc_memalign(size_t __alignment, size_t __size) __THROW 228 ATTRIBUTE_SECTION(google_malloc); 229 int tc_posix_memalign(void** ptr, size_t align, size_t size) __THROW 230 ATTRIBUTE_SECTION(google_malloc); 231 void* tc_valloc(size_t __size) __THROW 232 ATTRIBUTE_SECTION(google_malloc); 233 void* tc_pvalloc(size_t __size) __THROW 234 ATTRIBUTE_SECTION(google_malloc); 235 236 void tc_malloc_stats(void) __THROW 237 ATTRIBUTE_SECTION(google_malloc); 238 int tc_mallopt(int cmd, int value) __THROW 239 ATTRIBUTE_SECTION(google_malloc); 240 #ifdef HAVE_STRUCT_MALLINFO 241 struct mallinfo tc_mallinfo(void) __THROW 242 ATTRIBUTE_SECTION(google_malloc); 243 #endif 244 245 void* tc_new(size_t size) 246 ATTRIBUTE_SECTION(google_malloc); 247 void tc_delete(void* p) __THROW 248 ATTRIBUTE_SECTION(google_malloc); 249 void* tc_newarray(size_t size) 250 ATTRIBUTE_SECTION(google_malloc); 251 void tc_deletearray(void* p) __THROW 252 ATTRIBUTE_SECTION(google_malloc); 253 254 // And the nothrow variants of these: 255 void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW 256 ATTRIBUTE_SECTION(google_malloc); 257 void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW 258 ATTRIBUTE_SECTION(google_malloc); 259 // Surprisingly, standard C++ library implementations use a 260 // nothrow-delete internally. See, eg: 261 // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html 262 void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW 263 ATTRIBUTE_SECTION(google_malloc); 264 void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW 265 ATTRIBUTE_SECTION(google_malloc); 266 267 // Some non-standard extensions that we support. 268 269 // This is equivalent to 270 // OS X: malloc_size() 271 // glibc: malloc_usable_size() 272 // Windows: _msize() 273 size_t tc_malloc_size(void* p) __THROW 274 ATTRIBUTE_SECTION(google_malloc); 275 } // extern "C" 276 #endif // #ifndef _WIN32 277 278 // ----------------------- IMPLEMENTATION ------------------------------- 279 280 static int tc_new_mode = 0; // See tc_set_new_mode(). 281 282 // Routines such as free() and realloc() catch some erroneous pointers 283 // passed to them, and invoke the below when they do. (An erroneous pointer 284 // won't be caught if it's within a valid span or a stale span for which 285 // the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing 286 // required) kind of exception handling for these routines. 287 namespace { 288 void InvalidFree(void* ptr) { 289 Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr); 290 } 291 292 size_t InvalidGetSizeForRealloc(const void* old_ptr) { 293 Log(kCrash, __FILE__, __LINE__, 294 "Attempt to realloc invalid pointer", old_ptr); 295 return 0; 296 } 297 298 size_t InvalidGetAllocatedSize(const void* ptr) { 299 Log(kCrash, __FILE__, __LINE__, 300 "Attempt to get the size of an invalid pointer", ptr); 301 return 0; 302 } 303 } // unnamed namespace 304 305 // Extract interesting stats 306 struct TCMallocStats { 307 uint64_t thread_bytes; // Bytes in thread caches 308 uint64_t central_bytes; // Bytes in central cache 309 uint64_t transfer_bytes; // Bytes in central transfer cache 310 uint64_t metadata_bytes; // Bytes alloced for metadata 311 PageHeap::Stats pageheap; // Stats from page heap 312 }; 313 314 // Get stats into "r". Also get per-size-class counts if class_count != NULL 315 static void ExtractStats(TCMallocStats* r, uint64_t* class_count, 316 PageHeap::SmallSpanStats* small_spans, 317 PageHeap::LargeSpanStats* large_spans) { 318 r->central_bytes = 0; 319 r->transfer_bytes = 0; 320 for (int cl = 0; cl < kNumClasses; ++cl) { 321 const int length = Static::central_cache()[cl].length(); 322 const int tc_length = Static::central_cache()[cl].tc_length(); 323 const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes(); 324 const size_t size = static_cast<uint64_t>( 325 Static::sizemap()->ByteSizeForClass(cl)); 326 r->central_bytes += (size * length) + cache_overhead; 327 r->transfer_bytes += (size * tc_length); 328 if (class_count) class_count[cl] = length + tc_length; 329 } 330 331 // Add stats from per-thread heaps 332 r->thread_bytes = 0; 333 { // scope 334 SpinLockHolder h(Static::pageheap_lock()); 335 ThreadCache::GetThreadStats(&r->thread_bytes, class_count); 336 r->metadata_bytes = tcmalloc::metadata_system_bytes(); 337 r->pageheap = Static::pageheap()->stats(); 338 if (small_spans != NULL) { 339 Static::pageheap()->GetSmallSpanStats(small_spans); 340 } 341 if (large_spans != NULL) { 342 Static::pageheap()->GetLargeSpanStats(large_spans); 343 } 344 } 345 } 346 347 static double PagesToMiB(uint64_t pages) { 348 return (pages << kPageShift) / 1048576.0; 349 } 350 351 // WRITE stats to "out" 352 static void DumpStats(TCMalloc_Printer* out, int level) { 353 TCMallocStats stats; 354 uint64_t class_count[kNumClasses]; 355 PageHeap::SmallSpanStats small; 356 PageHeap::LargeSpanStats large; 357 if (level >= 2) { 358 ExtractStats(&stats, class_count, &small, &large); 359 } else { 360 ExtractStats(&stats, NULL, NULL, NULL); 361 } 362 363 static const double MiB = 1048576.0; 364 365 const uint64_t virtual_memory_used = (stats.pageheap.system_bytes 366 + stats.metadata_bytes); 367 const uint64_t physical_memory_used = (virtual_memory_used 368 - stats.pageheap.unmapped_bytes); 369 const uint64_t bytes_in_use_by_app = (physical_memory_used 370 - stats.metadata_bytes 371 - stats.pageheap.free_bytes 372 - stats.central_bytes 373 - stats.transfer_bytes 374 - stats.thread_bytes); 375 376 #ifdef TCMALLOC_SMALL_BUT_SLOW 377 out->printf( 378 "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); 379 #endif 380 out->printf( 381 "------------------------------------------------\n" 382 "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" 383 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" 384 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" 385 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" 386 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" 387 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" 388 "MALLOC: ------------\n" 389 "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" 390 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" 391 "MALLOC: ------------\n" 392 "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" 393 "MALLOC:\n" 394 "MALLOC: %12" PRIu64 " Spans in use\n" 395 "MALLOC: %12" PRIu64 " Thread heaps in use\n" 396 "MALLOC: %12" PRIu64 " Tcmalloc page size\n" 397 "------------------------------------------------\n" 398 "Call ReleaseFreeMemory() to release freelist memory to the OS" 399 " (via madvise()).\n" 400 "Bytes released to the OS take up virtual address space" 401 " but no physical memory.\n", 402 bytes_in_use_by_app, bytes_in_use_by_app / MiB, 403 stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, 404 stats.central_bytes, stats.central_bytes / MiB, 405 stats.transfer_bytes, stats.transfer_bytes / MiB, 406 stats.thread_bytes, stats.thread_bytes / MiB, 407 stats.metadata_bytes, stats.metadata_bytes / MiB, 408 physical_memory_used, physical_memory_used / MiB, 409 stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, 410 virtual_memory_used, virtual_memory_used / MiB, 411 uint64_t(Static::span_allocator()->inuse()), 412 uint64_t(ThreadCache::HeapsInUse()), 413 uint64_t(kPageSize)); 414 415 if (level >= 2) { 416 out->printf("------------------------------------------------\n"); 417 out->printf("Size class breakdown\n"); 418 out->printf("------------------------------------------------\n"); 419 uint64_t cumulative = 0; 420 for (int cl = 0; cl < kNumClasses; ++cl) { 421 if (class_count[cl] > 0) { 422 uint64_t class_bytes = 423 class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); 424 cumulative += class_bytes; 425 out->printf("class %3d [ %8" PRIuS " bytes ] : " 426 "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", 427 cl, Static::sizemap()->ByteSizeForClass(cl), 428 class_count[cl], 429 class_bytes / MiB, 430 cumulative / MiB); 431 } 432 } 433 434 // append page heap info 435 int nonempty_sizes = 0; 436 for (int s = 0; s < kMaxPages; s++) { 437 if (small.normal_length[s] + small.returned_length[s] > 0) { 438 nonempty_sizes++; 439 } 440 } 441 out->printf("------------------------------------------------\n"); 442 out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n", 443 nonempty_sizes, stats.pageheap.free_bytes / MiB, 444 stats.pageheap.unmapped_bytes / MiB); 445 out->printf("------------------------------------------------\n"); 446 uint64_t total_normal = 0; 447 uint64_t total_returned = 0; 448 for (int s = 0; s < kMaxPages; s++) { 449 const int n_length = small.normal_length[s]; 450 const int r_length = small.returned_length[s]; 451 if (n_length + r_length > 0) { 452 uint64_t n_pages = s * n_length; 453 uint64_t r_pages = s * r_length; 454 total_normal += n_pages; 455 total_returned += r_pages; 456 out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum" 457 "; unmapped: %6.1f MiB; %6.1f MiB cum\n", 458 s, 459 (n_length + r_length), 460 PagesToMiB(n_pages + r_pages), 461 PagesToMiB(total_normal + total_returned), 462 PagesToMiB(r_pages), 463 PagesToMiB(total_returned)); 464 } 465 } 466 467 total_normal += large.normal_pages; 468 total_returned += large.returned_pages; 469 out->printf(">255 large * %6u spans ~ %6.1f MiB; %6.1f MiB cum" 470 "; unmapped: %6.1f MiB; %6.1f MiB cum\n", 471 static_cast<unsigned int>(large.spans), 472 PagesToMiB(large.normal_pages + large.returned_pages), 473 PagesToMiB(total_normal + total_returned), 474 PagesToMiB(large.returned_pages), 475 PagesToMiB(total_returned)); 476 } 477 } 478 479 static void PrintStats(int level) { 480 const int kBufferSize = 16 << 10; 481 char* buffer = new char[kBufferSize]; 482 TCMalloc_Printer printer(buffer, kBufferSize); 483 DumpStats(&printer, level); 484 write(STDERR_FILENO, buffer, strlen(buffer)); 485 delete[] buffer; 486 } 487 488 static void** DumpHeapGrowthStackTraces() { 489 // Count how much space we need 490 int needed_slots = 0; 491 { 492 SpinLockHolder h(Static::pageheap_lock()); 493 for (StackTrace* t = Static::growth_stacks(); 494 t != NULL; 495 t = reinterpret_cast<StackTrace*>( 496 t->stack[tcmalloc::kMaxStackDepth-1])) { 497 needed_slots += 3 + t->depth; 498 } 499 needed_slots += 100; // Slop in case list grows 500 needed_slots += needed_slots/8; // An extra 12.5% slop 501 } 502 503 void** result = new void*[needed_slots]; 504 if (result == NULL) { 505 Log(kLog, __FILE__, __LINE__, 506 "tcmalloc: allocation failed for stack trace slots", 507 needed_slots * sizeof(*result)); 508 return NULL; 509 } 510 511 SpinLockHolder h(Static::pageheap_lock()); 512 int used_slots = 0; 513 for (StackTrace* t = Static::growth_stacks(); 514 t != NULL; 515 t = reinterpret_cast<StackTrace*>( 516 t->stack[tcmalloc::kMaxStackDepth-1])) { 517 ASSERT(used_slots < needed_slots); // Need to leave room for terminator 518 if (used_slots + 3 + t->depth >= needed_slots) { 519 // No more room 520 break; 521 } 522 523 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); 524 result[used_slots+1] = reinterpret_cast<void*>(t->size); 525 result[used_slots+2] = reinterpret_cast<void*>(t->depth); 526 for (int d = 0; d < t->depth; d++) { 527 result[used_slots+3+d] = t->stack[d]; 528 } 529 used_slots += 3 + t->depth; 530 } 531 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); 532 return result; 533 } 534 535 static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { 536 PageID page = 1; // Some code may assume that page==0 is never used 537 bool done = false; 538 while (!done) { 539 // Accumulate a small number of ranges in a local buffer 540 static const int kNumRanges = 16; 541 static base::MallocRange ranges[kNumRanges]; 542 int n = 0; 543 { 544 SpinLockHolder h(Static::pageheap_lock()); 545 while (n < kNumRanges) { 546 if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { 547 done = true; 548 break; 549 } else { 550 uintptr_t limit = ranges[n].address + ranges[n].length; 551 page = (limit + kPageSize - 1) >> kPageShift; 552 n++; 553 } 554 } 555 } 556 557 for (int i = 0; i < n; i++) { 558 (*func)(arg, &ranges[i]); 559 } 560 } 561 } 562 563 // TCMalloc's support for extra malloc interfaces 564 class TCMallocImplementation : public MallocExtension { 565 private: 566 // ReleaseToSystem() might release more than the requested bytes because 567 // the page heap releases at the span granularity, and spans are of wildly 568 // different sizes. This member keeps track of the extra bytes bytes 569 // released so that the app can periodically call ReleaseToSystem() to 570 // release memory at a constant rate. 571 // NOTE: Protected by Static::pageheap_lock(). 572 size_t extra_bytes_released_; 573 574 public: 575 TCMallocImplementation() 576 : extra_bytes_released_(0) { 577 } 578 579 virtual void GetStats(char* buffer, int buffer_length) { 580 ASSERT(buffer_length > 0); 581 TCMalloc_Printer printer(buffer, buffer_length); 582 583 // Print level one stats unless lots of space is available 584 if (buffer_length < 10000) { 585 DumpStats(&printer, 1); 586 } else { 587 DumpStats(&printer, 2); 588 } 589 } 590 591 // We may print an extra, tcmalloc-specific warning message here. 592 virtual void GetHeapSample(MallocExtensionWriter* writer) { 593 if (FLAGS_tcmalloc_sample_parameter == 0) { 594 const char* const kWarningMsg = 595 "%warn\n" 596 "%warn This heap profile does not have any data in it, because\n" 597 "%warn the application was run with heap sampling turned off.\n" 598 "%warn To get useful data from GetHeapSample(), you must\n" 599 "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n" 600 "%warn a positive sampling period, such as 524288.\n" 601 "%warn\n"; 602 writer->append(kWarningMsg, strlen(kWarningMsg)); 603 } 604 MallocExtension::GetHeapSample(writer); 605 } 606 607 virtual void** ReadStackTraces(int* sample_period) { 608 tcmalloc::StackTraceTable table; 609 { 610 SpinLockHolder h(Static::pageheap_lock()); 611 Span* sampled = Static::sampled_objects(); 612 for (Span* s = sampled->next; s != sampled; s = s->next) { 613 table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects)); 614 } 615 } 616 *sample_period = ThreadCache::GetCache()->GetSamplePeriod(); 617 return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock 618 } 619 620 virtual void** ReadHeapGrowthStackTraces() { 621 return DumpHeapGrowthStackTraces(); 622 } 623 624 virtual void Ranges(void* arg, RangeFunction func) { 625 IterateOverRanges(arg, func); 626 } 627 628 virtual bool GetNumericProperty(const char* name, size_t* value) { 629 ASSERT(name != NULL); 630 631 if (strcmp(name, "generic.current_allocated_bytes") == 0) { 632 TCMallocStats stats; 633 ExtractStats(&stats, NULL, NULL, NULL); 634 *value = stats.pageheap.system_bytes 635 - stats.thread_bytes 636 - stats.central_bytes 637 - stats.transfer_bytes 638 - stats.pageheap.free_bytes 639 - stats.pageheap.unmapped_bytes; 640 return true; 641 } 642 643 if (strcmp(name, "generic.heap_size") == 0) { 644 TCMallocStats stats; 645 ExtractStats(&stats, NULL, NULL, NULL); 646 *value = stats.pageheap.system_bytes; 647 return true; 648 } 649 650 if (strcmp(name, "tcmalloc.slack_bytes") == 0) { 651 // Kept for backwards compatibility. Now defined externally as: 652 // pageheap_free_bytes + pageheap_unmapped_bytes. 653 SpinLockHolder l(Static::pageheap_lock()); 654 PageHeap::Stats stats = Static::pageheap()->stats(); 655 *value = stats.free_bytes + stats.unmapped_bytes; 656 return true; 657 } 658 659 if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) { 660 TCMallocStats stats; 661 ExtractStats(&stats, NULL, NULL, NULL); 662 *value = stats.central_bytes; 663 return true; 664 } 665 666 if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) { 667 TCMallocStats stats; 668 ExtractStats(&stats, NULL, NULL, NULL); 669 *value = stats.transfer_bytes; 670 return true; 671 } 672 673 if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) { 674 TCMallocStats stats; 675 ExtractStats(&stats, NULL, NULL, NULL); 676 *value = stats.thread_bytes; 677 return true; 678 } 679 680 if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { 681 SpinLockHolder l(Static::pageheap_lock()); 682 *value = Static::pageheap()->stats().free_bytes; 683 return true; 684 } 685 686 if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { 687 SpinLockHolder l(Static::pageheap_lock()); 688 *value = Static::pageheap()->stats().unmapped_bytes; 689 return true; 690 } 691 692 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 693 SpinLockHolder l(Static::pageheap_lock()); 694 *value = ThreadCache::overall_thread_cache_size(); 695 return true; 696 } 697 698 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { 699 TCMallocStats stats; 700 ExtractStats(&stats, NULL, NULL, NULL); 701 *value = stats.thread_bytes; 702 return true; 703 } 704 705 return false; 706 } 707 708 virtual bool SetNumericProperty(const char* name, size_t value) { 709 ASSERT(name != NULL); 710 711 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 712 SpinLockHolder l(Static::pageheap_lock()); 713 ThreadCache::set_overall_thread_cache_size(value); 714 return true; 715 } 716 717 return false; 718 } 719 720 virtual void MarkThreadIdle() { 721 ThreadCache::BecomeIdle(); 722 } 723 724 virtual void MarkThreadBusy(); // Implemented below 725 726 virtual SysAllocator* GetSystemAllocator() { 727 SpinLockHolder h(Static::pageheap_lock()); 728 return sys_alloc; 729 } 730 731 virtual void SetSystemAllocator(SysAllocator* alloc) { 732 SpinLockHolder h(Static::pageheap_lock()); 733 sys_alloc = alloc; 734 } 735 736 virtual void ReleaseToSystem(size_t num_bytes) { 737 SpinLockHolder h(Static::pageheap_lock()); 738 if (num_bytes <= extra_bytes_released_) { 739 // We released too much on a prior call, so don't release any 740 // more this time. 741 extra_bytes_released_ = extra_bytes_released_ - num_bytes; 742 return; 743 } 744 num_bytes = num_bytes - extra_bytes_released_; 745 // num_bytes might be less than one page. If we pass zero to 746 // ReleaseAtLeastNPages, it won't do anything, so we release a whole 747 // page now and let extra_bytes_released_ smooth it out over time. 748 Length num_pages = max<Length>(num_bytes >> kPageShift, 1); 749 size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( 750 num_pages) << kPageShift; 751 if (bytes_released > num_bytes) { 752 extra_bytes_released_ = bytes_released - num_bytes; 753 } else { 754 // The PageHeap wasn't able to release num_bytes. Don't try to 755 // compensate with a big release next time. Specifically, 756 // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). 757 extra_bytes_released_ = 0; 758 } 759 } 760 761 virtual void SetMemoryReleaseRate(double rate) { 762 FLAGS_tcmalloc_release_rate = rate; 763 } 764 765 virtual double GetMemoryReleaseRate() { 766 return FLAGS_tcmalloc_release_rate; 767 } 768 virtual size_t GetEstimatedAllocatedSize(size_t size) { 769 if (size <= kMaxSize) { 770 const size_t cl = Static::sizemap()->SizeClass(size); 771 const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); 772 return alloc_size; 773 } else { 774 return tcmalloc::pages(size) << kPageShift; 775 } 776 } 777 778 // This just calls GetSizeWithCallback, but because that's in an 779 // unnamed namespace, we need to move the definition below it in the 780 // file. 781 virtual size_t GetAllocatedSize(const void* ptr); 782 783 // This duplicates some of the logic in GetSizeWithCallback, but is 784 // faster. This is important on OS X, where this function is called 785 // on every allocation operation. 786 virtual Ownership GetOwnership(const void* ptr) { 787 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 788 // The rest of tcmalloc assumes that all allocated pointers use at 789 // most kAddressBits bits. If ptr doesn't, then it definitely 790 // wasn't alloacted by tcmalloc. 791 if ((p >> (kAddressBits - kPageShift)) > 0) { 792 return kNotOwned; 793 } 794 size_t cl = Static::pageheap()->GetSizeClassIfCached(p); 795 if (cl != 0) { 796 return kOwned; 797 } 798 const Span *span = Static::pageheap()->GetDescriptor(p); 799 return span ? kOwned : kNotOwned; 800 } 801 802 virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { 803 static const char* kCentralCacheType = "tcmalloc.central"; 804 static const char* kTransferCacheType = "tcmalloc.transfer"; 805 static const char* kThreadCacheType = "tcmalloc.thread"; 806 static const char* kPageHeapType = "tcmalloc.page"; 807 static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped"; 808 static const char* kLargeSpanType = "tcmalloc.large"; 809 static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped"; 810 811 v->clear(); 812 813 // central class information 814 int64 prev_class_size = 0; 815 for (int cl = 1; cl < kNumClasses; ++cl) { 816 size_t class_size = Static::sizemap()->ByteSizeForClass(cl); 817 MallocExtension::FreeListInfo i; 818 i.min_object_size = prev_class_size + 1; 819 i.max_object_size = class_size; 820 i.total_bytes_free = 821 Static::central_cache()[cl].length() * class_size; 822 i.type = kCentralCacheType; 823 v->push_back(i); 824 825 // transfer cache 826 i.total_bytes_free = 827 Static::central_cache()[cl].tc_length() * class_size; 828 i.type = kTransferCacheType; 829 v->push_back(i); 830 831 prev_class_size = Static::sizemap()->ByteSizeForClass(cl); 832 } 833 834 // Add stats from per-thread heaps 835 uint64_t class_count[kNumClasses]; 836 memset(class_count, 0, sizeof(class_count)); 837 { 838 SpinLockHolder h(Static::pageheap_lock()); 839 uint64_t thread_bytes = 0; 840 ThreadCache::GetThreadStats(&thread_bytes, class_count); 841 } 842 843 prev_class_size = 0; 844 for (int cl = 1; cl < kNumClasses; ++cl) { 845 MallocExtension::FreeListInfo i; 846 i.min_object_size = prev_class_size + 1; 847 i.max_object_size = Static::sizemap()->ByteSizeForClass(cl); 848 i.total_bytes_free = 849 class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); 850 i.type = kThreadCacheType; 851 v->push_back(i); 852 } 853 854 // append page heap info 855 PageHeap::SmallSpanStats small; 856 PageHeap::LargeSpanStats large; 857 { 858 SpinLockHolder h(Static::pageheap_lock()); 859 Static::pageheap()->GetSmallSpanStats(&small); 860 Static::pageheap()->GetLargeSpanStats(&large); 861 } 862 863 // large spans: mapped 864 MallocExtension::FreeListInfo span_info; 865 span_info.type = kLargeSpanType; 866 span_info.max_object_size = (numeric_limits<size_t>::max)(); 867 span_info.min_object_size = kMaxPages << kPageShift; 868 span_info.total_bytes_free = large.normal_pages << kPageShift; 869 v->push_back(span_info); 870 871 // large spans: unmapped 872 span_info.type = kLargeUnmappedSpanType; 873 span_info.total_bytes_free = large.returned_pages << kPageShift; 874 v->push_back(span_info); 875 876 // small spans 877 for (int s = 1; s < kMaxPages; s++) { 878 MallocExtension::FreeListInfo i; 879 i.max_object_size = (s << kPageShift); 880 i.min_object_size = ((s - 1) << kPageShift); 881 882 i.type = kPageHeapType; 883 i.total_bytes_free = (s << kPageShift) * small.normal_length[s]; 884 v->push_back(i); 885 886 i.type = kPageHeapUnmappedType; 887 i.total_bytes_free = (s << kPageShift) * small.returned_length[s]; 888 v->push_back(i); 889 } 890 } 891 }; 892 893 // The constructor allocates an object to ensure that initialization 894 // runs before main(), and therefore we do not have a chance to become 895 // multi-threaded before initialization. We also create the TSD key 896 // here. Presumably by the time this constructor runs, glibc is in 897 // good enough shape to handle pthread_key_create(). 898 // 899 // The constructor also takes the opportunity to tell STL to use 900 // tcmalloc. We want to do this early, before construct time, so 901 // all user STL allocations go through tcmalloc (which works really 902 // well for STL). 903 // 904 // The destructor prints stats when the program exits. 905 static int tcmallocguard_refcount = 0; // no lock needed: runs before main() 906 TCMallocGuard::TCMallocGuard() { 907 if (tcmallocguard_refcount++ == 0) { 908 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 909 // Check whether the kernel also supports TLS (needs to happen at runtime) 910 tcmalloc::CheckIfKernelSupportsTLS(); 911 #endif 912 ReplaceSystemAlloc(); // defined in libc_override_*.h 913 tc_free(tc_malloc(1)); 914 ThreadCache::InitTSD(); 915 tc_free(tc_malloc(1)); 916 // Either we, or debugallocation.cc, or valgrind will control memory 917 // management. We register our extension if we're the winner. 918 #ifdef TCMALLOC_USING_DEBUGALLOCATION 919 // Let debugallocation register its extension. 920 #else 921 if (RunningOnValgrind()) { 922 // Let Valgrind uses its own malloc (so don't register our extension). 923 } else { 924 MallocExtension::Register(new TCMallocImplementation); 925 } 926 #endif 927 } 928 } 929 930 TCMallocGuard::~TCMallocGuard() { 931 if (--tcmallocguard_refcount == 0) { 932 const char* env = getenv("MALLOCSTATS"); 933 if (env != NULL) { 934 int level = atoi(env); 935 if (level < 1) level = 1; 936 PrintStats(level); 937 } 938 } 939 } 940 #ifndef WIN32_OVERRIDE_ALLOCATORS 941 static TCMallocGuard module_enter_exit_hook; 942 #endif 943 944 //------------------------------------------------------------------- 945 // Helpers for the exported routines below 946 //------------------------------------------------------------------- 947 948 static inline bool CheckCachedSizeClass(void *ptr) { 949 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 950 size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p); 951 return cached_value == 0 || 952 cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; 953 } 954 955 static inline void* CheckedMallocResult(void *result) { 956 ASSERT(result == NULL || CheckCachedSizeClass(result)); 957 return result; 958 } 959 960 static inline void* SpanToMallocResult(Span *span) { 961 Static::pageheap()->CacheSizeClass(span->start, 0); 962 return 963 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); 964 } 965 966 static void* DoSampledAllocation(size_t size) { 967 // Grab the stack trace outside the heap lock 968 StackTrace tmp; 969 tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); 970 tmp.size = size; 971 972 SpinLockHolder h(Static::pageheap_lock()); 973 // Allocate span 974 Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); 975 if (span == NULL) { 976 return NULL; 977 } 978 979 // Allocate stack trace 980 StackTrace *stack = Static::stacktrace_allocator()->New(); 981 if (stack == NULL) { 982 // Sampling failed because of lack of memory 983 return span; 984 } 985 *stack = tmp; 986 span->sample = 1; 987 span->objects = stack; 988 tcmalloc::DLL_Prepend(Static::sampled_objects(), span); 989 990 return SpanToMallocResult(span); 991 } 992 993 namespace { 994 995 // Copy of FLAGS_tcmalloc_large_alloc_report_threshold with 996 // automatic increases factored in. 997 static int64_t large_alloc_threshold = 998 (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold 999 ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold); 1000 1001 static void ReportLargeAlloc(Length num_pages, void* result) { 1002 StackTrace stack; 1003 stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1); 1004 1005 static const int N = 1000; 1006 char buffer[N]; 1007 TCMalloc_Printer printer(buffer, N); 1008 printer.printf("tcmalloc: large alloc %"PRIu64" bytes == %p @ ", 1009 static_cast<uint64>(num_pages) << kPageShift, 1010 result); 1011 for (int i = 0; i < stack.depth; i++) { 1012 printer.printf(" %p", stack.stack[i]); 1013 } 1014 printer.printf("\n"); 1015 write(STDERR_FILENO, buffer, strlen(buffer)); 1016 } 1017 1018 inline void* cpp_alloc(size_t size, bool nothrow); 1019 inline void* do_malloc(size_t size); 1020 1021 // TODO(willchan): Investigate whether or not lining this much is harmful to 1022 // performance. 1023 // This is equivalent to do_malloc() except when tc_new_mode is set to true. 1024 // Otherwise, it will run the std::new_handler if set. 1025 inline void* do_malloc_or_cpp_alloc(size_t size) { 1026 return tc_new_mode ? cpp_alloc(size, true) : do_malloc(size); 1027 } 1028 1029 void* cpp_memalign(size_t align, size_t size); 1030 void* do_memalign(size_t align, size_t size); 1031 1032 inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) { 1033 return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size); 1034 } 1035 1036 // Must be called with the page lock held. 1037 inline bool should_report_large(Length num_pages) { 1038 const int64 threshold = large_alloc_threshold; 1039 if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { 1040 // Increase the threshold by 1/8 every time we generate a report. 1041 // We cap the threshold at 8GiB to avoid overflow problems. 1042 large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 1043 ? threshold + threshold/8 : 8ll<<30); 1044 return true; 1045 } 1046 return false; 1047 } 1048 1049 // Helper for do_malloc(). 1050 inline void* do_malloc_pages(ThreadCache* heap, size_t size) { 1051 void* result; 1052 bool report_large; 1053 1054 Length num_pages = tcmalloc::pages(size); 1055 size = num_pages << kPageShift; 1056 1057 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { 1058 result = DoSampledAllocation(size); 1059 1060 SpinLockHolder h(Static::pageheap_lock()); 1061 report_large = should_report_large(num_pages); 1062 } else { 1063 SpinLockHolder h(Static::pageheap_lock()); 1064 Span* span = Static::pageheap()->New(num_pages); 1065 result = (span == NULL ? NULL : SpanToMallocResult(span)); 1066 report_large = should_report_large(num_pages); 1067 } 1068 1069 if (report_large) { 1070 ReportLargeAlloc(num_pages, result); 1071 } 1072 return result; 1073 } 1074 1075 inline void* do_malloc(size_t size) { 1076 void* ret = NULL; 1077 1078 // The following call forces module initialization 1079 ThreadCache* heap = ThreadCache::GetCache(); 1080 if (size <= kMaxSize) { 1081 size_t cl = Static::sizemap()->SizeClass(size); 1082 size = Static::sizemap()->class_to_size(cl); 1083 1084 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { 1085 ret = DoSampledAllocation(size); 1086 } else { 1087 // The common case, and also the simplest. This just pops the 1088 // size-appropriate freelist, after replenishing it if it's empty. 1089 ret = CheckedMallocResult(heap->Allocate(size, cl)); 1090 } 1091 } else { 1092 ret = do_malloc_pages(heap, size); 1093 } 1094 if (ret == NULL) errno = ENOMEM; 1095 return ret; 1096 } 1097 1098 inline void* do_calloc(size_t n, size_t elem_size) { 1099 // Overflow check 1100 const size_t size = n * elem_size; 1101 if (elem_size != 0 && size / elem_size != n) return NULL; 1102 1103 void* result = do_malloc_or_cpp_alloc(size); 1104 if (result != NULL) { 1105 memset(result, 0, size); 1106 } 1107 return result; 1108 } 1109 1110 static inline ThreadCache* GetCacheIfPresent() { 1111 void* const p = ThreadCache::GetCacheIfPresent(); 1112 return reinterpret_cast<ThreadCache*>(p); 1113 } 1114 1115 // This lets you call back to a given function pointer if ptr is invalid. 1116 // It is used primarily by windows code which wants a specialized callback. 1117 inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { 1118 if (ptr == NULL) return; 1119 if (Static::pageheap() == NULL) { 1120 // We called free() before malloc(). This can occur if the 1121 // (system) malloc() is called before tcmalloc is loaded, and then 1122 // free() is called after tcmalloc is loaded (and tc_free has 1123 // replaced free), but before the global constructor has run that 1124 // sets up the tcmalloc data structures. 1125 (*invalid_free_fn)(ptr); // Decide how to handle the bad free request 1126 return; 1127 } 1128 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 1129 Span* span = NULL; 1130 size_t cl = Static::pageheap()->GetSizeClassIfCached(p); 1131 1132 if (cl == 0) { 1133 span = Static::pageheap()->GetDescriptor(p); 1134 if (!span) { 1135 // span can be NULL because the pointer passed in is invalid 1136 // (not something returned by malloc or friends), or because the 1137 // pointer was allocated with some other allocator besides 1138 // tcmalloc. The latter can happen if tcmalloc is linked in via 1139 // a dynamic library, but is not listed last on the link line. 1140 // In that case, libraries after it on the link line will 1141 // allocate with libc malloc, but free with tcmalloc's free. 1142 (*invalid_free_fn)(ptr); // Decide how to handle the bad free request 1143 return; 1144 } 1145 cl = span->sizeclass; 1146 Static::pageheap()->CacheSizeClass(p, cl); 1147 } 1148 if (cl != 0) { 1149 ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); 1150 ThreadCache* heap = GetCacheIfPresent(); 1151 if (heap != NULL) { 1152 heap->Deallocate(ptr, cl); 1153 } else { 1154 // Delete directly into central cache 1155 tcmalloc::SLL_SetNext(ptr, NULL); 1156 Static::central_cache()[cl].InsertRange(ptr, ptr, 1); 1157 } 1158 } else { 1159 SpinLockHolder h(Static::pageheap_lock()); 1160 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 1161 ASSERT(span != NULL && span->start == p); 1162 if (span->sample) { 1163 StackTrace* st = reinterpret_cast<StackTrace*>(span->objects); 1164 tcmalloc::DLL_Remove(span); 1165 Static::stacktrace_allocator()->Delete(st); 1166 span->objects = NULL; 1167 } 1168 Static::pageheap()->Delete(span); 1169 } 1170 } 1171 1172 // The default "do_free" that uses the default callback. 1173 inline void do_free(void* ptr) { 1174 return do_free_with_callback(ptr, &InvalidFree); 1175 } 1176 1177 // NOTE: some logic here is duplicated in GetOwnership (above), for 1178 // speed. If you change this function, look at that one too. 1179 inline size_t GetSizeWithCallback(const void* ptr, 1180 size_t (*invalid_getsize_fn)(const void*)) { 1181 if (ptr == NULL) 1182 return 0; 1183 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 1184 size_t cl = Static::pageheap()->GetSizeClassIfCached(p); 1185 if (cl != 0) { 1186 return Static::sizemap()->ByteSizeForClass(cl); 1187 } else { 1188 const Span *span = Static::pageheap()->GetDescriptor(p); 1189 if (span == NULL) { // means we do not own this memory 1190 return (*invalid_getsize_fn)(ptr); 1191 } else if (span->sizeclass != 0) { 1192 Static::pageheap()->CacheSizeClass(p, span->sizeclass); 1193 return Static::sizemap()->ByteSizeForClass(span->sizeclass); 1194 } else { 1195 return span->length << kPageShift; 1196 } 1197 } 1198 } 1199 1200 // This lets you call back to a given function pointer if ptr is invalid. 1201 // It is used primarily by windows code which wants a specialized callback. 1202 inline void* do_realloc_with_callback( 1203 void* old_ptr, size_t new_size, 1204 void (*invalid_free_fn)(void*), 1205 size_t (*invalid_get_size_fn)(const void*)) { 1206 // Get the size of the old entry 1207 const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); 1208 1209 // Reallocate if the new size is larger than the old size, 1210 // or if the new size is significantly smaller than the old size. 1211 // We do hysteresis to avoid resizing ping-pongs: 1212 // . If we need to grow, grow to max(new_size, old_size * 1.X) 1213 // . Don't shrink unless new_size < old_size * 0.Y 1214 // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5. 1215 const int lower_bound_to_grow = old_size + old_size / 4; 1216 const int upper_bound_to_shrink = old_size / 2; 1217 if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) { 1218 // Need to reallocate. 1219 void* new_ptr = NULL; 1220 1221 if (new_size > old_size && new_size < lower_bound_to_grow) { 1222 new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); 1223 } 1224 if (new_ptr == NULL) { 1225 // Either new_size is not a tiny increment, or last do_malloc failed. 1226 new_ptr = do_malloc_or_cpp_alloc(new_size); 1227 } 1228 if (new_ptr == NULL) { 1229 return NULL; 1230 } 1231 MallocHook::InvokeNewHook(new_ptr, new_size); 1232 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 1233 MallocHook::InvokeDeleteHook(old_ptr); 1234 // We could use a variant of do_free() that leverages the fact 1235 // that we already know the sizeclass of old_ptr. The benefit 1236 // would be small, so don't bother. 1237 do_free_with_callback(old_ptr, invalid_free_fn); 1238 return new_ptr; 1239 } else { 1240 // We still need to call hooks to report the updated size: 1241 MallocHook::InvokeDeleteHook(old_ptr); 1242 MallocHook::InvokeNewHook(old_ptr, new_size); 1243 return old_ptr; 1244 } 1245 } 1246 1247 inline void* do_realloc(void* old_ptr, size_t new_size) { 1248 return do_realloc_with_callback(old_ptr, new_size, 1249 &InvalidFree, &InvalidGetSizeForRealloc); 1250 } 1251 1252 // For use by exported routines below that want specific alignments 1253 // 1254 // Note: this code can be slow for alignments > 16, and can 1255 // significantly fragment memory. The expectation is that 1256 // memalign/posix_memalign/valloc/pvalloc will not be invoked very 1257 // often. This requirement simplifies our implementation and allows 1258 // us to tune for expected allocation patterns. 1259 void* do_memalign(size_t align, size_t size) { 1260 ASSERT((align & (align - 1)) == 0); 1261 ASSERT(align > 0); 1262 if (size + align < size) return NULL; // Overflow 1263 1264 // Fall back to malloc if we would already align this memory access properly. 1265 if (align <= AlignmentForSize(size)) { 1266 void* p = do_malloc(size); 1267 ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0); 1268 return p; 1269 } 1270 1271 if (Static::pageheap() == NULL) ThreadCache::InitModule(); 1272 1273 // Allocate at least one byte to avoid boundary conditions below 1274 if (size == 0) size = 1; 1275 1276 if (size <= kMaxSize && align < kPageSize) { 1277 // Search through acceptable size classes looking for one with 1278 // enough alignment. This depends on the fact that 1279 // InitSizeClasses() currently produces several size classes that 1280 // are aligned at powers of two. We will waste time and space if 1281 // we miss in the size class array, but that is deemed acceptable 1282 // since memalign() should be used rarely. 1283 int cl = Static::sizemap()->SizeClass(size); 1284 while (cl < kNumClasses && 1285 ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) { 1286 cl++; 1287 } 1288 if (cl < kNumClasses) { 1289 ThreadCache* heap = ThreadCache::GetCache(); 1290 size = Static::sizemap()->class_to_size(cl); 1291 return CheckedMallocResult(heap->Allocate(size, cl)); 1292 } 1293 } 1294 1295 // We will allocate directly from the page heap 1296 SpinLockHolder h(Static::pageheap_lock()); 1297 1298 if (align <= kPageSize) { 1299 // Any page-level allocation will be fine 1300 // TODO: We could put the rest of this page in the appropriate 1301 // TODO: cache but it does not seem worth it. 1302 Span* span = Static::pageheap()->New(tcmalloc::pages(size)); 1303 return span == NULL ? NULL : SpanToMallocResult(span); 1304 } 1305 1306 // Allocate extra pages and carve off an aligned portion 1307 const Length alloc = tcmalloc::pages(size + align); 1308 Span* span = Static::pageheap()->New(alloc); 1309 if (span == NULL) return NULL; 1310 1311 // Skip starting portion so that we end up aligned 1312 Length skip = 0; 1313 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { 1314 skip++; 1315 } 1316 ASSERT(skip < alloc); 1317 if (skip > 0) { 1318 Span* rest = Static::pageheap()->Split(span, skip); 1319 Static::pageheap()->Delete(span); 1320 span = rest; 1321 } 1322 1323 // Skip trailing portion that we do not need to return 1324 const Length needed = tcmalloc::pages(size); 1325 ASSERT(span->length >= needed); 1326 if (span->length > needed) { 1327 Span* trailer = Static::pageheap()->Split(span, needed); 1328 Static::pageheap()->Delete(trailer); 1329 } 1330 return SpanToMallocResult(span); 1331 } 1332 1333 // Helpers for use by exported routines below: 1334 1335 inline void do_malloc_stats() { 1336 PrintStats(1); 1337 } 1338 1339 inline int do_mallopt(int cmd, int value) { 1340 return 1; // Indicates error 1341 } 1342 1343 #ifdef HAVE_STRUCT_MALLINFO 1344 inline struct mallinfo do_mallinfo() { 1345 TCMallocStats stats; 1346 ExtractStats(&stats, NULL, NULL, NULL); 1347 1348 // Just some of the fields are filled in. 1349 struct mallinfo info; 1350 memset(&info, 0, sizeof(info)); 1351 1352 // Unfortunately, the struct contains "int" field, so some of the 1353 // size values will be truncated. 1354 info.arena = static_cast<int>(stats.pageheap.system_bytes); 1355 info.fsmblks = static_cast<int>(stats.thread_bytes 1356 + stats.central_bytes 1357 + stats.transfer_bytes); 1358 info.fordblks = static_cast<int>(stats.pageheap.free_bytes + 1359 stats.pageheap.unmapped_bytes); 1360 info.uordblks = static_cast<int>(stats.pageheap.system_bytes 1361 - stats.thread_bytes 1362 - stats.central_bytes 1363 - stats.transfer_bytes 1364 - stats.pageheap.free_bytes 1365 - stats.pageheap.unmapped_bytes); 1366 1367 return info; 1368 } 1369 #endif // HAVE_STRUCT_MALLINFO 1370 1371 static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); 1372 1373 inline void* cpp_alloc(size_t size, bool nothrow) { 1374 for (;;) { 1375 void* p = do_malloc(size); 1376 #ifdef PREANSINEW 1377 return p; 1378 #else 1379 if (p == NULL) { // allocation failed 1380 // Get the current new handler. NB: this function is not 1381 // thread-safe. We make a feeble stab at making it so here, but 1382 // this lock only protects against tcmalloc interfering with 1383 // itself, not with other libraries calling set_new_handler. 1384 std::new_handler nh; 1385 { 1386 SpinLockHolder h(&set_new_handler_lock); 1387 nh = std::set_new_handler(0); 1388 (void) std::set_new_handler(nh); 1389 } 1390 #if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) 1391 if (nh) { 1392 // Since exceptions are disabled, we don't really know if new_handler 1393 // failed. Assume it will abort if it fails. 1394 (*nh)(); 1395 continue; 1396 } 1397 return 0; 1398 #else 1399 // If no new_handler is established, the allocation failed. 1400 if (!nh) { 1401 if (nothrow) return 0; 1402 throw std::bad_alloc(); 1403 } 1404 // Otherwise, try the new_handler. If it returns, retry the 1405 // allocation. If it throws std::bad_alloc, fail the allocation. 1406 // if it throws something else, don't interfere. 1407 try { 1408 (*nh)(); 1409 } catch (const std::bad_alloc&) { 1410 if (!nothrow) throw; 1411 return p; 1412 } 1413 #endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) 1414 } else { // allocation success 1415 return p; 1416 } 1417 #endif // PREANSINEW 1418 } 1419 } 1420 1421 void* cpp_memalign(size_t align, size_t size) { 1422 for (;;) { 1423 void* p = do_memalign(align, size); 1424 #ifdef PREANSINEW 1425 return p; 1426 #else 1427 if (p == NULL) { // allocation failed 1428 // Get the current new handler. NB: this function is not 1429 // thread-safe. We make a feeble stab at making it so here, but 1430 // this lock only protects against tcmalloc interfering with 1431 // itself, not with other libraries calling set_new_handler. 1432 std::new_handler nh; 1433 { 1434 SpinLockHolder h(&set_new_handler_lock); 1435 nh = std::set_new_handler(0); 1436 (void) std::set_new_handler(nh); 1437 } 1438 #if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) 1439 if (nh) { 1440 // Since exceptions are disabled, we don't really know if new_handler 1441 // failed. Assume it will abort if it fails. 1442 (*nh)(); 1443 continue; 1444 } 1445 return 0; 1446 #else 1447 // If no new_handler is established, the allocation failed. 1448 if (!nh) 1449 return 0; 1450 1451 // Otherwise, try the new_handler. If it returns, retry the 1452 // allocation. If it throws std::bad_alloc, fail the allocation. 1453 // if it throws something else, don't interfere. 1454 try { 1455 (*nh)(); 1456 } catch (const std::bad_alloc&) { 1457 return p; 1458 } 1459 #endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) 1460 } else { // allocation success 1461 return p; 1462 } 1463 #endif // PREANSINEW 1464 } 1465 } 1466 1467 } // end unnamed namespace 1468 1469 // As promised, the definition of this function, declared above. 1470 size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) { 1471 ASSERT(TCMallocImplementation::GetOwnership(ptr) 1472 != TCMallocImplementation::kNotOwned); 1473 return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); 1474 } 1475 1476 void TCMallocImplementation::MarkThreadBusy() { 1477 // Allocate to force the creation of a thread cache, but avoid 1478 // invoking any hooks. 1479 do_free(do_malloc(0)); 1480 } 1481 1482 //------------------------------------------------------------------- 1483 // Exported routines 1484 //------------------------------------------------------------------- 1485 1486 extern "C" PERFTOOLS_DLL_DECL const char* tc_version( 1487 int* major, int* minor, const char** patch) __THROW { 1488 if (major) *major = TC_VERSION_MAJOR; 1489 if (minor) *minor = TC_VERSION_MINOR; 1490 if (patch) *patch = TC_VERSION_PATCH; 1491 return TC_VERSION_STRING; 1492 } 1493 1494 // This function behaves similarly to MSVC's _set_new_mode. 1495 // If flag is 0 (default), calls to malloc will behave normally. 1496 // If flag is 1, calls to malloc will behave like calls to new, 1497 // and the std_new_handler will be invoked on failure. 1498 // Returns the previous mode. 1499 extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { 1500 int old_mode = tc_new_mode; 1501 tc_new_mode = flag; 1502 return old_mode; 1503 } 1504 1505 #ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own 1506 1507 // CAVEAT: The code structure below ensures that MallocHook methods are always 1508 // called from the stack frame of the invoked allocation function. 1509 // heap-checker.cc depends on this to start a stack trace from 1510 // the call to the (de)allocation function. 1511 1512 extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW { 1513 void* result = do_malloc_or_cpp_alloc(size); 1514 MallocHook::InvokeNewHook(result, size); 1515 return result; 1516 } 1517 1518 extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW { 1519 MallocHook::InvokeDeleteHook(ptr); 1520 do_free(ptr); 1521 } 1522 1523 extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n, 1524 size_t elem_size) __THROW { 1525 void* result = do_calloc(n, elem_size); 1526 MallocHook::InvokeNewHook(result, n * elem_size); 1527 return result; 1528 } 1529 1530 extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW { 1531 MallocHook::InvokeDeleteHook(ptr); 1532 do_free(ptr); 1533 } 1534 1535 extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr, 1536 size_t new_size) __THROW { 1537 if (old_ptr == NULL) { 1538 void* result = do_malloc_or_cpp_alloc(new_size); 1539 MallocHook::InvokeNewHook(result, new_size); 1540 return result; 1541 } 1542 if (new_size == 0) { 1543 MallocHook::InvokeDeleteHook(old_ptr); 1544 do_free(old_ptr); 1545 return NULL; 1546 } 1547 return do_realloc(old_ptr, new_size); 1548 } 1549 1550 extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { 1551 void* p = cpp_alloc(size, false); 1552 // We keep this next instruction out of cpp_alloc for a reason: when 1553 // it's in, and new just calls cpp_alloc, the optimizer may fold the 1554 // new call into cpp_alloc, which messes up our whole section-based 1555 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 1556 // isn't the last thing this fn calls, and prevents the folding. 1557 MallocHook::InvokeNewHook(p, size); 1558 return p; 1559 } 1560 1561 extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { 1562 void* p = cpp_alloc(size, true); 1563 MallocHook::InvokeNewHook(p, size); 1564 return p; 1565 } 1566 1567 extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { 1568 MallocHook::InvokeDeleteHook(p); 1569 do_free(p); 1570 } 1571 1572 // Standard C++ library implementations define and use this 1573 // (via ::operator delete(ptr, nothrow)). 1574 // But it's really the same as normal delete, so we just do the same thing. 1575 extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW { 1576 MallocHook::InvokeDeleteHook(p); 1577 do_free(p); 1578 } 1579 1580 extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { 1581 void* p = cpp_alloc(size, false); 1582 // We keep this next instruction out of cpp_alloc for a reason: when 1583 // it's in, and new just calls cpp_alloc, the optimizer may fold the 1584 // new call into cpp_alloc, which messes up our whole section-based 1585 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 1586 // isn't the last thing this fn calls, and prevents the folding. 1587 MallocHook::InvokeNewHook(p, size); 1588 return p; 1589 } 1590 1591 extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) 1592 __THROW { 1593 void* p = cpp_alloc(size, true); 1594 MallocHook::InvokeNewHook(p, size); 1595 return p; 1596 } 1597 1598 extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { 1599 MallocHook::InvokeDeleteHook(p); 1600 do_free(p); 1601 } 1602 1603 extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW { 1604 MallocHook::InvokeDeleteHook(p); 1605 do_free(p); 1606 } 1607 1608 extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, 1609 size_t size) __THROW { 1610 void* result = do_memalign_or_cpp_memalign(align, size); 1611 MallocHook::InvokeNewHook(result, size); 1612 return result; 1613 } 1614 1615 extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign( 1616 void** result_ptr, size_t align, size_t size) __THROW { 1617 if (((align % sizeof(void*)) != 0) || 1618 ((align & (align - 1)) != 0) || 1619 (align == 0)) { 1620 return EINVAL; 1621 } 1622 1623 void* result = do_memalign_or_cpp_memalign(align, size); 1624 MallocHook::InvokeNewHook(result, size); 1625 if (result == NULL) { 1626 return ENOMEM; 1627 } else { 1628 *result_ptr = result; 1629 return 0; 1630 } 1631 } 1632 1633 static size_t pagesize = 0; 1634 1635 extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW { 1636 // Allocate page-aligned object of length >= size bytes 1637 if (pagesize == 0) pagesize = getpagesize(); 1638 void* result = do_memalign_or_cpp_memalign(pagesize, size); 1639 MallocHook::InvokeNewHook(result, size); 1640 return result; 1641 } 1642 1643 extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW { 1644 // Round up size to a multiple of pagesize 1645 if (pagesize == 0) pagesize = getpagesize(); 1646 if (size == 0) { // pvalloc(0) should allocate one page, according to 1647 size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html 1648 } 1649 size = (size + pagesize - 1) & ~(pagesize - 1); 1650 void* result = do_memalign_or_cpp_memalign(pagesize, size); 1651 MallocHook::InvokeNewHook(result, size); 1652 return result; 1653 } 1654 1655 extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW { 1656 do_malloc_stats(); 1657 } 1658 1659 extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW { 1660 return do_mallopt(cmd, value); 1661 } 1662 1663 #ifdef HAVE_STRUCT_MALLINFO 1664 extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW { 1665 return do_mallinfo(); 1666 } 1667 #endif 1668 1669 extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW { 1670 return MallocExtension::instance()->GetAllocatedSize(ptr); 1671 } 1672 1673 #endif // TCMALLOC_USING_DEBUGALLOCATION 1674