1 // Copyright 2018 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/sampling_heap_profiler/sampling_heap_profiler.h" 6 7 #include <algorithm> 8 #include <cmath> 9 #include <utility> 10 11 #include "base/allocator/allocator_shim.h" 12 #include "base/allocator/buildflags.h" 13 #include "base/allocator/partition_allocator/partition_alloc.h" 14 #include "base/atomicops.h" 15 #include "base/debug/stack_trace.h" 16 #include "base/macros.h" 17 #include "base/no_destructor.h" 18 #include "base/partition_alloc_buildflags.h" 19 #include "base/rand_util.h" 20 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h" 21 #include "base/threading/thread_local_storage.h" 22 #include "build/build_config.h" 23 24 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \ 25 defined(OFFICIAL_BUILD) 26 #include "base/trace_event/cfi_backtrace_android.h" 27 #endif 28 29 namespace base { 30 31 using base::allocator::AllocatorDispatch; 32 using base::subtle::Atomic32; 33 using base::subtle::AtomicWord; 34 35 namespace { 36 37 // Control how many top frames to skip when recording call stack. 38 // These frames correspond to the profiler own frames. 39 const uint32_t kSkipBaseAllocatorFrames = 2; 40 41 const size_t kDefaultSamplingIntervalBytes = 128 * 1024; 42 43 // Controls if sample intervals should not be randomized. Used for testing. 44 bool g_deterministic; 45 46 // A positive value if profiling is running, otherwise it's zero. 47 Atomic32 g_running; 48 49 // Pointer to the current |LockFreeAddressHashSet|. 50 AtomicWord g_sampled_addresses_set; 51 52 // Sampling interval parameter, the mean value for intervals between samples. 53 AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes; 54 55 void (*g_hooks_install_callback)(); 56 Atomic32 g_hooks_installed; 57 58 void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) { 59 void* address = self->next->alloc_function(self->next, size, context); 60 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); 61 return address; 62 } 63 64 void* AllocZeroInitializedFn(const AllocatorDispatch* self, 65 size_t n, 66 size_t size, 67 void* context) { 68 void* address = 69 self->next->alloc_zero_initialized_function(self->next, n, size, context); 70 SamplingHeapProfiler::RecordAlloc(address, n * size, 71 kSkipBaseAllocatorFrames); 72 return address; 73 } 74 75 void* AllocAlignedFn(const AllocatorDispatch* self, 76 size_t alignment, 77 size_t size, 78 void* context) { 79 void* address = 80 self->next->alloc_aligned_function(self->next, alignment, size, context); 81 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); 82 return address; 83 } 84 85 void* ReallocFn(const AllocatorDispatch* self, 86 void* address, 87 size_t size, 88 void* context) { 89 // Note: size == 0 actually performs free. 90 SamplingHeapProfiler::RecordFree(address); 91 address = self->next->realloc_function(self->next, address, size, context); 92 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); 93 return address; 94 } 95 96 void FreeFn(const AllocatorDispatch* self, void* address, void* context) { 97 SamplingHeapProfiler::RecordFree(address); 98 self->next->free_function(self->next, address, context); 99 } 100 101 size_t GetSizeEstimateFn(const AllocatorDispatch* self, 102 void* address, 103 void* context) { 104 return self->next->get_size_estimate_function(self->next, address, context); 105 } 106 107 unsigned BatchMallocFn(const AllocatorDispatch* self, 108 size_t size, 109 void** results, 110 unsigned num_requested, 111 void* context) { 112 unsigned num_allocated = self->next->batch_malloc_function( 113 self->next, size, results, num_requested, context); 114 for (unsigned i = 0; i < num_allocated; ++i) { 115 SamplingHeapProfiler::RecordAlloc(results[i], size, 116 kSkipBaseAllocatorFrames); 117 } 118 return num_allocated; 119 } 120 121 void BatchFreeFn(const AllocatorDispatch* self, 122 void** to_be_freed, 123 unsigned num_to_be_freed, 124 void* context) { 125 for (unsigned i = 0; i < num_to_be_freed; ++i) 126 SamplingHeapProfiler::RecordFree(to_be_freed[i]); 127 self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed, 128 context); 129 } 130 131 void FreeDefiniteSizeFn(const AllocatorDispatch* self, 132 void* address, 133 size_t size, 134 void* context) { 135 SamplingHeapProfiler::RecordFree(address); 136 self->next->free_definite_size_function(self->next, address, size, context); 137 } 138 139 AllocatorDispatch g_allocator_dispatch = {&AllocFn, 140 &AllocZeroInitializedFn, 141 &AllocAlignedFn, 142 &ReallocFn, 143 &FreeFn, 144 &GetSizeEstimateFn, 145 &BatchMallocFn, 146 &BatchFreeFn, 147 &FreeDefiniteSizeFn, 148 nullptr}; 149 150 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) 151 152 void PartitionAllocHook(void* address, size_t size, const char*) { 153 SamplingHeapProfiler::RecordAlloc(address, size); 154 } 155 156 void PartitionFreeHook(void* address) { 157 SamplingHeapProfiler::RecordFree(address); 158 } 159 160 #endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) 161 162 ThreadLocalStorage::Slot& AccumulatedBytesTLS() { 163 static base::NoDestructor<base::ThreadLocalStorage::Slot> 164 accumulated_bytes_tls; 165 return *accumulated_bytes_tls; 166 } 167 168 } // namespace 169 170 SamplingHeapProfiler::Sample::Sample(size_t size, 171 size_t total, 172 uint32_t ordinal) 173 : size(size), total(total), ordinal(ordinal) {} 174 175 SamplingHeapProfiler::Sample::Sample(const Sample&) = default; 176 177 SamplingHeapProfiler::Sample::~Sample() = default; 178 179 SamplingHeapProfiler* SamplingHeapProfiler::instance_; 180 181 SamplingHeapProfiler::SamplingHeapProfiler() { 182 instance_ = this; 183 auto sampled_addresses = std::make_unique<LockFreeAddressHashSet>(64); 184 base::subtle::NoBarrier_Store( 185 &g_sampled_addresses_set, 186 reinterpret_cast<AtomicWord>(sampled_addresses.get())); 187 sampled_addresses_stack_.push(std::move(sampled_addresses)); 188 } 189 190 // static 191 void SamplingHeapProfiler::InitTLSSlot() { 192 // Preallocate the TLS slot early, so it can't cause reentracy issues 193 // when sampling is started. 194 ignore_result(AccumulatedBytesTLS().Get()); 195 } 196 197 // static 198 void SamplingHeapProfiler::InstallAllocatorHooksOnce() { 199 static bool hook_installed = InstallAllocatorHooks(); 200 ignore_result(hook_installed); 201 } 202 203 // static 204 bool SamplingHeapProfiler::InstallAllocatorHooks() { 205 #if BUILDFLAG(USE_ALLOCATOR_SHIM) 206 base::allocator::InsertAllocatorDispatch(&g_allocator_dispatch); 207 #else 208 ignore_result(g_allocator_dispatch); 209 DLOG(WARNING) 210 << "base::allocator shims are not available for memory sampling."; 211 #endif // BUILDFLAG(USE_ALLOCATOR_SHIM) 212 213 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) 214 base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook); 215 base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook); 216 #endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) 217 218 int32_t hooks_install_callback_has_been_set = 219 base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1); 220 if (hooks_install_callback_has_been_set) 221 g_hooks_install_callback(); 222 223 return true; 224 } 225 226 // static 227 void SamplingHeapProfiler::SetHooksInstallCallback( 228 void (*hooks_install_callback)()) { 229 CHECK(!g_hooks_install_callback && hooks_install_callback); 230 g_hooks_install_callback = hooks_install_callback; 231 232 int32_t profiler_has_already_been_initialized = 233 base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1); 234 if (profiler_has_already_been_initialized) 235 g_hooks_install_callback(); 236 } 237 238 uint32_t SamplingHeapProfiler::Start() { 239 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \ 240 defined(OFFICIAL_BUILD) 241 if (!base::trace_event::CFIBacktraceAndroid::GetInitializedInstance() 242 ->can_unwind_stack_frames()) { 243 LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available."; 244 return 0; 245 } 246 #endif 247 InstallAllocatorHooksOnce(); 248 base::subtle::Barrier_AtomicIncrement(&g_running, 1); 249 return last_sample_ordinal_; 250 } 251 252 void SamplingHeapProfiler::Stop() { 253 AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1); 254 CHECK_GE(count, 0); 255 } 256 257 void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) { 258 // TODO(alph): Reset the sample being collected if running. 259 base::subtle::Release_Store(&g_sampling_interval, 260 static_cast<AtomicWord>(sampling_interval)); 261 } 262 263 // static 264 size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) { 265 if (UNLIKELY(g_deterministic)) 266 return interval; 267 268 // We sample with a Poisson process, with constant average sampling 269 // interval. This follows the exponential probability distribution with 270 // parameter = 1/interval where |interval| is the average number of bytes 271 // between samples. 272 // Let u be a uniformly distributed random number between 0 and 1, then 273 // next_sample = -ln(u) / 274 double uniform = base::RandDouble(); 275 double value = -log(uniform) * interval; 276 size_t min_value = sizeof(intptr_t); 277 // We limit the upper bound of a sample interval to make sure we don't have 278 // huge gaps in the sampling stream. Probability of the upper bound gets hit 279 // is exp(-20) ~ 2e-9, so it should not skew the distibution. 280 size_t max_value = interval * 20; 281 if (UNLIKELY(value < min_value)) 282 return min_value; 283 if (UNLIKELY(value > max_value)) 284 return max_value; 285 return static_cast<size_t>(value); 286 } 287 288 // static 289 void SamplingHeapProfiler::RecordAlloc(void* address, 290 size_t size, 291 uint32_t skip_frames) { 292 if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running))) 293 return; 294 if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed())) 295 return; 296 297 // TODO(alph): On MacOS it may call the hook several times for a single 298 // allocation. Handle the case. 299 300 intptr_t accumulated_bytes = 301 reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get()); 302 accumulated_bytes += size; 303 if (LIKELY(accumulated_bytes < 0)) { 304 AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes)); 305 return; 306 } 307 308 size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval); 309 size_t samples = accumulated_bytes / mean_interval; 310 accumulated_bytes %= mean_interval; 311 312 do { 313 accumulated_bytes -= GetNextSampleInterval(mean_interval); 314 ++samples; 315 } while (accumulated_bytes >= 0); 316 317 AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes)); 318 319 instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames); 320 } 321 322 void SamplingHeapProfiler::RecordStackTrace(Sample* sample, 323 uint32_t skip_frames) { 324 #if !defined(OS_NACL) 325 constexpr uint32_t kMaxStackEntries = 256; 326 constexpr uint32_t kSkipProfilerOwnFrames = 2; 327 skip_frames += kSkipProfilerOwnFrames; 328 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \ 329 defined(OFFICIAL_BUILD) 330 const void* frames[kMaxStackEntries]; 331 size_t frame_count = 332 base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind( 333 frames, kMaxStackEntries); 334 #elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS) 335 const void* frames[kMaxStackEntries]; 336 size_t frame_count = base::debug::TraceStackFramePointers( 337 frames, kMaxStackEntries, skip_frames); 338 skip_frames = 0; 339 #else 340 // Fall-back to capturing the stack with base::debug::StackTrace, 341 // which is likely slower, but more reliable. 342 base::debug::StackTrace stack_trace(kMaxStackEntries); 343 size_t frame_count = 0; 344 const void* const* frames = stack_trace.Addresses(&frame_count); 345 #endif 346 347 sample->stack.insert( 348 sample->stack.end(), const_cast<void**>(&frames[skip_frames]), 349 const_cast<void**>(&frames[std::max<size_t>(frame_count, skip_frames)])); 350 #endif 351 } 352 353 void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated, 354 size_t size, 355 void* address, 356 uint32_t skip_frames) { 357 if (entered_.Get()) 358 return; 359 entered_.Set(true); 360 { 361 base::AutoLock lock(mutex_); 362 Sample sample(size, total_allocated, ++last_sample_ordinal_); 363 RecordStackTrace(&sample, skip_frames); 364 for (auto* observer : observers_) 365 observer->SampleAdded(sample.ordinal, size, total_allocated); 366 samples_.emplace(address, std::move(sample)); 367 // TODO(alph): Sometimes RecordAlloc is called twice in a row without 368 // a RecordFree in between. Investigate it. 369 if (!sampled_addresses_set().Contains(address)) 370 sampled_addresses_set().Insert(address); 371 BalanceAddressesHashSet(); 372 } 373 entered_.Set(false); 374 } 375 376 // static 377 void SamplingHeapProfiler::RecordFree(void* address) { 378 if (UNLIKELY(address == nullptr)) 379 return; 380 if (UNLIKELY(sampled_addresses_set().Contains(address))) 381 instance_->DoRecordFree(address); 382 } 383 384 void SamplingHeapProfiler::DoRecordFree(void* address) { 385 if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed())) 386 return; 387 if (entered_.Get()) 388 return; 389 entered_.Set(true); 390 { 391 base::AutoLock lock(mutex_); 392 auto it = samples_.find(address); 393 CHECK(it != samples_.end()); 394 for (auto* observer : observers_) 395 observer->SampleRemoved(it->second.ordinal); 396 samples_.erase(it); 397 sampled_addresses_set().Remove(address); 398 } 399 entered_.Set(false); 400 } 401 402 void SamplingHeapProfiler::BalanceAddressesHashSet() { 403 // Check if the load_factor of the current addresses hash set becomes higher 404 // than 1, allocate a new twice larger one, copy all the data, 405 // and switch to using it. 406 // During the copy process no other writes are made to both sets 407 // as it's behind the lock. 408 // All the readers continue to use the old one until the atomic switch 409 // process takes place. 410 LockFreeAddressHashSet& current_set = sampled_addresses_set(); 411 if (current_set.load_factor() < 1) 412 return; 413 auto new_set = 414 std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2); 415 new_set->Copy(current_set); 416 // Atomically switch all the new readers to the new set. 417 base::subtle::Release_Store(&g_sampled_addresses_set, 418 reinterpret_cast<AtomicWord>(new_set.get())); 419 // We still have to keep all the old maps alive to resolve the theoretical 420 // race with readers in |RecordFree| that have already obtained the map, 421 // but haven't yet managed to access it. 422 sampled_addresses_stack_.push(std::move(new_set)); 423 } 424 425 // static 426 LockFreeAddressHashSet& SamplingHeapProfiler::sampled_addresses_set() { 427 return *reinterpret_cast<LockFreeAddressHashSet*>( 428 base::subtle::NoBarrier_Load(&g_sampled_addresses_set)); 429 } 430 431 // static 432 SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() { 433 static base::NoDestructor<SamplingHeapProfiler> instance; 434 return instance.get(); 435 } 436 437 // static 438 void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) { 439 g_deterministic = suppress; 440 } 441 442 void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) { 443 CHECK(!entered_.Get()); 444 entered_.Set(true); 445 { 446 base::AutoLock lock(mutex_); 447 observers_.push_back(observer); 448 } 449 entered_.Set(false); 450 } 451 452 void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) { 453 CHECK(!entered_.Get()); 454 entered_.Set(true); 455 { 456 base::AutoLock lock(mutex_); 457 auto it = std::find(observers_.begin(), observers_.end(), observer); 458 CHECK(it != observers_.end()); 459 observers_.erase(it); 460 } 461 entered_.Set(false); 462 } 463 464 std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples( 465 uint32_t profile_id) { 466 CHECK(!entered_.Get()); 467 entered_.Set(true); 468 std::vector<Sample> samples; 469 { 470 base::AutoLock lock(mutex_); 471 for (auto& it : samples_) { 472 Sample& sample = it.second; 473 if (sample.ordinal > profile_id) 474 samples.push_back(sample); 475 } 476 } 477 entered_.Set(false); 478 return samples; 479 } 480 481 } // namespace base 482