Home | History | Annotate | Download | only in sampling_heap_profiler
      1 // Copyright 2018 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/sampling_heap_profiler/sampling_heap_profiler.h"
      6 
      7 #include <algorithm>
      8 #include <cmath>
      9 #include <utility>
     10 
     11 #include "base/allocator/allocator_shim.h"
     12 #include "base/allocator/buildflags.h"
     13 #include "base/allocator/partition_allocator/partition_alloc.h"
     14 #include "base/atomicops.h"
     15 #include "base/debug/stack_trace.h"
     16 #include "base/macros.h"
     17 #include "base/no_destructor.h"
     18 #include "base/partition_alloc_buildflags.h"
     19 #include "base/rand_util.h"
     20 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
     21 #include "base/threading/thread_local_storage.h"
     22 #include "build/build_config.h"
     23 
     24 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
     25     defined(OFFICIAL_BUILD)
     26 #include "base/trace_event/cfi_backtrace_android.h"
     27 #endif
     28 
     29 namespace base {
     30 
     31 using base::allocator::AllocatorDispatch;
     32 using base::subtle::Atomic32;
     33 using base::subtle::AtomicWord;
     34 
     35 namespace {
     36 
     37 // Control how many top frames to skip when recording call stack.
     38 // These frames correspond to the profiler own frames.
     39 const uint32_t kSkipBaseAllocatorFrames = 2;
     40 
     41 const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
     42 
     43 // Controls if sample intervals should not be randomized. Used for testing.
     44 bool g_deterministic;
     45 
     46 // A positive value if profiling is running, otherwise it's zero.
     47 Atomic32 g_running;
     48 
     49 // Pointer to the current |LockFreeAddressHashSet|.
     50 AtomicWord g_sampled_addresses_set;
     51 
     52 // Sampling interval parameter, the mean value for intervals between samples.
     53 AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
     54 
     55 void (*g_hooks_install_callback)();
     56 Atomic32 g_hooks_installed;
     57 
     58 void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
     59   void* address = self->next->alloc_function(self->next, size, context);
     60   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
     61   return address;
     62 }
     63 
     64 void* AllocZeroInitializedFn(const AllocatorDispatch* self,
     65                              size_t n,
     66                              size_t size,
     67                              void* context) {
     68   void* address =
     69       self->next->alloc_zero_initialized_function(self->next, n, size, context);
     70   SamplingHeapProfiler::RecordAlloc(address, n * size,
     71                                     kSkipBaseAllocatorFrames);
     72   return address;
     73 }
     74 
     75 void* AllocAlignedFn(const AllocatorDispatch* self,
     76                      size_t alignment,
     77                      size_t size,
     78                      void* context) {
     79   void* address =
     80       self->next->alloc_aligned_function(self->next, alignment, size, context);
     81   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
     82   return address;
     83 }
     84 
     85 void* ReallocFn(const AllocatorDispatch* self,
     86                 void* address,
     87                 size_t size,
     88                 void* context) {
     89   // Note: size == 0 actually performs free.
     90   SamplingHeapProfiler::RecordFree(address);
     91   address = self->next->realloc_function(self->next, address, size, context);
     92   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
     93   return address;
     94 }
     95 
     96 void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
     97   SamplingHeapProfiler::RecordFree(address);
     98   self->next->free_function(self->next, address, context);
     99 }
    100 
    101 size_t GetSizeEstimateFn(const AllocatorDispatch* self,
    102                          void* address,
    103                          void* context) {
    104   return self->next->get_size_estimate_function(self->next, address, context);
    105 }
    106 
    107 unsigned BatchMallocFn(const AllocatorDispatch* self,
    108                        size_t size,
    109                        void** results,
    110                        unsigned num_requested,
    111                        void* context) {
    112   unsigned num_allocated = self->next->batch_malloc_function(
    113       self->next, size, results, num_requested, context);
    114   for (unsigned i = 0; i < num_allocated; ++i) {
    115     SamplingHeapProfiler::RecordAlloc(results[i], size,
    116                                       kSkipBaseAllocatorFrames);
    117   }
    118   return num_allocated;
    119 }
    120 
    121 void BatchFreeFn(const AllocatorDispatch* self,
    122                  void** to_be_freed,
    123                  unsigned num_to_be_freed,
    124                  void* context) {
    125   for (unsigned i = 0; i < num_to_be_freed; ++i)
    126     SamplingHeapProfiler::RecordFree(to_be_freed[i]);
    127   self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
    128                                   context);
    129 }
    130 
    131 void FreeDefiniteSizeFn(const AllocatorDispatch* self,
    132                         void* address,
    133                         size_t size,
    134                         void* context) {
    135   SamplingHeapProfiler::RecordFree(address);
    136   self->next->free_definite_size_function(self->next, address, size, context);
    137 }
    138 
    139 AllocatorDispatch g_allocator_dispatch = {&AllocFn,
    140                                           &AllocZeroInitializedFn,
    141                                           &AllocAlignedFn,
    142                                           &ReallocFn,
    143                                           &FreeFn,
    144                                           &GetSizeEstimateFn,
    145                                           &BatchMallocFn,
    146                                           &BatchFreeFn,
    147                                           &FreeDefiniteSizeFn,
    148                                           nullptr};
    149 
    150 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
    151 
    152 void PartitionAllocHook(void* address, size_t size, const char*) {
    153   SamplingHeapProfiler::RecordAlloc(address, size);
    154 }
    155 
    156 void PartitionFreeHook(void* address) {
    157   SamplingHeapProfiler::RecordFree(address);
    158 }
    159 
    160 #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
    161 
    162 ThreadLocalStorage::Slot& AccumulatedBytesTLS() {
    163   static base::NoDestructor<base::ThreadLocalStorage::Slot>
    164       accumulated_bytes_tls;
    165   return *accumulated_bytes_tls;
    166 }
    167 
    168 }  // namespace
    169 
    170 SamplingHeapProfiler::Sample::Sample(size_t size,
    171                                      size_t total,
    172                                      uint32_t ordinal)
    173     : size(size), total(total), ordinal(ordinal) {}
    174 
    175 SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
    176 
    177 SamplingHeapProfiler::Sample::~Sample() = default;
    178 
    179 SamplingHeapProfiler* SamplingHeapProfiler::instance_;
    180 
    181 SamplingHeapProfiler::SamplingHeapProfiler() {
    182   instance_ = this;
    183   auto sampled_addresses = std::make_unique<LockFreeAddressHashSet>(64);
    184   base::subtle::NoBarrier_Store(
    185       &g_sampled_addresses_set,
    186       reinterpret_cast<AtomicWord>(sampled_addresses.get()));
    187   sampled_addresses_stack_.push(std::move(sampled_addresses));
    188 }
    189 
    190 // static
    191 void SamplingHeapProfiler::InitTLSSlot() {
    192   // Preallocate the TLS slot early, so it can't cause reentracy issues
    193   // when sampling is started.
    194   ignore_result(AccumulatedBytesTLS().Get());
    195 }
    196 
    197 // static
    198 void SamplingHeapProfiler::InstallAllocatorHooksOnce() {
    199   static bool hook_installed = InstallAllocatorHooks();
    200   ignore_result(hook_installed);
    201 }
    202 
    203 // static
    204 bool SamplingHeapProfiler::InstallAllocatorHooks() {
    205 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
    206   base::allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
    207 #else
    208   ignore_result(g_allocator_dispatch);
    209   DLOG(WARNING)
    210       << "base::allocator shims are not available for memory sampling.";
    211 #endif  // BUILDFLAG(USE_ALLOCATOR_SHIM)
    212 
    213 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
    214   base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook);
    215   base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook);
    216 #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
    217 
    218   int32_t hooks_install_callback_has_been_set =
    219       base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1);
    220   if (hooks_install_callback_has_been_set)
    221     g_hooks_install_callback();
    222 
    223   return true;
    224 }
    225 
    226 // static
    227 void SamplingHeapProfiler::SetHooksInstallCallback(
    228     void (*hooks_install_callback)()) {
    229   CHECK(!g_hooks_install_callback && hooks_install_callback);
    230   g_hooks_install_callback = hooks_install_callback;
    231 
    232   int32_t profiler_has_already_been_initialized =
    233       base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1);
    234   if (profiler_has_already_been_initialized)
    235     g_hooks_install_callback();
    236 }
    237 
    238 uint32_t SamplingHeapProfiler::Start() {
    239 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
    240     defined(OFFICIAL_BUILD)
    241   if (!base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()
    242            ->can_unwind_stack_frames()) {
    243     LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
    244     return 0;
    245   }
    246 #endif
    247   InstallAllocatorHooksOnce();
    248   base::subtle::Barrier_AtomicIncrement(&g_running, 1);
    249   return last_sample_ordinal_;
    250 }
    251 
    252 void SamplingHeapProfiler::Stop() {
    253   AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1);
    254   CHECK_GE(count, 0);
    255 }
    256 
    257 void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
    258   // TODO(alph): Reset the sample being collected if running.
    259   base::subtle::Release_Store(&g_sampling_interval,
    260                               static_cast<AtomicWord>(sampling_interval));
    261 }
    262 
    263 // static
    264 size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) {
    265   if (UNLIKELY(g_deterministic))
    266     return interval;
    267 
    268   // We sample with a Poisson process, with constant average sampling
    269   // interval. This follows the exponential probability distribution with
    270   // parameter  = 1/interval where |interval| is the average number of bytes
    271   // between samples.
    272   // Let u be a uniformly distributed random number between 0 and 1, then
    273   // next_sample = -ln(u) / 
    274   double uniform = base::RandDouble();
    275   double value = -log(uniform) * interval;
    276   size_t min_value = sizeof(intptr_t);
    277   // We limit the upper bound of a sample interval to make sure we don't have
    278   // huge gaps in the sampling stream. Probability of the upper bound gets hit
    279   // is exp(-20) ~ 2e-9, so it should not skew the distibution.
    280   size_t max_value = interval * 20;
    281   if (UNLIKELY(value < min_value))
    282     return min_value;
    283   if (UNLIKELY(value > max_value))
    284     return max_value;
    285   return static_cast<size_t>(value);
    286 }
    287 
    288 // static
    289 void SamplingHeapProfiler::RecordAlloc(void* address,
    290                                        size_t size,
    291                                        uint32_t skip_frames) {
    292   if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running)))
    293     return;
    294   if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
    295     return;
    296 
    297   // TODO(alph): On MacOS it may call the hook several times for a single
    298   // allocation. Handle the case.
    299 
    300   intptr_t accumulated_bytes =
    301       reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get());
    302   accumulated_bytes += size;
    303   if (LIKELY(accumulated_bytes < 0)) {
    304     AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
    305     return;
    306   }
    307 
    308   size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval);
    309   size_t samples = accumulated_bytes / mean_interval;
    310   accumulated_bytes %= mean_interval;
    311 
    312   do {
    313     accumulated_bytes -= GetNextSampleInterval(mean_interval);
    314     ++samples;
    315   } while (accumulated_bytes >= 0);
    316 
    317   AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
    318 
    319   instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames);
    320 }
    321 
    322 void SamplingHeapProfiler::RecordStackTrace(Sample* sample,
    323                                             uint32_t skip_frames) {
    324 #if !defined(OS_NACL)
    325   constexpr uint32_t kMaxStackEntries = 256;
    326   constexpr uint32_t kSkipProfilerOwnFrames = 2;
    327   skip_frames += kSkipProfilerOwnFrames;
    328 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
    329     defined(OFFICIAL_BUILD)
    330   const void* frames[kMaxStackEntries];
    331   size_t frame_count =
    332       base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
    333           frames, kMaxStackEntries);
    334 #elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
    335   const void* frames[kMaxStackEntries];
    336   size_t frame_count = base::debug::TraceStackFramePointers(
    337       frames, kMaxStackEntries, skip_frames);
    338   skip_frames = 0;
    339 #else
    340   // Fall-back to capturing the stack with base::debug::StackTrace,
    341   // which is likely slower, but more reliable.
    342   base::debug::StackTrace stack_trace(kMaxStackEntries);
    343   size_t frame_count = 0;
    344   const void* const* frames = stack_trace.Addresses(&frame_count);
    345 #endif
    346 
    347   sample->stack.insert(
    348       sample->stack.end(), const_cast<void**>(&frames[skip_frames]),
    349       const_cast<void**>(&frames[std::max<size_t>(frame_count, skip_frames)]));
    350 #endif
    351 }
    352 
    353 void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
    354                                          size_t size,
    355                                          void* address,
    356                                          uint32_t skip_frames) {
    357   if (entered_.Get())
    358     return;
    359   entered_.Set(true);
    360   {
    361     base::AutoLock lock(mutex_);
    362     Sample sample(size, total_allocated, ++last_sample_ordinal_);
    363     RecordStackTrace(&sample, skip_frames);
    364     for (auto* observer : observers_)
    365       observer->SampleAdded(sample.ordinal, size, total_allocated);
    366     samples_.emplace(address, std::move(sample));
    367     // TODO(alph): Sometimes RecordAlloc is called twice in a row without
    368     // a RecordFree in between. Investigate it.
    369     if (!sampled_addresses_set().Contains(address))
    370       sampled_addresses_set().Insert(address);
    371     BalanceAddressesHashSet();
    372   }
    373   entered_.Set(false);
    374 }
    375 
    376 // static
    377 void SamplingHeapProfiler::RecordFree(void* address) {
    378   if (UNLIKELY(address == nullptr))
    379     return;
    380   if (UNLIKELY(sampled_addresses_set().Contains(address)))
    381     instance_->DoRecordFree(address);
    382 }
    383 
    384 void SamplingHeapProfiler::DoRecordFree(void* address) {
    385   if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
    386     return;
    387   if (entered_.Get())
    388     return;
    389   entered_.Set(true);
    390   {
    391     base::AutoLock lock(mutex_);
    392     auto it = samples_.find(address);
    393     CHECK(it != samples_.end());
    394     for (auto* observer : observers_)
    395       observer->SampleRemoved(it->second.ordinal);
    396     samples_.erase(it);
    397     sampled_addresses_set().Remove(address);
    398   }
    399   entered_.Set(false);
    400 }
    401 
    402 void SamplingHeapProfiler::BalanceAddressesHashSet() {
    403   // Check if the load_factor of the current addresses hash set becomes higher
    404   // than 1, allocate a new twice larger one, copy all the data,
    405   // and switch to using it.
    406   // During the copy process no other writes are made to both sets
    407   // as it's behind the lock.
    408   // All the readers continue to use the old one until the atomic switch
    409   // process takes place.
    410   LockFreeAddressHashSet& current_set = sampled_addresses_set();
    411   if (current_set.load_factor() < 1)
    412     return;
    413   auto new_set =
    414       std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
    415   new_set->Copy(current_set);
    416   // Atomically switch all the new readers to the new set.
    417   base::subtle::Release_Store(&g_sampled_addresses_set,
    418                               reinterpret_cast<AtomicWord>(new_set.get()));
    419   // We still have to keep all the old maps alive to resolve the theoretical
    420   // race with readers in |RecordFree| that have already obtained the map,
    421   // but haven't yet managed to access it.
    422   sampled_addresses_stack_.push(std::move(new_set));
    423 }
    424 
    425 // static
    426 LockFreeAddressHashSet& SamplingHeapProfiler::sampled_addresses_set() {
    427   return *reinterpret_cast<LockFreeAddressHashSet*>(
    428       base::subtle::NoBarrier_Load(&g_sampled_addresses_set));
    429 }
    430 
    431 // static
    432 SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() {
    433   static base::NoDestructor<SamplingHeapProfiler> instance;
    434   return instance.get();
    435 }
    436 
    437 // static
    438 void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) {
    439   g_deterministic = suppress;
    440 }
    441 
    442 void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) {
    443   CHECK(!entered_.Get());
    444   entered_.Set(true);
    445   {
    446     base::AutoLock lock(mutex_);
    447     observers_.push_back(observer);
    448   }
    449   entered_.Set(false);
    450 }
    451 
    452 void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) {
    453   CHECK(!entered_.Get());
    454   entered_.Set(true);
    455   {
    456     base::AutoLock lock(mutex_);
    457     auto it = std::find(observers_.begin(), observers_.end(), observer);
    458     CHECK(it != observers_.end());
    459     observers_.erase(it);
    460   }
    461   entered_.Set(false);
    462 }
    463 
    464 std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
    465     uint32_t profile_id) {
    466   CHECK(!entered_.Get());
    467   entered_.Set(true);
    468   std::vector<Sample> samples;
    469   {
    470     base::AutoLock lock(mutex_);
    471     for (auto& it : samples_) {
    472       Sample& sample = it.second;
    473       if (sample.ordinal > profile_id)
    474         samples.push_back(sample);
    475     }
    476   }
    477   entered_.Set(false);
    478   return samples;
    479 }
    480 
    481 }  // namespace base
    482