Home | History | Annotate | Download | only in trace_event
      1 // Copyright 2015 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"
      6 
      7 #include <stdint.h>
      8 
      9 #include <algorithm>
     10 #include <iterator>
     11 #include <tuple>
     12 #include <utility>
     13 #include <vector>
     14 
     15 #include "base/format_macros.h"
     16 #include "base/logging.h"
     17 #include "base/macros.h"
     18 #include "base/strings/stringprintf.h"
     19 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
     20 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
     21 #include "base/trace_event/memory_dump_session_state.h"
     22 #include "base/trace_event/trace_config.h"
     23 #include "base/trace_event/trace_event_argument.h"
     24 #include "base/trace_event/trace_log.h"
     25 
     26 // Most of what the |HeapDumpWriter| does is aggregating detailed information
     27 // about the heap and deciding what to dump. The Input to this process is a list
     28 // of |AllocationContext|s and size pairs.
     29 //
     30 // The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
     31 // pairs where the properties of the contexts share a prefix. (Type name is
     32 // considered a list of length one here.) First all pairs are put into one
     33 // bucket that represents the entire heap. Then this bucket is recursively
     34 // broken down into smaller buckets. Each bucket keeps track of whether further
     35 // breakdown is possible.
     36 
     37 namespace base {
     38 namespace trace_event {
     39 namespace internal {
     40 namespace {
     41 
     42 // Denotes a property of |AllocationContext| to break down by.
     43 enum class BreakDownMode { kByBacktrace, kByTypeName };
     44 
     45 // A group of bytes for which the context shares a prefix.
     46 struct Bucket {
     47   Bucket()
     48       : size(0),
     49         count(0),
     50         backtrace_cursor(0),
     51         is_broken_down_by_type_name(false) {}
     52 
     53   std::vector<std::pair<const AllocationContext*, AllocationMetrics>>
     54       metrics_by_context;
     55 
     56   // The sum of the sizes of |metrics_by_context|.
     57   size_t size;
     58 
     59   // The sum of number of allocations of |metrics_by_context|.
     60   size_t count;
     61 
     62   // The index of the stack frame that has not yet been broken down by. For all
     63   // elements in this bucket, the stack frames 0 up to (but not including) the
     64   // cursor, must be equal.
     65   size_t backtrace_cursor;
     66 
     67   // When true, the type name for all elements in this bucket must be equal.
     68   bool is_broken_down_by_type_name;
     69 };
     70 
     71 // Comparison operator to order buckets by their size.
     72 bool operator<(const Bucket& lhs, const Bucket& rhs) {
     73   return lhs.size < rhs.size;
     74 }
     75 
     76 // Groups the allocations in the bucket by |break_by|. The buckets in the
     77 // returned list will have |backtrace_cursor| advanced or
     78 // |is_broken_down_by_type_name| set depending on the property to group by.
     79 std::vector<Bucket> GetSubbuckets(const Bucket& bucket,
     80                                   BreakDownMode break_by) {
     81   base::hash_map<const void*, Bucket> breakdown;
     82 
     83 
     84   if (break_by == BreakDownMode::kByBacktrace) {
     85     for (const auto& context_and_metrics : bucket.metrics_by_context) {
     86       const Backtrace& backtrace = context_and_metrics.first->backtrace;
     87       const StackFrame* begin = std::begin(backtrace.frames);
     88       const StackFrame* end = begin + backtrace.frame_count;
     89       const StackFrame* cursor = begin + bucket.backtrace_cursor;
     90 
     91       DCHECK_LE(cursor, end);
     92 
     93       if (cursor != end) {
     94         Bucket& subbucket = breakdown[cursor->value];
     95         subbucket.size += context_and_metrics.second.size;
     96         subbucket.count += context_and_metrics.second.count;
     97         subbucket.metrics_by_context.push_back(context_and_metrics);
     98         subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
     99         subbucket.is_broken_down_by_type_name =
    100             bucket.is_broken_down_by_type_name;
    101         DCHECK_GT(subbucket.size, 0u);
    102         DCHECK_GT(subbucket.count, 0u);
    103       }
    104     }
    105   } else if (break_by == BreakDownMode::kByTypeName) {
    106     if (!bucket.is_broken_down_by_type_name) {
    107       for (const auto& context_and_metrics : bucket.metrics_by_context) {
    108         const AllocationContext* context = context_and_metrics.first;
    109         Bucket& subbucket = breakdown[context->type_name];
    110         subbucket.size += context_and_metrics.second.size;
    111         subbucket.count += context_and_metrics.second.count;
    112         subbucket.metrics_by_context.push_back(context_and_metrics);
    113         subbucket.backtrace_cursor = bucket.backtrace_cursor;
    114         subbucket.is_broken_down_by_type_name = true;
    115         DCHECK_GT(subbucket.size, 0u);
    116         DCHECK_GT(subbucket.count, 0u);
    117       }
    118     }
    119   }
    120 
    121   std::vector<Bucket> buckets;
    122   buckets.reserve(breakdown.size());
    123   for (auto key_bucket : breakdown)
    124     buckets.push_back(key_bucket.second);
    125 
    126   return buckets;
    127 }
    128 
    129 // Breaks down the bucket by |break_by|. Returns only buckets that contribute
    130 // more than |min_size_bytes| to the total size. The long tail is omitted.
    131 std::vector<Bucket> BreakDownBy(const Bucket& bucket,
    132                                 BreakDownMode break_by,
    133                                 size_t min_size_bytes) {
    134   std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);
    135 
    136   // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
    137   // so its front contains the largest bucket. Buckets should be iterated
    138   // ordered by size, but sorting the vector is overkill because the long tail
    139   // of small buckets will be discarded. By using a max-heap, the optimal case
    140   // where all but the first bucket are discarded is O(n). The worst case where
    141   // no bucket is discarded is doing a heap sort, which is O(n log n).
    142   std::make_heap(buckets.begin(), buckets.end());
    143 
    144   // Keep including buckets until adding one would increase the number of
    145   // bytes accounted for by |min_size_bytes|. The large buckets end up in
    146   // [it, end()), [begin(), it) is the part that contains the max-heap
    147   // of small buckets.
    148   std::vector<Bucket>::iterator it;
    149   for (it = buckets.end(); it != buckets.begin(); --it) {
    150     if (buckets.front().size < min_size_bytes)
    151       break;
    152 
    153     // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
    154     // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
    155     std::pop_heap(buckets.begin(), it);
    156   }
    157 
    158   // At this point, |buckets| looks like this (numbers are bucket sizes):
    159   //
    160   // <-- max-heap of small buckets --->
    161   //                                  <-- large buckets by ascending size -->
    162   // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
    163   //   ^                                ^              ^
    164   //   |                                |              |
    165   //   begin()                          it             end()
    166 
    167   // Discard the long tail of buckets that contribute less than a percent.
    168   buckets.erase(buckets.begin(), it);
    169 
    170   return buckets;
    171 }
    172 
    173 }  // namespace
    174 
    175 bool operator<(Entry lhs, Entry rhs) {
    176   // There is no need to compare |size|. If the backtrace and type name are
    177   // equal then the sizes must be equal as well.
    178   return std::tie(lhs.stack_frame_id, lhs.type_id) <
    179          std::tie(rhs.stack_frame_id, rhs.type_id);
    180 }
    181 
    182 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
    183                                TypeNameDeduplicator* type_name_deduplicator,
    184                                uint32_t breakdown_threshold_bytes)
    185     : stack_frame_deduplicator_(stack_frame_deduplicator),
    186       type_name_deduplicator_(type_name_deduplicator),
    187       breakdown_threshold_bytes_(breakdown_threshold_bytes) {
    188 }
    189 
    190 HeapDumpWriter::~HeapDumpWriter() {}
    191 
    192 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
    193   // The contexts in the bucket are all different, but the [begin, cursor) range
    194   // is equal for all contexts in the bucket, and the type names are the same if
    195   // |is_broken_down_by_type_name| is set.
    196   DCHECK(!bucket.metrics_by_context.empty());
    197 
    198   const AllocationContext* context = bucket.metrics_by_context.front().first;
    199 
    200   const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);
    201   const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
    202   DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
    203 
    204   Entry entry;
    205   entry.stack_frame_id = stack_frame_deduplicator_->Insert(
    206       backtrace_begin, backtrace_end);
    207 
    208   // Deduplicate the type name, or use ID -1 if type name is not set.
    209   entry.type_id = bucket.is_broken_down_by_type_name
    210                       ? type_name_deduplicator_->Insert(context->type_name)
    211                       : -1;
    212 
    213   entry.size = bucket.size;
    214   entry.count = bucket.count;
    215 
    216   auto position_and_inserted = entries_.insert(entry);
    217   return position_and_inserted.second;
    218 }
    219 
    220 void HeapDumpWriter::BreakDown(const Bucket& bucket) {
    221   auto by_backtrace = BreakDownBy(bucket,
    222                                   BreakDownMode::kByBacktrace,
    223                                   breakdown_threshold_bytes_);
    224   auto by_type_name = BreakDownBy(bucket,
    225                                   BreakDownMode::kByTypeName,
    226                                   breakdown_threshold_bytes_);
    227 
    228   // Insert entries for the buckets. If a bucket was not present before, it has
    229   // not been broken down before, so recursively continue breaking down in that
    230   // case. There might be multiple routes to the same entry (first break down
    231   // by type name, then by backtrace, or first by backtrace and then by type),
    232   // so a set is used to avoid dumping and breaking down entries more than once.
    233 
    234   for (const Bucket& subbucket : by_backtrace)
    235     if (AddEntryForBucket(subbucket))
    236       BreakDown(subbucket);
    237 
    238   for (const Bucket& subbucket : by_type_name)
    239     if (AddEntryForBucket(subbucket))
    240       BreakDown(subbucket);
    241 }
    242 
    243 const std::set<Entry>& HeapDumpWriter::Summarize(
    244     const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context) {
    245   // Start with one bucket that represents the entire heap. Iterate by
    246   // reference, because the allocation contexts are going to point to allocation
    247   // contexts stored in |metrics_by_context|.
    248   Bucket root_bucket;
    249   for (const auto& context_and_metrics : metrics_by_context) {
    250     DCHECK_GT(context_and_metrics.second.size, 0u);
    251     DCHECK_GT(context_and_metrics.second.count, 0u);
    252     const AllocationContext* context = &context_and_metrics.first;
    253     root_bucket.metrics_by_context.push_back(
    254         std::make_pair(context, context_and_metrics.second));
    255     root_bucket.size += context_and_metrics.second.size;
    256     root_bucket.count += context_and_metrics.second.count;
    257   }
    258 
    259   AddEntryForBucket(root_bucket);
    260 
    261   // Recursively break down the heap and fill |entries_| with entries to dump.
    262   BreakDown(root_bucket);
    263 
    264   return entries_;
    265 }
    266 
    267 std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) {
    268   std::string buffer;
    269   std::unique_ptr<TracedValue> traced_value(new TracedValue);
    270 
    271   traced_value->BeginArray("entries");
    272 
    273   for (const Entry& entry : entries) {
    274     traced_value->BeginDictionary();
    275 
    276     // Format size as hexadecimal string into |buffer|.
    277     SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
    278     traced_value->SetString("size", buffer);
    279 
    280     SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));
    281     traced_value->SetString("count", buffer);
    282 
    283     if (entry.stack_frame_id == -1) {
    284       // An empty backtrace (which will have ID -1) is represented by the empty
    285       // string, because there is no leaf frame to reference in |stackFrames|.
    286       traced_value->SetString("bt", "");
    287     } else {
    288       // Format index of the leaf frame as a string, because |stackFrames| is a
    289       // dictionary, not an array.
    290       SStringPrintf(&buffer, "%i", entry.stack_frame_id);
    291       traced_value->SetString("bt", buffer);
    292     }
    293 
    294     // Type ID -1 (cumulative size for all types) is represented by the absence
    295     // of the "type" key in the dictionary.
    296     if (entry.type_id != -1) {
    297       // Format the type ID as a string.
    298       SStringPrintf(&buffer, "%i", entry.type_id);
    299       traced_value->SetString("type", buffer);
    300     }
    301 
    302     traced_value->EndDictionary();
    303   }
    304 
    305   traced_value->EndArray();  // "entries"
    306   return traced_value;
    307 }
    308 
    309 }  // namespace internal
    310 
    311 std::unique_ptr<TracedValue> ExportHeapDump(
    312     const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context,
    313     const MemoryDumpSessionState& session_state) {
    314   internal::HeapDumpWriter writer(
    315       session_state.stack_frame_deduplicator(),
    316       session_state.type_name_deduplicator(),
    317       session_state.heap_profiler_breakdown_threshold_bytes());
    318   return Serialize(writer.Summarize(metrics_by_context));
    319 }
    320 
    321 }  // namespace trace_event
    322 }  // namespace base
    323