Home | History | Annotate | Download | only in heap_profiler
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This is a OS-independent* module which purpose is tracking allocations and
      6 // their call sites (stack traces). It is able to deal with hole punching
      7 // (read: munmap). Also, it has low overhead and its presence in the system its
      8 // barely noticeable, even if tracing *all* the processes.
      9 // This module does NOT know how to deal with stack unwinding. The caller must
     10 // do that and pass the addresses of the unwound stack.
     11 // * (Modulo three lines for mutexes.)
     12 //
     13 // Exposed API:
     14 //   void heap_profiler_init(HeapStats*);
     15 //   void heap_profiler_alloc(addr, size, stack_frames, depth, flags);
     16 //   void heap_profiler_free(addr, size);  (size == 0 means free entire region).
     17 //
     18 // The profiling information is tracked into two data structures:
     19 // 1) A RB-Tree of non-overlapping VM regions (allocs) sorted by their start
     20 //    addr. Each entry tracks the start-end addresses and points to the stack
     21 //    trace which created that allocation (see below).
     22 // 2) A (hash) table of stack traces. In general the #allocations >> #call sites
     23 //    which create those allocations. In order to avoid duplicating the latter,
     24 //    they are stored distinctly in this hash table and used by reference.
     25 //
     26 //   /  Process virtual address space  \
     27 //   +------+      +------+      +------+
     28 //   |Alloc1|      |Alloc2|      |Alloc3|    <- Allocs (a RB-Tree underneath)
     29 //   +------+      +------+      +------+
     30 //    Len: 12       Len: 4        Len: 4
     31 //       |            |             |                     stack_traces
     32 //       |            |             |              +-----------+--------------+
     33 //       |            |             |              | Alloc tot | stack frames +
     34 //       |            |             |              +-----------+--------------+
     35 //       +------------|-------------+------------> |    16     | 0x1234 ....  |
     36 //                    |                            +-----------+--------------+
     37 //                    +--------------------------> |     4     | 0x5678 ....  |
     38 //                                                 +-----------+--------------+
     39 //                                                   (A hash-table underneath)
     40 //
     41 // Final note: the memory for both 1) and 2) entries is carved out from two
     42 // static pools (i.e. stack_traces and allocs). The pools are treated as
     43 // a sbrk essentially, and are kept compact by reusing freed elements (hence
     44 // having a freelist for each of them).
     45 //
     46 // All the internal (static) functions here assume that the |lock| is held.
     47 
     48 #include <assert.h>
     49 #include <string.h>
     50 
     51 // Platform-dependent mutex boilerplate.
     52 #if defined(__linux__) || defined(__ANDROID__)
     53 #include <pthread.h>
     54 #define DEFINE_MUTEX(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
     55 #define LOCK_MUTEX(x) pthread_mutex_lock(&x)
     56 #define UNLOCK_MUTEX(x) pthread_mutex_unlock(&x)
     57 #else
     58 #error OS not supported.
     59 #endif
     60 
     61 #include "tools/android/heap_profiler/heap_profiler.h"
     62 
     63 
     64 static DEFINE_MUTEX(lock);
     65 
     66 // |stats| contains the global tracking metadata and is the entry point which
     67 // is read by the heap_dump tool.
     68 static HeapStats* stats;
     69 
     70 // +---------------------------------------------------------------------------+
     71 // + Stack traces hash-table                                                   +
     72 // +---------------------------------------------------------------------------+
     73 #define ST_ENTRIES_MAX (64 * 1024)
     74 #define ST_HASHTABLE_BUCKETS (64 * 1024) /* Must be a power of 2. */
     75 
     76 static StacktraceEntry stack_traces[ST_ENTRIES_MAX];
     77 static StacktraceEntry* stack_traces_freelist;
     78 static StacktraceEntry* stack_traces_ht[ST_HASHTABLE_BUCKETS];
     79 
     80 // Looks up a stack trace from the stack frames. Creates a new one if necessary.
     81 static StacktraceEntry* record_stacktrace(uintptr_t* frames, uint32_t depth) {
     82   if (depth == 0)
     83     return NULL;
     84 
     85   if (depth > HEAP_PROFILER_MAX_DEPTH)
     86     depth = HEAP_PROFILER_MAX_DEPTH;
     87 
     88   uint32_t i;
     89   uintptr_t hash = 0;
     90   for (i = 0; i < depth; ++i)
     91     hash = (hash << 1) ^ (frames[i]);
     92   const uint32_t slot = hash & (ST_HASHTABLE_BUCKETS - 1);
     93   StacktraceEntry* st = stack_traces_ht[slot];
     94 
     95   // Look for an existing entry in the hash-table.
     96   const size_t frames_length = depth * sizeof(uintptr_t);
     97   while (st != NULL && st->hash != hash &&
     98          memcmp(frames, st->frames, frames_length) != 0) {
     99     st = st->next;
    100   }
    101 
    102   // If not found, create a new one from the stack_traces array and add it to
    103   // the hash-table.
    104   if (st == NULL) {
    105     // Get a free element either from the freelist or from the pool.
    106     if (stack_traces_freelist != NULL) {
    107       st = stack_traces_freelist;
    108       stack_traces_freelist = stack_traces_freelist->next;
    109     } else if (stats->max_stack_traces < ST_ENTRIES_MAX) {
    110       st = &stack_traces[stats->max_stack_traces];
    111       ++stats->max_stack_traces;
    112     } else {
    113       return NULL;
    114     }
    115 
    116     memset(st, 0, sizeof(*st));
    117     memcpy(st->frames, frames, frames_length);
    118     st->hash = hash;
    119     st->next = stack_traces_ht[slot];
    120     stack_traces_ht[slot] = st;
    121     ++stats->num_stack_traces;
    122   }
    123 
    124   return st;
    125 }
    126 
    127 // Frees up a stack trace and appends it to the corresponding freelist.
    128 static void free_stacktrace(StacktraceEntry* st) {
    129   assert(st->alloc_bytes == 0);
    130   const uint32_t slot = st->hash & (ST_HASHTABLE_BUCKETS - 1);
    131 
    132   // The expected load factor of the hash-table is very low. Frees should be
    133   // pretty rare. Hence don't bother with a doubly linked list, might cost more.
    134   StacktraceEntry** prev = &stack_traces_ht[slot];
    135   while (*prev != st)
    136     prev = &((*prev)->next);
    137 
    138   // Remove from the hash-table bucket.
    139   assert(*prev == st);
    140   *prev = st->next;
    141 
    142   // Add to the freelist.
    143   st->next = stack_traces_freelist;
    144   stack_traces_freelist = st;
    145   --stats->num_stack_traces;
    146 }
    147 
    148 // +---------------------------------------------------------------------------+
    149 // + Allocs RB-tree                                                            +
    150 // +---------------------------------------------------------------------------+
    151 #define ALLOCS_ENTRIES_MAX (256 * 1024)
    152 
    153 static Alloc allocs[ALLOCS_ENTRIES_MAX];
    154 static Alloc* allocs_freelist;
    155 static RB_HEAD(HeapEntriesTree, Alloc) allocs_tree =
    156     RB_INITIALIZER(&allocs_tree);
    157 
    158 // Comparator used by the RB-Tree (mind the overflow, avoid arith on addresses).
    159 static int allocs_tree_cmp(Alloc *alloc_1, Alloc *alloc_2) {
    160   if (alloc_1->start < alloc_2->start)
    161     return -1;
    162   if (alloc_1->start > alloc_2->start)
    163     return 1;
    164   return 0;
    165 }
    166 
    167 RB_PROTOTYPE(HeapEntriesTree, Alloc, rb_node, allocs_tree_cmp);
    168 RB_GENERATE(HeapEntriesTree, Alloc, rb_node, allocs_tree_cmp);
    169 
    170 // Allocates a new Alloc and inserts it in the tree.
    171 static Alloc* insert_alloc(
    172     uintptr_t start, uintptr_t end, StacktraceEntry* st, uint32_t flags) {
    173   Alloc* alloc = NULL;
    174 
    175   // First of all, get a free element either from the freelist or from the pool.
    176   if (allocs_freelist != NULL) {
    177     alloc = allocs_freelist;
    178     allocs_freelist = alloc->next_free;
    179   } else if (stats->max_allocs < ALLOCS_ENTRIES_MAX) {
    180     alloc = &allocs[stats->max_allocs];
    181     ++stats->max_allocs;
    182   } else {
    183     return NULL;  // OOM.
    184   }
    185 
    186   alloc->start = start;
    187   alloc->end = end;
    188   alloc->st = st;
    189   alloc->flags = flags;
    190   alloc->next_free = NULL;
    191   RB_INSERT(HeapEntriesTree, &allocs_tree, alloc);
    192   ++stats->num_allocs;
    193   return alloc;
    194 }
    195 
    196 // Deletes all the allocs in the range [addr, addr+size[ dealing with partial
    197 // frees and hole punching. Note that in the general case this function might
    198 // need to deal with very unfortunate cases, as below:
    199 //
    200 // Alloc tree begin: [Alloc 1]----[Alloc 2]-------[Alloc 3][Alloc 4]---[Alloc 5]
    201 // Deletion range:                      [xxxxxxxxxxxxxxxxxxxx]
    202 // Alloc tree end:   [Alloc 1]----[Al.2]----------------------[Al.4]---[Alloc 5]
    203 //                   Alloc3 has to be deleted and Alloc 2,4 shrunk.
    204 static uint32_t delete_allocs_in_range(void* addr, size_t size) {
    205   uintptr_t del_start = (uintptr_t) addr;
    206   uintptr_t del_end = del_start + size - 1;
    207   uint32_t flags = 0;
    208 
    209   Alloc* alloc = NULL;
    210   Alloc* next_alloc = RB_ROOT(&allocs_tree);
    211 
    212   // Lookup the first (by address) relevant Alloc to initiate the deletion walk.
    213   // At the end of the loop next_alloc is either:
    214   // - the closest alloc starting before (or exactly at) the start of the
    215   //   deletion range (i.e. addr == del_start).
    216   // - the first alloc inside the deletion range.
    217   // - the first alloc after the deletion range iff the range was already empty
    218   //   (in this case the next loop will just bail out doing nothing).
    219   // - NULL: iff the entire tree is empty (as above).
    220   while (next_alloc != NULL) {
    221     alloc = next_alloc;
    222     if (alloc->start > del_start) {
    223       next_alloc = RB_LEFT(alloc, rb_node);
    224     } else if (alloc->end < del_start) {
    225       next_alloc = RB_RIGHT(alloc, rb_node);
    226     } else {  // alloc->start <= del_start && alloc->end >= del_start
    227       break;
    228     }
    229   }
    230 
    231   // Now scan the allocs linearly deleting chunks (or eventually whole allocs)
    232   // until passing the end of the deleting region.
    233   next_alloc = alloc;
    234   while (next_alloc != NULL) {
    235     alloc = next_alloc;
    236     next_alloc = RB_NEXT(HeapEntriesTree, &allocs_tree, alloc);
    237 
    238     if (size != 0) {
    239       // In the general case we stop passed the end of the deletion range.
    240       if (alloc->start > del_end)
    241         break;
    242 
    243       // This deals with the case of the first Alloc laying before the range.
    244       if (alloc->end < del_start)
    245         continue;
    246     } else {
    247       // size == 0 is a special case. It means deleting only the alloc which
    248       // starts exactly at |del_start| if any (for dealing with free(ptr)).
    249       if (alloc->start > del_start)
    250         break;
    251       if (alloc->start < del_start)
    252         continue;
    253       del_end = alloc->end;
    254     }
    255 
    256     // Reached this point the Alloc must overlap (partially or completely) with
    257     // the deletion range.
    258     assert(!(alloc->start > del_end || alloc->end < del_start));
    259 
    260     StacktraceEntry* st = alloc->st;
    261     flags |= alloc->flags;
    262     uintptr_t freed_bytes = 0;  // Bytes freed in this cycle.
    263 
    264     if (del_start <= alloc->start) {
    265       if (del_end >= alloc->end) {
    266         // Complete overlap. Delete full Alloc. Note: the range might might
    267         // still overlap with the next allocs.
    268         // Begin:      ------[alloc.start    alloc.end]-[next alloc]
    269         // Del range:      [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
    270         // Result:     ---------------------------------[next alloc]
    271         //             [next alloc] will be shrinked on the next iteration.
    272         freed_bytes = alloc->end - alloc->start + 1;
    273         RB_REMOVE(HeapEntriesTree, &allocs_tree, alloc);
    274 
    275         // Clean-up, so heap_dump can tell this is a free entry and skip it.
    276         alloc->start = alloc->end = 0;
    277         alloc->st = NULL;
    278 
    279         // Put in the freelist.
    280         alloc->next_free = allocs_freelist;
    281         allocs_freelist = alloc;
    282         --stats->num_allocs;
    283       } else {
    284         // Partial overlap at beginning. Cut first part and shrink the alloc.
    285         // Begin:      ------[alloc.start  alloc.end]-[next alloc]
    286         // Del range:      [xxxxxx]
    287         // Result:     ------------[start  alloc.end]-[next alloc]
    288         freed_bytes = del_end - alloc->start + 1;
    289         alloc->start = del_end + 1;
    290         // No need to update the tree even if we changed the key. The keys are
    291         // still monotonic (because the ranges are guaranteed to not overlap).
    292       }
    293     } else {
    294       if (del_end >= alloc->end) {
    295         // Partial overlap at end. Cut last part and shrink the alloc left.
    296         // Begin:      ------[alloc.start     alloc.end]-[next alloc]
    297         // Del range:                               [xxxxxxxx]
    298         // Result:     ------[alloc.start alloc.end]-----[next alloc]
    299         //             [next alloc] will be shrinked on the next iteration.
    300         freed_bytes = alloc->end - del_start + 1;
    301         alloc->end = del_start - 1;
    302       } else {
    303         // Hole punching. Requires creating an extra alloc.
    304         // Begin:      ------[alloc.start     alloc.end]-[next alloc]
    305         // Del range:                   [xxx]
    306         // Result:     ------[ alloc 1 ]-----[ alloc 2 ]-[next alloc]
    307         freed_bytes = del_end - del_start + 1;
    308         const uintptr_t old_end = alloc->end;
    309         alloc->end = del_start - 1;
    310 
    311         // In case of OOM, don't count the 2nd alloc we failed to allocate.
    312         if (insert_alloc(del_end + 1, old_end, st, alloc->flags) == NULL)
    313           freed_bytes += (old_end - del_end);
    314       }
    315     }
    316     // Now update the StackTraceEntry the Alloc was pointing to, eventually
    317     // freeing it up.
    318     assert(st->alloc_bytes >= freed_bytes);
    319     st->alloc_bytes -= freed_bytes;
    320     if (st->alloc_bytes == 0)
    321       free_stacktrace(st);
    322     stats->total_alloc_bytes -= freed_bytes;
    323   }
    324   return flags;
    325 }
    326 
    327 // +---------------------------------------------------------------------------+
    328 // + Library entry points (refer to heap_profiler.h for API doc).              +
    329 // +---------------------------------------------------------------------------+
    330 void heap_profiler_free(void* addr, size_t size, uint32_t* old_flags) {
    331   assert(size == 0 || ((uintptr_t) addr + (size - 1)) >= (uintptr_t) addr);
    332 
    333   LOCK_MUTEX(lock);
    334   uint32_t flags = delete_allocs_in_range(addr, size);
    335   UNLOCK_MUTEX(lock);
    336 
    337   if (old_flags != NULL)
    338     *old_flags = flags;
    339 }
    340 
    341 void heap_profiler_alloc(void* addr, size_t size, uintptr_t* frames,
    342                          uint32_t depth, uint32_t flags) {
    343   if (depth > HEAP_PROFILER_MAX_DEPTH)
    344     depth = HEAP_PROFILER_MAX_DEPTH;
    345 
    346   if (size == 0)  // Apps calling malloc(0), sometimes it happens.
    347     return;
    348 
    349   const uintptr_t start = (uintptr_t) addr;
    350   const uintptr_t end = start + (size - 1);
    351   assert(start <= end);
    352 
    353   LOCK_MUTEX(lock);
    354 
    355   delete_allocs_in_range(addr, size);
    356 
    357   StacktraceEntry* st = record_stacktrace(frames, depth);
    358   if (st != NULL) {
    359     Alloc* alloc = insert_alloc(start, end, st, flags);
    360     if (alloc != NULL) {
    361       st->alloc_bytes += size;
    362       stats->total_alloc_bytes += size;
    363     }
    364   }
    365 
    366   UNLOCK_MUTEX(lock);
    367 }
    368 
    369 void heap_profiler_init(HeapStats* heap_stats) {
    370   LOCK_MUTEX(lock);
    371 
    372   assert(stats == NULL);
    373   stats = heap_stats;
    374   memset(stats, 0, sizeof(HeapStats));
    375   stats->magic_start = HEAP_PROFILER_MAGIC_MARKER;
    376   stats->allocs = &allocs[0];
    377   stats->stack_traces = &stack_traces[0];
    378 
    379   UNLOCK_MUTEX(lock);
    380 }
    381 
    382 void heap_profiler_cleanup(void) {
    383   LOCK_MUTEX(lock);
    384 
    385   assert(stats != NULL);
    386   memset(stack_traces, 0, sizeof(StacktraceEntry) * stats->max_stack_traces);
    387   memset(stack_traces_ht, 0, sizeof(stack_traces_ht));
    388   stack_traces_freelist = NULL;
    389 
    390   memset(allocs, 0, sizeof(Alloc) * stats->max_allocs);
    391   allocs_freelist = NULL;
    392   RB_INIT(&allocs_tree);
    393 
    394   stats = NULL;
    395 
    396   UNLOCK_MUTEX(lock);
    397 }
    398