1 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """This module classifies NativeHeap objects filtering their allocations. 6 7 The only filter currently available is 'stacktrace', which works as follows: 8 9 {'name': 'rule-1', 'stacktrace': 'foo' } 10 {'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']} 11 {'name': 'rule-3', 'source_path': 'sk.*allocator'} 12 {'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'} 13 14 15 rule-1 will match any allocation that has 'foo' in one of its stack frames. 16 rule-2 will match any allocation that has a stack frame matching 'foo' AND a 17 followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2 18 will not match a stacktrace like ['bar baz', 'foo']. 19 rule-3 will match any allocation in which at least one of the source paths in 20 its stack frames matches the regex sk.*allocator. 21 rule-4 will match any allocation which satisfies both the conditions. 22 23 TODO(primiano): introduce more filters after the first prototype with UI, for 24 instance, filter by library file name or by allocation size. 25 """ 26 27 import collections 28 import posixpath 29 import re 30 31 from memory_inspector.classification import results 32 from memory_inspector.classification import rules 33 from memory_inspector.core import exceptions 34 from memory_inspector.core import native_heap 35 36 37 _RESULT_KEYS = ['bytes_allocated', 'bytes_resident'] 38 39 40 def LoadRules(content): 41 """Loads and parses a native-heap rule tree from a content (string). 42 43 Returns: 44 An instance of |rules.Rule|, which nodes are |_NHeapRule| instances. 45 """ 46 return rules.Load(content, _NHeapRule) 47 48 49 def Classify(nativeheap, rule_tree): 50 """Creates aggregated results of native heaps using the provided rules. 51 52 Args: 53 nativeheap: the heap dump being processed (a |NativeHeap| instance). 54 rule_tree: the user-defined rules that define the filtering categories. 55 56 Returns: 57 An instance of |AggreatedResults|. 58 """ 59 assert(isinstance(nativeheap, native_heap.NativeHeap)) 60 assert(isinstance(rule_tree, rules.Rule)) 61 62 res = results.AggreatedResults(rule_tree, _RESULT_KEYS) 63 for allocation in nativeheap.allocations: 64 res.AddToMatchingNodes(allocation, 65 [allocation.size, allocation.resident_size]) 66 return res 67 68 69 def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02): 70 """Infers the rules tree from a symbolized heap snapshot. 71 72 In lack of a specific set of rules, this method can be invoked to infer a 73 meaningful rule tree starting from a heap snapshot. It will build a compact 74 radix tree from the source paths of the stack traces, which height is at most 75 |max_depth|, selecting only those nodes which contribute for at least 76 |threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot. 77 """ 78 assert(isinstance(nheap, native_heap.NativeHeap)) 79 80 def RadixTreeInsert(node, path): 81 """Inserts a string (path) into a radix tree (a python recursive dict). 82 83 e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}} 84 """ 85 86 def GetCommonPrefix(args): 87 """Returns the common prefix between two paths (no partial paths). 88 89 e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb 90 posixpath.commonprefix implementation would do) 91 """ 92 parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0] 93 return parts + posixpath.sep if parts else '' 94 95 for node_path in node.iterkeys(): 96 pfx = GetCommonPrefix([node_path, path]) 97 if not pfx: 98 continue 99 if len(pfx) < len(node_path): 100 node[pfx] = {node_path[len(pfx):] : node[node_path]} 101 del node[node_path] 102 if len(path) > len(pfx): 103 RadixTreeInsert(node[pfx], path[len(pfx):]) 104 return 105 node[path] = {} # No common prefix, create new child in current node. 106 107 # Given an allocation of size N and its stack trace, heuristically determines 108 # the source directory to be blamed for the N bytes. 109 # The blamed_dir is the one which appears more times in the top 8 stack frames 110 # (excluding the first 2, which usually are just the (m|c)alloc call sites). 111 # At the end, this will generate a *leaderboard* (|blamed_dirs|) which 112 # associates, to each source path directory, the number of bytes allocated. 113 114 blamed_dirs = collections.Counter() # '/s/path' : bytes_from_this_path (int) 115 total_allocated = 0 116 for alloc in nheap.allocations: 117 dir_histogram = collections.Counter() 118 for frame in alloc.stack_trace.frames[2:10]: 119 # Compute a histogram (for each allocation) of the top source dirs. 120 if not frame.symbol or not frame.symbol.source_info: 121 continue 122 src_file = frame.symbol.source_info[0].source_file_path 123 src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/' 124 dir_histogram.update([src_dir]) 125 if not dir_histogram: 126 continue 127 # Add the blamed dir to the leaderboard. 128 blamed_dir = dir_histogram.most_common()[0][0] 129 blamed_dirs.update({blamed_dir : alloc.size}) 130 total_allocated += alloc.size 131 132 # Select only the top paths from the leaderboard which contribute for more 133 # than |threshold| and make a radix tree out of them. 134 radix_tree = {} 135 for blamed_dir, alloc_size in blamed_dirs.most_common(): 136 if (1.0 * alloc_size / total_allocated) < threshold: 137 break 138 RadixTreeInsert(radix_tree, blamed_dir) 139 140 # The final step consists in generating a rule tree from the radix tree. This 141 # is a pretty straightforward tree-clone operation, they have the same shape. 142 def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''): 143 children = [] 144 if max_depth > 0: 145 for node_path, node_children in radix_tree_node.iteritems(): 146 child_rule = { 147 'name': node_path[-16:], 148 'source_path': '^' + re.escape(parent_path + node_path), 149 'children': GenRulesFromRadixTree( 150 node_children, max_depth - 1, parent_path + node_path)} 151 children += [child_rule] 152 return children 153 154 rules_tree = GenRulesFromRadixTree(radix_tree, max_depth) 155 return LoadRules(str(rules_tree)) 156 157 158 class _NHeapRule(rules.Rule): 159 def __init__(self, name, filters): 160 super(_NHeapRule, self).__init__(name) 161 162 # The 'stacktrace' filter can be either a string (simple case, one regex) or 163 # a list of strings (complex case, see doc in the header of this file). 164 stacktrace_regexs = filters.get('stacktrace', []) 165 if isinstance(stacktrace_regexs, basestring): 166 stacktrace_regexs = [stacktrace_regexs] 167 self._stacktrace_regexs = [] 168 for regex in stacktrace_regexs: 169 try: 170 self._stacktrace_regexs.append(re.compile(regex)) 171 except re.error, descr: 172 raise exceptions.MemoryInspectorException( 173 'Stacktrace regex error "%s" : %s' % (regex, descr)) 174 175 # The 'source_path' regex, instead, simply matches the source file path. 176 self._path_regex = None 177 path_regex = filters.get('source_path') 178 if path_regex: 179 try: 180 self._path_regex = re.compile(path_regex) 181 except re.error, descr: 182 raise exceptions.MemoryInspectorException( 183 'Path regex error "%s" : %s' % (path_regex, descr)) 184 185 def Match(self, allocation): 186 # Match the source file path, if the 'source_path' filter is specified. 187 if self._path_regex: 188 path_matches = False 189 for frame in allocation.stack_trace.frames: 190 if frame.symbol and frame.symbol.source_info: 191 if self._path_regex.search( 192 frame.symbol.source_info[0].source_file_path): 193 path_matches = True 194 break 195 if not path_matches: 196 return False 197 198 # Match the stack traces symbols, if the 'stacktrace' filter is specified. 199 if not self._stacktrace_regexs: 200 return True 201 cur_regex_idx = 0 202 cur_regex = self._stacktrace_regexs[0] 203 for frame in allocation.stack_trace.frames: 204 if frame.symbol and cur_regex.search(frame.symbol.name): 205 # The current regex has been matched. 206 if cur_regex_idx == len(self._stacktrace_regexs) - 1: 207 return True # All the provided regexs have been matched, we're happy. 208 cur_regex_idx += 1 209 cur_regex = self._stacktrace_regexs[cur_regex_idx] 210 211 return False # Not all the provided regexs have been matched.