1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import json 6 import logging 7 import os 8 import re 9 10 11 LOGGER = logging.getLogger('dmprof') 12 13 BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 14 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') 15 16 # Heap Profile Policy versions 17 18 # POLICY_DEEP_1 DOES NOT include allocation_type columns. 19 # mmap regions are distincted w/ mmap frames in the pattern column. 20 POLICY_DEEP_1 = 'POLICY_DEEP_1' 21 22 # POLICY_DEEP_2 DOES include allocation_type columns. 23 # mmap regions are distincted w/ the allocation_type column. 24 POLICY_DEEP_2 = 'POLICY_DEEP_2' 25 26 # POLICY_DEEP_3 is in JSON format. 27 POLICY_DEEP_3 = 'POLICY_DEEP_3' 28 29 # POLICY_DEEP_3 contains typeinfo. 30 POLICY_DEEP_4 = 'POLICY_DEEP_4' 31 32 33 class Rule(object): 34 """Represents one matching rule in a policy file.""" 35 36 def __init__(self, 37 name, 38 allocator_type, 39 stackfunction_pattern=None, 40 stacksourcefile_pattern=None, 41 typeinfo_pattern=None, 42 mappedpathname_pattern=None, 43 mappedpermission_pattern=None, 44 sharedwith=None): 45 self._name = name 46 self._allocator_type = allocator_type 47 48 self._stackfunction_pattern = None 49 if stackfunction_pattern: 50 self._stackfunction_pattern = re.compile( 51 stackfunction_pattern + r'\Z') 52 53 self._stacksourcefile_pattern = None 54 if stacksourcefile_pattern: 55 self._stacksourcefile_pattern = re.compile( 56 stacksourcefile_pattern + r'\Z') 57 58 self._typeinfo_pattern = None 59 if typeinfo_pattern: 60 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') 61 62 self._mappedpathname_pattern = None 63 if mappedpathname_pattern: 64 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') 65 66 self._mappedpermission_pattern = None 67 if mappedpermission_pattern: 68 self._mappedpermission_pattern = re.compile( 69 mappedpermission_pattern + r'\Z') 70 71 self._sharedwith = [] 72 if sharedwith: 73 self._sharedwith = sharedwith 74 75 @property 76 def name(self): 77 return self._name 78 79 @property 80 def allocator_type(self): 81 return self._allocator_type 82 83 @property 84 def stackfunction_pattern(self): 85 return self._stackfunction_pattern 86 87 @property 88 def stacksourcefile_pattern(self): 89 return self._stacksourcefile_pattern 90 91 @property 92 def typeinfo_pattern(self): 93 return self._typeinfo_pattern 94 95 @property 96 def mappedpathname_pattern(self): 97 return self._mappedpathname_pattern 98 99 @property 100 def mappedpermission_pattern(self): 101 return self._mappedpermission_pattern 102 103 @property 104 def sharedwith(self): 105 return self._sharedwith 106 107 108 class Policy(object): 109 """Represents a policy, a content of a policy file.""" 110 111 def __init__(self, rules, version, components): 112 self._rules = rules 113 self._version = version 114 self._components = components 115 116 @property 117 def rules(self): 118 return self._rules 119 120 @property 121 def version(self): 122 return self._version 123 124 @property 125 def components(self): 126 return self._components 127 128 def find_rule(self, component_name): 129 """Finds a rule whose name is |component_name|. """ 130 for rule in self._rules: 131 if rule.name == component_name: 132 return rule 133 return None 134 135 def find_malloc(self, bucket): 136 """Finds a matching component name which a given |bucket| belongs to. 137 138 Args: 139 bucket: A Bucket object to be searched for. 140 141 Returns: 142 A string representing a component name. 143 """ 144 assert not bucket or bucket.allocator_type == 'malloc' 145 146 if not bucket: 147 return 'no-bucket' 148 if bucket.component_cache: 149 return bucket.component_cache 150 151 stackfunction = bucket.symbolized_joined_stackfunction 152 stacksourcefile = bucket.symbolized_joined_stacksourcefile 153 typeinfo = bucket.symbolized_typeinfo 154 if typeinfo.startswith('0x'): 155 typeinfo = bucket.typeinfo_name 156 157 for rule in self._rules: 158 if (rule.allocator_type == 'malloc' and 159 (not rule.stackfunction_pattern or 160 rule.stackfunction_pattern.match(stackfunction)) and 161 (not rule.stacksourcefile_pattern or 162 rule.stacksourcefile_pattern.match(stacksourcefile)) and 163 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): 164 bucket.component_cache = rule.name 165 return rule.name 166 167 assert False 168 169 def find_mmap(self, region, bucket_set, 170 pageframe=None, group_pfn_counts=None): 171 """Finds a matching component which a given mmap |region| belongs to. 172 173 It uses |bucket_set| to match with backtraces. If |pageframe| is given, 174 it considers memory sharing among processes. 175 176 NOTE: Don't use Bucket's |component_cache| for mmap regions because they're 177 classified not only with bucket information (mappedpathname for example). 178 179 Args: 180 region: A tuple representing a memory region. 181 bucket_set: A BucketSet object to look up backtraces. 182 pageframe: A PageFrame object representing a pageframe maybe including 183 a pagecount. 184 group_pfn_counts: A dict mapping a PFN to the number of times the 185 the pageframe is mapped by the known "group (Chrome)" processes. 186 187 Returns: 188 A string representing a component name. 189 """ 190 assert region[0] == 'hooked' 191 bucket = bucket_set.get(region[1]['bucket_id']) 192 assert not bucket or bucket.allocator_type == 'mmap' 193 194 if not bucket: 195 return 'no-bucket', None 196 197 stackfunction = bucket.symbolized_joined_stackfunction 198 stacksourcefile = bucket.symbolized_joined_stacksourcefile 199 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) 200 201 for rule in self._rules: 202 if (rule.allocator_type == 'mmap' and 203 (not rule.stackfunction_pattern or 204 rule.stackfunction_pattern.match(stackfunction)) and 205 (not rule.stacksourcefile_pattern or 206 rule.stacksourcefile_pattern.match(stacksourcefile)) and 207 (not rule.mappedpathname_pattern or 208 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and 209 (not rule.mappedpermission_pattern or 210 rule.mappedpermission_pattern.match( 211 region[1]['vma']['readable'] + 212 region[1]['vma']['writable'] + 213 region[1]['vma']['executable'] + 214 region[1]['vma']['private'])) and 215 (not rule.sharedwith or 216 not pageframe or sharedwith in rule.sharedwith)): 217 return rule.name, bucket 218 219 assert False 220 221 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): 222 """Finds a matching component which a given unhooked |region| belongs to. 223 224 If |pageframe| is given, it considers memory sharing among processes. 225 226 Args: 227 region: A tuple representing a memory region. 228 pageframe: A PageFrame object representing a pageframe maybe including 229 a pagecount. 230 group_pfn_counts: A dict mapping a PFN to the number of times the 231 the pageframe is mapped by the known "group (Chrome)" processes. 232 233 Returns: 234 A string representing a component name. 235 """ 236 assert region[0] == 'unhooked' 237 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) 238 239 for rule in self._rules: 240 if (rule.allocator_type == 'unhooked' and 241 (not rule.mappedpathname_pattern or 242 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and 243 (not rule.mappedpermission_pattern or 244 rule.mappedpermission_pattern.match( 245 region[1]['vma']['readable'] + 246 region[1]['vma']['writable'] + 247 region[1]['vma']['executable'] + 248 region[1]['vma']['private'])) and 249 (not rule.sharedwith or 250 not pageframe or sharedwith in rule.sharedwith)): 251 return rule.name 252 253 assert False 254 255 @staticmethod 256 def load(filename, filetype): 257 """Loads a policy file of |filename| in a |format|. 258 259 Args: 260 filename: A filename to be loaded. 261 filetype: A string to specify a type of the file. Only 'json' is 262 supported for now. 263 264 Returns: 265 A loaded Policy object. 266 """ 267 with open(os.path.join(BASE_PATH, filename)) as policy_f: 268 return Policy.parse(policy_f, filetype) 269 270 @staticmethod 271 def parse(policy_f, filetype): 272 """Parses a policy file content in a |format|. 273 274 Args: 275 policy_f: An IO object to be loaded. 276 filetype: A string to specify a type of the file. Only 'json' is 277 supported for now. 278 279 Returns: 280 A loaded Policy object. 281 """ 282 if filetype == 'json': 283 return Policy._parse_json(policy_f) 284 else: 285 return None 286 287 @staticmethod 288 def _parse_json(policy_f): 289 """Parses policy file in json format. 290 291 A policy file contains component's names and their stacktrace pattern 292 written in regular expression. Those patterns are matched against each 293 symbols of each stacktraces in the order written in the policy file 294 295 Args: 296 policy_f: A File/IO object to read. 297 298 Returns: 299 A loaded policy object. 300 """ 301 policy = json.load(policy_f) 302 303 rules = [] 304 for rule in policy['rules']: 305 stackfunction = rule.get('stackfunction') or rule.get('stacktrace') 306 stacksourcefile = rule.get('stacksourcefile') 307 rules.append(Rule( 308 rule['name'], 309 rule['allocator'], # allocator_type 310 stackfunction, 311 stacksourcefile, 312 rule['typeinfo'] if 'typeinfo' in rule else None, 313 rule.get('mappedpathname'), 314 rule.get('mappedpermission'), 315 rule.get('sharedwith'))) 316 317 return Policy(rules, policy['version'], policy['components']) 318 319 @staticmethod 320 def _categorize_pageframe(pageframe, group_pfn_counts): 321 """Categorizes a pageframe based on its sharing status. 322 323 Returns: 324 'private' if |pageframe| is not shared with other processes. 'group' 325 if |pageframe| is shared only with group (Chrome-related) processes. 326 'others' if |pageframe| is shared with non-group processes. 327 """ 328 if not pageframe: 329 return 'private' 330 331 if pageframe.pagecount: 332 if pageframe.pagecount == 1: 333 return 'private' 334 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: 335 return 'group' 336 else: 337 return 'others' 338 else: 339 if pageframe.pfn in group_pfn_counts: 340 return 'group' 341 else: 342 return 'private' 343 344 345 class PolicySet(object): 346 """Represents a set of policies.""" 347 348 def __init__(self, policy_directory): 349 self._policy_directory = policy_directory 350 351 @staticmethod 352 def load(labels=None): 353 """Loads a set of policies via the "default policy directory". 354 355 The "default policy directory" contains pairs of policies and their labels. 356 For example, a policy "policy.l0.json" is labeled "l0" in the default 357 policy directory "policies.json". 358 359 All policies in the directory are loaded by default. Policies can be 360 limited by |labels|. 361 362 Args: 363 labels: An array that contains policy labels to be loaded. 364 365 Returns: 366 A PolicySet object. 367 """ 368 default_policy_directory = PolicySet._load_default_policy_directory() 369 if labels: 370 specified_policy_directory = {} 371 for label in labels: 372 if label in default_policy_directory: 373 specified_policy_directory[label] = default_policy_directory[label] 374 # TODO(dmikurube): Load an un-labeled policy file. 375 return PolicySet._load_policies(specified_policy_directory) 376 else: 377 return PolicySet._load_policies(default_policy_directory) 378 379 def __len__(self): 380 return len(self._policy_directory) 381 382 def __iter__(self): 383 for label in self._policy_directory: 384 yield label 385 386 def __getitem__(self, label): 387 return self._policy_directory[label] 388 389 @staticmethod 390 def _load_default_policy_directory(): 391 with open(POLICIES_JSON_PATH, mode='r') as policies_f: 392 default_policy_directory = json.load(policies_f) 393 return default_policy_directory 394 395 @staticmethod 396 def _load_policies(directory): 397 LOGGER.info('Loading policy files.') 398 policies = {} 399 for label in directory: 400 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) 401 loaded = Policy.load(directory[label]['file'], directory[label]['format']) 402 if loaded: 403 policies[label] = loaded 404 return PolicySet(policies) 405