1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import copy 6 import datetime 7 import logging 8 import os 9 import re 10 import time 11 12 from lib.bucket import BUCKET_ID 13 from lib.exceptions import EmptyDumpException, InvalidDumpException 14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException 15 from lib.pageframe import PageFrame 16 from lib.range_dict import ExclusiveRangeDict 17 from lib.symbol import proc_maps 18 19 20 LOGGER = logging.getLogger('dmprof') 21 22 23 # Heap Profile Dump versions 24 25 # DUMP_DEEP_[1-4] are obsolete. 26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. 27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. 28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". 29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. 30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. 31 DUMP_DEEP_1 = 'DUMP_DEEP_1' 32 DUMP_DEEP_2 = 'DUMP_DEEP_2' 33 DUMP_DEEP_3 = 'DUMP_DEEP_3' 34 DUMP_DEEP_4 = 'DUMP_DEEP_4' 35 36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) 37 38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. 39 # malloc and mmap are identified in bucket files. 40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. 41 DUMP_DEEP_5 = 'DUMP_DEEP_5' 42 43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. 44 DUMP_DEEP_6 = 'DUMP_DEEP_6' 45 46 47 class Dump(object): 48 """Represents a heap profile dump.""" 49 50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') 51 52 _HOOK_PATTERN = re.compile( 53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' 54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) 55 56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' 57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') 58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' 59 '(?P<RESERVED>[0-9]+)') 60 61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') 62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') 63 64 _TIME_PATTERN_FORMAT = re.compile( 65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') 66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') 67 68 def __init__(self, path, modified_time): 69 self._path = path 70 matched = self._PATH_PATTERN.match(path) 71 self._pid = int(matched.group(2)) 72 self._count = int(matched.group(3)) 73 self._time = modified_time 74 self._map = {} 75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) 76 self._stacktrace_lines = [] 77 self._global_stats = {} # used only in apply_policy 78 79 self._run_id = '' 80 self._pagesize = 4096 81 self._pageframe_length = 0 82 self._pageframe_encoding = '' 83 self._has_pagecount = False 84 85 self._version = '' 86 self._lines = [] 87 88 @property 89 def path(self): 90 return self._path 91 92 @property 93 def count(self): 94 return self._count 95 96 @property 97 def time(self): 98 return self._time 99 100 @property 101 def iter_map(self): 102 for region in sorted(self._map.iteritems()): 103 yield region[0], region[1] 104 105 def iter_procmaps(self): 106 for begin, end, attr in self._map.iter_range(): 107 yield begin, end, attr 108 109 @property 110 def iter_stacktrace(self): 111 for line in self._stacktrace_lines: 112 yield line 113 114 def global_stat(self, name): 115 return self._global_stats[name] 116 117 @property 118 def run_id(self): 119 return self._run_id 120 121 @property 122 def pagesize(self): 123 return self._pagesize 124 125 @property 126 def pageframe_length(self): 127 return self._pageframe_length 128 129 @property 130 def pageframe_encoding(self): 131 return self._pageframe_encoding 132 133 @property 134 def has_pagecount(self): 135 return self._has_pagecount 136 137 @staticmethod 138 def load(path, log_header='Loading a heap profile dump: '): 139 """Loads a heap profile dump. 140 141 Args: 142 path: A file path string to load. 143 log_header: A preceding string for log messages. 144 145 Returns: 146 A loaded Dump object. 147 148 Raises: 149 ParsingException for invalid heap profile dumps. 150 """ 151 dump = Dump(path, os.stat(path).st_mtime) 152 with open(path, 'r') as f: 153 dump.load_file(f, log_header) 154 return dump 155 156 def load_file(self, f, log_header): 157 self._lines = [line for line in f 158 if line and not line.startswith('#')] 159 160 try: 161 self._version, ln = self._parse_version() 162 self._parse_meta_information() 163 if self._version == DUMP_DEEP_6: 164 self._parse_mmap_list() 165 self._parse_global_stats() 166 self._extract_stacktrace_lines(ln) 167 except EmptyDumpException: 168 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) 169 except ParsingException, e: 170 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) 171 raise 172 else: 173 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) 174 175 def _parse_version(self): 176 """Parses a version string in self._lines. 177 178 Returns: 179 A pair of (a string representing a version of the stacktrace dump, 180 and an integer indicating a line number next to the version string). 181 182 Raises: 183 ParsingException for invalid dump versions. 184 """ 185 version = '' 186 187 # Skip until an identifiable line. 188 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') 189 if not self._lines: 190 raise EmptyDumpException('Empty heap dump file.') 191 (ln, found) = skip_while( 192 0, len(self._lines), 193 lambda n: not self._lines[n].startswith(headers)) 194 if not found: 195 raise InvalidDumpException('No version header.') 196 197 # Identify a version. 198 if self._lines[ln].startswith('heap profile: '): 199 version = self._lines[ln][13:].strip() 200 if version in (DUMP_DEEP_5, DUMP_DEEP_6): 201 (ln, _) = skip_while( 202 ln, len(self._lines), 203 lambda n: self._lines[n] != 'STACKTRACES:\n') 204 elif version in DUMP_DEEP_OBSOLETE: 205 raise ObsoleteDumpVersionException(version) 206 else: 207 raise InvalidDumpException('Invalid version: %s' % version) 208 elif self._lines[ln] == 'STACKTRACES:\n': 209 raise ObsoleteDumpVersionException(DUMP_DEEP_1) 210 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': 211 raise ObsoleteDumpVersionException(DUMP_DEEP_2) 212 213 return (version, ln) 214 215 def _parse_global_stats(self): 216 """Parses lines in self._lines as global stats.""" 217 (ln, _) = skip_while( 218 0, len(self._lines), 219 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') 220 221 global_stat_names = [ 222 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 223 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', 224 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', 225 'nonprofiled-stack', 'nonprofiled-other', 226 'profiled-mmap', 'profiled-malloc'] 227 228 for prefix in global_stat_names: 229 (ln, _) = skip_while( 230 ln, len(self._lines), 231 lambda n: self._lines[n].split()[0] != prefix) 232 words = self._lines[ln].split() 233 self._global_stats[prefix + '_virtual'] = int(words[-2]) 234 self._global_stats[prefix + '_committed'] = int(words[-1]) 235 236 def _parse_meta_information(self): 237 """Parses lines in self._lines for meta information.""" 238 (ln, found) = skip_while( 239 0, len(self._lines), 240 lambda n: self._lines[n] != 'META:\n') 241 if not found: 242 return 243 ln += 1 244 245 while True: 246 if self._lines[ln].startswith('Time:'): 247 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) 248 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) 249 if matched_format: 250 self._time = time.mktime(datetime.datetime.strptime( 251 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) 252 if matched_format.group(2): 253 self._time += float(matched_format.group(2)[1:]) / 1000.0 254 elif matched_seconds: 255 self._time = float(matched_seconds.group(1)) 256 elif self._lines[ln].startswith('Reason:'): 257 pass # Nothing to do for 'Reason:' 258 elif self._lines[ln].startswith('PageSize: '): 259 self._pagesize = int(self._lines[ln][10:]) 260 elif self._lines[ln].startswith('CommandLine:'): 261 pass 262 elif (self._lines[ln].startswith('PageFrame: ') or 263 self._lines[ln].startswith('PFN: ')): 264 if self._lines[ln].startswith('PageFrame: '): 265 words = self._lines[ln][11:].split(',') 266 else: 267 words = self._lines[ln][5:].split(',') 268 for word in words: 269 if word == '24': 270 self._pageframe_length = 24 271 elif word == 'Base64': 272 self._pageframe_encoding = 'base64' 273 elif word == 'PageCount': 274 self._has_pagecount = True 275 elif self._lines[ln].startswith('RunID: '): 276 self._run_id = self._lines[ln][7:].strip() 277 elif (self._lines[ln].startswith('MMAP_LIST:') or 278 self._lines[ln].startswith('GLOBAL_STATS:')): 279 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. 280 break 281 else: 282 pass 283 ln += 1 284 285 def _parse_mmap_list(self): 286 """Parses lines in self._lines as a mmap list.""" 287 (ln, found) = skip_while( 288 0, len(self._lines), 289 lambda n: self._lines[n] != 'MMAP_LIST:\n') 290 if not found: 291 return {} 292 293 ln += 1 294 self._map = {} 295 current_vma = {} 296 pageframe_list = [] 297 while True: 298 entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) 299 if entry: 300 current_vma = {} 301 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): 302 for key, value in entry.as_dict().iteritems(): 303 attr[key] = value 304 current_vma[key] = value 305 ln += 1 306 continue 307 308 if self._lines[ln].startswith(' PF: '): 309 for pageframe in self._lines[ln][5:].split(): 310 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) 311 ln += 1 312 continue 313 314 matched = self._HOOK_PATTERN.match(self._lines[ln]) 315 if not matched: 316 break 317 # 2: starting address 318 # 5: end address 319 # 7: hooked or unhooked 320 # 8: additional information 321 if matched.group(7) == 'hooked': 322 submatched = self._HOOKED_PATTERN.match(matched.group(8)) 323 if not submatched: 324 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) 325 elif matched.group(7) == 'unhooked': 326 submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) 327 if not submatched: 328 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) 329 else: 330 assert matched.group(7) in ['hooked', 'unhooked'] 331 332 submatched_dict = submatched.groupdict() 333 region_info = { 'vma': current_vma } 334 if submatched_dict.get('TYPE'): 335 region_info['type'] = submatched_dict['TYPE'].strip() 336 if submatched_dict.get('COMMITTED'): 337 region_info['committed'] = int(submatched_dict['COMMITTED']) 338 if submatched_dict.get('RESERVED'): 339 region_info['reserved'] = int(submatched_dict['RESERVED']) 340 if submatched_dict.get('BUCKETID'): 341 region_info['bucket_id'] = int(submatched_dict['BUCKETID']) 342 343 if matched.group(1) == '(': 344 start = current_vma['begin'] 345 else: 346 start = int(matched.group(2), 16) 347 if matched.group(4) == '(': 348 end = current_vma['end'] 349 else: 350 end = int(matched.group(5), 16) 351 352 if pageframe_list and pageframe_list[0].start_truncated: 353 pageframe_list[0].set_size( 354 pageframe_list[0].size - start % self._pagesize) 355 if pageframe_list and pageframe_list[-1].end_truncated: 356 pageframe_list[-1].set_size( 357 pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) 358 region_info['pageframe'] = pageframe_list 359 pageframe_list = [] 360 361 self._map[(start, end)] = (matched.group(7), region_info) 362 ln += 1 363 364 def _extract_stacktrace_lines(self, line_number): 365 """Extracts the position of stacktrace lines. 366 367 Valid stacktrace lines are stored into self._stacktrace_lines. 368 369 Args: 370 line_number: A line number to start parsing in lines. 371 372 Raises: 373 ParsingException for invalid dump versions. 374 """ 375 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): 376 (line_number, _) = skip_while( 377 line_number, len(self._lines), 378 lambda n: not self._lines[n].split()[0].isdigit()) 379 stacktrace_start = line_number 380 (line_number, _) = skip_while( 381 line_number, len(self._lines), 382 lambda n: self._check_stacktrace_line(self._lines[n])) 383 self._stacktrace_lines = self._lines[stacktrace_start:line_number] 384 385 elif self._version in DUMP_DEEP_OBSOLETE: 386 raise ObsoleteDumpVersionException(self._version) 387 388 else: 389 raise InvalidDumpException('Invalid version: %s' % self._version) 390 391 @staticmethod 392 def _check_stacktrace_line(stacktrace_line): 393 """Checks if a given stacktrace_line is valid as stacktrace. 394 395 Args: 396 stacktrace_line: A string to be checked. 397 398 Returns: 399 True if the given stacktrace_line is valid. 400 """ 401 words = stacktrace_line.split() 402 if len(words) < BUCKET_ID + 1: 403 return False 404 if words[BUCKET_ID - 1] != '@': 405 return False 406 return True 407 408 409 class DumpList(object): 410 """Represents a sequence of heap profile dumps.""" 411 412 def __init__(self, dump_list): 413 self._dump_list = dump_list 414 415 @staticmethod 416 def load(path_list): 417 LOGGER.info('Loading heap dump profiles.') 418 dump_list = [] 419 for path in path_list: 420 dump_list.append(Dump.load(path, ' ')) 421 return DumpList(dump_list) 422 423 def __len__(self): 424 return len(self._dump_list) 425 426 def __iter__(self): 427 for dump in self._dump_list: 428 yield dump 429 430 def __getitem__(self, index): 431 return self._dump_list[index] 432 433 434 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): 435 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" 436 _DUMMY_ENTRY = proc_maps.ProcMapsEntry( 437 0, # begin 438 0, # end 439 '-', # readable 440 '-', # writable 441 '-', # executable 442 '-', # private 443 0, # offset 444 '00', # major 445 '00', # minor 446 0, # inode 447 '' # name 448 ) 449 450 def __init__(self): 451 super(ProcMapsEntryAttribute, self).__init__() 452 self._entry = self._DUMMY_ENTRY.as_dict() 453 454 def __str__(self): 455 return str(self._entry) 456 457 def __repr__(self): 458 return 'ProcMapsEntryAttribute' + str(self._entry) 459 460 def __getitem__(self, key): 461 return self._entry[key] 462 463 def __setitem__(self, key, value): 464 if key not in self._entry: 465 raise KeyError(key) 466 self._entry[key] = value 467 468 def copy(self): 469 new_entry = ProcMapsEntryAttribute() 470 for key, value in self._entry.iteritems(): 471 new_entry[key] = copy.deepcopy(value) 472 return new_entry 473 474 475 def skip_while(index, max_index, skipping_condition): 476 """Increments |index| until |skipping_condition|(|index|) is False. 477 478 Returns: 479 A pair of an integer indicating a line number after skipped, and a 480 boolean value which is True if found a line which skipping_condition 481 is False for. 482 """ 483 while skipping_condition(index): 484 index += 1 485 if index >= max_index: 486 return index, False 487 return index, True 488