Home | History | Annotate | Download | only in lib
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import copy
      6 import datetime
      7 import logging
      8 import os
      9 import re
     10 import time
     11 
     12 from lib.bucket import BUCKET_ID
     13 from lib.exceptions import EmptyDumpException, InvalidDumpException
     14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
     15 from lib.pageframe import PageFrame
     16 from lib.range_dict import ExclusiveRangeDict
     17 from lib.symbol import proc_maps
     18 
     19 
     20 LOGGER = logging.getLogger('dmprof')
     21 
     22 
     23 # Heap Profile Dump versions
     24 
     25 # DUMP_DEEP_[1-4] are obsolete.
     26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
     27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
     28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
     29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
     30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
     31 DUMP_DEEP_1 = 'DUMP_DEEP_1'
     32 DUMP_DEEP_2 = 'DUMP_DEEP_2'
     33 DUMP_DEEP_3 = 'DUMP_DEEP_3'
     34 DUMP_DEEP_4 = 'DUMP_DEEP_4'
     35 
     36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
     37 
     38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
     39 # malloc and mmap are identified in bucket files.
     40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
     41 DUMP_DEEP_5 = 'DUMP_DEEP_5'
     42 
     43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
     44 DUMP_DEEP_6 = 'DUMP_DEEP_6'
     45 
     46 
     47 class Dump(object):
     48   """Represents a heap profile dump."""
     49 
     50   _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
     51 
     52   _HOOK_PATTERN = re.compile(
     53       r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
     54       r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
     55 
     56   _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
     57                                '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
     58   _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
     59                                  '(?P<RESERVED>[0-9]+)')
     60 
     61   _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
     62   _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
     63 
     64   _TIME_PATTERN_FORMAT = re.compile(
     65       r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
     66   _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
     67 
     68   def __init__(self, path, modified_time):
     69     self._path = path
     70     matched = self._PATH_PATTERN.match(path)
     71     self._pid = int(matched.group(2))
     72     self._count = int(matched.group(3))
     73     self._time = modified_time
     74     self._map = {}
     75     self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
     76     self._stacktrace_lines = []
     77     self._global_stats = {} # used only in apply_policy
     78 
     79     self._run_id = ''
     80     self._pagesize = 4096
     81     self._pageframe_length = 0
     82     self._pageframe_encoding = ''
     83     self._has_pagecount = False
     84 
     85     self._version = ''
     86     self._lines = []
     87 
     88   @property
     89   def path(self):
     90     return self._path
     91 
     92   @property
     93   def count(self):
     94     return self._count
     95 
     96   @property
     97   def time(self):
     98     return self._time
     99 
    100   @property
    101   def iter_map(self):
    102     for region in sorted(self._map.iteritems()):
    103       yield region[0], region[1]
    104 
    105   def iter_procmaps(self):
    106     for begin, end, attr in self._map.iter_range():
    107       yield begin, end, attr
    108 
    109   @property
    110   def iter_stacktrace(self):
    111     for line in self._stacktrace_lines:
    112       yield line
    113 
    114   def global_stat(self, name):
    115     return self._global_stats[name]
    116 
    117   @property
    118   def run_id(self):
    119     return self._run_id
    120 
    121   @property
    122   def pagesize(self):
    123     return self._pagesize
    124 
    125   @property
    126   def pageframe_length(self):
    127     return self._pageframe_length
    128 
    129   @property
    130   def pageframe_encoding(self):
    131     return self._pageframe_encoding
    132 
    133   @property
    134   def has_pagecount(self):
    135     return self._has_pagecount
    136 
    137   @staticmethod
    138   def load(path, log_header='Loading a heap profile dump: '):
    139     """Loads a heap profile dump.
    140 
    141     Args:
    142         path: A file path string to load.
    143         log_header: A preceding string for log messages.
    144 
    145     Returns:
    146         A loaded Dump object.
    147 
    148     Raises:
    149         ParsingException for invalid heap profile dumps.
    150     """
    151     dump = Dump(path, os.stat(path).st_mtime)
    152     with open(path, 'r') as f:
    153       dump.load_file(f, log_header)
    154     return dump
    155 
    156   def load_file(self, f, log_header):
    157     self._lines = [line for line in f
    158                    if line and not line.startswith('#')]
    159 
    160     try:
    161       self._version, ln = self._parse_version()
    162       self._parse_meta_information()
    163       if self._version == DUMP_DEEP_6:
    164         self._parse_mmap_list()
    165       self._parse_global_stats()
    166       self._extract_stacktrace_lines(ln)
    167     except EmptyDumpException:
    168       LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
    169     except ParsingException, e:
    170       LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
    171       raise
    172     else:
    173       LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
    174 
    175   def _parse_version(self):
    176     """Parses a version string in self._lines.
    177 
    178     Returns:
    179         A pair of (a string representing a version of the stacktrace dump,
    180         and an integer indicating a line number next to the version string).
    181 
    182     Raises:
    183         ParsingException for invalid dump versions.
    184     """
    185     version = ''
    186 
    187     # Skip until an identifiable line.
    188     headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
    189     if not self._lines:
    190       raise EmptyDumpException('Empty heap dump file.')
    191     (ln, found) = skip_while(
    192         0, len(self._lines),
    193         lambda n: not self._lines[n].startswith(headers))
    194     if not found:
    195       raise InvalidDumpException('No version header.')
    196 
    197     # Identify a version.
    198     if self._lines[ln].startswith('heap profile: '):
    199       version = self._lines[ln][13:].strip()
    200       if version in (DUMP_DEEP_5, DUMP_DEEP_6):
    201         (ln, _) = skip_while(
    202             ln, len(self._lines),
    203             lambda n: self._lines[n] != 'STACKTRACES:\n')
    204       elif version in DUMP_DEEP_OBSOLETE:
    205         raise ObsoleteDumpVersionException(version)
    206       else:
    207         raise InvalidDumpException('Invalid version: %s' % version)
    208     elif self._lines[ln] == 'STACKTRACES:\n':
    209       raise ObsoleteDumpVersionException(DUMP_DEEP_1)
    210     elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
    211       raise ObsoleteDumpVersionException(DUMP_DEEP_2)
    212 
    213     return (version, ln)
    214 
    215   def _parse_global_stats(self):
    216     """Parses lines in self._lines as global stats."""
    217     (ln, _) = skip_while(
    218         0, len(self._lines),
    219         lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
    220 
    221     global_stat_names = [
    222         'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
    223         'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
    224         'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
    225         'nonprofiled-stack', 'nonprofiled-other',
    226         'profiled-mmap', 'profiled-malloc']
    227 
    228     for prefix in global_stat_names:
    229       (ln, _) = skip_while(
    230           ln, len(self._lines),
    231           lambda n: self._lines[n].split()[0] != prefix)
    232       words = self._lines[ln].split()
    233       self._global_stats[prefix + '_virtual'] = int(words[-2])
    234       self._global_stats[prefix + '_committed'] = int(words[-1])
    235 
    236   def _parse_meta_information(self):
    237     """Parses lines in self._lines for meta information."""
    238     (ln, found) = skip_while(
    239         0, len(self._lines),
    240         lambda n: self._lines[n] != 'META:\n')
    241     if not found:
    242       return
    243     ln += 1
    244 
    245     while True:
    246       if self._lines[ln].startswith('Time:'):
    247         matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
    248         matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
    249         if matched_format:
    250           self._time = time.mktime(datetime.datetime.strptime(
    251               matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
    252           if matched_format.group(2):
    253             self._time += float(matched_format.group(2)[1:]) / 1000.0
    254         elif matched_seconds:
    255           self._time = float(matched_seconds.group(1))
    256       elif self._lines[ln].startswith('Reason:'):
    257         pass  # Nothing to do for 'Reason:'
    258       elif self._lines[ln].startswith('PageSize: '):
    259         self._pagesize = int(self._lines[ln][10:])
    260       elif self._lines[ln].startswith('CommandLine:'):
    261         pass
    262       elif (self._lines[ln].startswith('PageFrame: ') or
    263             self._lines[ln].startswith('PFN: ')):
    264         if self._lines[ln].startswith('PageFrame: '):
    265           words = self._lines[ln][11:].split(',')
    266         else:
    267           words = self._lines[ln][5:].split(',')
    268         for word in words:
    269           if word == '24':
    270             self._pageframe_length = 24
    271           elif word == 'Base64':
    272             self._pageframe_encoding = 'base64'
    273           elif word == 'PageCount':
    274             self._has_pagecount = True
    275       elif self._lines[ln].startswith('RunID: '):
    276         self._run_id = self._lines[ln][7:].strip()
    277       elif (self._lines[ln].startswith('MMAP_LIST:') or
    278             self._lines[ln].startswith('GLOBAL_STATS:')):
    279         # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
    280         break
    281       else:
    282         pass
    283       ln += 1
    284 
    285   def _parse_mmap_list(self):
    286     """Parses lines in self._lines as a mmap list."""
    287     (ln, found) = skip_while(
    288         0, len(self._lines),
    289         lambda n: self._lines[n] != 'MMAP_LIST:\n')
    290     if not found:
    291       return {}
    292 
    293     ln += 1
    294     self._map = {}
    295     current_vma = {}
    296     pageframe_list = []
    297     while True:
    298       entry = proc_maps.ProcMaps.parse_line(self._lines[ln])
    299       if entry:
    300         current_vma = {}
    301         for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
    302           for key, value in entry.as_dict().iteritems():
    303             attr[key] = value
    304             current_vma[key] = value
    305         ln += 1
    306         continue
    307 
    308       if self._lines[ln].startswith('  PF: '):
    309         for pageframe in self._lines[ln][5:].split():
    310           pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
    311         ln += 1
    312         continue
    313 
    314       matched = self._HOOK_PATTERN.match(self._lines[ln])
    315       if not matched:
    316         break
    317       # 2: starting address
    318       # 5: end address
    319       # 7: hooked or unhooked
    320       # 8: additional information
    321       if matched.group(7) == 'hooked':
    322         submatched = self._HOOKED_PATTERN.match(matched.group(8))
    323         if not submatched:
    324           submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
    325       elif matched.group(7) == 'unhooked':
    326         submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
    327         if not submatched:
    328           submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
    329       else:
    330         assert matched.group(7) in ['hooked', 'unhooked']
    331 
    332       submatched_dict = submatched.groupdict()
    333       region_info = { 'vma': current_vma }
    334       if submatched_dict.get('TYPE'):
    335         region_info['type'] = submatched_dict['TYPE'].strip()
    336       if submatched_dict.get('COMMITTED'):
    337         region_info['committed'] = int(submatched_dict['COMMITTED'])
    338       if submatched_dict.get('RESERVED'):
    339         region_info['reserved'] = int(submatched_dict['RESERVED'])
    340       if submatched_dict.get('BUCKETID'):
    341         region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
    342 
    343       if matched.group(1) == '(':
    344         start = current_vma['begin']
    345       else:
    346         start = int(matched.group(2), 16)
    347       if matched.group(4) == '(':
    348         end = current_vma['end']
    349       else:
    350         end = int(matched.group(5), 16)
    351 
    352       if pageframe_list and pageframe_list[0].start_truncated:
    353         pageframe_list[0].set_size(
    354             pageframe_list[0].size - start % self._pagesize)
    355       if pageframe_list and pageframe_list[-1].end_truncated:
    356         pageframe_list[-1].set_size(
    357             pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
    358       region_info['pageframe'] = pageframe_list
    359       pageframe_list = []
    360 
    361       self._map[(start, end)] = (matched.group(7), region_info)
    362       ln += 1
    363 
    364   def _extract_stacktrace_lines(self, line_number):
    365     """Extracts the position of stacktrace lines.
    366 
    367     Valid stacktrace lines are stored into self._stacktrace_lines.
    368 
    369     Args:
    370         line_number: A line number to start parsing in lines.
    371 
    372     Raises:
    373         ParsingException for invalid dump versions.
    374     """
    375     if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
    376       (line_number, _) = skip_while(
    377           line_number, len(self._lines),
    378           lambda n: not self._lines[n].split()[0].isdigit())
    379       stacktrace_start = line_number
    380       (line_number, _) = skip_while(
    381           line_number, len(self._lines),
    382           lambda n: self._check_stacktrace_line(self._lines[n]))
    383       self._stacktrace_lines = self._lines[stacktrace_start:line_number]
    384 
    385     elif self._version in DUMP_DEEP_OBSOLETE:
    386       raise ObsoleteDumpVersionException(self._version)
    387 
    388     else:
    389       raise InvalidDumpException('Invalid version: %s' % self._version)
    390 
    391   @staticmethod
    392   def _check_stacktrace_line(stacktrace_line):
    393     """Checks if a given stacktrace_line is valid as stacktrace.
    394 
    395     Args:
    396         stacktrace_line: A string to be checked.
    397 
    398     Returns:
    399         True if the given stacktrace_line is valid.
    400     """
    401     words = stacktrace_line.split()
    402     if len(words) < BUCKET_ID + 1:
    403       return False
    404     if words[BUCKET_ID - 1] != '@':
    405       return False
    406     return True
    407 
    408 
    409 class DumpList(object):
    410   """Represents a sequence of heap profile dumps."""
    411 
    412   def __init__(self, dump_list):
    413     self._dump_list = dump_list
    414 
    415   @staticmethod
    416   def load(path_list):
    417     LOGGER.info('Loading heap dump profiles.')
    418     dump_list = []
    419     for path in path_list:
    420       dump_list.append(Dump.load(path, '  '))
    421     return DumpList(dump_list)
    422 
    423   def __len__(self):
    424     return len(self._dump_list)
    425 
    426   def __iter__(self):
    427     for dump in self._dump_list:
    428       yield dump
    429 
    430   def __getitem__(self, index):
    431     return self._dump_list[index]
    432 
    433 
    434 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
    435   """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
    436   _DUMMY_ENTRY = proc_maps.ProcMapsEntry(
    437       0,     # begin
    438       0,     # end
    439       '-',   # readable
    440       '-',   # writable
    441       '-',   # executable
    442       '-',   # private
    443       0,     # offset
    444       '00',  # major
    445       '00',  # minor
    446       0,     # inode
    447       ''     # name
    448       )
    449 
    450   def __init__(self):
    451     super(ProcMapsEntryAttribute, self).__init__()
    452     self._entry = self._DUMMY_ENTRY.as_dict()
    453 
    454   def __str__(self):
    455     return str(self._entry)
    456 
    457   def __repr__(self):
    458     return 'ProcMapsEntryAttribute' + str(self._entry)
    459 
    460   def __getitem__(self, key):
    461     return self._entry[key]
    462 
    463   def __setitem__(self, key, value):
    464     if key not in self._entry:
    465       raise KeyError(key)
    466     self._entry[key] = value
    467 
    468   def copy(self):
    469     new_entry = ProcMapsEntryAttribute()
    470     for key, value in self._entry.iteritems():
    471       new_entry[key] = copy.deepcopy(value)
    472     return new_entry
    473 
    474 
    475 def skip_while(index, max_index, skipping_condition):
    476   """Increments |index| until |skipping_condition|(|index|) is False.
    477 
    478   Returns:
    479       A pair of an integer indicating a line number after skipped, and a
    480       boolean value which is True if found a line which skipping_condition
    481       is False for.
    482   """
    483   while skipping_condition(index):
    484     index += 1
    485     if index >= max_index:
    486       return index, False
    487   return index, True
    488