Home | History | Annotate | Download | only in lib
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 import os
      8 import re
      9 
     10 
     11 LOGGER = logging.getLogger('dmprof')
     12 
     13 BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     14 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
     15 
     16 # Heap Profile Policy versions
     17 
     18 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
     19 # mmap regions are distincted w/ mmap frames in the pattern column.
     20 POLICY_DEEP_1 = 'POLICY_DEEP_1'
     21 
     22 # POLICY_DEEP_2 DOES include allocation_type columns.
     23 # mmap regions are distincted w/ the allocation_type column.
     24 POLICY_DEEP_2 = 'POLICY_DEEP_2'
     25 
     26 # POLICY_DEEP_3 is in JSON format.
     27 POLICY_DEEP_3 = 'POLICY_DEEP_3'
     28 
     29 # POLICY_DEEP_3 contains typeinfo.
     30 POLICY_DEEP_4 = 'POLICY_DEEP_4'
     31 
     32 
     33 class Rule(object):
     34   """Represents one matching rule in a policy file."""
     35 
     36   def __init__(self,
     37                name,
     38                allocator_type,
     39                stackfunction_pattern=None,
     40                stacksourcefile_pattern=None,
     41                typeinfo_pattern=None,
     42                mappedpathname_pattern=None,
     43                mappedpermission_pattern=None,
     44                sharedwith=None):
     45     self._name = name
     46     self._allocator_type = allocator_type
     47 
     48     self._stackfunction_pattern = None
     49     if stackfunction_pattern:
     50       self._stackfunction_pattern = re.compile(
     51           stackfunction_pattern + r'\Z')
     52 
     53     self._stacksourcefile_pattern = None
     54     if stacksourcefile_pattern:
     55       self._stacksourcefile_pattern = re.compile(
     56           stacksourcefile_pattern + r'\Z')
     57 
     58     self._typeinfo_pattern = None
     59     if typeinfo_pattern:
     60       self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
     61 
     62     self._mappedpathname_pattern = None
     63     if mappedpathname_pattern:
     64       self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')
     65 
     66     self._mappedpermission_pattern = None
     67     if mappedpermission_pattern:
     68       self._mappedpermission_pattern = re.compile(
     69           mappedpermission_pattern + r'\Z')
     70 
     71     self._sharedwith = []
     72     if sharedwith:
     73       self._sharedwith = sharedwith
     74 
     75   @property
     76   def name(self):
     77     return self._name
     78 
     79   @property
     80   def allocator_type(self):
     81     return self._allocator_type
     82 
     83   @property
     84   def stackfunction_pattern(self):
     85     return self._stackfunction_pattern
     86 
     87   @property
     88   def stacksourcefile_pattern(self):
     89     return self._stacksourcefile_pattern
     90 
     91   @property
     92   def typeinfo_pattern(self):
     93     return self._typeinfo_pattern
     94 
     95   @property
     96   def mappedpathname_pattern(self):
     97     return self._mappedpathname_pattern
     98 
     99   @property
    100   def mappedpermission_pattern(self):
    101     return self._mappedpermission_pattern
    102 
    103   @property
    104   def sharedwith(self):
    105     return self._sharedwith
    106 
    107 
    108 class Policy(object):
    109   """Represents a policy, a content of a policy file."""
    110 
    111   def __init__(self, rules, version, components):
    112     self._rules = rules
    113     self._version = version
    114     self._components = components
    115 
    116   @property
    117   def rules(self):
    118     return self._rules
    119 
    120   @property
    121   def version(self):
    122     return self._version
    123 
    124   @property
    125   def components(self):
    126     return self._components
    127 
    128   def find_rule(self, component_name):
    129     """Finds a rule whose name is |component_name|. """
    130     for rule in self._rules:
    131       if rule.name == component_name:
    132         return rule
    133     return None
    134 
    135   def find_malloc(self, bucket):
    136     """Finds a matching component name which a given |bucket| belongs to.
    137 
    138     Args:
    139         bucket: A Bucket object to be searched for.
    140 
    141     Returns:
    142         A string representing a component name.
    143     """
    144     assert not bucket or bucket.allocator_type == 'malloc'
    145 
    146     if not bucket:
    147       return 'no-bucket'
    148     if bucket.component_cache:
    149       return bucket.component_cache
    150 
    151     stackfunction = bucket.symbolized_joined_stackfunction
    152     stacksourcefile = bucket.symbolized_joined_stacksourcefile
    153     typeinfo = bucket.symbolized_typeinfo
    154     if typeinfo.startswith('0x'):
    155       typeinfo = bucket.typeinfo_name
    156 
    157     for rule in self._rules:
    158       if (rule.allocator_type == 'malloc' and
    159           (not rule.stackfunction_pattern or
    160            rule.stackfunction_pattern.match(stackfunction)) and
    161           (not rule.stacksourcefile_pattern or
    162            rule.stacksourcefile_pattern.match(stacksourcefile)) and
    163           (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
    164         bucket.component_cache = rule.name
    165         return rule.name
    166 
    167     assert False
    168 
    169   def find_mmap(self, region, bucket_set,
    170                 pageframe=None, group_pfn_counts=None):
    171     """Finds a matching component which a given mmap |region| belongs to.
    172 
    173     It uses |bucket_set| to match with backtraces.  If |pageframe| is given,
    174     it considers memory sharing among processes.
    175 
    176     NOTE: Don't use Bucket's |component_cache| for mmap regions because they're
    177     classified not only with bucket information (mappedpathname for example).
    178 
    179     Args:
    180         region: A tuple representing a memory region.
    181         bucket_set: A BucketSet object to look up backtraces.
    182         pageframe: A PageFrame object representing a pageframe maybe including
    183             a pagecount.
    184         group_pfn_counts: A dict mapping a PFN to the number of times the
    185             the pageframe is mapped by the known "group (Chrome)" processes.
    186 
    187     Returns:
    188         A string representing a component name.
    189     """
    190     assert region[0] == 'hooked'
    191     bucket = bucket_set.get(region[1]['bucket_id'])
    192     assert not bucket or bucket.allocator_type == 'mmap'
    193 
    194     if not bucket:
    195       return 'no-bucket', None
    196 
    197     stackfunction = bucket.symbolized_joined_stackfunction
    198     stacksourcefile = bucket.symbolized_joined_stacksourcefile
    199     sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
    200 
    201     for rule in self._rules:
    202       if (rule.allocator_type == 'mmap' and
    203           (not rule.stackfunction_pattern or
    204            rule.stackfunction_pattern.match(stackfunction)) and
    205           (not rule.stacksourcefile_pattern or
    206            rule.stacksourcefile_pattern.match(stacksourcefile)) and
    207           (not rule.mappedpathname_pattern or
    208            rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
    209           (not rule.mappedpermission_pattern or
    210            rule.mappedpermission_pattern.match(
    211                region[1]['vma']['readable'] +
    212                region[1]['vma']['writable'] +
    213                region[1]['vma']['executable'] +
    214                region[1]['vma']['private'])) and
    215           (not rule.sharedwith or
    216            not pageframe or sharedwith in rule.sharedwith)):
    217         return rule.name, bucket
    218 
    219     assert False
    220 
    221   def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):
    222     """Finds a matching component which a given unhooked |region| belongs to.
    223 
    224     If |pageframe| is given, it considers memory sharing among processes.
    225 
    226     Args:
    227         region: A tuple representing a memory region.
    228         pageframe: A PageFrame object representing a pageframe maybe including
    229             a pagecount.
    230         group_pfn_counts: A dict mapping a PFN to the number of times the
    231             the pageframe is mapped by the known "group (Chrome)" processes.
    232 
    233     Returns:
    234         A string representing a component name.
    235     """
    236     assert region[0] == 'unhooked'
    237     sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
    238 
    239     for rule in self._rules:
    240       if (rule.allocator_type == 'unhooked' and
    241           (not rule.mappedpathname_pattern or
    242            rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
    243           (not rule.mappedpermission_pattern or
    244            rule.mappedpermission_pattern.match(
    245                region[1]['vma']['readable'] +
    246                region[1]['vma']['writable'] +
    247                region[1]['vma']['executable'] +
    248                region[1]['vma']['private'])) and
    249           (not rule.sharedwith or
    250            not pageframe or sharedwith in rule.sharedwith)):
    251         return rule.name
    252 
    253     assert False
    254 
    255   @staticmethod
    256   def load(filename, filetype):
    257     """Loads a policy file of |filename| in a |format|.
    258 
    259     Args:
    260         filename: A filename to be loaded.
    261         filetype: A string to specify a type of the file.  Only 'json' is
    262             supported for now.
    263 
    264     Returns:
    265         A loaded Policy object.
    266     """
    267     with open(os.path.join(BASE_PATH, filename)) as policy_f:
    268       return Policy.parse(policy_f, filetype)
    269 
    270   @staticmethod
    271   def parse(policy_f, filetype):
    272     """Parses a policy file content in a |format|.
    273 
    274     Args:
    275         policy_f: An IO object to be loaded.
    276         filetype: A string to specify a type of the file.  Only 'json' is
    277             supported for now.
    278 
    279     Returns:
    280         A loaded Policy object.
    281     """
    282     if filetype == 'json':
    283       return Policy._parse_json(policy_f)
    284     else:
    285       return None
    286 
    287   @staticmethod
    288   def _parse_json(policy_f):
    289     """Parses policy file in json format.
    290 
    291     A policy file contains component's names and their stacktrace pattern
    292     written in regular expression.  Those patterns are matched against each
    293     symbols of each stacktraces in the order written in the policy file
    294 
    295     Args:
    296          policy_f: A File/IO object to read.
    297 
    298     Returns:
    299          A loaded policy object.
    300     """
    301     policy = json.load(policy_f)
    302 
    303     rules = []
    304     for rule in policy['rules']:
    305       stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
    306       stacksourcefile = rule.get('stacksourcefile')
    307       rules.append(Rule(
    308           rule['name'],
    309           rule['allocator'],  # allocator_type
    310           stackfunction,
    311           stacksourcefile,
    312           rule['typeinfo'] if 'typeinfo' in rule else None,
    313           rule.get('mappedpathname'),
    314           rule.get('mappedpermission'),
    315           rule.get('sharedwith')))
    316 
    317     return Policy(rules, policy['version'], policy['components'])
    318 
    319   @staticmethod
    320   def _categorize_pageframe(pageframe, group_pfn_counts):
    321     """Categorizes a pageframe based on its sharing status.
    322 
    323     Returns:
    324         'private' if |pageframe| is not shared with other processes.  'group'
    325         if |pageframe| is shared only with group (Chrome-related) processes.
    326         'others' if |pageframe| is shared with non-group processes.
    327     """
    328     if not pageframe:
    329       return 'private'
    330 
    331     if pageframe.pagecount:
    332       if pageframe.pagecount == 1:
    333         return 'private'
    334       elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:
    335         return 'group'
    336       else:
    337         return 'others'
    338     else:
    339       if pageframe.pfn in group_pfn_counts:
    340         return 'group'
    341       else:
    342         return 'private'
    343 
    344 
    345 class PolicySet(object):
    346   """Represents a set of policies."""
    347 
    348   def __init__(self, policy_directory):
    349     self._policy_directory = policy_directory
    350 
    351   @staticmethod
    352   def load(labels=None):
    353     """Loads a set of policies via the "default policy directory".
    354 
    355     The "default policy directory" contains pairs of policies and their labels.
    356     For example, a policy "policy.l0.json" is labeled "l0" in the default
    357     policy directory "policies.json".
    358 
    359     All policies in the directory are loaded by default.  Policies can be
    360     limited by |labels|.
    361 
    362     Args:
    363         labels: An array that contains policy labels to be loaded.
    364 
    365     Returns:
    366         A PolicySet object.
    367     """
    368     default_policy_directory = PolicySet._load_default_policy_directory()
    369     if labels:
    370       specified_policy_directory = {}
    371       for label in labels:
    372         if label in default_policy_directory:
    373           specified_policy_directory[label] = default_policy_directory[label]
    374         # TODO(dmikurube): Load an un-labeled policy file.
    375       return PolicySet._load_policies(specified_policy_directory)
    376     else:
    377       return PolicySet._load_policies(default_policy_directory)
    378 
    379   def __len__(self):
    380     return len(self._policy_directory)
    381 
    382   def __iter__(self):
    383     for label in self._policy_directory:
    384       yield label
    385 
    386   def __getitem__(self, label):
    387     return self._policy_directory[label]
    388 
    389   @staticmethod
    390   def _load_default_policy_directory():
    391     with open(POLICIES_JSON_PATH, mode='r') as policies_f:
    392       default_policy_directory = json.load(policies_f)
    393     return default_policy_directory
    394 
    395   @staticmethod
    396   def _load_policies(directory):
    397     LOGGER.info('Loading policy files.')
    398     policies = {}
    399     for label in directory:
    400       LOGGER.info('  %s: %s' % (label, directory[label]['file']))
    401       loaded = Policy.load(directory[label]['file'], directory[label]['format'])
    402       if loaded:
    403         policies[label] = loaded
    404     return PolicySet(policies)
    405