Home | History | Annotate | Download | only in lib
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 import os
      8 import re
      9 
     10 
     11 LOGGER = logging.getLogger('dmprof')
     12 
     13 BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     14 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
     15 
     16 # Heap Profile Policy versions
     17 
     18 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
     19 # mmap regions are distincted w/ mmap frames in the pattern column.
     20 POLICY_DEEP_1 = 'POLICY_DEEP_1'
     21 
     22 # POLICY_DEEP_2 DOES include allocation_type columns.
     23 # mmap regions are distincted w/ the allocation_type column.
     24 POLICY_DEEP_2 = 'POLICY_DEEP_2'
     25 
     26 # POLICY_DEEP_3 is in JSON format.
     27 POLICY_DEEP_3 = 'POLICY_DEEP_3'
     28 
     29 # POLICY_DEEP_3 contains typeinfo.
     30 POLICY_DEEP_4 = 'POLICY_DEEP_4'
     31 
     32 
     33 class Rule(object):
     34   """Represents one matching rule in a policy file."""
     35 
     36   def __init__(self,
     37                name,
     38                allocator_type,
     39                stackfunction_pattern=None,
     40                stacksourcefile_pattern=None,
     41                typeinfo_pattern=None,
     42                mappedpathname_pattern=None,
     43                mappedpermission_pattern=None,
     44                sharedwith=None):
     45     self._name = name
     46     self._allocator_type = allocator_type
     47 
     48     self._stackfunction_pattern = None
     49     if stackfunction_pattern:
     50       self._stackfunction_pattern = re.compile(
     51           stackfunction_pattern + r'\Z')
     52 
     53     self._stacksourcefile_pattern = None
     54     if stacksourcefile_pattern:
     55       self._stacksourcefile_pattern = re.compile(
     56           stacksourcefile_pattern + r'\Z')
     57 
     58     self._typeinfo_pattern = None
     59     if typeinfo_pattern:
     60       self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
     61 
     62     self._mappedpathname_pattern = None
     63     if mappedpathname_pattern:
     64       self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')
     65 
     66     self._mappedpermission_pattern = None
     67     if mappedpermission_pattern:
     68       self._mappedpermission_pattern = re.compile(
     69           mappedpermission_pattern + r'\Z')
     70 
     71     self._sharedwith = []
     72     if sharedwith:
     73       self._sharedwith = sharedwith
     74 
     75   @property
     76   def name(self):
     77     return self._name
     78 
     79   @property
     80   def allocator_type(self):
     81     return self._allocator_type
     82 
     83   @property
     84   def stackfunction_pattern(self):
     85     return self._stackfunction_pattern
     86 
     87   @property
     88   def stacksourcefile_pattern(self):
     89     return self._stacksourcefile_pattern
     90 
     91   @property
     92   def typeinfo_pattern(self):
     93     return self._typeinfo_pattern
     94 
     95   @property
     96   def mappedpathname_pattern(self):
     97     return self._mappedpathname_pattern
     98 
     99   @property
    100   def mappedpermission_pattern(self):
    101     return self._mappedpermission_pattern
    102 
    103   @property
    104   def sharedwith(self):
    105     return self._sharedwith
    106 
    107 
    108 class Policy(object):
    109   """Represents a policy, a content of a policy file."""
    110 
    111   def __init__(self, rules, version, components):
    112     self._rules = rules
    113     self._version = version
    114     self._components = components
    115 
    116   @property
    117   def rules(self):
    118     return self._rules
    119 
    120   @property
    121   def version(self):
    122     return self._version
    123 
    124   @property
    125   def components(self):
    126     return self._components
    127 
    128   def find_rule(self, component_name):
    129     """Finds a rule whose name is |component_name|. """
    130     for rule in self._rules:
    131       if rule.name == component_name:
    132         return rule
    133     return None
    134 
    135   def find_malloc(self, bucket):
    136     """Finds a matching component name which a given |bucket| belongs to.
    137 
    138     Args:
    139         bucket: A Bucket object to be searched for.
    140 
    141     Returns:
    142         A string representing a component name.
    143     """
    144     assert not bucket or bucket.allocator_type == 'malloc'
    145 
    146     if not bucket:
    147       return 'no-bucket'
    148     if bucket.component_cache:
    149       return bucket.component_cache
    150 
    151     stackfunction = bucket.symbolized_joined_stackfunction
    152     stacksourcefile = bucket.symbolized_joined_stacksourcefile
    153     typeinfo = bucket.symbolized_typeinfo
    154     if typeinfo.startswith('0x'):
    155       typeinfo = bucket.typeinfo_name
    156 
    157     for rule in self._rules:
    158       if (rule.allocator_type == 'malloc' and
    159           (not rule.stackfunction_pattern or
    160            rule.stackfunction_pattern.match(stackfunction)) and
    161           (not rule.stacksourcefile_pattern or
    162            rule.stacksourcefile_pattern.match(stacksourcefile)) and
    163           (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
    164         bucket.component_cache = rule.name
    165         return rule.name
    166 
    167     assert False
    168 
    169   def find_mmap(self, region, bucket_set,
    170                 pageframe=None, group_pfn_counts=None):
    171     """Finds a matching component which a given mmap |region| belongs to.
    172 
    173     It uses |bucket_set| to match with backtraces.  If |pageframe| is given,
    174     it considers memory sharing among processes.
    175 
    176     NOTE: Don't use Bucket's |component_cache| for mmap regions because they're
    177     classified not only with bucket information (mappedpathname for example).
    178 
    179     Args:
    180         region: A tuple representing a memory region.
    181         bucket_set: A BucketSet object to look up backtraces.
    182         pageframe: A PageFrame object representing a pageframe maybe including
    183             a pagecount.
    184         group_pfn_counts: A dict mapping a PFN to the number of times the
    185             the pageframe is mapped by the known "group (Chrome)" processes.
    186 
    187     Returns:
    188         A string representing a component name.
    189     """
    190     assert region[0] == 'hooked'
    191     bucket = bucket_set.get(region[1]['bucket_id'])
    192     assert not bucket or bucket.allocator_type == 'mmap'
    193 
    194     if not bucket:
    195       return 'no-bucket', None
    196 
    197     stackfunction = bucket.symbolized_joined_stackfunction
    198     stacksourcefile = bucket.symbolized_joined_stacksourcefile
    199     sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
    200 
    201     for rule in self._rules:
    202       if (rule.allocator_type == 'mmap' and
    203           (not rule.stackfunction_pattern or
    204            rule.stackfunction_pattern.match(stackfunction)) and
    205           (not rule.stacksourcefile_pattern or
    206            rule.stacksourcefile_pattern.match(stacksourcefile)) and
    207           (not rule.mappedpathname_pattern or
    208            rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
    209           (not rule.mappedpermission_pattern or
    210            rule.mappedpermission_pattern.match(
    211                region[1]['vma']['readable'] +
    212                region[1]['vma']['writable'] +
    213                region[1]['vma']['executable'] +
    214                region[1]['vma']['private'])) and
    215           (not rule.sharedwith or
    216            not pageframe or sharedwith in rule.sharedwith)):
    217         return rule.name, bucket
    218 
    219     assert False
    220 
    221   def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):
    222     """Finds a matching component which a given unhooked |region| belongs to.
    223 
    224     If |pageframe| is given, it considers memory sharing among processes.
    225 
    226     Args:
    227         region: A tuple representing a memory region.
    228         pageframe: A PageFrame object representing a pageframe maybe including
    229             a pagecount.
    230         group_pfn_counts: A dict mapping a PFN to the number of times the
    231             the pageframe is mapped by the known "group (Chrome)" processes.
    232 
    233     Returns:
    234         A string representing a component name.
    235     """
    236     assert region[0] == 'unhooked'
    237     sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
    238 
    239     for rule in self._rules:
    240       if (rule.allocator_type == 'unhooked' and
    241           (not rule.mappedpathname_pattern or
    242            rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
    243           (not rule.mappedpermission_pattern or
    244            rule.mappedpermission_pattern.match(
    245                region[1]['vma']['readable'] +
    246                region[1]['vma']['writable'] +
    247                region[1]['vma']['executable'] +
    248                region[1]['vma']['private'])) and
    249           (not rule.sharedwith or
    250            not pageframe or sharedwith in rule.sharedwith)):
    251         return rule.name
    252 
    253     assert False
    254 
    255   @staticmethod
    256   def load(filename, filetype):
    257     """Loads a policy file of |filename| in a |format|.
    258 
    259     Args:
    260         filename: A filename to be loaded.
    261         filetype: A string to specify a type of the file.  Only 'json' is
    262             supported for now.
    263 
    264     Returns:
    265         A loaded Policy object.
    266     """
    267     with open(os.path.join(BASE_PATH, filename)) as policy_f:
    268       return Policy.parse(policy_f, filetype)
    269 
    270   @staticmethod
    271   def parse(policy_f, filetype):
    272     """Parses a policy file content in a |format|.
    273 
    274     Args:
    275         policy_f: An IO object to be loaded.
    276         filetype: A string to specify a type of the file.  Only 'json' is
    277             supported for now.
    278 
    279     Returns:
    280         A loaded Policy object.
    281     """
    282     if filetype == 'json':
    283       return Policy._parse_json(policy_f)
    284     else:
    285       return None
    286 
    287   JSON_COMMENT_REGEX = re.compile(r'//.*')
    288 
    289   @staticmethod
    290   def _parse_json(policy_f):
    291     """Parses policy file in json format.
    292 
    293     A policy file contains component's names and their stacktrace pattern
    294     written in regular expression.  Those patterns are matched against each
    295     symbols of each stacktraces in the order written in the policy file
    296 
    297     Args:
    298          policy_f: A File/IO object to read.
    299 
    300     Returns:
    301          A loaded policy object.
    302     """
    303     policy_json = policy_f.read()
    304     policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json)
    305     policy = json.loads(policy_json)
    306 
    307     rules = []
    308     for rule in policy['rules']:
    309       stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
    310       stacksourcefile = rule.get('stacksourcefile')
    311       rules.append(Rule(
    312           rule['name'],
    313           rule['allocator'],  # allocator_type
    314           stackfunction,
    315           stacksourcefile,
    316           rule['typeinfo'] if 'typeinfo' in rule else None,
    317           rule.get('mappedpathname'),
    318           rule.get('mappedpermission'),
    319           rule.get('sharedwith')))
    320 
    321     return Policy(rules, policy['version'], policy['components'])
    322 
    323   @staticmethod
    324   def _categorize_pageframe(pageframe, group_pfn_counts):
    325     """Categorizes a pageframe based on its sharing status.
    326 
    327     Returns:
    328         'private' if |pageframe| is not shared with other processes.  'group'
    329         if |pageframe| is shared only with group (Chrome-related) processes.
    330         'others' if |pageframe| is shared with non-group processes.
    331     """
    332     if not pageframe:
    333       return 'private'
    334 
    335     if pageframe.pagecount:
    336       if pageframe.pagecount == 1:
    337         return 'private'
    338       elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:
    339         return 'group'
    340       else:
    341         return 'others'
    342     else:
    343       if pageframe.pfn in group_pfn_counts:
    344         return 'group'
    345       else:
    346         return 'private'
    347 
    348 
    349 class PolicySet(object):
    350   """Represents a set of policies."""
    351 
    352   def __init__(self, policy_directory):
    353     self._policy_directory = policy_directory
    354 
    355   @staticmethod
    356   def load(labels=None):
    357     """Loads a set of policies via the "default policy directory".
    358 
    359     The "default policy directory" contains pairs of policies and their labels.
    360     For example, a policy "policy.l0.json" is labeled "l0" in the default
    361     policy directory "policies.json".
    362 
    363     All policies in the directory are loaded by default.  Policies can be
    364     limited by |labels|.
    365 
    366     Args:
    367         labels: An array that contains policy labels to be loaded.
    368 
    369     Returns:
    370         A PolicySet object.
    371     """
    372     default_policy_directory = PolicySet._load_default_policy_directory()
    373     if labels:
    374       specified_policy_directory = {}
    375       for label in labels:
    376         if label in default_policy_directory:
    377           specified_policy_directory[label] = default_policy_directory[label]
    378         # TODO(dmikurube): Load an un-labeled policy file.
    379       return PolicySet._load_policies(specified_policy_directory)
    380     else:
    381       return PolicySet._load_policies(default_policy_directory)
    382 
    383   def __len__(self):
    384     return len(self._policy_directory)
    385 
    386   def __iter__(self):
    387     for label in self._policy_directory:
    388       yield label
    389 
    390   def __getitem__(self, label):
    391     return self._policy_directory[label]
    392 
    393   @staticmethod
    394   def _load_default_policy_directory():
    395     with open(POLICIES_JSON_PATH, mode='r') as policies_f:
    396       default_policy_directory = json.load(policies_f)
    397     return default_policy_directory
    398 
    399   @staticmethod
    400   def _load_policies(directory):
    401     LOGGER.info('Loading policy files.')
    402     policies = {}
    403     for label in directory:
    404       LOGGER.info('  %s: %s' % (label, directory[label]['file']))
    405       loaded = Policy.load(directory[label]['file'], directory[label]['format'])
    406       if loaded:
    407         policies[label] = loaded
    408     return PolicySet(policies)
    409