Home | History | Annotate | Download | only in histograms
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Extract histogram names from the description XML file.
      6 
      7 For more information on the format of the XML file, which is self-documenting,
      8 see histograms.xml; however, here is a simple example to get you started. The
      9 XML below will generate the following five histograms:
     10 
     11     HistogramTime
     12     HistogramEnum
     13     HistogramEnum_Chrome
     14     HistogramEnum_IE
     15     HistogramEnum_Firefox
     16 
     17 <histogram-configuration>
     18 
     19 <histograms>
     20 
     21 <histogram name="HistogramTime" units="milliseconds">
     22   <summary>A brief description.</summary>
     23   <details>This is a more thorough description of this histogram.</details>
     24 </histogram>
     25 
     26 <histogram name="HistogramEnum" enum="MyEnumType">
     27   <summary>This histogram sports an enum value type.</summary>
     28 </histogram>
     29 
     30 </histograms>
     31 
     32 <enums>
     33 
     34 <enum name="MyEnumType">
     35   <summary>This is an example enum type, where the values mean little.</summary>
     36   <int value="1" label="FIRST_VALUE">This is the first value.</int>
     37   <int value="2" label="SECOND_VALUE">This is the second value.</int>
     38 </enum>
     39 
     40 </enums>
     41 
     42 <histogram_suffixes_list>
     43 
     44 <histogram_suffixes name="BrowserType">
     45   <suffix name="Chrome"/>
     46   <suffix name="IE"/>
     47   <suffix name="Firefox"/>
     48   <affected-histogram name="HistogramEnum"/>
     49 </histogram_suffixes>
     50 
     51 </histogram_suffixes_list>
     52 
     53 </histogram-configuration>
     54 
     55 """
     56 
     57 import copy
     58 import logging
     59 import xml.dom.minidom
     60 
     61 OWNER_FIELD_PLACEHOLDER = (
     62     'Please list the metric\'s owners. Add more owner tags as needed.')
     63 
     64 MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5
     65 
     66 
     67 class Error(Exception):
     68   pass
     69 
     70 
     71 def _JoinChildNodes(tag):
     72   """Join child nodes into a single text.
     73 
     74   Applicable to leafs like 'summary' and 'detail'.
     75 
     76   Args:
     77     tag: parent node
     78 
     79   Returns:
     80     a string with concatenated nodes' text representation.
     81   """
     82   return ''.join(c.toxml() for c in tag.childNodes).strip()
     83 
     84 
     85 def _NormalizeString(s):
     86   """Replaces all whitespace sequences with a single space.
     87 
     88   The function properly handles multi-line strings.
     89 
     90   Args:
     91     s: The string to normalize, ('  \\n a  b c\\n d  ').
     92 
     93   Returns:
     94     The normalized string (a b c d).
     95   """
     96   return ' '.join(s.split())
     97 
     98 
     99 def _NormalizeAllAttributeValues(node):
    100   """Recursively normalizes all tag attribute values in the given tree.
    101 
    102   Args:
    103     node: The minidom node to be normalized.
    104 
    105   Returns:
    106     The normalized minidom node.
    107   """
    108   if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
    109     for a in node.attributes.keys():
    110       node.attributes[a].value = _NormalizeString(node.attributes[a].value)
    111 
    112   for c in node.childNodes:
    113     _NormalizeAllAttributeValues(c)
    114   return node
    115 
    116 
    117 def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name,
    118                                      histogram_suffixes_node):
    119   """Creates a new histogram name based on a histogram suffix.
    120 
    121   Args:
    122     suffix_name: The suffix string to apply to the histogram name. May be empty.
    123     histogram_name: The name of the histogram. May be of the form
    124       Group.BaseName or BaseName.
    125     histogram_suffixes_node: The histogram_suffixes XML node.
    126 
    127   Returns:
    128     A string with the expanded histogram name.
    129 
    130   Raises:
    131     Error: if the expansion can't be done.
    132   """
    133   if histogram_suffixes_node.hasAttribute('separator'):
    134     separator = histogram_suffixes_node.getAttribute('separator')
    135   else:
    136     separator = '_'
    137 
    138   if histogram_suffixes_node.hasAttribute('ordering'):
    139     ordering = histogram_suffixes_node.getAttribute('ordering')
    140   else:
    141     ordering = 'suffix'
    142   if ordering not in ['prefix', 'suffix']:
    143     logging.error('ordering needs to be prefix or suffix, value is %s',
    144                   ordering)
    145     raise Error()
    146 
    147   if not suffix_name:
    148     return histogram_name
    149 
    150   if ordering == 'suffix':
    151     return histogram_name + separator + suffix_name
    152 
    153   # For prefixes, the suffix_name is inserted between the "cluster" and the
    154   # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
    155   sections = histogram_name.split('.')
    156   if len(sections) <= 1:
    157     logging.error(
    158         'Prefix Field Trial expansions require histogram names which include a '
    159         'dot separator. Histogram name is %s, and Field Trial is %s',
    160         histogram_name, histogram_suffixes_node.getAttribute('name'))
    161     raise Error()
    162 
    163   cluster = sections[0] + '.'
    164   remainder = '.'.join(sections[1:])
    165   return cluster + suffix_name + separator + remainder
    166 
    167 
    168 def _ExtractEnumsFromXmlTree(tree):
    169   """Extract all <enum> nodes in the tree into a dictionary."""
    170 
    171   enums = {}
    172   have_errors = False
    173 
    174   last_name = None
    175   for enum in tree.getElementsByTagName('enum'):
    176     if enum.getAttribute('type') != 'int':
    177       logging.error('Unknown enum type %s', enum.getAttribute('type'))
    178       have_errors = True
    179       continue
    180 
    181     name = enum.getAttribute('name')
    182     if last_name is not None and name.lower() < last_name.lower():
    183       logging.error('Enums %s and %s are not in alphabetical order',
    184                     last_name, name)
    185       have_errors = True
    186     last_name = name
    187 
    188     if name in enums:
    189       logging.error('Duplicate enum %s', name)
    190       have_errors = True
    191       continue
    192 
    193     last_int_value = None
    194     enum_dict = {}
    195     enum_dict['name'] = name
    196     enum_dict['values'] = {}
    197 
    198     for int_tag in enum.getElementsByTagName('int'):
    199       value_dict = {}
    200       int_value = int(int_tag.getAttribute('value'))
    201       if last_int_value is not None and int_value < last_int_value:
    202         logging.error('Enum %s int values %d and %d are not in numerical order',
    203                       name, last_int_value, int_value)
    204         have_errors = True
    205       last_int_value = int_value
    206       if int_value in enum_dict['values']:
    207         logging.error('Duplicate enum value %d for enum %s', int_value, name)
    208         have_errors = True
    209         continue
    210       value_dict['label'] = int_tag.getAttribute('label')
    211       value_dict['summary'] = _JoinChildNodes(int_tag)
    212       enum_dict['values'][int_value] = value_dict
    213 
    214     summary_nodes = enum.getElementsByTagName('summary')
    215     if summary_nodes:
    216       enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
    217 
    218     enums[name] = enum_dict
    219 
    220   return enums, have_errors
    221 
    222 
    223 def _ExtractOwners(xml_node):
    224   """Extract all owners into a list from owner tag under |xml_node|."""
    225   owners = []
    226   for owner_node in xml_node.getElementsByTagName('owner'):
    227     owner_entry = _NormalizeString(_JoinChildNodes(owner_node))
    228     if OWNER_FIELD_PLACEHOLDER not in owner_entry:
    229       owners.append(owner_entry)
    230   return owners
    231 
    232 
    233 def _ExtractHistogramsFromXmlTree(tree, enums):
    234   """Extract all <histogram> nodes in the tree into a dictionary."""
    235 
    236   # Process the histograms. The descriptions can include HTML tags.
    237   histograms = {}
    238   have_errors = False
    239   last_name = None
    240   for histogram in tree.getElementsByTagName('histogram'):
    241     name = histogram.getAttribute('name')
    242     if last_name is not None and name.lower() < last_name.lower():
    243       logging.error('Histograms %s and %s are not in alphabetical order',
    244                     last_name, name)
    245       have_errors = True
    246     last_name = name
    247     if name in histograms:
    248       logging.error('Duplicate histogram definition %s', name)
    249       have_errors = True
    250       continue
    251     histograms[name] = histogram_entry = {}
    252 
    253     # Find <owner> tag.
    254     owners = _ExtractOwners(histogram)
    255     if owners:
    256       histogram_entry['owners'] = owners
    257 
    258     # Find <summary> tag.
    259     summary_nodes = histogram.getElementsByTagName('summary')
    260     if summary_nodes:
    261       histogram_entry['summary'] = _NormalizeString(
    262           _JoinChildNodes(summary_nodes[0]))
    263     else:
    264       histogram_entry['summary'] = 'TBD'
    265 
    266     # Find <obsolete> tag.
    267     obsolete_nodes = histogram.getElementsByTagName('obsolete')
    268     if obsolete_nodes:
    269       reason = _JoinChildNodes(obsolete_nodes[0])
    270       histogram_entry['obsolete'] = reason
    271 
    272     # Handle units.
    273     if histogram.hasAttribute('units'):
    274       histogram_entry['units'] = histogram.getAttribute('units')
    275 
    276     # Find <details> tag.
    277     details_nodes = histogram.getElementsByTagName('details')
    278     if details_nodes:
    279       histogram_entry['details'] = _NormalizeString(
    280           _JoinChildNodes(details_nodes[0]))
    281 
    282     # Handle enum types.
    283     if histogram.hasAttribute('enum'):
    284       enum_name = histogram.getAttribute('enum')
    285       if enum_name not in enums:
    286         logging.error('Unknown enum %s in histogram %s', enum_name, name)
    287         have_errors = True
    288       else:
    289         histogram_entry['enum'] = enums[enum_name]
    290 
    291   return histograms, have_errors
    292 
    293 
    294 def _UpdateHistogramsWithSuffixes(tree, histograms):
    295   """Process <histogram_suffixes> tags and combine with affected histograms.
    296 
    297   The histograms dictionary will be updated in-place by adding new histograms
    298   created by combining histograms themselves with histogram_suffixes targeting
    299   these histograms.
    300 
    301   Args:
    302     tree: XML dom tree.
    303     histograms: a dictionary of histograms previously extracted from the tree;
    304 
    305   Returns:
    306     True if any errors were found.
    307   """
    308   have_errors = False
    309 
    310   histogram_suffix_tag = 'histogram_suffixes'
    311   suffix_tag = 'suffix'
    312   with_tag = 'with-suffix'
    313 
    314   # Verify order of histogram_suffixes fields first.
    315   last_name = None
    316   for histogram_suffixes in tree.getElementsByTagName(histogram_suffix_tag):
    317     name = histogram_suffixes.getAttribute('name')
    318     if last_name is not None and name.lower() < last_name.lower():
    319       logging.error('histogram_suffixes %s and %s are not in alphabetical '
    320                     'order', last_name, name)
    321       have_errors = True
    322     last_name = name
    323 
    324   # histogram_suffixes can depend on other histogram_suffixes, so we need to be
    325   # careful. Make a temporary copy of the list of histogram_suffixes to use as a
    326   # queue. histogram_suffixes whose dependencies have not yet been processed
    327   # will get relegated to the back of the queue to be processed later.
    328   reprocess_queue = []
    329   def GenerateHistogramSuffixes():
    330     for f in tree.getElementsByTagName(histogram_suffix_tag):
    331       yield 0, f
    332     for r, f in reprocess_queue:
    333       yield r, f
    334 
    335   for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
    336     # Check dependencies first
    337     dependencies_valid = True
    338     affected_histograms = histogram_suffixes.getElementsByTagName(
    339         'affected-histogram')
    340     for affected_histogram in affected_histograms:
    341       histogram_name = affected_histogram.getAttribute('name')
    342       if histogram_name not in histograms:
    343         # Base histogram is missing
    344         dependencies_valid = False
    345         missing_dependency = histogram_name
    346         break
    347     if not dependencies_valid:
    348       if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH:
    349         reprocess_queue.append((reprocess_count + 1, histogram_suffixes))
    350         continue
    351       else:
    352         logging.error('histogram_suffixes %s is missing its dependency %s',
    353                       histogram_suffixes.getAttribute('name'),
    354                       missing_dependency)
    355         have_errors = True
    356         continue
    357 
    358     name = histogram_suffixes.getAttribute('name')
    359     suffix_nodes = histogram_suffixes.getElementsByTagName(suffix_tag)
    360     suffix_labels = {}
    361     for suffix in suffix_nodes:
    362       suffix_labels[suffix.getAttribute('name')] = suffix.getAttribute('label')
    363     # Find owners list under current histogram_suffixes tag.
    364     owners = _ExtractOwners(histogram_suffixes)
    365 
    366     last_histogram_name = None
    367     for affected_histogram in affected_histograms:
    368       histogram_name = affected_histogram.getAttribute('name')
    369       if (last_histogram_name is not None
    370           and histogram_name.lower() < last_histogram_name.lower()):
    371         logging.error('Affected histograms %s and %s of histogram_suffixes %s '
    372                       'are not in alphabetical order',
    373                       last_histogram_name, histogram_name, name)
    374         have_errors = True
    375       last_histogram_name = histogram_name
    376       with_suffixes = affected_histogram.getElementsByTagName(with_tag)
    377       if with_suffixes:
    378         suffixes_to_add = with_suffixes
    379       else:
    380         suffixes_to_add = suffix_nodes
    381       for suffix in suffixes_to_add:
    382         suffix_name = suffix.getAttribute('name')
    383         try:
    384           new_histogram_name = _ExpandHistogramNameWithSuffixes(
    385               suffix_name, histogram_name, histogram_suffixes)
    386           if new_histogram_name != histogram_name:
    387             histograms[new_histogram_name] = copy.deepcopy(
    388                 histograms[histogram_name])
    389 
    390           suffix_label = suffix_labels.get(suffix_name, '')
    391 
    392           # TODO(yiyaoliu): Rename these to be consistent with the new naming.
    393           # It is kept unchanged for now to be it's used by dashboards.
    394           if 'fieldtrial_groups' not in histograms[new_histogram_name]:
    395             histograms[new_histogram_name]['fieldtrial_groups'] = []
    396           histograms[new_histogram_name]['fieldtrial_groups'].append(
    397               suffix_name)
    398 
    399           if 'fieldtrial_names' not in histograms[new_histogram_name]:
    400             histograms[new_histogram_name]['fieldtrial_names'] = []
    401           histograms[new_histogram_name]['fieldtrial_names'].append(name)
    402 
    403           if 'fieldtrial_labels' not in histograms[new_histogram_name]:
    404             histograms[new_histogram_name]['fieldtrial_labels'] = []
    405           histograms[new_histogram_name]['fieldtrial_labels'].append(
    406               suffix_label)
    407 
    408           # If no owners are added for this histogram-suffixes, it inherits the
    409           # owners of its parents.
    410           if owners:
    411             histograms[new_histogram_name]['owners'] = owners
    412 
    413         except Error:
    414           have_errors = True
    415 
    416   return have_errors
    417 
    418 
    419 def ExtractHistogramsFromFile(file_handle):
    420   """Compute the histogram names and descriptions from the XML representation.
    421 
    422   Args:
    423     file_handle: A file or file-like with XML content.
    424 
    425   Returns:
    426     a tuple of (histograms, status) where histograms is a dictionary mapping
    427     histogram names to dictionaries containing histogram descriptions and status
    428     is a boolean indicating if errros were encoutered in processing.
    429   """
    430   tree = xml.dom.minidom.parse(file_handle)
    431   _NormalizeAllAttributeValues(tree)
    432 
    433   enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
    434   histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
    435   update_errors = _UpdateHistogramsWithSuffixes(tree, histograms)
    436 
    437   return histograms, enum_errors or histogram_errors or update_errors
    438 
    439 
    440 def ExtractHistograms(filename):
    441   """Load histogram definitions from a disk file.
    442 
    443   Args:
    444     filename: a file path to load data from.
    445 
    446   Returns:
    447     a dictionary of histogram descriptions.
    448 
    449   Raises:
    450     Error: if the file is not well-formatted.
    451   """
    452   with open(filename, 'r') as f:
    453     histograms, had_errors = ExtractHistogramsFromFile(f)
    454     if had_errors:
    455       logging.error('Error parsing %s', filename)
    456       raise Error()
    457     return histograms
    458 
    459 
    460 def ExtractNames(histograms):
    461   return sorted(histograms.keys())