Home | History | Annotate | Download | only in histograms
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Extract histogram names from the description XML file.
      6 
      7 For more information on the format of the XML file, which is self-documenting,
      8 see histograms.xml; however, here is a simple example to get you started. The
      9 XML below will generate the following five histograms:
     10 
     11     HistogramTime
     12     HistogramEnum
     13     HistogramEnum_Chrome
     14     HistogramEnum_IE
     15     HistogramEnum_Firefox
     16 
     17 <histogram-configuration>
     18 
     19 <histograms>
     20 
     21 <histogram name="HistogramTime" units="milliseconds">
     22   <summary>A brief description.</summary>
     23   <details>This is a more thorough description of this histogram.</details>
     24 </histogram>
     25 
     26 <histogram name="HistogramEnum" enum="MyEnumType">
     27   <summary>This histogram sports an enum value type.</summary>
     28 </histogram>
     29 
     30 </histograms>
     31 
     32 <enums>
     33 
     34 <enum name="MyEnumType">
     35   <summary>This is an example enum type, where the values mean little.</summary>
     36   <int value="1" label="FIRST_VALUE">This is the first value.</int>
     37   <int value="2" label="SECOND_VALUE">This is the second value.</int>
     38 </enum>
     39 
     40 </enums>
     41 
     42 <fieldtrials>
     43 
     44 <fieldtrial name="BrowserType">
     45   <group name="Chrome"/>
     46   <group name="IE"/>
     47   <group name="Firefox"/>
     48   <affected-histogram name="HistogramEnum"/>
     49 </fieldtrial>
     50 
     51 </fieldtrials>
     52 
     53 </histogram-configuration>
     54 
     55 """
     56 
     57 import copy
     58 import logging
     59 import xml.dom.minidom
     60 
     61 
     62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
     63 
     64 
     65 class Error(Exception):
     66   pass
     67 
     68 
     69 def JoinChildNodes(tag):
     70   return ''.join([c.toxml() for c in tag.childNodes]).strip()
     71 
     72 
     73 def NormalizeAttributeValue(s):
     74   """Normalizes an attribute value (which might be wrapped over multiple lines)
     75   by replacing each whitespace sequence with a single space.
     76 
     77   Args:
     78     s: The string to normalize, e.g. '  \n a  b c\n d  '
     79 
     80   Returns:
     81     The normalized string, e.g. 'a b c d'
     82   """
     83   return ' '.join(s.split())
     84 
     85 
     86 def NormalizeAllAttributeValues(node):
     87   """Recursively normalizes all tag attribute values in the given tree.
     88 
     89   Args:
     90     node: The minidom node to be normalized.
     91 
     92   Returns:
     93     The normalized minidom node.
     94   """
     95   if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
     96     for a in node.attributes.keys():
     97       node.attributes[a].value = NormalizeAttributeValue(
     98         node.attributes[a].value)
     99 
    100   for c in node.childNodes: NormalizeAllAttributeValues(c)
    101   return node
    102 
    103 
    104 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
    105   """Creates a new histogram name based on the field trial group.
    106 
    107   Args:
    108     group_name: The name of the field trial group. May be empty.
    109     histogram_name: The name of the histogram. May be of the form
    110       Group.BaseName or BaseName
    111     field_trial: The FieldTrial XML element.
    112 
    113   Returns:
    114     A string with the expanded histogram name.
    115 
    116   Raises:
    117     Error if the expansion can't be done.
    118   """
    119   if fieldtrial.hasAttribute('separator'):
    120     separator = fieldtrial.getAttribute('separator')
    121   else:
    122     separator = '_'
    123 
    124   if fieldtrial.hasAttribute('ordering'):
    125     ordering = fieldtrial.getAttribute('ordering')
    126   else:
    127     ordering = 'suffix'
    128   if ordering not in ['prefix', 'suffix']:
    129     logging.error('ordering needs to be prefix or suffix, value is %s' %
    130                   ordering)
    131     raise Error()
    132 
    133   if not group_name:
    134     return histogram_name
    135 
    136   if ordering == 'suffix':
    137     return histogram_name + separator + group_name
    138 
    139   # For prefixes, the group_name is inserted between the "cluster" and the
    140   # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
    141   sections = histogram_name.split('.')
    142   if len(sections) <= 1:
    143     logging.error(
    144       'Prefix Field Trial expansions require histogram names which include a '
    145       'dot separator. Histogram name is %s, and Field Trial is %s' %
    146       (histogram_name, fieldtrial.getAttribute('name')))
    147     raise Error()
    148 
    149   cluster = sections[0] + '.'
    150   remainder = '.'.join(sections[1:])
    151   return cluster + group_name + separator + remainder
    152 
    153 
    154 def ExtractHistograms(filename):
    155   """Compute the histogram names and descriptions from the XML representation.
    156 
    157   Args:
    158     filename: The path to the histograms XML file.
    159 
    160   Returns:
    161     { 'histogram_name': 'histogram_description', ... }
    162 
    163   Raises:
    164     Error if the file is not well-formatted.
    165   """
    166   # Slurp in histograms.xml
    167   raw_xml = ''
    168   with open(filename, 'r') as f:
    169     raw_xml = f.read()
    170 
    171   # Parse the XML into a tree
    172   tree = xml.dom.minidom.parseString(raw_xml)
    173   NormalizeAllAttributeValues(tree)
    174 
    175   histograms = {}
    176   have_errors = False
    177 
    178   # Load the enums.
    179   enums = {}
    180   last_name = None
    181   for enum in tree.getElementsByTagName("enum"):
    182     if enum.getAttribute('type') != 'int':
    183       logging.error('Unknown enum type %s' % enum.getAttribute('type'))
    184       have_errors = True
    185       continue
    186 
    187     name = enum.getAttribute('name')
    188     if last_name is not None and name.lower() < last_name.lower():
    189       logging.error('Enums %s and %s are not in alphabetical order'
    190                     % (last_name, name))
    191       have_errors = True
    192     last_name = name
    193 
    194     if name in enums:
    195       logging.error('Duplicate enum %s' % name)
    196       have_errors = True
    197       continue
    198 
    199     last_int_value = None
    200     enum_dict = {}
    201     enum_dict['name'] = name
    202     enum_dict['values'] = {}
    203 
    204     for int_tag in enum.getElementsByTagName("int"):
    205       value_dict = {}
    206       int_value = int(int_tag.getAttribute('value'))
    207       if last_int_value is not None and int_value < last_int_value:
    208         logging.error('Enum %s int values %d and %d are not in numerical order'
    209                       % (name, last_int_value, int_value))
    210         have_errors = True
    211       last_int_value = int_value
    212       if int_value in enum_dict['values']:
    213         logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
    214         have_errors = True
    215         continue
    216       value_dict['label'] = int_tag.getAttribute('label')
    217       value_dict['summary'] = JoinChildNodes(int_tag)
    218       enum_dict['values'][int_value] = value_dict
    219 
    220     summary_nodes = enum.getElementsByTagName("summary")
    221     if len(summary_nodes) > 0:
    222       enum_dict['summary'] = JoinChildNodes(summary_nodes[0])
    223 
    224     enums[name] = enum_dict
    225 
    226   # Process the histograms. The descriptions can include HTML tags.
    227   last_name = None
    228   for histogram in tree.getElementsByTagName("histogram"):
    229     name = histogram.getAttribute('name')
    230     if last_name is not None and name.lower() < last_name.lower():
    231       logging.error('Histograms %s and %s are not in alphabetical order'
    232                     % (last_name, name))
    233       have_errors = True
    234     last_name = name
    235     if name in histograms:
    236       logging.error('Duplicate histogram definition %s' % name)
    237       have_errors = True
    238       continue
    239     histograms[name] = {}
    240 
    241     # Find <summary> tag.
    242     summary_nodes = histogram.getElementsByTagName("summary")
    243     if len(summary_nodes) > 0:
    244       histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])
    245     else:
    246       histograms[name]['summary'] = 'TBD'
    247 
    248     # Find <obsolete> tag.
    249     obsolete_nodes = histogram.getElementsByTagName("obsolete")
    250     if len(obsolete_nodes) > 0:
    251       reason = JoinChildNodes(obsolete_nodes[0])
    252       histograms[name]['obsolete'] = reason
    253 
    254     # Handle units.
    255     if histogram.hasAttribute('units'):
    256       histograms[name]['units'] = histogram.getAttribute('units')
    257 
    258     # Find <details> tag.
    259     details_nodes = histogram.getElementsByTagName("details")
    260     if len(details_nodes) > 0:
    261       histograms[name]['details'] = JoinChildNodes(details_nodes[0])
    262 
    263     # Handle enum types.
    264     if histogram.hasAttribute('enum'):
    265       enum_name = histogram.getAttribute('enum')
    266       if not enum_name in enums:
    267         logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
    268         have_errors = True
    269       else:
    270         histograms[name]['enum'] = enums[enum_name]
    271 
    272   # Process the field trials and compute the combinations with their affected
    273   # histograms.
    274   last_name = None
    275   for fieldtrial in tree.getElementsByTagName("fieldtrial"):
    276     name = fieldtrial.getAttribute('name')
    277     if last_name is not None and name.lower() < last_name.lower():
    278       logging.error('Field trials %s and %s are not in alphabetical order'
    279                     % (last_name, name))
    280       have_errors = True
    281     last_name = name
    282   # Field trials can depend on other field trials, so we need to be careful.
    283   # Make a temporary copy of the list of field trials to use as a queue.
    284   # Field trials whose dependencies have not yet been processed will get
    285   # relegated to the back of the queue to be processed later.
    286   reprocess_queue = []
    287   def GenerateFieldTrials():
    288     for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
    289     for r, f in reprocess_queue: yield r, f
    290   for reprocess_count, fieldtrial in GenerateFieldTrials():
    291     # Check dependencies first
    292     dependencies_valid = True
    293     affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
    294     for affected_histogram in affected_histograms:
    295       histogram_name = affected_histogram.getAttribute('name')
    296       if not histogram_name in histograms:
    297         # Base histogram is missing
    298         dependencies_valid = False
    299         missing_dependency = histogram_name
    300         break
    301     if not dependencies_valid:
    302       if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
    303         reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
    304         continue
    305       else:
    306         logging.error('Field trial %s is missing its dependency %s'
    307                       % (fieldtrial.getAttribute('name'),
    308                          missing_dependency))
    309         have_errors = True
    310         continue
    311 
    312     name = fieldtrial.getAttribute('name')
    313     groups = fieldtrial.getElementsByTagName('group')
    314     group_labels = {}
    315     for group in groups:
    316       group_labels[group.getAttribute('name')] = group.getAttribute('label')
    317     last_histogram_name = None
    318     for affected_histogram in affected_histograms:
    319       histogram_name = affected_histogram.getAttribute('name')
    320       if (last_histogram_name is not None
    321           and histogram_name.lower() < last_histogram_name.lower()):
    322         logging.error('Affected histograms %s and %s of field trial %s are not '
    323                       'in alphabetical order'
    324                       % (last_histogram_name, histogram_name, name))
    325         have_errors = True
    326       last_histogram_name = histogram_name
    327       base_description = histograms[histogram_name]
    328       with_groups = affected_histogram.getElementsByTagName('with-group')
    329       if len(with_groups) > 0:
    330         histogram_groups = with_groups
    331       else:
    332         histogram_groups = groups
    333       for group in histogram_groups:
    334         group_name = group.getAttribute('name')
    335         try:
    336           new_histogram_name = _ExpandHistogramNameWithFieldTrial(
    337             group_name, histogram_name, fieldtrial)
    338           if new_histogram_name != histogram_name:
    339             histograms[new_histogram_name] = copy.deepcopy(
    340               histograms[histogram_name])
    341 
    342           group_label = group_labels.get(group_name, '')
    343 
    344           if not 'fieldtrial_groups' in histograms[new_histogram_name]:
    345             histograms[new_histogram_name]['fieldtrial_groups'] = []
    346           histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
    347 
    348           if not 'fieldtrial_names' in histograms[new_histogram_name]:
    349             histograms[new_histogram_name]['fieldtrial_names'] = []
    350           histograms[new_histogram_name]['fieldtrial_names'].append(name)
    351 
    352           if not 'fieldtrial_labels' in histograms[new_histogram_name]:
    353             histograms[new_histogram_name]['fieldtrial_labels'] = []
    354           histograms[new_histogram_name]['fieldtrial_labels'].append(
    355             group_label)
    356 
    357         except Error:
    358           have_errors = True
    359 
    360   if have_errors:
    361     logging.error('Error parsing %s' % filename)
    362     raise Error()
    363 
    364   return histograms
    365 
    366 
    367 def ExtractNames(histograms):
    368   return sorted(histograms.keys())
    369