Home | History | Annotate | Download | only in histograms
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Extract histogram names from the description XML file.
      6 
      7 For more information on the format of the XML file, which is self-documenting,
      8 see histograms.xml; however, here is a simple example to get you started. The
      9 XML below will generate the following five histograms:
     10 
     11     HistogramTime
     12     HistogramEnum
     13     HistogramEnum_Chrome
     14     HistogramEnum_IE
     15     HistogramEnum_Firefox
     16 
     17 <histogram-configuration>
     18 
     19 <histograms>
     20 
     21 <histogram name="HistogramTime" units="milliseconds">
     22   <summary>A brief description.</summary>
     23   <details>This is a more thorough description of this histogram.</details>
     24 </histogram>
     25 
     26 <histogram name="HistogramEnum" enum="MyEnumType">
     27   <summary>This histogram sports an enum value type.</summary>
     28 </histogram>
     29 
     30 </histograms>
     31 
     32 <enums>
     33 
     34 <enum name="MyEnumType">
     35   <summary>This is an example enum type, where the values mean little.</summary>
     36   <int value="1" label="FIRST_VALUE">This is the first value.</int>
     37   <int value="2" label="SECOND_VALUE">This is the second value.</int>
     38 </enum>
     39 
     40 </enums>
     41 
     42 <fieldtrials>
     43 
     44 <fieldtrial name="BrowserType">
     45   <group name="Chrome"/>
     46   <group name="IE"/>
     47   <group name="Firefox"/>
     48   <affected-histogram name="HistogramEnum"/>
     49 </fieldtrial>
     50 
     51 </fieldtrials>
     52 
     53 </histogram-configuration>
     54 
     55 """
     56 
     57 import copy
     58 import logging
     59 import xml.dom.minidom
     60 
     61 
     62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
     63 
     64 
     65 class Error(Exception):
     66   pass
     67 
     68 
     69 def _JoinChildNodes(tag):
     70   """Join child nodes into a single text.
     71 
     72   Applicable to leafs like 'summary' and 'detail'.
     73 
     74   Args:
     75     tag: parent node
     76 
     77   Returns:
     78     a string with concatenated nodes' text representation.
     79   """
     80   return ''.join(c.toxml() for c in tag.childNodes).strip()
     81 
     82 
     83 def _NormalizeString(s):
     84   """Normalizes a string (possibly of multiple lines) by replacing each
     85   whitespace sequence with a single space.
     86 
     87   Args:
     88     s: The string to normalize, e.g. '  \n a  b c\n d  '
     89 
     90   Returns:
     91     The normalized string, e.g. 'a b c d'
     92   """
     93   return ' '.join(s.split())
     94 
     95 
     96 def _NormalizeAllAttributeValues(node):
     97   """Recursively normalizes all tag attribute values in the given tree.
     98 
     99   Args:
    100     node: The minidom node to be normalized.
    101 
    102   Returns:
    103     The normalized minidom node.
    104   """
    105   if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
    106     for a in node.attributes.keys():
    107       node.attributes[a].value = _NormalizeString(node.attributes[a].value)
    108 
    109   for c in node.childNodes: _NormalizeAllAttributeValues(c)
    110   return node
    111 
    112 
    113 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
    114   """Creates a new histogram name based on the field trial group.
    115 
    116   Args:
    117     group_name: The name of the field trial group. May be empty.
    118     histogram_name: The name of the histogram. May be of the form
    119       Group.BaseName or BaseName
    120     field_trial: The FieldTrial XML element.
    121 
    122   Returns:
    123     A string with the expanded histogram name.
    124 
    125   Raises:
    126     Error if the expansion can't be done.
    127   """
    128   if fieldtrial.hasAttribute('separator'):
    129     separator = fieldtrial.getAttribute('separator')
    130   else:
    131     separator = '_'
    132 
    133   if fieldtrial.hasAttribute('ordering'):
    134     ordering = fieldtrial.getAttribute('ordering')
    135   else:
    136     ordering = 'suffix'
    137   if ordering not in ['prefix', 'suffix']:
    138     logging.error('ordering needs to be prefix or suffix, value is %s' %
    139                   ordering)
    140     raise Error()
    141 
    142   if not group_name:
    143     return histogram_name
    144 
    145   if ordering == 'suffix':
    146     return histogram_name + separator + group_name
    147 
    148   # For prefixes, the group_name is inserted between the "cluster" and the
    149   # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
    150   sections = histogram_name.split('.')
    151   if len(sections) <= 1:
    152     logging.error(
    153       'Prefix Field Trial expansions require histogram names which include a '
    154       'dot separator. Histogram name is %s, and Field Trial is %s' %
    155       (histogram_name, fieldtrial.getAttribute('name')))
    156     raise Error()
    157 
    158   cluster = sections[0] + '.'
    159   remainder = '.'.join(sections[1:])
    160   return cluster + group_name + separator + remainder
    161 
    162 
    163 def _ExtractEnumsFromXmlTree(tree):
    164   """Extract all <enum> nodes in the tree into a dictionary."""
    165 
    166   enums = {}
    167   have_errors = False
    168 
    169   last_name = None
    170   for enum in tree.getElementsByTagName("enum"):
    171     if enum.getAttribute('type') != 'int':
    172       logging.error('Unknown enum type %s' % enum.getAttribute('type'))
    173       have_errors = True
    174       continue
    175 
    176     name = enum.getAttribute('name')
    177     if last_name is not None and name.lower() < last_name.lower():
    178       logging.error('Enums %s and %s are not in alphabetical order'
    179                     % (last_name, name))
    180       have_errors = True
    181     last_name = name
    182 
    183     if name in enums:
    184       logging.error('Duplicate enum %s' % name)
    185       have_errors = True
    186       continue
    187 
    188     last_int_value = None
    189     enum_dict = {}
    190     enum_dict['name'] = name
    191     enum_dict['values'] = {}
    192 
    193     for int_tag in enum.getElementsByTagName("int"):
    194       value_dict = {}
    195       int_value = int(int_tag.getAttribute('value'))
    196       if last_int_value is not None and int_value < last_int_value:
    197         logging.error('Enum %s int values %d and %d are not in numerical order'
    198                       % (name, last_int_value, int_value))
    199         have_errors = True
    200       last_int_value = int_value
    201       if int_value in enum_dict['values']:
    202         logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
    203         have_errors = True
    204         continue
    205       value_dict['label'] = int_tag.getAttribute('label')
    206       value_dict['summary'] = _JoinChildNodes(int_tag)
    207       enum_dict['values'][int_value] = value_dict
    208 
    209     summary_nodes = enum.getElementsByTagName("summary")
    210     if len(summary_nodes) > 0:
    211       enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
    212 
    213     enums[name] = enum_dict
    214 
    215   return enums, have_errors
    216 
    217 
    218 def _ExtractHistogramsFromXmlTree(tree, enums):
    219   """Extract all <histogram> nodes in the tree into a dictionary."""
    220 
    221   # Process the histograms. The descriptions can include HTML tags.
    222   histograms = {}
    223   have_errors = False
    224   last_name = None
    225   for histogram in tree.getElementsByTagName("histogram"):
    226     name = histogram.getAttribute('name')
    227     if last_name is not None and name.lower() < last_name.lower():
    228       logging.error('Histograms %s and %s are not in alphabetical order'
    229                     % (last_name, name))
    230       have_errors = True
    231     last_name = name
    232     if name in histograms:
    233       logging.error('Duplicate histogram definition %s' % name)
    234       have_errors = True
    235       continue
    236     histograms[name] = histogram_entry = {}
    237 
    238     # Find <summary> tag.
    239     summary_nodes = histogram.getElementsByTagName("summary")
    240     if len(summary_nodes) > 0:
    241       histogram_entry['summary'] = _NormalizeString(
    242           _JoinChildNodes(summary_nodes[0]))
    243     else:
    244       histogram_entry['summary'] = 'TBD'
    245 
    246     # Find <obsolete> tag.
    247     obsolete_nodes = histogram.getElementsByTagName("obsolete")
    248     if len(obsolete_nodes) > 0:
    249       reason = _JoinChildNodes(obsolete_nodes[0])
    250       histogram_entry['obsolete'] = reason
    251 
    252     # Handle units.
    253     if histogram.hasAttribute('units'):
    254       histogram_entry['units'] = histogram.getAttribute('units')
    255 
    256     # Find <details> tag.
    257     details_nodes = histogram.getElementsByTagName("details")
    258     if len(details_nodes) > 0:
    259       histogram_entry['details'] = _NormalizeString(
    260           _JoinChildNodes(details_nodes[0]))
    261 
    262     # Handle enum types.
    263     if histogram.hasAttribute('enum'):
    264       enum_name = histogram.getAttribute('enum')
    265       if not enum_name in enums:
    266         logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
    267         have_errors = True
    268       else:
    269         histogram_entry['enum'] = enums[enum_name]
    270 
    271   return histograms, have_errors
    272 
    273 
    274 def _UpdateHistogramsWithFieldTrialInformation(tree, histograms):
    275   """Process field trials' tags and combine with affected histograms.
    276 
    277   The histograms dictionary will be updated in-place by adding new histograms
    278   created by combining histograms themselves with field trials targetting these
    279   histograms.
    280 
    281   Args:
    282     tree: XML dom tree.
    283     histograms: a dictinary of histograms previously extracted from the tree;
    284 
    285   Returns:
    286     True if any errors were found.
    287   """
    288   have_errors = False
    289 
    290   # Verify order of fieldtrial fields first.
    291   last_name = None
    292   for fieldtrial in tree.getElementsByTagName("fieldtrial"):
    293     name = fieldtrial.getAttribute('name')
    294     if last_name is not None and name.lower() < last_name.lower():
    295       logging.error('Field trials %s and %s are not in alphabetical order'
    296                     % (last_name, name))
    297       have_errors = True
    298     last_name = name
    299 
    300   # Field trials can depend on other field trials, so we need to be careful.
    301   # Make a temporary copy of the list of field trials to use as a queue.
    302   # Field trials whose dependencies have not yet been processed will get
    303   # relegated to the back of the queue to be processed later.
    304   reprocess_queue = []
    305   def GenerateFieldTrials():
    306     for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
    307     for r, f in reprocess_queue: yield r, f
    308 
    309   for reprocess_count, fieldtrial in GenerateFieldTrials():
    310     # Check dependencies first
    311     dependencies_valid = True
    312     affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
    313     for affected_histogram in affected_histograms:
    314       histogram_name = affected_histogram.getAttribute('name')
    315       if not histogram_name in histograms:
    316         # Base histogram is missing
    317         dependencies_valid = False
    318         missing_dependency = histogram_name
    319         break
    320     if not dependencies_valid:
    321       if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
    322         reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
    323         continue
    324       else:
    325         logging.error('Field trial %s is missing its dependency %s'
    326                       % (fieldtrial.getAttribute('name'),
    327                          missing_dependency))
    328         have_errors = True
    329         continue
    330 
    331     name = fieldtrial.getAttribute('name')
    332     groups = fieldtrial.getElementsByTagName('group')
    333     group_labels = {}
    334     for group in groups:
    335       group_labels[group.getAttribute('name')] = group.getAttribute('label')
    336 
    337     last_histogram_name = None
    338     for affected_histogram in affected_histograms:
    339       histogram_name = affected_histogram.getAttribute('name')
    340       if (last_histogram_name is not None
    341           and histogram_name.lower() < last_histogram_name.lower()):
    342         logging.error('Affected histograms %s and %s of field trial %s are not '
    343                       'in alphabetical order'
    344                       % (last_histogram_name, histogram_name, name))
    345         have_errors = True
    346       last_histogram_name = histogram_name
    347       base_description = histograms[histogram_name]
    348       with_groups = affected_histogram.getElementsByTagName('with-group')
    349       if len(with_groups) > 0:
    350         histogram_groups = with_groups
    351       else:
    352         histogram_groups = groups
    353       for group in histogram_groups:
    354         group_name = group.getAttribute('name')
    355         try:
    356           new_histogram_name = _ExpandHistogramNameWithFieldTrial(
    357             group_name, histogram_name, fieldtrial)
    358           if new_histogram_name != histogram_name:
    359             histograms[new_histogram_name] = copy.deepcopy(
    360               histograms[histogram_name])
    361 
    362           group_label = group_labels.get(group_name, '')
    363 
    364           if not 'fieldtrial_groups' in histograms[new_histogram_name]:
    365             histograms[new_histogram_name]['fieldtrial_groups'] = []
    366           histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
    367 
    368           if not 'fieldtrial_names' in histograms[new_histogram_name]:
    369             histograms[new_histogram_name]['fieldtrial_names'] = []
    370           histograms[new_histogram_name]['fieldtrial_names'].append(name)
    371 
    372           if not 'fieldtrial_labels' in histograms[new_histogram_name]:
    373             histograms[new_histogram_name]['fieldtrial_labels'] = []
    374           histograms[new_histogram_name]['fieldtrial_labels'].append(
    375             group_label)
    376 
    377         except Error:
    378           have_errors = True
    379 
    380   return have_errors
    381 
    382 
    383 def ExtractHistogramsFromFile(file_handle):
    384   """Compute the histogram names and descriptions from the XML representation.
    385 
    386   Args:
    387     file_handle: A file or file-like with XML content.
    388 
    389   Returns:
    390     a tuple of (histograms, status) where histograms is a dictionary mapping
    391     histogram names to dictionaries containing histogram descriptions and status
    392     is a boolean indicating if errros were encoutered in processing.
    393   """
    394   tree = xml.dom.minidom.parse(file_handle)
    395   _NormalizeAllAttributeValues(tree)
    396 
    397   enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
    398   histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
    399   update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms)
    400 
    401   return histograms, enum_errors or histogram_errors or update_errors
    402 
    403 
    404 def ExtractHistograms(filename):
    405   """Load histogram definitions from a disk file.
    406   Args:
    407     filename: a file path to load data from.
    408 
    409   Raises:
    410     Error if the file is not well-formatted.
    411   """
    412   with open(filename, 'r') as f:
    413     histograms, had_errors = ExtractHistogramsFromFile(f)
    414     if had_errors:
    415       logging.error('Error parsing %s' % filename)
    416       raise Error()
    417     return histograms
    418 
    419 
    420 def ExtractNames(histograms):
    421   return sorted(histograms.keys())
    422