1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Extract histogram names from the description XML file. 6 7 For more information on the format of the XML file, which is self-documenting, 8 see histograms.xml; however, here is a simple example to get you started. The 9 XML below will generate the following five histograms: 10 11 HistogramTime 12 HistogramEnum 13 HistogramEnum_Chrome 14 HistogramEnum_IE 15 HistogramEnum_Firefox 16 17 <histogram-configuration> 18 19 <histograms> 20 21 <histogram name="HistogramTime" units="milliseconds"> 22 <summary>A brief description.</summary> 23 <details>This is a more thorough description of this histogram.</details> 24 </histogram> 25 26 <histogram name="HistogramEnum" enum="MyEnumType"> 27 <summary>This histogram sports an enum value type.</summary> 28 </histogram> 29 30 </histograms> 31 32 <enums> 33 34 <enum name="MyEnumType"> 35 <summary>This is an example enum type, where the values mean little.</summary> 36 <int value="1" label="FIRST_VALUE">This is the first value.</int> 37 <int value="2" label="SECOND_VALUE">This is the second value.</int> 38 </enum> 39 40 </enums> 41 42 <fieldtrials> 43 44 <fieldtrial name="BrowserType"> 45 <group name="Chrome"/> 46 <group name="IE"/> 47 <group name="Firefox"/> 48 <affected-histogram name="HistogramEnum"/> 49 </fieldtrial> 50 51 </fieldtrials> 52 53 </histogram-configuration> 54 55 """ 56 57 import copy 58 import logging 59 import xml.dom.minidom 60 61 62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5 63 64 65 class Error(Exception): 66 pass 67 68 69 def JoinChildNodes(tag): 70 return ''.join([c.toxml() for c in tag.childNodes]).strip() 71 72 73 def NormalizeAttributeValue(s): 74 """Normalizes an attribute value (which might be wrapped over multiple lines) 75 by replacing each whitespace sequence with a single space. 76 77 Args: 78 s: The string to normalize, e.g. ' \n a b c\n d ' 79 80 Returns: 81 The normalized string, e.g. 'a b c d' 82 """ 83 return ' '.join(s.split()) 84 85 86 def NormalizeAllAttributeValues(node): 87 """Recursively normalizes all tag attribute values in the given tree. 88 89 Args: 90 node: The minidom node to be normalized. 91 92 Returns: 93 The normalized minidom node. 94 """ 95 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: 96 for a in node.attributes.keys(): 97 node.attributes[a].value = NormalizeAttributeValue( 98 node.attributes[a].value) 99 100 for c in node.childNodes: NormalizeAllAttributeValues(c) 101 return node 102 103 104 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial): 105 """Creates a new histogram name based on the field trial group. 106 107 Args: 108 group_name: The name of the field trial group. May be empty. 109 histogram_name: The name of the histogram. May be of the form 110 Group.BaseName or BaseName 111 field_trial: The FieldTrial XML element. 112 113 Returns: 114 A string with the expanded histogram name. 115 116 Raises: 117 Error if the expansion can't be done. 118 """ 119 if fieldtrial.hasAttribute('separator'): 120 separator = fieldtrial.getAttribute('separator') 121 else: 122 separator = '_' 123 124 if fieldtrial.hasAttribute('ordering'): 125 ordering = fieldtrial.getAttribute('ordering') 126 else: 127 ordering = 'suffix' 128 if ordering not in ['prefix', 'suffix']: 129 logging.error('ordering needs to be prefix or suffix, value is %s' % 130 ordering) 131 raise Error() 132 133 if not group_name: 134 return histogram_name 135 136 if ordering == 'suffix': 137 return histogram_name + separator + group_name 138 139 # For prefixes, the group_name is inserted between the "cluster" and the 140 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist. 141 sections = histogram_name.split('.') 142 if len(sections) <= 1: 143 logging.error( 144 'Prefix Field Trial expansions require histogram names which include a ' 145 'dot separator. Histogram name is %s, and Field Trial is %s' % 146 (histogram_name, fieldtrial.getAttribute('name'))) 147 raise Error() 148 149 cluster = sections[0] + '.' 150 remainder = '.'.join(sections[1:]) 151 return cluster + group_name + separator + remainder 152 153 154 def ExtractHistograms(filename): 155 """Compute the histogram names and descriptions from the XML representation. 156 157 Args: 158 filename: The path to the histograms XML file. 159 160 Returns: 161 { 'histogram_name': 'histogram_description', ... } 162 163 Raises: 164 Error if the file is not well-formatted. 165 """ 166 # Slurp in histograms.xml 167 raw_xml = '' 168 with open(filename, 'r') as f: 169 raw_xml = f.read() 170 171 # Parse the XML into a tree 172 tree = xml.dom.minidom.parseString(raw_xml) 173 NormalizeAllAttributeValues(tree) 174 175 histograms = {} 176 have_errors = False 177 178 # Load the enums. 179 enums = {} 180 last_name = None 181 for enum in tree.getElementsByTagName("enum"): 182 if enum.getAttribute('type') != 'int': 183 logging.error('Unknown enum type %s' % enum.getAttribute('type')) 184 have_errors = True 185 continue 186 187 name = enum.getAttribute('name') 188 if last_name is not None and name.lower() < last_name.lower(): 189 logging.error('Enums %s and %s are not in alphabetical order' 190 % (last_name, name)) 191 have_errors = True 192 last_name = name 193 194 if name in enums: 195 logging.error('Duplicate enum %s' % name) 196 have_errors = True 197 continue 198 199 last_int_value = None 200 enum_dict = {} 201 enum_dict['name'] = name 202 enum_dict['values'] = {} 203 204 for int_tag in enum.getElementsByTagName("int"): 205 value_dict = {} 206 int_value = int(int_tag.getAttribute('value')) 207 if last_int_value is not None and int_value < last_int_value: 208 logging.error('Enum %s int values %d and %d are not in numerical order' 209 % (name, last_int_value, int_value)) 210 have_errors = True 211 last_int_value = int_value 212 if int_value in enum_dict['values']: 213 logging.error('Duplicate enum value %d for enum %s' % (int_value, name)) 214 have_errors = True 215 continue 216 value_dict['label'] = int_tag.getAttribute('label') 217 value_dict['summary'] = JoinChildNodes(int_tag) 218 enum_dict['values'][int_value] = value_dict 219 220 summary_nodes = enum.getElementsByTagName("summary") 221 if len(summary_nodes) > 0: 222 enum_dict['summary'] = JoinChildNodes(summary_nodes[0]) 223 224 enums[name] = enum_dict 225 226 # Process the histograms. The descriptions can include HTML tags. 227 last_name = None 228 for histogram in tree.getElementsByTagName("histogram"): 229 name = histogram.getAttribute('name') 230 if last_name is not None and name.lower() < last_name.lower(): 231 logging.error('Histograms %s and %s are not in alphabetical order' 232 % (last_name, name)) 233 have_errors = True 234 last_name = name 235 if name in histograms: 236 logging.error('Duplicate histogram definition %s' % name) 237 have_errors = True 238 continue 239 histograms[name] = {} 240 241 # Find <summary> tag. 242 summary_nodes = histogram.getElementsByTagName("summary") 243 if len(summary_nodes) > 0: 244 histograms[name]['summary'] = JoinChildNodes(summary_nodes[0]) 245 else: 246 histograms[name]['summary'] = 'TBD' 247 248 # Find <obsolete> tag. 249 obsolete_nodes = histogram.getElementsByTagName("obsolete") 250 if len(obsolete_nodes) > 0: 251 reason = JoinChildNodes(obsolete_nodes[0]) 252 histograms[name]['obsolete'] = reason 253 254 # Handle units. 255 if histogram.hasAttribute('units'): 256 histograms[name]['units'] = histogram.getAttribute('units') 257 258 # Find <details> tag. 259 details_nodes = histogram.getElementsByTagName("details") 260 if len(details_nodes) > 0: 261 histograms[name]['details'] = JoinChildNodes(details_nodes[0]) 262 263 # Handle enum types. 264 if histogram.hasAttribute('enum'): 265 enum_name = histogram.getAttribute('enum') 266 if not enum_name in enums: 267 logging.error('Unknown enum %s in histogram %s' % (enum_name, name)) 268 have_errors = True 269 else: 270 histograms[name]['enum'] = enums[enum_name] 271 272 # Process the field trials and compute the combinations with their affected 273 # histograms. 274 last_name = None 275 for fieldtrial in tree.getElementsByTagName("fieldtrial"): 276 name = fieldtrial.getAttribute('name') 277 if last_name is not None and name.lower() < last_name.lower(): 278 logging.error('Field trials %s and %s are not in alphabetical order' 279 % (last_name, name)) 280 have_errors = True 281 last_name = name 282 # Field trials can depend on other field trials, so we need to be careful. 283 # Make a temporary copy of the list of field trials to use as a queue. 284 # Field trials whose dependencies have not yet been processed will get 285 # relegated to the back of the queue to be processed later. 286 reprocess_queue = [] 287 def GenerateFieldTrials(): 288 for f in tree.getElementsByTagName("fieldtrial"): yield 0, f 289 for r, f in reprocess_queue: yield r, f 290 for reprocess_count, fieldtrial in GenerateFieldTrials(): 291 # Check dependencies first 292 dependencies_valid = True 293 affected_histograms = fieldtrial.getElementsByTagName('affected-histogram') 294 for affected_histogram in affected_histograms: 295 histogram_name = affected_histogram.getAttribute('name') 296 if not histogram_name in histograms: 297 # Base histogram is missing 298 dependencies_valid = False 299 missing_dependency = histogram_name 300 break 301 if not dependencies_valid: 302 if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH: 303 reprocess_queue.append( (reprocess_count + 1, fieldtrial) ) 304 continue 305 else: 306 logging.error('Field trial %s is missing its dependency %s' 307 % (fieldtrial.getAttribute('name'), 308 missing_dependency)) 309 have_errors = True 310 continue 311 312 name = fieldtrial.getAttribute('name') 313 groups = fieldtrial.getElementsByTagName('group') 314 group_labels = {} 315 for group in groups: 316 group_labels[group.getAttribute('name')] = group.getAttribute('label') 317 last_histogram_name = None 318 for affected_histogram in affected_histograms: 319 histogram_name = affected_histogram.getAttribute('name') 320 if (last_histogram_name is not None 321 and histogram_name.lower() < last_histogram_name.lower()): 322 logging.error('Affected histograms %s and %s of field trial %s are not ' 323 'in alphabetical order' 324 % (last_histogram_name, histogram_name, name)) 325 have_errors = True 326 last_histogram_name = histogram_name 327 base_description = histograms[histogram_name] 328 with_groups = affected_histogram.getElementsByTagName('with-group') 329 if len(with_groups) > 0: 330 histogram_groups = with_groups 331 else: 332 histogram_groups = groups 333 for group in histogram_groups: 334 group_name = group.getAttribute('name') 335 try: 336 new_histogram_name = _ExpandHistogramNameWithFieldTrial( 337 group_name, histogram_name, fieldtrial) 338 if new_histogram_name != histogram_name: 339 histograms[new_histogram_name] = copy.deepcopy( 340 histograms[histogram_name]) 341 342 group_label = group_labels.get(group_name, '') 343 344 if not 'fieldtrial_groups' in histograms[new_histogram_name]: 345 histograms[new_histogram_name]['fieldtrial_groups'] = [] 346 histograms[new_histogram_name]['fieldtrial_groups'].append(group_name) 347 348 if not 'fieldtrial_names' in histograms[new_histogram_name]: 349 histograms[new_histogram_name]['fieldtrial_names'] = [] 350 histograms[new_histogram_name]['fieldtrial_names'].append(name) 351 352 if not 'fieldtrial_labels' in histograms[new_histogram_name]: 353 histograms[new_histogram_name]['fieldtrial_labels'] = [] 354 histograms[new_histogram_name]['fieldtrial_labels'].append( 355 group_label) 356 357 except Error: 358 have_errors = True 359 360 if have_errors: 361 logging.error('Error parsing %s' % filename) 362 raise Error() 363 364 return histograms 365 366 367 def ExtractNames(histograms): 368 return sorted(histograms.keys()) 369