1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Extract histogram names from the description XML file. 6 7 For more information on the format of the XML file, which is self-documenting, 8 see histograms.xml; however, here is a simple example to get you started. The 9 XML below will generate the following five histograms: 10 11 HistogramTime 12 HistogramEnum 13 HistogramEnum_Chrome 14 HistogramEnum_IE 15 HistogramEnum_Firefox 16 17 <histogram-configuration> 18 19 <histograms> 20 21 <histogram name="HistogramTime" units="milliseconds"> 22 <summary>A brief description.</summary> 23 <details>This is a more thorough description of this histogram.</details> 24 </histogram> 25 26 <histogram name="HistogramEnum" enum="MyEnumType"> 27 <summary>This histogram sports an enum value type.</summary> 28 </histogram> 29 30 </histograms> 31 32 <enums> 33 34 <enum name="MyEnumType"> 35 <summary>This is an example enum type, where the values mean little.</summary> 36 <int value="1" label="FIRST_VALUE">This is the first value.</int> 37 <int value="2" label="SECOND_VALUE">This is the second value.</int> 38 </enum> 39 40 </enums> 41 42 <fieldtrials> 43 44 <fieldtrial name="BrowserType"> 45 <group name="Chrome"/> 46 <group name="IE"/> 47 <group name="Firefox"/> 48 <affected-histogram name="HistogramEnum"/> 49 </fieldtrial> 50 51 </fieldtrials> 52 53 </histogram-configuration> 54 55 """ 56 57 import copy 58 import logging 59 import xml.dom.minidom 60 61 62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5 63 64 65 class Error(Exception): 66 pass 67 68 69 def _JoinChildNodes(tag): 70 """Join child nodes into a single text. 71 72 Applicable to leafs like 'summary' and 'detail'. 73 74 Args: 75 tag: parent node 76 77 Returns: 78 a string with concatenated nodes' text representation. 79 """ 80 return ''.join(c.toxml() for c in tag.childNodes).strip() 81 82 83 def _NormalizeString(s): 84 """Normalizes a string (possibly of multiple lines) by replacing each 85 whitespace sequence with a single space. 86 87 Args: 88 s: The string to normalize, e.g. ' \n a b c\n d ' 89 90 Returns: 91 The normalized string, e.g. 'a b c d' 92 """ 93 return ' '.join(s.split()) 94 95 96 def _NormalizeAllAttributeValues(node): 97 """Recursively normalizes all tag attribute values in the given tree. 98 99 Args: 100 node: The minidom node to be normalized. 101 102 Returns: 103 The normalized minidom node. 104 """ 105 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: 106 for a in node.attributes.keys(): 107 node.attributes[a].value = _NormalizeString(node.attributes[a].value) 108 109 for c in node.childNodes: _NormalizeAllAttributeValues(c) 110 return node 111 112 113 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial): 114 """Creates a new histogram name based on the field trial group. 115 116 Args: 117 group_name: The name of the field trial group. May be empty. 118 histogram_name: The name of the histogram. May be of the form 119 Group.BaseName or BaseName 120 field_trial: The FieldTrial XML element. 121 122 Returns: 123 A string with the expanded histogram name. 124 125 Raises: 126 Error if the expansion can't be done. 127 """ 128 if fieldtrial.hasAttribute('separator'): 129 separator = fieldtrial.getAttribute('separator') 130 else: 131 separator = '_' 132 133 if fieldtrial.hasAttribute('ordering'): 134 ordering = fieldtrial.getAttribute('ordering') 135 else: 136 ordering = 'suffix' 137 if ordering not in ['prefix', 'suffix']: 138 logging.error('ordering needs to be prefix or suffix, value is %s' % 139 ordering) 140 raise Error() 141 142 if not group_name: 143 return histogram_name 144 145 if ordering == 'suffix': 146 return histogram_name + separator + group_name 147 148 # For prefixes, the group_name is inserted between the "cluster" and the 149 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist. 150 sections = histogram_name.split('.') 151 if len(sections) <= 1: 152 logging.error( 153 'Prefix Field Trial expansions require histogram names which include a ' 154 'dot separator. Histogram name is %s, and Field Trial is %s' % 155 (histogram_name, fieldtrial.getAttribute('name'))) 156 raise Error() 157 158 cluster = sections[0] + '.' 159 remainder = '.'.join(sections[1:]) 160 return cluster + group_name + separator + remainder 161 162 163 def _ExtractEnumsFromXmlTree(tree): 164 """Extract all <enum> nodes in the tree into a dictionary.""" 165 166 enums = {} 167 have_errors = False 168 169 last_name = None 170 for enum in tree.getElementsByTagName("enum"): 171 if enum.getAttribute('type') != 'int': 172 logging.error('Unknown enum type %s' % enum.getAttribute('type')) 173 have_errors = True 174 continue 175 176 name = enum.getAttribute('name') 177 if last_name is not None and name.lower() < last_name.lower(): 178 logging.error('Enums %s and %s are not in alphabetical order' 179 % (last_name, name)) 180 have_errors = True 181 last_name = name 182 183 if name in enums: 184 logging.error('Duplicate enum %s' % name) 185 have_errors = True 186 continue 187 188 last_int_value = None 189 enum_dict = {} 190 enum_dict['name'] = name 191 enum_dict['values'] = {} 192 193 for int_tag in enum.getElementsByTagName("int"): 194 value_dict = {} 195 int_value = int(int_tag.getAttribute('value')) 196 if last_int_value is not None and int_value < last_int_value: 197 logging.error('Enum %s int values %d and %d are not in numerical order' 198 % (name, last_int_value, int_value)) 199 have_errors = True 200 last_int_value = int_value 201 if int_value in enum_dict['values']: 202 logging.error('Duplicate enum value %d for enum %s' % (int_value, name)) 203 have_errors = True 204 continue 205 value_dict['label'] = int_tag.getAttribute('label') 206 value_dict['summary'] = _JoinChildNodes(int_tag) 207 enum_dict['values'][int_value] = value_dict 208 209 summary_nodes = enum.getElementsByTagName("summary") 210 if len(summary_nodes) > 0: 211 enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0])) 212 213 enums[name] = enum_dict 214 215 return enums, have_errors 216 217 218 def _ExtractHistogramsFromXmlTree(tree, enums): 219 """Extract all <histogram> nodes in the tree into a dictionary.""" 220 221 # Process the histograms. The descriptions can include HTML tags. 222 histograms = {} 223 have_errors = False 224 last_name = None 225 for histogram in tree.getElementsByTagName("histogram"): 226 name = histogram.getAttribute('name') 227 if last_name is not None and name.lower() < last_name.lower(): 228 logging.error('Histograms %s and %s are not in alphabetical order' 229 % (last_name, name)) 230 have_errors = True 231 last_name = name 232 if name in histograms: 233 logging.error('Duplicate histogram definition %s' % name) 234 have_errors = True 235 continue 236 histograms[name] = histogram_entry = {} 237 238 # Find <summary> tag. 239 summary_nodes = histogram.getElementsByTagName("summary") 240 if len(summary_nodes) > 0: 241 histogram_entry['summary'] = _NormalizeString( 242 _JoinChildNodes(summary_nodes[0])) 243 else: 244 histogram_entry['summary'] = 'TBD' 245 246 # Find <obsolete> tag. 247 obsolete_nodes = histogram.getElementsByTagName("obsolete") 248 if len(obsolete_nodes) > 0: 249 reason = _JoinChildNodes(obsolete_nodes[0]) 250 histogram_entry['obsolete'] = reason 251 252 # Handle units. 253 if histogram.hasAttribute('units'): 254 histogram_entry['units'] = histogram.getAttribute('units') 255 256 # Find <details> tag. 257 details_nodes = histogram.getElementsByTagName("details") 258 if len(details_nodes) > 0: 259 histogram_entry['details'] = _NormalizeString( 260 _JoinChildNodes(details_nodes[0])) 261 262 # Handle enum types. 263 if histogram.hasAttribute('enum'): 264 enum_name = histogram.getAttribute('enum') 265 if not enum_name in enums: 266 logging.error('Unknown enum %s in histogram %s' % (enum_name, name)) 267 have_errors = True 268 else: 269 histogram_entry['enum'] = enums[enum_name] 270 271 return histograms, have_errors 272 273 274 def _UpdateHistogramsWithFieldTrialInformation(tree, histograms): 275 """Process field trials' tags and combine with affected histograms. 276 277 The histograms dictionary will be updated in-place by adding new histograms 278 created by combining histograms themselves with field trials targetting these 279 histograms. 280 281 Args: 282 tree: XML dom tree. 283 histograms: a dictinary of histograms previously extracted from the tree; 284 285 Returns: 286 True if any errors were found. 287 """ 288 have_errors = False 289 290 # Verify order of fieldtrial fields first. 291 last_name = None 292 for fieldtrial in tree.getElementsByTagName("fieldtrial"): 293 name = fieldtrial.getAttribute('name') 294 if last_name is not None and name.lower() < last_name.lower(): 295 logging.error('Field trials %s and %s are not in alphabetical order' 296 % (last_name, name)) 297 have_errors = True 298 last_name = name 299 300 # Field trials can depend on other field trials, so we need to be careful. 301 # Make a temporary copy of the list of field trials to use as a queue. 302 # Field trials whose dependencies have not yet been processed will get 303 # relegated to the back of the queue to be processed later. 304 reprocess_queue = [] 305 def GenerateFieldTrials(): 306 for f in tree.getElementsByTagName("fieldtrial"): yield 0, f 307 for r, f in reprocess_queue: yield r, f 308 309 for reprocess_count, fieldtrial in GenerateFieldTrials(): 310 # Check dependencies first 311 dependencies_valid = True 312 affected_histograms = fieldtrial.getElementsByTagName('affected-histogram') 313 for affected_histogram in affected_histograms: 314 histogram_name = affected_histogram.getAttribute('name') 315 if not histogram_name in histograms: 316 # Base histogram is missing 317 dependencies_valid = False 318 missing_dependency = histogram_name 319 break 320 if not dependencies_valid: 321 if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH: 322 reprocess_queue.append( (reprocess_count + 1, fieldtrial) ) 323 continue 324 else: 325 logging.error('Field trial %s is missing its dependency %s' 326 % (fieldtrial.getAttribute('name'), 327 missing_dependency)) 328 have_errors = True 329 continue 330 331 name = fieldtrial.getAttribute('name') 332 groups = fieldtrial.getElementsByTagName('group') 333 group_labels = {} 334 for group in groups: 335 group_labels[group.getAttribute('name')] = group.getAttribute('label') 336 337 last_histogram_name = None 338 for affected_histogram in affected_histograms: 339 histogram_name = affected_histogram.getAttribute('name') 340 if (last_histogram_name is not None 341 and histogram_name.lower() < last_histogram_name.lower()): 342 logging.error('Affected histograms %s and %s of field trial %s are not ' 343 'in alphabetical order' 344 % (last_histogram_name, histogram_name, name)) 345 have_errors = True 346 last_histogram_name = histogram_name 347 base_description = histograms[histogram_name] 348 with_groups = affected_histogram.getElementsByTagName('with-group') 349 if len(with_groups) > 0: 350 histogram_groups = with_groups 351 else: 352 histogram_groups = groups 353 for group in histogram_groups: 354 group_name = group.getAttribute('name') 355 try: 356 new_histogram_name = _ExpandHistogramNameWithFieldTrial( 357 group_name, histogram_name, fieldtrial) 358 if new_histogram_name != histogram_name: 359 histograms[new_histogram_name] = copy.deepcopy( 360 histograms[histogram_name]) 361 362 group_label = group_labels.get(group_name, '') 363 364 if not 'fieldtrial_groups' in histograms[new_histogram_name]: 365 histograms[new_histogram_name]['fieldtrial_groups'] = [] 366 histograms[new_histogram_name]['fieldtrial_groups'].append(group_name) 367 368 if not 'fieldtrial_names' in histograms[new_histogram_name]: 369 histograms[new_histogram_name]['fieldtrial_names'] = [] 370 histograms[new_histogram_name]['fieldtrial_names'].append(name) 371 372 if not 'fieldtrial_labels' in histograms[new_histogram_name]: 373 histograms[new_histogram_name]['fieldtrial_labels'] = [] 374 histograms[new_histogram_name]['fieldtrial_labels'].append( 375 group_label) 376 377 except Error: 378 have_errors = True 379 380 return have_errors 381 382 383 def ExtractHistogramsFromFile(file_handle): 384 """Compute the histogram names and descriptions from the XML representation. 385 386 Args: 387 file_handle: A file or file-like with XML content. 388 389 Returns: 390 a tuple of (histograms, status) where histograms is a dictionary mapping 391 histogram names to dictionaries containing histogram descriptions and status 392 is a boolean indicating if errros were encoutered in processing. 393 """ 394 tree = xml.dom.minidom.parse(file_handle) 395 _NormalizeAllAttributeValues(tree) 396 397 enums, enum_errors = _ExtractEnumsFromXmlTree(tree) 398 histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums) 399 update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms) 400 401 return histograms, enum_errors or histogram_errors or update_errors 402 403 404 def ExtractHistograms(filename): 405 """Load histogram definitions from a disk file. 406 Args: 407 filename: a file path to load data from. 408 409 Raises: 410 Error if the file is not well-formatted. 411 """ 412 with open(filename, 'r') as f: 413 histograms, had_errors = ExtractHistogramsFromFile(f) 414 if had_errors: 415 logging.error('Error parsing %s' % filename) 416 raise Error() 417 return histograms 418 419 420 def ExtractNames(histograms): 421 return sorted(histograms.keys()) 422