1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Extract histogram names from the description XML file. 6 7 For more information on the format of the XML file, which is self-documenting, 8 see histograms.xml; however, here is a simple example to get you started. The 9 XML below will generate the following five histograms: 10 11 HistogramTime 12 HistogramEnum 13 HistogramEnum_Chrome 14 HistogramEnum_IE 15 HistogramEnum_Firefox 16 17 <histogram-configuration> 18 19 <histograms> 20 21 <histogram name="HistogramTime" units="milliseconds"> 22 <summary>A brief description.</summary> 23 <details>This is a more thorough description of this histogram.</details> 24 </histogram> 25 26 <histogram name="HistogramEnum" enum="MyEnumType"> 27 <summary>This histogram sports an enum value type.</summary> 28 </histogram> 29 30 </histograms> 31 32 <enums> 33 34 <enum name="MyEnumType"> 35 <summary>This is an example enum type, where the values mean little.</summary> 36 <int value="1" label="FIRST_VALUE">This is the first value.</int> 37 <int value="2" label="SECOND_VALUE">This is the second value.</int> 38 </enum> 39 40 </enums> 41 42 <histogram_suffixes_list> 43 44 <histogram_suffixes name="BrowserType"> 45 <suffix name="Chrome"/> 46 <suffix name="IE"/> 47 <suffix name="Firefox"/> 48 <affected-histogram name="HistogramEnum"/> 49 </histogram_suffixes> 50 51 </histogram_suffixes_list> 52 53 </histogram-configuration> 54 55 """ 56 57 import copy 58 import logging 59 import xml.dom.minidom 60 61 OWNER_FIELD_PLACEHOLDER = ( 62 'Please list the metric\'s owners. Add more owner tags as needed.') 63 64 MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5 65 66 67 class Error(Exception): 68 pass 69 70 71 def _JoinChildNodes(tag): 72 """Join child nodes into a single text. 73 74 Applicable to leafs like 'summary' and 'detail'. 75 76 Args: 77 tag: parent node 78 79 Returns: 80 a string with concatenated nodes' text representation. 81 """ 82 return ''.join(c.toxml() for c in tag.childNodes).strip() 83 84 85 def _NormalizeString(s): 86 """Replaces all whitespace sequences with a single space. 87 88 The function properly handles multi-line strings. 89 90 Args: 91 s: The string to normalize, (' \\n a b c\\n d '). 92 93 Returns: 94 The normalized string (a b c d). 95 """ 96 return ' '.join(s.split()) 97 98 99 def _NormalizeAllAttributeValues(node): 100 """Recursively normalizes all tag attribute values in the given tree. 101 102 Args: 103 node: The minidom node to be normalized. 104 105 Returns: 106 The normalized minidom node. 107 """ 108 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: 109 for a in node.attributes.keys(): 110 node.attributes[a].value = _NormalizeString(node.attributes[a].value) 111 112 for c in node.childNodes: 113 _NormalizeAllAttributeValues(c) 114 return node 115 116 117 def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name, 118 histogram_suffixes_node): 119 """Creates a new histogram name based on a histogram suffix. 120 121 Args: 122 suffix_name: The suffix string to apply to the histogram name. May be empty. 123 histogram_name: The name of the histogram. May be of the form 124 Group.BaseName or BaseName. 125 histogram_suffixes_node: The histogram_suffixes XML node. 126 127 Returns: 128 A string with the expanded histogram name. 129 130 Raises: 131 Error: if the expansion can't be done. 132 """ 133 if histogram_suffixes_node.hasAttribute('separator'): 134 separator = histogram_suffixes_node.getAttribute('separator') 135 else: 136 separator = '_' 137 138 if histogram_suffixes_node.hasAttribute('ordering'): 139 ordering = histogram_suffixes_node.getAttribute('ordering') 140 else: 141 ordering = 'suffix' 142 if ordering not in ['prefix', 'suffix']: 143 logging.error('ordering needs to be prefix or suffix, value is %s', 144 ordering) 145 raise Error() 146 147 if not suffix_name: 148 return histogram_name 149 150 if ordering == 'suffix': 151 return histogram_name + separator + suffix_name 152 153 # For prefixes, the suffix_name is inserted between the "cluster" and the 154 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist. 155 sections = histogram_name.split('.') 156 if len(sections) <= 1: 157 logging.error( 158 'Prefix Field Trial expansions require histogram names which include a ' 159 'dot separator. Histogram name is %s, and Field Trial is %s', 160 histogram_name, histogram_suffixes_node.getAttribute('name')) 161 raise Error() 162 163 cluster = sections[0] + '.' 164 remainder = '.'.join(sections[1:]) 165 return cluster + suffix_name + separator + remainder 166 167 168 def _ExtractEnumsFromXmlTree(tree): 169 """Extract all <enum> nodes in the tree into a dictionary.""" 170 171 enums = {} 172 have_errors = False 173 174 last_name = None 175 for enum in tree.getElementsByTagName('enum'): 176 if enum.getAttribute('type') != 'int': 177 logging.error('Unknown enum type %s', enum.getAttribute('type')) 178 have_errors = True 179 continue 180 181 name = enum.getAttribute('name') 182 if last_name is not None and name.lower() < last_name.lower(): 183 logging.error('Enums %s and %s are not in alphabetical order', 184 last_name, name) 185 have_errors = True 186 last_name = name 187 188 if name in enums: 189 logging.error('Duplicate enum %s', name) 190 have_errors = True 191 continue 192 193 last_int_value = None 194 enum_dict = {} 195 enum_dict['name'] = name 196 enum_dict['values'] = {} 197 198 for int_tag in enum.getElementsByTagName('int'): 199 value_dict = {} 200 int_value = int(int_tag.getAttribute('value')) 201 if last_int_value is not None and int_value < last_int_value: 202 logging.error('Enum %s int values %d and %d are not in numerical order', 203 name, last_int_value, int_value) 204 have_errors = True 205 last_int_value = int_value 206 if int_value in enum_dict['values']: 207 logging.error('Duplicate enum value %d for enum %s', int_value, name) 208 have_errors = True 209 continue 210 value_dict['label'] = int_tag.getAttribute('label') 211 value_dict['summary'] = _JoinChildNodes(int_tag) 212 enum_dict['values'][int_value] = value_dict 213 214 summary_nodes = enum.getElementsByTagName('summary') 215 if summary_nodes: 216 enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0])) 217 218 enums[name] = enum_dict 219 220 return enums, have_errors 221 222 223 def _ExtractOwners(xml_node): 224 """Extract all owners into a list from owner tag under |xml_node|.""" 225 owners = [] 226 for owner_node in xml_node.getElementsByTagName('owner'): 227 owner_entry = _NormalizeString(_JoinChildNodes(owner_node)) 228 if OWNER_FIELD_PLACEHOLDER not in owner_entry: 229 owners.append(owner_entry) 230 return owners 231 232 233 def _ExtractHistogramsFromXmlTree(tree, enums): 234 """Extract all <histogram> nodes in the tree into a dictionary.""" 235 236 # Process the histograms. The descriptions can include HTML tags. 237 histograms = {} 238 have_errors = False 239 last_name = None 240 for histogram in tree.getElementsByTagName('histogram'): 241 name = histogram.getAttribute('name') 242 if last_name is not None and name.lower() < last_name.lower(): 243 logging.error('Histograms %s and %s are not in alphabetical order', 244 last_name, name) 245 have_errors = True 246 last_name = name 247 if name in histograms: 248 logging.error('Duplicate histogram definition %s', name) 249 have_errors = True 250 continue 251 histograms[name] = histogram_entry = {} 252 253 # Find <owner> tag. 254 owners = _ExtractOwners(histogram) 255 if owners: 256 histogram_entry['owners'] = owners 257 258 # Find <summary> tag. 259 summary_nodes = histogram.getElementsByTagName('summary') 260 if summary_nodes: 261 histogram_entry['summary'] = _NormalizeString( 262 _JoinChildNodes(summary_nodes[0])) 263 else: 264 histogram_entry['summary'] = 'TBD' 265 266 # Find <obsolete> tag. 267 obsolete_nodes = histogram.getElementsByTagName('obsolete') 268 if obsolete_nodes: 269 reason = _JoinChildNodes(obsolete_nodes[0]) 270 histogram_entry['obsolete'] = reason 271 272 # Handle units. 273 if histogram.hasAttribute('units'): 274 histogram_entry['units'] = histogram.getAttribute('units') 275 276 # Find <details> tag. 277 details_nodes = histogram.getElementsByTagName('details') 278 if details_nodes: 279 histogram_entry['details'] = _NormalizeString( 280 _JoinChildNodes(details_nodes[0])) 281 282 # Handle enum types. 283 if histogram.hasAttribute('enum'): 284 enum_name = histogram.getAttribute('enum') 285 if enum_name not in enums: 286 logging.error('Unknown enum %s in histogram %s', enum_name, name) 287 have_errors = True 288 else: 289 histogram_entry['enum'] = enums[enum_name] 290 291 return histograms, have_errors 292 293 294 def _UpdateHistogramsWithSuffixes(tree, histograms): 295 """Process <histogram_suffixes> tags and combine with affected histograms. 296 297 The histograms dictionary will be updated in-place by adding new histograms 298 created by combining histograms themselves with histogram_suffixes targeting 299 these histograms. 300 301 Args: 302 tree: XML dom tree. 303 histograms: a dictionary of histograms previously extracted from the tree; 304 305 Returns: 306 True if any errors were found. 307 """ 308 have_errors = False 309 310 histogram_suffix_tag = 'histogram_suffixes' 311 suffix_tag = 'suffix' 312 with_tag = 'with-suffix' 313 314 # Verify order of histogram_suffixes fields first. 315 last_name = None 316 for histogram_suffixes in tree.getElementsByTagName(histogram_suffix_tag): 317 name = histogram_suffixes.getAttribute('name') 318 if last_name is not None and name.lower() < last_name.lower(): 319 logging.error('histogram_suffixes %s and %s are not in alphabetical ' 320 'order', last_name, name) 321 have_errors = True 322 last_name = name 323 324 # histogram_suffixes can depend on other histogram_suffixes, so we need to be 325 # careful. Make a temporary copy of the list of histogram_suffixes to use as a 326 # queue. histogram_suffixes whose dependencies have not yet been processed 327 # will get relegated to the back of the queue to be processed later. 328 reprocess_queue = [] 329 def GenerateHistogramSuffixes(): 330 for f in tree.getElementsByTagName(histogram_suffix_tag): 331 yield 0, f 332 for r, f in reprocess_queue: 333 yield r, f 334 335 for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes(): 336 # Check dependencies first 337 dependencies_valid = True 338 affected_histograms = histogram_suffixes.getElementsByTagName( 339 'affected-histogram') 340 for affected_histogram in affected_histograms: 341 histogram_name = affected_histogram.getAttribute('name') 342 if histogram_name not in histograms: 343 # Base histogram is missing 344 dependencies_valid = False 345 missing_dependency = histogram_name 346 break 347 if not dependencies_valid: 348 if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH: 349 reprocess_queue.append((reprocess_count + 1, histogram_suffixes)) 350 continue 351 else: 352 logging.error('histogram_suffixes %s is missing its dependency %s', 353 histogram_suffixes.getAttribute('name'), 354 missing_dependency) 355 have_errors = True 356 continue 357 358 name = histogram_suffixes.getAttribute('name') 359 suffix_nodes = histogram_suffixes.getElementsByTagName(suffix_tag) 360 suffix_labels = {} 361 for suffix in suffix_nodes: 362 suffix_labels[suffix.getAttribute('name')] = suffix.getAttribute('label') 363 # Find owners list under current histogram_suffixes tag. 364 owners = _ExtractOwners(histogram_suffixes) 365 366 last_histogram_name = None 367 for affected_histogram in affected_histograms: 368 histogram_name = affected_histogram.getAttribute('name') 369 if (last_histogram_name is not None 370 and histogram_name.lower() < last_histogram_name.lower()): 371 logging.error('Affected histograms %s and %s of histogram_suffixes %s ' 372 'are not in alphabetical order', 373 last_histogram_name, histogram_name, name) 374 have_errors = True 375 last_histogram_name = histogram_name 376 with_suffixes = affected_histogram.getElementsByTagName(with_tag) 377 if with_suffixes: 378 suffixes_to_add = with_suffixes 379 else: 380 suffixes_to_add = suffix_nodes 381 for suffix in suffixes_to_add: 382 suffix_name = suffix.getAttribute('name') 383 try: 384 new_histogram_name = _ExpandHistogramNameWithSuffixes( 385 suffix_name, histogram_name, histogram_suffixes) 386 if new_histogram_name != histogram_name: 387 histograms[new_histogram_name] = copy.deepcopy( 388 histograms[histogram_name]) 389 390 suffix_label = suffix_labels.get(suffix_name, '') 391 392 # TODO(yiyaoliu): Rename these to be consistent with the new naming. 393 # It is kept unchanged for now to be it's used by dashboards. 394 if 'fieldtrial_groups' not in histograms[new_histogram_name]: 395 histograms[new_histogram_name]['fieldtrial_groups'] = [] 396 histograms[new_histogram_name]['fieldtrial_groups'].append( 397 suffix_name) 398 399 if 'fieldtrial_names' not in histograms[new_histogram_name]: 400 histograms[new_histogram_name]['fieldtrial_names'] = [] 401 histograms[new_histogram_name]['fieldtrial_names'].append(name) 402 403 if 'fieldtrial_labels' not in histograms[new_histogram_name]: 404 histograms[new_histogram_name]['fieldtrial_labels'] = [] 405 histograms[new_histogram_name]['fieldtrial_labels'].append( 406 suffix_label) 407 408 # If no owners are added for this histogram-suffixes, it inherits the 409 # owners of its parents. 410 if owners: 411 histograms[new_histogram_name]['owners'] = owners 412 413 except Error: 414 have_errors = True 415 416 return have_errors 417 418 419 def ExtractHistogramsFromFile(file_handle): 420 """Compute the histogram names and descriptions from the XML representation. 421 422 Args: 423 file_handle: A file or file-like with XML content. 424 425 Returns: 426 a tuple of (histograms, status) where histograms is a dictionary mapping 427 histogram names to dictionaries containing histogram descriptions and status 428 is a boolean indicating if errros were encoutered in processing. 429 """ 430 tree = xml.dom.minidom.parse(file_handle) 431 _NormalizeAllAttributeValues(tree) 432 433 enums, enum_errors = _ExtractEnumsFromXmlTree(tree) 434 histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums) 435 update_errors = _UpdateHistogramsWithSuffixes(tree, histograms) 436 437 return histograms, enum_errors or histogram_errors or update_errors 438 439 440 def ExtractHistograms(filename): 441 """Load histogram definitions from a disk file. 442 443 Args: 444 filename: a file path to load data from. 445 446 Returns: 447 a dictionary of histogram descriptions. 448 449 Raises: 450 Error: if the file is not well-formatted. 451 """ 452 with open(filename, 'r') as f: 453 histograms, had_errors = ExtractHistogramsFromFile(f) 454 if had_errors: 455 logging.error('Error parsing %s', filename) 456 raise Error() 457 return histograms 458 459 460 def ExtractNames(histograms): 461 return sorted(histograms.keys())