1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium OS Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Report whether DUTs are working or broken. 7 8 usage: dut_status [ <options> ] [hostname ...] 9 10 Reports on the history and status of selected DUT hosts, to 11 determine whether they're "working" or "broken". For purposes of 12 the script, "broken" means "the DUT requires manual intervention 13 before it can be used for further testing", and "working" means "not 14 broken". The status determination is based on the history of 15 completed jobs for the DUT in a given time interval; still-running 16 jobs are not considered. 17 18 Time Interval Selection 19 ~~~~~~~~~~~~~~~~~~~~~~~ 20 A DUT's reported status is based on the DUT's job history in a time 21 interval determined by command line options. The interval is 22 specified with up to two of three options: 23 --until/-u DATE/TIME - Specifies an end time for the search 24 range. (default: now) 25 --since/-s DATE/TIME - Specifies a start time for the search 26 range. (no default) 27 --duration/-d HOURS - Specifies the length of the search interval 28 in hours. (default: 24 hours) 29 30 Any two time options completely specify the time interval. If only 31 one option is provided, these defaults are used: 32 --until - Use the given end time with the default duration. 33 --since - Use the given start time with the default end time. 34 --duration - Use the given duration with the default end time. 35 36 If no time options are given, use the default end time and duration. 37 38 DATE/TIME values are of the form '2014-11-06 17:21:34'. 39 40 DUT Selection 41 ~~~~~~~~~~~~~ 42 By default, information is reported for DUTs named as command-line 43 arguments. Options are also available for selecting groups of 44 hosts: 45 --board/-b BOARD - Only include hosts with the given board. 46 --pool/-p POOL - Only include hosts in the given pool. The user 47 might be interested in the following pools: bvt, cq, 48 continuous, cts, or suites. 49 50 51 The selected hosts may also be filtered based on status: 52 -w/--working - Only include hosts in a working state. 53 -n/--broken - Only include hosts in a non-working state. Hosts 54 with no job history are considered non-working. 55 56 Output Formats 57 ~~~~~~~~~~~~~~ 58 There are four available output formats: 59 * A simple list of host names. 60 * A status summary showing one line per host. 61 * A detailed job history for all selected DUTs, sorted by 62 time of execution. 63 * A job history for all selected DUTs showing only the history 64 surrounding the DUT's last change from working to broken, 65 or vice versa. 66 67 The default format depends on whether hosts are filtered by 68 status: 69 * With the --working or --broken options, the list of host names 70 is the default format. 71 * Without those options, the default format is the one-line status 72 summary. 73 74 These options override the default formats: 75 -o/--oneline - Use the one-line summary with the --working or 76 --broken options. 77 -f/--full_history - Print detailed per-host job history. 78 -g/--diagnosis - Print the job history surrounding a status 79 change. 80 81 Examples 82 ~~~~~~~~ 83 $ dut_status chromeos2-row4-rack2-host12 84 hostname S last checked URL 85 chromeos2-row4-rack2-host12 NO 2014-11-06 15:25:29 http://... 86 87 'NO' means the DUT is broken. That diagnosis is based on a job that 88 failed: 'last checked' is the time of the failed job, and the URL 89 points to the job's logs. 90 91 $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12 92 chromeos2-row4-rack2-host12 93 2014-11-06 15:25:29 NO http://... 94 2014-11-06 14:44:07 -- http://... 95 2014-11-06 14:42:56 OK http://... 96 97 The times are the start times of the jobs; the URL points to the 98 job's logs. The status indicates the working or broken status after 99 the job: 100 'NO' Indicates that the DUT was believed broken after the job. 101 'OK' Indicates that the DUT was believed working after the job. 102 '--' Indicates that the job probably didn't change the DUT's 103 status. 104 Typically, logs of the actual failure will be found at the last job 105 to report 'OK', or the first job to report '--'. 106 107 """ 108 109 import argparse 110 import sys 111 import time 112 113 import common 114 from autotest_lib.client.common_lib import time_utils 115 from autotest_lib.server import frontend 116 from autotest_lib.server.lib import status_history 117 from autotest_lib.site_utils import lab_inventory 118 119 # The fully qualified name makes for lines that are too long, so 120 # shorten it locally. 121 HostJobHistory = status_history.HostJobHistory 122 123 # _DIAGNOSIS_IDS - 124 # Dictionary to map the known diagnosis codes to string values. 125 126 _DIAGNOSIS_IDS = { 127 status_history.UNUSED: '??', 128 status_history.UNKNOWN: '--', 129 status_history.WORKING: 'OK', 130 status_history.BROKEN: 'NO' 131 } 132 133 134 # Default time interval for the --duration option when a value isn't 135 # specified on the command line. 136 _DEFAULT_DURATION = 24 137 138 139 def _include_status(status, arguments): 140 """Determine whether the given status should be filtered. 141 142 Checks the given `status` against the command line options in 143 `arguments`. Return whether a host with that status should be 144 printed based on the options. 145 146 @param status Status of a host to be printed or skipped. 147 @param arguments Parsed arguments object as returned by 148 ArgumentParser.parse_args(). 149 150 @return Returns `True` if the command-line options call for 151 printing hosts with the status, or `False` otherwise. 152 153 """ 154 if status == status_history.WORKING: 155 return arguments.working 156 else: 157 return arguments.broken 158 159 160 def _print_host_summaries(history_list, arguments): 161 """Print one-line summaries of host history. 162 163 This function handles the output format of the --oneline option. 164 165 @param history_list A list of HostHistory objects to be printed. 166 @param arguments Parsed arguments object as returned by 167 ArgumentParser.parse_args(). 168 169 """ 170 fmt = '%-30s %-2s %-19s %s' 171 print fmt % ('hostname', 'S', 'last checked', 'URL') 172 for history in history_list: 173 status, event = history.last_diagnosis() 174 if not _include_status(status, arguments): 175 continue 176 datestr = '---' 177 url = '---' 178 if event is not None: 179 datestr = time_utils.epoch_time_to_date_string( 180 event.start_time) 181 url = event.job_url 182 183 print fmt % (history.hostname, 184 _DIAGNOSIS_IDS[status], 185 datestr, 186 url) 187 188 189 def _print_event_summary(event): 190 """Print a one-line summary of a job or special task.""" 191 start_time = time_utils.epoch_time_to_date_string( 192 event.start_time) 193 print ' %s %s %s' % ( 194 start_time, 195 _DIAGNOSIS_IDS[event.diagnosis], 196 event.job_url) 197 198 199 def _print_hosts(history_list, arguments): 200 """Print hosts, optionally with a job history. 201 202 This function handles both the default format for --working 203 and --broken options, as well as the output for the 204 --full_history and --diagnosis options. The `arguments` 205 parameter determines the format to use. 206 207 @param history_list A list of HostHistory objects to be printed. 208 @param arguments Parsed arguments object as returned by 209 ArgumentParser.parse_args(). 210 211 """ 212 for history in history_list: 213 status, _ = history.last_diagnosis() 214 if not _include_status(status, arguments): 215 continue 216 print history.hostname 217 if arguments.full_history: 218 for event in history: 219 _print_event_summary(event) 220 elif arguments.diagnosis: 221 for event in history.diagnosis_interval(): 222 _print_event_summary(event) 223 224 225 def _validate_time_range(arguments): 226 """Validate the time range requested on the command line. 227 228 Enforces the rules for the --until, --since, and --duration 229 options are followed, and calculates defaults: 230 * It isn't allowed to supply all three options. 231 * If only two options are supplied, they completely determine 232 the time interval. 233 * If only one option is supplied, or no options, then apply 234 specified defaults to the arguments object. 235 236 @param arguments Parsed arguments object as returned by 237 ArgumentParser.parse_args(). 238 239 """ 240 if (arguments.duration is not None and 241 arguments.since is not None and arguments.until is not None): 242 print >>sys.stderr, ('FATAL: Can specify at most two of ' 243 '--since, --until, and --duration') 244 sys.exit(1) 245 if (arguments.until is None and (arguments.since is None or 246 arguments.duration is None)): 247 arguments.until = int(time.time()) 248 if arguments.since is None: 249 if arguments.duration is None: 250 arguments.duration = _DEFAULT_DURATION 251 arguments.since = (arguments.until - 252 arguments.duration * 60 * 60) 253 elif arguments.until is None: 254 arguments.until = (arguments.since + 255 arguments.duration * 60 * 60) 256 257 258 def _get_host_histories(afe, arguments): 259 """Return HostJobHistory objects for the requested hosts. 260 261 Checks that individual hosts specified on the command line are 262 valid. Invalid hosts generate a warning message, and are 263 omitted from futher processing. 264 265 The return value is a list of HostJobHistory objects for the 266 valid requested hostnames, using the time range supplied on the 267 command line. 268 269 @param afe Autotest frontend 270 @param arguments Parsed arguments object as returned by 271 ArgumentParser.parse_args(). 272 @return List of HostJobHistory objects for the hosts requested 273 on the command line. 274 275 """ 276 histories = [] 277 saw_error = False 278 for hostname in arguments.hostnames: 279 try: 280 h = HostJobHistory.get_host_history( 281 afe, hostname, arguments.since, arguments.until) 282 histories.append(h) 283 except: 284 print >>sys.stderr, ('WARNING: Ignoring unknown host %s' % 285 hostname) 286 saw_error = True 287 if saw_error: 288 # Create separation from the output that follows 289 print >>sys.stderr 290 return histories 291 292 293 def _validate_host_list(afe, arguments): 294 """Validate the user-specified list of hosts. 295 296 Hosts may be specified implicitly with --board or --pool, or 297 explictly as command line arguments. This enforces these 298 rules: 299 * If --board or --pool, or both are specified, individual 300 hosts may not be specified. 301 * However specified, there must be at least one host. 302 303 The return value is a list of HostJobHistory objects for the 304 requested hosts, using the time range supplied on the command 305 line. 306 307 @param afe Autotest frontend 308 @param arguments Parsed arguments object as returned by 309 ArgumentParser.parse_args(). 310 @return List of HostJobHistory objects for the hosts requested 311 on the command line. 312 313 """ 314 if arguments.board or arguments.pool: 315 if arguments.hostnames: 316 print >>sys.stderr, ('FATAL: Hostname arguments provided ' 317 'with --board or --pool') 318 sys.exit(1) 319 histories = HostJobHistory.get_multiple_histories( 320 afe, arguments.since, arguments.until, 321 board=arguments.board, pool=arguments.pool) 322 else: 323 histories = _get_host_histories(afe, arguments) 324 if not histories: 325 print >>sys.stderr, 'FATAL: no valid hosts found' 326 sys.exit(1) 327 return histories 328 329 330 def _validate_format_options(arguments): 331 """Check the options for what output format to use. 332 333 Enforce these rules: 334 * If neither --broken nor --working was used, then --oneline 335 becomes the selected format. 336 * If neither --broken nor --working was used, included both 337 working and broken DUTs. 338 339 @param arguments Parsed arguments object as returned by 340 ArgumentParser.parse_args(). 341 342 """ 343 if (not arguments.oneline and not arguments.diagnosis and 344 not arguments.full_history): 345 arguments.oneline = (not arguments.working and 346 not arguments.broken) 347 if not arguments.working and not arguments.broken: 348 arguments.working = True 349 arguments.broken = True 350 351 352 def _validate_command(afe, arguments): 353 """Check that the command's arguments are valid. 354 355 This performs command line checking to enforce command line 356 rules that ArgumentParser can't handle. Additionally, this 357 handles calculation of default arguments/options when a simple 358 constant default won't do. 359 360 Areas checked: 361 * Check that a valid time range was provided, supplying 362 defaults as necessary. 363 * Identify invalid host names. 364 365 @param afe Autotest frontend 366 @param arguments Parsed arguments object as returned by 367 ArgumentParser.parse_args(). 368 @return List of HostJobHistory objects for the hosts requested 369 on the command line. 370 371 """ 372 _validate_time_range(arguments) 373 _validate_format_options(arguments) 374 return _validate_host_list(afe, arguments) 375 376 377 def _parse_command(argv): 378 """Parse the command line arguments. 379 380 Create an argument parser for this command's syntax, parse the 381 command line, and return the result of the ArgumentParser 382 parse_args() method. 383 384 @param argv Standard command line argument vector; argv[0] is 385 assumed to be the command name. 386 @return Result returned by ArgumentParser.parse_args(). 387 388 """ 389 parser = argparse.ArgumentParser( 390 prog=argv[0], 391 description='Report DUT status and execution history', 392 epilog='You can specify one or two of --since, --until, ' 393 'and --duration, but not all three.\n' 394 'The date/time format is "YYYY-MM-DD HH:MM:SS".') 395 parser.add_argument('-s', '--since', type=status_history.parse_time, 396 metavar='DATE/TIME', 397 help='starting time for history display') 398 parser.add_argument('-u', '--until', type=status_history.parse_time, 399 metavar='DATE/TIME', 400 help='ending time for history display' 401 ' (default: now)') 402 parser.add_argument('-d', '--duration', type=int, 403 metavar='HOURS', 404 help='number of hours of history to display' 405 ' (default: %d)' % _DEFAULT_DURATION) 406 407 format_group = parser.add_mutually_exclusive_group() 408 format_group.add_argument('-f', '--full_history', action='store_true', 409 help='Display host history from most ' 410 'to least recent for each DUT') 411 format_group.add_argument('-g', '--diagnosis', action='store_true', 412 help='Display host history for the ' 413 'most recent DUT status change') 414 format_group.add_argument('-o', '--oneline', action='store_true', 415 help='Display host status summary') 416 417 parser.add_argument('-w', '--working', action='store_true', 418 help='List working devices by name only') 419 parser.add_argument('-n', '--broken', action='store_true', 420 help='List non-working devices by name only') 421 422 parser.add_argument('-b', '--board', 423 help='Display history for all DUTs ' 424 'of the given board') 425 parser.add_argument('-p', '--pool', 426 help='Display history for all DUTs ' 427 'in the given pool. You might ' 428 'be interested in the following pools: ' 429 + ', '.join(lab_inventory.MANAGED_POOLS[:-1]) 430 +', or '+ lab_inventory.MANAGED_POOLS[-1] +'.') 431 parser.add_argument('hostnames', 432 nargs='*', 433 help='host names of DUTs to report on') 434 parser.add_argument('--web', 435 help='Master autotest frontend hostname. If no value ' 436 'is given, the one in global config will be used.', 437 default=None) 438 arguments = parser.parse_args(argv[1:]) 439 return arguments 440 441 442 def main(argv): 443 """Standard main() for command line processing. 444 445 @param argv Command line arguments (normally sys.argv). 446 447 """ 448 arguments = _parse_command(argv) 449 afe = frontend.AFE(server=arguments.web) 450 history_list = _validate_command(afe, arguments) 451 if arguments.oneline: 452 _print_host_summaries(history_list, arguments) 453 else: 454 _print_hosts(history_list, arguments) 455 456 457 if __name__ == '__main__': 458 main(sys.argv) 459