Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Report whether DUTs are working or broken.
      7 
      8 usage: dut_status [ <options> ] [hostname ...]
      9 
     10 Reports on the history and status of selected DUT hosts, to
     11 determine whether they're "working" or "broken".  For purposes of
     12 the script, "broken" means "the DUT requires manual intervention
     13 before it can be used for further testing", and "working" means "not
     14 broken".  The status determination is based on the history of
     15 completed jobs for the DUT in a given time interval; still-running
     16 jobs are not considered.
     17 
     18 Time Interval Selection
     19 ~~~~~~~~~~~~~~~~~~~~~~~
     20 A DUT's reported status is based on the DUT's job history in a time
     21 interval determined by command line options.  The interval is
     22 specified with up to two of three options:
     23   --until/-u DATE/TIME - Specifies an end time for the search
     24       range.  (default: now)
     25   --since/-s DATE/TIME - Specifies a start time for the search
     26       range. (no default)
     27   --duration/-d HOURS - Specifies the length of the search interval
     28       in hours. (default: 24 hours)
     29 
     30 Any two time options completely specify the time interval.  If only
     31 one option is provided, these defaults are used:
     32   --until - Use the given end time with the default duration.
     33   --since - Use the given start time with the default end time.
     34   --duration - Use the given duration with the default end time.
     35 
     36 If no time options are given, use the default end time and duration.
     37 
     38 DATE/TIME values are of the form '2014-11-06 17:21:34'.
     39 
     40 DUT Selection
     41 ~~~~~~~~~~~~~
     42 By default, information is reported for DUTs named as command-line
     43 arguments.  Options are also available for selecting groups of
     44 hosts:
     45   --board/-b BOARD - Only include hosts with the given board.
     46   --pool/-p POOL - Only include hosts in the given pool.
     47 
     48 The selected hosts may also be filtered based on status:
     49   -w/--working - Only include hosts in a working state.
     50   -n/--broken - Only include hosts in a non-working state.  Hosts
     51       with no job history are considered non-working.
     52 
     53 Output Formats
     54 ~~~~~~~~~~~~~~
     55 There are four available output formats:
     56   * A simple list of host names.
     57   * A status summary showing one line per host.
     58   * A detailed job history for all selected DUTs, sorted by
     59     time of execution.
     60   * A job history for all selected DUTs showing only the history
     61     surrounding the DUT's last change from working to broken,
     62     or vice versa.
     63 
     64 The default format depends on whether hosts are filtered by
     65 status:
     66   * With the --working or --broken options, the list of host names
     67     is the default format.
     68   * Without those options, the default format is the one-line status
     69     summary.
     70 
     71 These options override the default formats:
     72   -o/--oneline - Use the one-line summary with the --working or
     73       --broken options.
     74   -f/--full_history - Print detailed per-host job history.
     75   -g/--diagnosis - Print the job history surrounding a status
     76       change.
     77 
     78 Examples
     79 ~~~~~~~~
     80     $ dut_status chromeos2-row4-rack2-host12
     81     hostname                     S   last checked         URL
     82     chromeos2-row4-rack2-host12  NO  2014-11-06 15:25:29  http://...
     83 
     84 'NO' means the DUT is broken.  That diagnosis is based on a job that
     85 failed:  'last checked' is the time of the failed job, and the URL
     86 points to the job's logs.
     87 
     88     $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12
     89     chromeos2-row4-rack2-host12
     90         2014-11-06 15:25:29  NO http://...
     91         2014-11-06 14:44:07  -- http://...
     92         2014-11-06 14:42:56  OK http://...
     93 
     94 The times are the start times of the jobs; the URL points to the
     95 job's logs.  The status indicates the working or broken status after
     96 the job:
     97   'NO' Indicates that the DUT was believed broken after the job.
     98   'OK' Indicates that the DUT was believed working after the job.
     99   '--' Indicates that the job probably didn't change the DUT's
    100        status.
    101 Typically, logs of the actual failure will be found at the last job
    102 to report 'OK', or the first job to report '--'.
    103 
    104 """
    105 
    106 import argparse
    107 import sys
    108 import time
    109 
    110 import common
    111 from autotest_lib.client.common_lib import time_utils
    112 from autotest_lib.server import frontend
    113 from autotest_lib.site_utils import status_history
    114 
    115 
    116 # The fully qualified name makes for lines that are too long, so
    117 # shorten it locally.
    118 HostJobHistory = status_history.HostJobHistory
    119 
    120 # _DIAGNOSIS_IDS -
    121 #     Dictionary to map the known diagnosis codes to string values.
    122 
    123 _DIAGNOSIS_IDS = {
    124     status_history.UNUSED: '??',
    125     status_history.UNKNOWN: '--',
    126     status_history.WORKING: 'OK',
    127     status_history.BROKEN: 'NO'
    128 }
    129 
    130 
    131 # Default time interval for the --duration option when a value isn't
    132 # specified on the command line.
    133 _DEFAULT_DURATION = 24
    134 
    135 
    136 def _include_status(status, arguments):
    137     """Determine whether the given status should be filtered.
    138 
    139     Checks the given `status` against the command line options in
    140     `arguments`.  Return whether a host with that status should be
    141     printed based on the options.
    142 
    143     @param status Status of a host to be printed or skipped.
    144     @param arguments Parsed arguments object as returned by
    145                      ArgumentParser.parse_args().
    146 
    147     @return Returns `True` if the command-line options call for
    148             printing hosts with the status, or `False` otherwise.
    149 
    150     """
    151     if status == status_history.WORKING:
    152         return arguments.working
    153     else:
    154         return arguments.broken
    155 
    156 
    157 def _print_host_summaries(history_list, arguments):
    158     """Print one-line summaries of host history.
    159 
    160     This function handles the output format of the --oneline option.
    161 
    162     @param history_list A list of HostHistory objects to be printed.
    163     @param arguments    Parsed arguments object as returned by
    164                         ArgumentParser.parse_args().
    165 
    166     """
    167     fmt = '%-30s %-2s  %-19s  %s'
    168     print fmt % ('hostname', 'S', 'last checked', 'URL')
    169     for history in history_list:
    170         status, event = history.last_diagnosis()
    171         if not _include_status(status, arguments):
    172             continue
    173         datestr = '---'
    174         url = '---'
    175         if event is not None:
    176             datestr = time_utils.epoch_time_to_date_string(
    177                     event.start_time)
    178             url = event.job_url
    179 
    180         print fmt % (history.hostname,
    181                      _DIAGNOSIS_IDS[status],
    182                      datestr,
    183                      url)
    184 
    185 
    186 def _print_event_summary(event):
    187     """Print a one-line summary of a job or special task."""
    188     start_time = time_utils.epoch_time_to_date_string(
    189             event.start_time)
    190     print '    %s  %s %s' % (
    191             start_time,
    192             _DIAGNOSIS_IDS[event.diagnosis],
    193             event.job_url)
    194 
    195 
    196 def _print_hosts(history_list, arguments):
    197     """Print hosts, optionally with a job history.
    198 
    199     This function handles both the default format for --working
    200     and --broken options, as well as the output for the
    201     --full_history and --diagnosis options.  The `arguments`
    202     parameter determines the format to use.
    203 
    204     @param history_list A list of HostHistory objects to be printed.
    205     @param arguments    Parsed arguments object as returned by
    206                         ArgumentParser.parse_args().
    207 
    208     """
    209     for history in history_list:
    210         status, _ = history.last_diagnosis()
    211         if not _include_status(status, arguments):
    212             continue
    213         print history.hostname
    214         if arguments.full_history:
    215             for event in history:
    216                 _print_event_summary(event)
    217         elif arguments.diagnosis:
    218             for event in history.diagnosis_interval():
    219                 _print_event_summary(event)
    220 
    221 
    222 def _validate_time_range(arguments):
    223     """Validate the time range requested on the command line.
    224 
    225     Enforces the rules for the --until, --since, and --duration
    226     options are followed, and calculates defaults:
    227       * It isn't allowed to supply all three options.
    228       * If only two options are supplied, they completely determine
    229         the time interval.
    230       * If only one option is supplied, or no options, then apply
    231         specified defaults to the arguments object.
    232 
    233     @param arguments Parsed arguments object as returned by
    234                      ArgumentParser.parse_args().
    235 
    236     """
    237     if (arguments.duration is not None and
    238             arguments.since is not None and arguments.until is not None):
    239         print >>sys.stderr, ('FATAL: Can specify at most two of '
    240                              '--since, --until, and --duration')
    241         sys.exit(1)
    242     if (arguments.until is None and (arguments.since is None or
    243                                      arguments.duration is None)):
    244         arguments.until = int(time.time())
    245     if arguments.since is None:
    246         if arguments.duration is None:
    247             arguments.duration = _DEFAULT_DURATION
    248         arguments.since = (arguments.until -
    249                            arguments.duration * 60 * 60)
    250     elif arguments.until is None:
    251         arguments.until = (arguments.since +
    252                            arguments.duration * 60 * 60)
    253 
    254 
    255 def _get_host_histories(afe, arguments):
    256     """Return HostJobHistory objects for the requested hosts.
    257 
    258     Checks that individual hosts specified on the command line are
    259     valid.  Invalid hosts generate a warning message, and are
    260     omitted from futher processing.
    261 
    262     The return value is a list of HostJobHistory objects for the
    263     valid requested hostnames, using the time range supplied on the
    264     command line.
    265 
    266     @param afe       Autotest frontend
    267     @param arguments Parsed arguments object as returned by
    268                      ArgumentParser.parse_args().
    269     @return List of HostJobHistory objects for the hosts requested
    270             on the command line.
    271 
    272     """
    273     histories = []
    274     saw_error = False
    275     for hostname in arguments.hostnames:
    276         try:
    277             h = HostJobHistory.get_host_history(
    278                     afe, hostname, arguments.since, arguments.until)
    279             histories.append(h)
    280         except:
    281             print >>sys.stderr, ('WARNING: Ignoring unknown host %s' %
    282                                   hostname)
    283             saw_error = True
    284     if saw_error:
    285         # Create separation from the output that follows
    286         print >>sys.stderr
    287     return histories
    288 
    289 
    290 def _validate_host_list(afe, arguments):
    291     """Validate the user-specified list of hosts.
    292 
    293     Hosts may be specified implicitly with --board or --pool, or
    294     explictly as command line arguments.  This enforces these
    295     rules:
    296       * If --board or --pool, or both are specified, individual
    297         hosts may not be specified.
    298       * However specified, there must be at least one host.
    299 
    300     The return value is a list of HostJobHistory objects for the
    301     requested hosts, using the time range supplied on the command
    302     line.
    303 
    304     @param afe       Autotest frontend
    305     @param arguments Parsed arguments object as returned by
    306                      ArgumentParser.parse_args().
    307     @return List of HostJobHistory objects for the hosts requested
    308             on the command line.
    309 
    310     """
    311     if arguments.board or arguments.pool:
    312         if arguments.hostnames:
    313             print >>sys.stderr, ('FATAL: Hostname arguments provided '
    314                                  'with --board or --pool')
    315             sys.exit(1)
    316         histories = HostJobHistory.get_multiple_histories(
    317                 afe, arguments.since, arguments.until,
    318                 board=arguments.board, pool=arguments.pool)
    319     else:
    320         histories = _get_host_histories(afe, arguments)
    321     if not histories:
    322         print >>sys.stderr, 'FATAL: no valid hosts found'
    323         sys.exit(1)
    324     return histories
    325 
    326 
    327 def _validate_format_options(arguments):
    328     """Check the options for what output format to use.
    329 
    330     Enforce these rules:
    331       * If neither --broken nor --working was used, then --oneline
    332         becomes the selected format.
    333       * If neither --broken nor --working was used, included both
    334         working and broken DUTs.
    335 
    336     @param arguments Parsed arguments object as returned by
    337                      ArgumentParser.parse_args().
    338 
    339     """
    340     if (not arguments.oneline and not arguments.diagnosis and
    341             not arguments.full_history):
    342         arguments.oneline = (not arguments.working and
    343                              not arguments.broken)
    344     if not arguments.working and not arguments.broken:
    345         arguments.working = True
    346         arguments.broken = True
    347 
    348 
    349 def _validate_command(afe, arguments):
    350     """Check that the command's arguments are valid.
    351 
    352     This performs command line checking to enforce command line
    353     rules that ArgumentParser can't handle.  Additionally, this
    354     handles calculation of default arguments/options when a simple
    355     constant default won't do.
    356 
    357     Areas checked:
    358       * Check that a valid time range was provided, supplying
    359         defaults as necessary.
    360       * Identify invalid host names.
    361 
    362     @param afe       Autotest frontend
    363     @param arguments Parsed arguments object as returned by
    364                      ArgumentParser.parse_args().
    365     @return List of HostJobHistory objects for the hosts requested
    366             on the command line.
    367 
    368     """
    369     _validate_time_range(arguments)
    370     _validate_format_options(arguments)
    371     return _validate_host_list(afe, arguments)
    372 
    373 
    374 def _parse_command(argv):
    375     """Parse the command line arguments.
    376 
    377     Create an argument parser for this command's syntax, parse the
    378     command line, and return the result of the ArgumentParser
    379     parse_args() method.
    380 
    381     @param argv Standard command line argument vector; argv[0] is
    382                 assumed to be the command name.
    383     @return Result returned by ArgumentParser.parse_args().
    384 
    385     """
    386     parser = argparse.ArgumentParser(
    387             prog=argv[0],
    388             description='Report DUT status and execution history',
    389             epilog='You can specify one or two of --since, --until, '
    390                    'and --duration, but not all three.\n'
    391                    'The date/time format is "YYYY-MM-DD HH:MM:SS".')
    392     parser.add_argument('-s', '--since', type=status_history.parse_time,
    393                         metavar='DATE/TIME',
    394                         help='starting time for history display')
    395     parser.add_argument('-u', '--until', type=status_history.parse_time,
    396                         metavar='DATE/TIME',
    397                         help='ending time for history display'
    398                              ' (default: now)')
    399     parser.add_argument('-d', '--duration', type=int,
    400                         metavar='HOURS',
    401                         help='number of hours of history to display'
    402                              ' (default: %d)' % _DEFAULT_DURATION)
    403 
    404     format_group = parser.add_mutually_exclusive_group()
    405     format_group.add_argument('-f', '--full_history', action='store_true',
    406                               help='Display host history from most '
    407                                    'to least recent for each DUT')
    408     format_group.add_argument('-g', '--diagnosis', action='store_true',
    409                               help='Display host history for the '
    410                                    'most recent DUT status change')
    411     format_group.add_argument('-o', '--oneline', action='store_true',
    412                               help='Display host status summary')
    413 
    414     parser.add_argument('-w', '--working', action='store_true',
    415                         help='List working devices by name only')
    416     parser.add_argument('-n', '--broken', action='store_true',
    417                         help='List non-working devices by name only')
    418 
    419     parser.add_argument('-b', '--board',
    420                         help='Display history for all DUTs '
    421                              'of the given board')
    422     parser.add_argument('-p', '--pool',
    423                         help='Display history for all DUTs '
    424                              'in the given pool')
    425     parser.add_argument('hostnames',
    426                         nargs='*',
    427                         help='host names of DUTs to report on')
    428     parser.add_argument('--web',
    429                         help='Master autotest frontend hostname. If no value '
    430                              'is given, the one in global config will be used.',
    431                         default=None)
    432     arguments = parser.parse_args(argv[1:])
    433     return arguments
    434 
    435 
    436 def main(argv):
    437     """Standard main() for command line processing.
    438 
    439     @param argv Command line arguments (normally sys.argv).
    440 
    441     """
    442     arguments = _parse_command(argv)
    443     afe = frontend.AFE(server=arguments.web)
    444     history_list = _validate_command(afe, arguments)
    445     if arguments.oneline:
    446         _print_host_summaries(history_list, arguments)
    447     else:
    448         _print_hosts(history_list, arguments)
    449 
    450 
    451 if __name__ == '__main__':
    452     main(sys.argv)
    453