Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Report whether DUTs are working or broken.
      7 
      8 usage: dut_status [ <options> ] [hostname ...]
      9 
     10 Reports on the history and status of selected DUT hosts, to
     11 determine whether they're "working" or "broken".  For purposes of
     12 the script, "broken" means "the DUT requires manual intervention
     13 before it can be used for further testing", and "working" means "not
     14 broken".  The status determination is based on the history of
     15 completed jobs for the DUT in a given time interval; still-running
     16 jobs are not considered.
     17 
     18 Time Interval Selection
     19 ~~~~~~~~~~~~~~~~~~~~~~~
     20 A DUT's reported status is based on the DUT's job history in a time
     21 interval determined by command line options.  The interval is
     22 specified with up to two of three options:
     23   --until/-u DATE/TIME - Specifies an end time for the search
     24       range.  (default: now)
     25   --since/-s DATE/TIME - Specifies a start time for the search
     26       range. (no default)
     27   --duration/-d HOURS - Specifies the length of the search interval
     28       in hours. (default: 24 hours)
     29 
     30 Any two time options completely specify the time interval.  If only
     31 one option is provided, these defaults are used:
     32   --until - Use the given end time with the default duration.
     33   --since - Use the given start time with the default end time.
     34   --duration - Use the given duration with the default end time.
     35 
     36 If no time options are given, use the default end time and duration.
     37 
     38 DATE/TIME values are of the form '2014-11-06 17:21:34'.
     39 
     40 DUT Selection
     41 ~~~~~~~~~~~~~
     42 By default, information is reported for DUTs named as command-line
     43 arguments.  Options are also available for selecting groups of
     44 hosts:
     45   --board/-b BOARD - Only include hosts with the given board.
     46   --pool/-p POOL - Only include hosts in the given pool. The user
     47       might be interested in the following pools: bvt, cq,
     48       continuous, cts, or suites.
     49 
     50 
     51 The selected hosts may also be filtered based on status:
     52   -w/--working - Only include hosts in a working state.
     53   -n/--broken - Only include hosts in a non-working state.  Hosts
     54       with no job history are considered non-working.
     55 
     56 Output Formats
     57 ~~~~~~~~~~~~~~
     58 There are four available output formats:
     59   * A simple list of host names.
     60   * A status summary showing one line per host.
     61   * A detailed job history for all selected DUTs, sorted by
     62     time of execution.
     63   * A job history for all selected DUTs showing only the history
     64     surrounding the DUT's last change from working to broken,
     65     or vice versa.
     66 
     67 The default format depends on whether hosts are filtered by
     68 status:
     69   * With the --working or --broken options, the list of host names
     70     is the default format.
     71   * Without those options, the default format is the one-line status
     72     summary.
     73 
     74 These options override the default formats:
     75   -o/--oneline - Use the one-line summary with the --working or
     76       --broken options.
     77   -f/--full_history - Print detailed per-host job history.
     78   -g/--diagnosis - Print the job history surrounding a status
     79       change.
     80 
     81 Examples
     82 ~~~~~~~~
     83     $ dut_status chromeos2-row4-rack2-host12
     84     hostname                     S   last checked         URL
     85     chromeos2-row4-rack2-host12  NO  2014-11-06 15:25:29  http://...
     86 
     87 'NO' means the DUT is broken.  That diagnosis is based on a job that
     88 failed:  'last checked' is the time of the failed job, and the URL
     89 points to the job's logs.
     90 
     91     $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12
     92     chromeos2-row4-rack2-host12
     93         2014-11-06 15:25:29  NO http://...
     94         2014-11-06 14:44:07  -- http://...
     95         2014-11-06 14:42:56  OK http://...
     96 
     97 The times are the start times of the jobs; the URL points to the
     98 job's logs.  The status indicates the working or broken status after
     99 the job:
    100   'NO' Indicates that the DUT was believed broken after the job.
    101   'OK' Indicates that the DUT was believed working after the job.
    102   '--' Indicates that the job probably didn't change the DUT's
    103        status.
    104 Typically, logs of the actual failure will be found at the last job
    105 to report 'OK', or the first job to report '--'.
    106 
    107 """
    108 
    109 import argparse
    110 import sys
    111 import time
    112 
    113 import common
    114 from autotest_lib.client.common_lib import time_utils
    115 from autotest_lib.server import constants
    116 from autotest_lib.server import frontend
    117 from autotest_lib.server.lib import status_history
    118 from autotest_lib.utils import labellib
    119 
    120 # The fully qualified name makes for lines that are too long, so
    121 # shorten it locally.
    122 HostJobHistory = status_history.HostJobHistory
    123 
    124 # _DIAGNOSIS_IDS -
    125 #     Dictionary to map the known diagnosis codes to string values.
    126 
    127 _DIAGNOSIS_IDS = {
    128     status_history.UNUSED: '??',
    129     status_history.UNKNOWN: '--',
    130     status_history.WORKING: 'OK',
    131     status_history.BROKEN: 'NO'
    132 }
    133 
    134 
    135 # Default time interval for the --duration option when a value isn't
    136 # specified on the command line.
    137 _DEFAULT_DURATION = 24
    138 
    139 
    140 def _include_status(status, arguments):
    141     """Determine whether the given status should be filtered.
    142 
    143     Checks the given `status` against the command line options in
    144     `arguments`.  Return whether a host with that status should be
    145     printed based on the options.
    146 
    147     @param status Status of a host to be printed or skipped.
    148     @param arguments Parsed arguments object as returned by
    149                      ArgumentParser.parse_args().
    150 
    151     @return Returns `True` if the command-line options call for
    152             printing hosts with the status, or `False` otherwise.
    153 
    154     """
    155     if status == status_history.WORKING:
    156         return arguments.working
    157     else:
    158         return arguments.broken
    159 
    160 
    161 def _print_host_summaries(history_list, arguments):
    162     """Print one-line summaries of host history.
    163 
    164     This function handles the output format of the --oneline option.
    165 
    166     @param history_list A list of HostHistory objects to be printed.
    167     @param arguments    Parsed arguments object as returned by
    168                         ArgumentParser.parse_args().
    169 
    170     """
    171     fmt = '%-30s %-2s  %-19s  %s'
    172     print fmt % ('hostname', 'S', 'last checked', 'URL')
    173     for history in history_list:
    174         status, event = history.last_diagnosis()
    175         if not _include_status(status, arguments):
    176             continue
    177         datestr = '---'
    178         url = '---'
    179         if event is not None:
    180             datestr = time_utils.epoch_time_to_date_string(
    181                     event.start_time)
    182             url = event.job_url
    183 
    184         print fmt % (history.hostname,
    185                      _DIAGNOSIS_IDS[status],
    186                      datestr,
    187                      url)
    188 
    189 
    190 def _print_event_summary(event):
    191     """Print a one-line summary of a job or special task."""
    192     start_time = time_utils.epoch_time_to_date_string(
    193             event.start_time)
    194     print '    %s  %s %s' % (
    195             start_time,
    196             _DIAGNOSIS_IDS[event.diagnosis],
    197             event.job_url)
    198 
    199 
    200 def _print_hosts(history_list, arguments):
    201     """Print hosts, optionally with a job history.
    202 
    203     This function handles both the default format for --working
    204     and --broken options, as well as the output for the
    205     --full_history and --diagnosis options.  The `arguments`
    206     parameter determines the format to use.
    207 
    208     @param history_list A list of HostHistory objects to be printed.
    209     @param arguments    Parsed arguments object as returned by
    210                         ArgumentParser.parse_args().
    211 
    212     """
    213     for history in history_list:
    214         status, _ = history.last_diagnosis()
    215         if not _include_status(status, arguments):
    216             continue
    217         print history.hostname
    218         if arguments.full_history:
    219             for event in history:
    220                 _print_event_summary(event)
    221         elif arguments.diagnosis:
    222             for event in history.diagnosis_interval():
    223                 _print_event_summary(event)
    224 
    225 
    226 def _validate_time_range(arguments):
    227     """Validate the time range requested on the command line.
    228 
    229     Enforces the rules for the --until, --since, and --duration
    230     options are followed, and calculates defaults:
    231       * It isn't allowed to supply all three options.
    232       * If only two options are supplied, they completely determine
    233         the time interval.
    234       * If only one option is supplied, or no options, then apply
    235         specified defaults to the arguments object.
    236 
    237     @param arguments Parsed arguments object as returned by
    238                      ArgumentParser.parse_args().
    239 
    240     """
    241     if (arguments.duration is not None and
    242             arguments.since is not None and arguments.until is not None):
    243         print >>sys.stderr, ('FATAL: Can specify at most two of '
    244                              '--since, --until, and --duration')
    245         sys.exit(1)
    246     if (arguments.until is None and (arguments.since is None or
    247                                      arguments.duration is None)):
    248         arguments.until = int(time.time())
    249     if arguments.since is None:
    250         if arguments.duration is None:
    251             arguments.duration = _DEFAULT_DURATION
    252         arguments.since = (arguments.until -
    253                            arguments.duration * 60 * 60)
    254     elif arguments.until is None:
    255         arguments.until = (arguments.since +
    256                            arguments.duration * 60 * 60)
    257 
    258 
    259 def _get_host_histories(afe, arguments):
    260     """Return HostJobHistory objects for the requested hosts.
    261 
    262     Checks that individual hosts specified on the command line are
    263     valid.  Invalid hosts generate a warning message, and are
    264     omitted from futher processing.
    265 
    266     The return value is a list of HostJobHistory objects for the
    267     valid requested hostnames, using the time range supplied on the
    268     command line.
    269 
    270     @param afe       Autotest frontend
    271     @param arguments Parsed arguments object as returned by
    272                      ArgumentParser.parse_args().
    273     @return List of HostJobHistory objects for the hosts requested
    274             on the command line.
    275 
    276     """
    277     histories = []
    278     saw_error = False
    279     for hostname in arguments.hostnames:
    280         try:
    281             h = HostJobHistory.get_host_history(
    282                     afe, hostname, arguments.since, arguments.until)
    283             histories.append(h)
    284         except:
    285             print >>sys.stderr, ('WARNING: Ignoring unknown host %s' %
    286                                   hostname)
    287             saw_error = True
    288     if saw_error:
    289         # Create separation from the output that follows
    290         print >>sys.stderr
    291     return histories
    292 
    293 
    294 def _validate_host_list(afe, arguments):
    295     """Validate the user-specified list of hosts.
    296 
    297     Hosts may be specified implicitly with --board or --pool, or
    298     explictly as command line arguments.  This enforces these
    299     rules:
    300       * If --board or --pool, or both are specified, individual
    301         hosts may not be specified.
    302       * However specified, there must be at least one host.
    303 
    304     The return value is a list of HostJobHistory objects for the
    305     requested hosts, using the time range supplied on the command
    306     line.
    307 
    308     @param afe       Autotest frontend
    309     @param arguments Parsed arguments object as returned by
    310                      ArgumentParser.parse_args().
    311     @return List of HostJobHistory objects for the hosts requested
    312             on the command line.
    313 
    314     """
    315     if arguments.board or arguments.pool or arguments.model:
    316         if arguments.hostnames:
    317             print >>sys.stderr, ('FATAL: Hostname arguments provided '
    318                                  'with --board or --pool')
    319             sys.exit(1)
    320 
    321         labels = labellib.LabelsMapping()
    322         labels['board'] = arguments.board
    323         labels['pool'] = arguments.pool
    324         labels['model'] = arguments.model
    325         histories = HostJobHistory.get_multiple_histories(
    326             afe, arguments.since, arguments.until, labels.getlabels())
    327     else:
    328         histories = _get_host_histories(afe, arguments)
    329     if not histories:
    330         print >>sys.stderr, 'FATAL: no valid hosts found'
    331         sys.exit(1)
    332     return histories
    333 
    334 
    335 def _validate_format_options(arguments):
    336     """Check the options for what output format to use.
    337 
    338     Enforce these rules:
    339       * If neither --broken nor --working was used, then --oneline
    340         becomes the selected format.
    341       * If neither --broken nor --working was used, included both
    342         working and broken DUTs.
    343 
    344     @param arguments Parsed arguments object as returned by
    345                      ArgumentParser.parse_args().
    346 
    347     """
    348     if (not arguments.oneline and not arguments.diagnosis and
    349             not arguments.full_history):
    350         arguments.oneline = (not arguments.working and
    351                              not arguments.broken)
    352     if not arguments.working and not arguments.broken:
    353         arguments.working = True
    354         arguments.broken = True
    355 
    356 
    357 def _validate_command(afe, arguments):
    358     """Check that the command's arguments are valid.
    359 
    360     This performs command line checking to enforce command line
    361     rules that ArgumentParser can't handle.  Additionally, this
    362     handles calculation of default arguments/options when a simple
    363     constant default won't do.
    364 
    365     Areas checked:
    366       * Check that a valid time range was provided, supplying
    367         defaults as necessary.
    368       * Identify invalid host names.
    369 
    370     @param afe       Autotest frontend
    371     @param arguments Parsed arguments object as returned by
    372                      ArgumentParser.parse_args().
    373     @return List of HostJobHistory objects for the hosts requested
    374             on the command line.
    375 
    376     """
    377     _validate_time_range(arguments)
    378     _validate_format_options(arguments)
    379     return _validate_host_list(afe, arguments)
    380 
    381 
    382 def _parse_command(argv):
    383     """Parse the command line arguments.
    384 
    385     Create an argument parser for this command's syntax, parse the
    386     command line, and return the result of the ArgumentParser
    387     parse_args() method.
    388 
    389     @param argv Standard command line argument vector; argv[0] is
    390                 assumed to be the command name.
    391     @return Result returned by ArgumentParser.parse_args().
    392 
    393     """
    394     parser = argparse.ArgumentParser(
    395             prog=argv[0],
    396             description='Report DUT status and execution history',
    397             epilog='You can specify one or two of --since, --until, '
    398                    'and --duration, but not all three.')
    399     parser.add_argument('-s', '--since', type=status_history.parse_time,
    400                         metavar='DATE/TIME',
    401                         help=('Starting time for history display. '
    402                               'Format: "YYYY-MM-DD HH:MM:SS"'))
    403     parser.add_argument('-u', '--until', type=status_history.parse_time,
    404                         metavar='DATE/TIME',
    405                         help=('Ending time for history display. '
    406                               'Format: "YYYY-MM-DD HH:MM:SS" '
    407                               'Default: now'))
    408     parser.add_argument('-d', '--duration', type=int,
    409                         metavar='HOURS',
    410                         help='Number of hours of history to display'
    411                              ' (default: %d)' % _DEFAULT_DURATION)
    412 
    413     format_group = parser.add_mutually_exclusive_group()
    414     format_group.add_argument('-f', '--full_history', action='store_true',
    415                               help='Display host history from most '
    416                                    'to least recent for each DUT')
    417     format_group.add_argument('-g', '--diagnosis', action='store_true',
    418                               help='Display host history for the '
    419                                    'most recent DUT status change')
    420     format_group.add_argument('-o', '--oneline', action='store_true',
    421                               help='Display host status summary')
    422 
    423     parser.add_argument('-w', '--working', action='store_true',
    424                         help='List working devices by name only')
    425     parser.add_argument('-n', '--broken', action='store_true',
    426                         help='List non-working devices by name only')
    427 
    428     parser.add_argument('-b', '--board',
    429                         help='Display history for all DUTs '
    430                              'of the given board')
    431     parser.add_argument('-m', '--model',
    432                         help='Display history for all DUTs of the given model.')
    433     parser.add_argument('-p', '--pool',
    434                         help='Display history for all DUTs '
    435                              'in the given pool. You might '
    436                              'be interested in the following pools: '
    437                              + ', '.join(constants.Pools.MANAGED_POOLS[:-1])
    438                              +', or '+ constants.Pools.MANAGED_POOLS[-1] +'.')
    439     parser.add_argument('hostnames',
    440                         nargs='*',
    441                         help='Host names of DUTs to report on')
    442     parser.add_argument('--web',
    443                         help='Master autotest frontend hostname. If no value '
    444                              'is given, the one in global config will be used.',
    445                         default=None)
    446     arguments = parser.parse_args(argv[1:])
    447     return arguments
    448 
    449 
    450 def main(argv):
    451     """Standard main() for command line processing.
    452 
    453     @param argv Command line arguments (normally sys.argv).
    454 
    455     """
    456     arguments = _parse_command(argv)
    457     afe = frontend.AFE(server=arguments.web)
    458     history_list = _validate_command(afe, arguments)
    459     if arguments.oneline:
    460         _print_host_summaries(history_list, arguments)
    461     else:
    462         _print_hosts(history_list, arguments)
    463 
    464 
    465 if __name__ == '__main__':
    466     main(sys.argv)
    467