Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Report whether DUTs are working or broken.
      7 
      8 usage: dut_status [ <options> ] [hostname ...]
      9 
     10 Reports on the history and status of selected DUT hosts, to
     11 determine whether they're "working" or "broken".  For purposes of
     12 the script, "broken" means "the DUT requires manual intervention
     13 before it can be used for further testing", and "working" means "not
     14 broken".  The status determination is based on the history of
     15 completed jobs for the DUT in a given time interval; still-running
     16 jobs are not considered.
     17 
     18 Time Interval Selection
     19 ~~~~~~~~~~~~~~~~~~~~~~~
     20 A DUT's reported status is based on the DUT's job history in a time
     21 interval determined by command line options.  The interval is
     22 specified with up to two of three options:
     23   --until/-u DATE/TIME - Specifies an end time for the search
     24       range.  (default: now)
     25   --since/-s DATE/TIME - Specifies a start time for the search
     26       range. (no default)
     27   --duration/-d HOURS - Specifies the length of the search interval
     28       in hours. (default: 24 hours)
     29 
     30 Any two time options completely specify the time interval.  If only
     31 one option is provided, these defaults are used:
     32   --until - Use the given end time with the default duration.
     33   --since - Use the given start time with the default end time.
     34   --duration - Use the given duration with the default end time.
     35 
     36 If no time options are given, use the default end time and duration.
     37 
     38 DATE/TIME values are of the form '2014-11-06 17:21:34'.
     39 
     40 DUT Selection
     41 ~~~~~~~~~~~~~
     42 By default, information is reported for DUTs named as command-line
     43 arguments.  Options are also available for selecting groups of
     44 hosts:
     45   --board/-b BOARD - Only include hosts with the given board.
     46   --pool/-p POOL - Only include hosts in the given pool. The user
     47       might be interested in the following pools: bvt, cq,
     48       continuous, cts, or suites.
     49 
     50 
     51 The selected hosts may also be filtered based on status:
     52   -w/--working - Only include hosts in a working state.
     53   -n/--broken - Only include hosts in a non-working state.  Hosts
     54       with no job history are considered non-working.
     55 
     56 Output Formats
     57 ~~~~~~~~~~~~~~
     58 There are four available output formats:
     59   * A simple list of host names.
     60   * A status summary showing one line per host.
     61   * A detailed job history for all selected DUTs, sorted by
     62     time of execution.
     63   * A job history for all selected DUTs showing only the history
     64     surrounding the DUT's last change from working to broken,
     65     or vice versa.
     66 
     67 The default format depends on whether hosts are filtered by
     68 status:
     69   * With the --working or --broken options, the list of host names
     70     is the default format.
     71   * Without those options, the default format is the one-line status
     72     summary.
     73 
     74 These options override the default formats:
     75   -o/--oneline - Use the one-line summary with the --working or
     76       --broken options.
     77   -f/--full_history - Print detailed per-host job history.
     78   -g/--diagnosis - Print the job history surrounding a status
     79       change.
     80 
     81 Examples
     82 ~~~~~~~~
     83     $ dut_status chromeos2-row4-rack2-host12
     84     hostname                     S   last checked         URL
     85     chromeos2-row4-rack2-host12  NO  2014-11-06 15:25:29  http://...
     86 
     87 'NO' means the DUT is broken.  That diagnosis is based on a job that
     88 failed:  'last checked' is the time of the failed job, and the URL
     89 points to the job's logs.
     90 
     91     $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12
     92     chromeos2-row4-rack2-host12
     93         2014-11-06 15:25:29  NO http://...
     94         2014-11-06 14:44:07  -- http://...
     95         2014-11-06 14:42:56  OK http://...
     96 
     97 The times are the start times of the jobs; the URL points to the
     98 job's logs.  The status indicates the working or broken status after
     99 the job:
    100   'NO' Indicates that the DUT was believed broken after the job.
    101   'OK' Indicates that the DUT was believed working after the job.
    102   '--' Indicates that the job probably didn't change the DUT's
    103        status.
    104 Typically, logs of the actual failure will be found at the last job
    105 to report 'OK', or the first job to report '--'.
    106 
    107 """
    108 
    109 import argparse
    110 import sys
    111 import time
    112 
    113 import common
    114 from autotest_lib.client.common_lib import time_utils
    115 from autotest_lib.server import frontend
    116 from autotest_lib.server.lib import status_history
    117 from autotest_lib.site_utils import lab_inventory
    118 
    119 # The fully qualified name makes for lines that are too long, so
    120 # shorten it locally.
    121 HostJobHistory = status_history.HostJobHistory
    122 
    123 # _DIAGNOSIS_IDS -
    124 #     Dictionary to map the known diagnosis codes to string values.
    125 
    126 _DIAGNOSIS_IDS = {
    127     status_history.UNUSED: '??',
    128     status_history.UNKNOWN: '--',
    129     status_history.WORKING: 'OK',
    130     status_history.BROKEN: 'NO'
    131 }
    132 
    133 
    134 # Default time interval for the --duration option when a value isn't
    135 # specified on the command line.
    136 _DEFAULT_DURATION = 24
    137 
    138 
    139 def _include_status(status, arguments):
    140     """Determine whether the given status should be filtered.
    141 
    142     Checks the given `status` against the command line options in
    143     `arguments`.  Return whether a host with that status should be
    144     printed based on the options.
    145 
    146     @param status Status of a host to be printed or skipped.
    147     @param arguments Parsed arguments object as returned by
    148                      ArgumentParser.parse_args().
    149 
    150     @return Returns `True` if the command-line options call for
    151             printing hosts with the status, or `False` otherwise.
    152 
    153     """
    154     if status == status_history.WORKING:
    155         return arguments.working
    156     else:
    157         return arguments.broken
    158 
    159 
    160 def _print_host_summaries(history_list, arguments):
    161     """Print one-line summaries of host history.
    162 
    163     This function handles the output format of the --oneline option.
    164 
    165     @param history_list A list of HostHistory objects to be printed.
    166     @param arguments    Parsed arguments object as returned by
    167                         ArgumentParser.parse_args().
    168 
    169     """
    170     fmt = '%-30s %-2s  %-19s  %s'
    171     print fmt % ('hostname', 'S', 'last checked', 'URL')
    172     for history in history_list:
    173         status, event = history.last_diagnosis()
    174         if not _include_status(status, arguments):
    175             continue
    176         datestr = '---'
    177         url = '---'
    178         if event is not None:
    179             datestr = time_utils.epoch_time_to_date_string(
    180                     event.start_time)
    181             url = event.job_url
    182 
    183         print fmt % (history.hostname,
    184                      _DIAGNOSIS_IDS[status],
    185                      datestr,
    186                      url)
    187 
    188 
    189 def _print_event_summary(event):
    190     """Print a one-line summary of a job or special task."""
    191     start_time = time_utils.epoch_time_to_date_string(
    192             event.start_time)
    193     print '    %s  %s %s' % (
    194             start_time,
    195             _DIAGNOSIS_IDS[event.diagnosis],
    196             event.job_url)
    197 
    198 
    199 def _print_hosts(history_list, arguments):
    200     """Print hosts, optionally with a job history.
    201 
    202     This function handles both the default format for --working
    203     and --broken options, as well as the output for the
    204     --full_history and --diagnosis options.  The `arguments`
    205     parameter determines the format to use.
    206 
    207     @param history_list A list of HostHistory objects to be printed.
    208     @param arguments    Parsed arguments object as returned by
    209                         ArgumentParser.parse_args().
    210 
    211     """
    212     for history in history_list:
    213         status, _ = history.last_diagnosis()
    214         if not _include_status(status, arguments):
    215             continue
    216         print history.hostname
    217         if arguments.full_history:
    218             for event in history:
    219                 _print_event_summary(event)
    220         elif arguments.diagnosis:
    221             for event in history.diagnosis_interval():
    222                 _print_event_summary(event)
    223 
    224 
    225 def _validate_time_range(arguments):
    226     """Validate the time range requested on the command line.
    227 
    228     Enforces the rules for the --until, --since, and --duration
    229     options are followed, and calculates defaults:
    230       * It isn't allowed to supply all three options.
    231       * If only two options are supplied, they completely determine
    232         the time interval.
    233       * If only one option is supplied, or no options, then apply
    234         specified defaults to the arguments object.
    235 
    236     @param arguments Parsed arguments object as returned by
    237                      ArgumentParser.parse_args().
    238 
    239     """
    240     if (arguments.duration is not None and
    241             arguments.since is not None and arguments.until is not None):
    242         print >>sys.stderr, ('FATAL: Can specify at most two of '
    243                              '--since, --until, and --duration')
    244         sys.exit(1)
    245     if (arguments.until is None and (arguments.since is None or
    246                                      arguments.duration is None)):
    247         arguments.until = int(time.time())
    248     if arguments.since is None:
    249         if arguments.duration is None:
    250             arguments.duration = _DEFAULT_DURATION
    251         arguments.since = (arguments.until -
    252                            arguments.duration * 60 * 60)
    253     elif arguments.until is None:
    254         arguments.until = (arguments.since +
    255                            arguments.duration * 60 * 60)
    256 
    257 
    258 def _get_host_histories(afe, arguments):
    259     """Return HostJobHistory objects for the requested hosts.
    260 
    261     Checks that individual hosts specified on the command line are
    262     valid.  Invalid hosts generate a warning message, and are
    263     omitted from futher processing.
    264 
    265     The return value is a list of HostJobHistory objects for the
    266     valid requested hostnames, using the time range supplied on the
    267     command line.
    268 
    269     @param afe       Autotest frontend
    270     @param arguments Parsed arguments object as returned by
    271                      ArgumentParser.parse_args().
    272     @return List of HostJobHistory objects for the hosts requested
    273             on the command line.
    274 
    275     """
    276     histories = []
    277     saw_error = False
    278     for hostname in arguments.hostnames:
    279         try:
    280             h = HostJobHistory.get_host_history(
    281                     afe, hostname, arguments.since, arguments.until)
    282             histories.append(h)
    283         except:
    284             print >>sys.stderr, ('WARNING: Ignoring unknown host %s' %
    285                                   hostname)
    286             saw_error = True
    287     if saw_error:
    288         # Create separation from the output that follows
    289         print >>sys.stderr
    290     return histories
    291 
    292 
    293 def _validate_host_list(afe, arguments):
    294     """Validate the user-specified list of hosts.
    295 
    296     Hosts may be specified implicitly with --board or --pool, or
    297     explictly as command line arguments.  This enforces these
    298     rules:
    299       * If --board or --pool, or both are specified, individual
    300         hosts may not be specified.
    301       * However specified, there must be at least one host.
    302 
    303     The return value is a list of HostJobHistory objects for the
    304     requested hosts, using the time range supplied on the command
    305     line.
    306 
    307     @param afe       Autotest frontend
    308     @param arguments Parsed arguments object as returned by
    309                      ArgumentParser.parse_args().
    310     @return List of HostJobHistory objects for the hosts requested
    311             on the command line.
    312 
    313     """
    314     if arguments.board or arguments.pool:
    315         if arguments.hostnames:
    316             print >>sys.stderr, ('FATAL: Hostname arguments provided '
    317                                  'with --board or --pool')
    318             sys.exit(1)
    319         histories = HostJobHistory.get_multiple_histories(
    320                 afe, arguments.since, arguments.until,
    321                 board=arguments.board, pool=arguments.pool)
    322     else:
    323         histories = _get_host_histories(afe, arguments)
    324     if not histories:
    325         print >>sys.stderr, 'FATAL: no valid hosts found'
    326         sys.exit(1)
    327     return histories
    328 
    329 
    330 def _validate_format_options(arguments):
    331     """Check the options for what output format to use.
    332 
    333     Enforce these rules:
    334       * If neither --broken nor --working was used, then --oneline
    335         becomes the selected format.
    336       * If neither --broken nor --working was used, included both
    337         working and broken DUTs.
    338 
    339     @param arguments Parsed arguments object as returned by
    340                      ArgumentParser.parse_args().
    341 
    342     """
    343     if (not arguments.oneline and not arguments.diagnosis and
    344             not arguments.full_history):
    345         arguments.oneline = (not arguments.working and
    346                              not arguments.broken)
    347     if not arguments.working and not arguments.broken:
    348         arguments.working = True
    349         arguments.broken = True
    350 
    351 
    352 def _validate_command(afe, arguments):
    353     """Check that the command's arguments are valid.
    354 
    355     This performs command line checking to enforce command line
    356     rules that ArgumentParser can't handle.  Additionally, this
    357     handles calculation of default arguments/options when a simple
    358     constant default won't do.
    359 
    360     Areas checked:
    361       * Check that a valid time range was provided, supplying
    362         defaults as necessary.
    363       * Identify invalid host names.
    364 
    365     @param afe       Autotest frontend
    366     @param arguments Parsed arguments object as returned by
    367                      ArgumentParser.parse_args().
    368     @return List of HostJobHistory objects for the hosts requested
    369             on the command line.
    370 
    371     """
    372     _validate_time_range(arguments)
    373     _validate_format_options(arguments)
    374     return _validate_host_list(afe, arguments)
    375 
    376 
    377 def _parse_command(argv):
    378     """Parse the command line arguments.
    379 
    380     Create an argument parser for this command's syntax, parse the
    381     command line, and return the result of the ArgumentParser
    382     parse_args() method.
    383 
    384     @param argv Standard command line argument vector; argv[0] is
    385                 assumed to be the command name.
    386     @return Result returned by ArgumentParser.parse_args().
    387 
    388     """
    389     parser = argparse.ArgumentParser(
    390             prog=argv[0],
    391             description='Report DUT status and execution history',
    392             epilog='You can specify one or two of --since, --until, '
    393                    'and --duration, but not all three.\n'
    394                    'The date/time format is "YYYY-MM-DD HH:MM:SS".')
    395     parser.add_argument('-s', '--since', type=status_history.parse_time,
    396                         metavar='DATE/TIME',
    397                         help='starting time for history display')
    398     parser.add_argument('-u', '--until', type=status_history.parse_time,
    399                         metavar='DATE/TIME',
    400                         help='ending time for history display'
    401                              ' (default: now)')
    402     parser.add_argument('-d', '--duration', type=int,
    403                         metavar='HOURS',
    404                         help='number of hours of history to display'
    405                              ' (default: %d)' % _DEFAULT_DURATION)
    406 
    407     format_group = parser.add_mutually_exclusive_group()
    408     format_group.add_argument('-f', '--full_history', action='store_true',
    409                               help='Display host history from most '
    410                                    'to least recent for each DUT')
    411     format_group.add_argument('-g', '--diagnosis', action='store_true',
    412                               help='Display host history for the '
    413                                    'most recent DUT status change')
    414     format_group.add_argument('-o', '--oneline', action='store_true',
    415                               help='Display host status summary')
    416 
    417     parser.add_argument('-w', '--working', action='store_true',
    418                         help='List working devices by name only')
    419     parser.add_argument('-n', '--broken', action='store_true',
    420                         help='List non-working devices by name only')
    421 
    422     parser.add_argument('-b', '--board',
    423                         help='Display history for all DUTs '
    424                              'of the given board')
    425     parser.add_argument('-p', '--pool',
    426                         help='Display history for all DUTs '
    427                              'in the given pool. You might '
    428                              'be interested in the following pools: '
    429                              + ', '.join(lab_inventory.MANAGED_POOLS[:-1])
    430                              +', or '+ lab_inventory.MANAGED_POOLS[-1] +'.')
    431     parser.add_argument('hostnames',
    432                         nargs='*',
    433                         help='host names of DUTs to report on')
    434     parser.add_argument('--web',
    435                         help='Master autotest frontend hostname. If no value '
    436                              'is given, the one in global config will be used.',
    437                         default=None)
    438     arguments = parser.parse_args(argv[1:])
    439     return arguments
    440 
    441 
    442 def main(argv):
    443     """Standard main() for command line processing.
    444 
    445     @param argv Command line arguments (normally sys.argv).
    446 
    447     """
    448     arguments = _parse_command(argv)
    449     afe = frontend.AFE(server=arguments.web)
    450     history_list = _validate_command(afe, arguments)
    451     if arguments.oneline:
    452         _print_host_summaries(history_list, arguments)
    453     else:
    454         _print_hosts(history_list, arguments)
    455 
    456 
    457 if __name__ == '__main__':
    458     main(sys.argv)
    459