Home | History | Annotate | Download | only in server
      1 # Copyright 2007 Google Inc. Released under the GPL v2
      2 #pylint: disable-msg=C0111
      3 
      4 import glob
      5 import logging
      6 import os
      7 import re
      8 import sys
      9 import tempfile
     10 import time
     11 import traceback
     12 
     13 import common
     14 from autotest_lib.client.bin.result_tools import runner as result_tools_runner
     15 from autotest_lib.client.common_lib import autotemp
     16 from autotest_lib.client.common_lib import base_job
     17 from autotest_lib.client.common_lib import error
     18 from autotest_lib.client.common_lib import packages
     19 from autotest_lib.client.common_lib import global_config
     20 from autotest_lib.client.common_lib import utils as client_utils
     21 from autotest_lib.server import installable_object
     22 from autotest_lib.server import prebuild
     23 from autotest_lib.server import utils
     24 
     25 try:
     26     from chromite.lib import metrics
     27 except ImportError:
     28     metrics = client_utils.metrics_mock
     29 
     30 
     31 AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
     32 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
     33 
     34 CONFIG = global_config.global_config
     35 AUTOSERV_PREBUILD = CONFIG.get_config_value(
     36         'AUTOSERV', 'enable_server_prebuild', type=bool, default=False)
     37 
     38 ENABLE_RESULT_THROTTLING = CONFIG.get_config_value(
     39         'AUTOSERV', 'enable_result_throttling', type=bool, default=False)
     40 
     41 class AutodirNotFoundError(Exception):
     42     """No Autotest installation could be found."""
     43 
     44 
     45 class AutotestFailure(Exception):
     46     """Gereric exception class for failures during a test run."""
     47 
     48 
     49 class AutotestAbort(AutotestFailure):
     50     """
     51     AutotestAborts are thrown when the DUT seems fine,
     52     and the test doesn't give us an explicit reason for
     53     failure; In this case we have no choice but to abort.
     54     """
     55 
     56 
     57 class AutotestDeviceError(AutotestFailure):
     58     """
     59     Exceptions that inherit from AutotestDeviceError
     60     are thrown when we can determine the current
     61     state of the DUT and conclude that it probably
     62     lead to the test failing; these exceptions lead
     63     to failures instead of aborts.
     64     """
     65 
     66 
     67 class AutotestDeviceNotPingable(AutotestDeviceError):
     68     """Error for when a DUT becomes unpingable."""
     69 
     70 
     71 class AutotestDeviceNotSSHable(AutotestDeviceError):
     72     """Error for when a DUT is pingable but not SSHable."""
     73 
     74 
     75 class AutotestDeviceRebooted(AutotestDeviceError):
     76     """Error for when a DUT rebooted unexpectedly."""
     77 
     78 
     79 class BaseAutotest(installable_object.InstallableObject):
     80     """
     81     This class represents the Autotest program.
     82 
     83     Autotest is used to run tests automatically and collect the results.
     84     It also supports profilers.
     85 
     86     Implementation details:
     87     This is a leaf class in an abstract class hierarchy, it must
     88     implement the unimplemented methods in parent classes.
     89     """
     90 
     91     def __init__(self, host=None):
     92         self.host = host
     93         self.got = False
     94         self.installed = False
     95         self.serverdir = utils.get_server_dir()
     96         super(BaseAutotest, self).__init__()
     97 
     98 
     99     install_in_tmpdir = False
    100     @classmethod
    101     def set_install_in_tmpdir(cls, flag):
    102         """ Sets a flag that controls whether or not Autotest should by
    103         default be installed in a "standard" directory (e.g.
    104         /home/autotest, /usr/local/autotest) or a temporary directory. """
    105         cls.install_in_tmpdir = flag
    106 
    107 
    108     @classmethod
    109     def get_client_autodir_paths(cls, host):
    110         return global_config.global_config.get_config_value(
    111                 'AUTOSERV', 'client_autodir_paths', type=list)
    112 
    113 
    114     @classmethod
    115     def get_installed_autodir(cls, host):
    116         """
    117         Find where the Autotest client is installed on the host.
    118         @returns an absolute path to an installed Autotest client root.
    119         @raises AutodirNotFoundError if no Autotest installation can be found.
    120         """
    121         autodir = host.get_autodir()
    122         if autodir:
    123             logging.debug('Using existing host autodir: %s', autodir)
    124             return autodir
    125 
    126         for path in Autotest.get_client_autodir_paths(host):
    127             try:
    128                 autotest_binary = os.path.join(path, 'bin', 'autotest')
    129                 host.run('test -x %s' % utils.sh_escape(autotest_binary))
    130                 host.run('test -w %s' % utils.sh_escape(path))
    131                 logging.debug('Found existing autodir at %s', path)
    132                 return path
    133             except error.GenericHostRunError:
    134                 logging.debug('%s does not exist on %s', autotest_binary,
    135                               host.hostname)
    136         raise AutodirNotFoundError
    137 
    138 
    139     @classmethod
    140     def get_install_dir(cls, host):
    141         """
    142         Determines the location where autotest should be installed on
    143         host. If self.install_in_tmpdir is set, it will return a unique
    144         temporary directory that autotest can be installed in. Otherwise, looks
    145         for an existing installation to use; if none is found, looks for a
    146         usable directory in the global config client_autodir_paths.
    147         """
    148         try:
    149             install_dir = cls.get_installed_autodir(host)
    150         except AutodirNotFoundError:
    151             install_dir = cls._find_installable_dir(host)
    152 
    153         if cls.install_in_tmpdir:
    154             return host.get_tmp_dir(parent=install_dir)
    155         return install_dir
    156 
    157 
    158     @classmethod
    159     def _find_installable_dir(cls, host):
    160         client_autodir_paths = cls.get_client_autodir_paths(host)
    161         for path in client_autodir_paths:
    162             try:
    163                 host.run('mkdir -p %s' % utils.sh_escape(path))
    164                 host.run('test -w %s' % utils.sh_escape(path))
    165                 return path
    166             except error.AutoservRunError:
    167                 logging.debug('Failed to create %s', path)
    168         metrics.Counter(
    169             'chromeos/autotest/errors/no_autotest_install_path').increment(
    170                 fields={'dut_host_name': host.hostname})
    171         raise error.AutoservInstallError(
    172                 'Unable to find a place to install Autotest; tried %s' %
    173                 ', '.join(client_autodir_paths))
    174 
    175 
    176     def get_fetch_location(self):
    177         c = global_config.global_config
    178         repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
    179                                    default=[])
    180         repos.reverse()
    181         return repos
    182 
    183 
    184     def install(self, host=None, autodir=None, use_packaging=True):
    185         self._install(host=host, autodir=autodir, use_packaging=use_packaging)
    186 
    187 
    188     def install_full_client(self, host=None, autodir=None):
    189         self._install(host=host, autodir=autodir, use_autoserv=False,
    190                       use_packaging=False)
    191 
    192 
    193     def install_no_autoserv(self, host=None, autodir=None):
    194         self._install(host=host, autodir=autodir, use_autoserv=False)
    195 
    196 
    197     def _install_using_packaging(self, host, autodir):
    198         repos = self.get_fetch_location()
    199         if not repos:
    200             raise error.PackageInstallError("No repos to install an "
    201                                             "autotest client from")
    202         pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
    203                                          repo_urls=repos,
    204                                          do_locking=False,
    205                                          run_function=host.run,
    206                                          run_function_dargs=dict(timeout=600))
    207         # The packages dir is used to store all the packages that
    208         # are fetched on that client. (for the tests,deps etc.
    209         # too apart from the client)
    210         pkg_dir = os.path.join(autodir, 'packages')
    211         # clean up the autodir except for the packages directory
    212         host.run('cd %s && ls | grep -v "^packages$"'
    213                  ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
    214         pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
    215                            preserve_install_dir=True)
    216         self.installed = True
    217 
    218 
    219     def _install_using_send_file(self, host, autodir):
    220         dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
    221                                "packages"])
    222         light_files = [os.path.join(self.source_material, f)
    223                        for f in os.listdir(self.source_material)
    224                        if f not in dirs_to_exclude]
    225         host.send_file(light_files, autodir, delete_dest=True)
    226 
    227         # create empty dirs for all the stuff we excluded
    228         commands = []
    229         for path in dirs_to_exclude:
    230             abs_path = os.path.join(autodir, path)
    231             abs_path = utils.sh_escape(abs_path)
    232             commands.append("mkdir -p '%s'" % abs_path)
    233             commands.append("touch '%s'/__init__.py" % abs_path)
    234         host.run(';'.join(commands))
    235 
    236 
    237     def _install(self, host=None, autodir=None, use_autoserv=True,
    238                  use_packaging=True):
    239         """
    240         Install autotest.  If get() was not called previously, an
    241         attempt will be made to install from the autotest svn
    242         repository.
    243 
    244         @param host A Host instance on which autotest will be installed
    245         @param autodir Location on the remote host to install to
    246         @param use_autoserv Enable install modes that depend on the client
    247             running with the autoserv harness
    248         @param use_packaging Enable install modes that use the packaging system
    249 
    250         @exception AutoservError if a tarball was not specified and
    251             the target host does not have svn installed in its path
    252         """
    253         if not host:
    254             host = self.host
    255         if not self.got:
    256             self.get()
    257         host.wait_up(timeout=30)
    258         host.setup()
    259         logging.info("Installing autotest on %s", host.hostname)
    260 
    261         # set up the autotest directory on the remote machine
    262         if not autodir:
    263             autodir = self.get_install_dir(host)
    264         logging.info('Using installation dir %s', autodir)
    265         host.set_autodir(autodir)
    266         host.run('mkdir -p %s' % utils.sh_escape(autodir))
    267 
    268         # make sure there are no files in $AUTODIR/results
    269         results_path = os.path.join(autodir, 'results')
    270         host.run('rm -rf %s/*' % utils.sh_escape(results_path),
    271                  ignore_status=True)
    272 
    273         # Fetch the autotest client from the nearest repository
    274         if use_packaging:
    275             try:
    276                 self._install_using_packaging(host, autodir)
    277                 logging.info("Installation of autotest completed using the "
    278                              "packaging system.")
    279                 return
    280             except (error.PackageInstallError, error.AutoservRunError,
    281                     global_config.ConfigError), e:
    282                 logging.info("Could not install autotest using the packaging "
    283                              "system: %s. Trying other methods", e)
    284         else:
    285             # Delete the package checksum file to force dut updating local
    286             # packages.
    287             command = ('rm -f "%s"' %
    288                        (os.path.join(autodir, packages.CHECKSUM_FILE)))
    289             host.run(command)
    290 
    291         # try to install from file or directory
    292         if self.source_material:
    293             c = global_config.global_config
    294             supports_autoserv_packaging = c.get_config_value(
    295                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
    296             # Copy autotest recursively
    297             if supports_autoserv_packaging and use_autoserv:
    298                 self._install_using_send_file(host, autodir)
    299             else:
    300                 host.send_file(self.source_material, autodir, delete_dest=True)
    301             logging.info("Installation of autotest completed from %s",
    302                          self.source_material)
    303             self.installed = True
    304             return
    305 
    306         # if that fails try to install using svn
    307         if utils.run('which svn').exit_status:
    308             raise error.AutoservError('svn not found on target machine: %s' %
    309                                       host.hostname)
    310         try:
    311             host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
    312         except error.AutoservRunError, e:
    313             host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
    314         logging.info("Installation of autotest completed using SVN.")
    315         self.installed = True
    316 
    317 
    318     def uninstall(self, host=None):
    319         """
    320         Uninstall (i.e. delete) autotest. Removes the autotest client install
    321         from the specified host.
    322 
    323         @params host a Host instance from which the client will be removed
    324         """
    325         if not self.installed:
    326             return
    327         if not host:
    328             host = self.host
    329         autodir = host.get_autodir()
    330         if not autodir:
    331             return
    332 
    333         # perform the actual uninstall
    334         host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
    335         host.set_autodir(None)
    336         self.installed = False
    337 
    338 
    339     def get(self, location=None):
    340         if not location:
    341             location = os.path.join(self.serverdir, '../client')
    342             location = os.path.abspath(location)
    343         # If there's stuff run on our client directory already, it
    344         # can cause problems. Try giving it a quick clean first.
    345         cwd = os.getcwd()
    346         os.chdir(location)
    347         try:
    348             utils.system('tools/make_clean', ignore_status=True)
    349         finally:
    350             os.chdir(cwd)
    351         super(BaseAutotest, self).get(location)
    352         self.got = True
    353 
    354 
    355     def run(self, control_file, results_dir='.', host=None, timeout=None,
    356             tag=None, parallel_flag=False, client_disconnect_timeout=None,
    357             use_packaging=True):
    358         """
    359         Run an autotest job on the remote machine.
    360 
    361         @param control_file: An open file-like-obj of the control file.
    362         @param results_dir: A str path where the results should be stored
    363                 on the local filesystem.
    364         @param host: A Host instance on which the control file should
    365                 be run.
    366         @param timeout: Maximum number of seconds to wait for the run or None.
    367         @param tag: Tag name for the client side instance of autotest.
    368         @param parallel_flag: Flag set when multiple jobs are run at the
    369                 same time.
    370         @param client_disconnect_timeout: Seconds to wait for the remote host
    371                 to come back after a reboot. Defaults to the host setting for
    372                 DEFAULT_REBOOT_TIMEOUT.
    373 
    374         @raises AutotestRunError: If there is a problem executing
    375                 the control file.
    376         """
    377         host = self._get_host_and_setup(host, use_packaging=use_packaging)
    378         logging.debug('Autotest job starts on remote host: %s',
    379                       host.hostname)
    380         results_dir = os.path.abspath(results_dir)
    381 
    382         if client_disconnect_timeout is None:
    383             client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
    384 
    385         if tag:
    386             results_dir = os.path.join(results_dir, tag)
    387 
    388         atrun = _Run(host, results_dir, tag, parallel_flag)
    389         self._do_run(control_file, results_dir, host, atrun, timeout,
    390                      client_disconnect_timeout, use_packaging=use_packaging)
    391 
    392 
    393     def _get_host_and_setup(self, host, use_packaging=True):
    394         if not host:
    395             host = self.host
    396         if not self.installed:
    397             self.install(host, use_packaging=use_packaging)
    398 
    399         host.wait_up(timeout=30)
    400         return host
    401 
    402 
    403     def _do_run(self, control_file, results_dir, host, atrun, timeout,
    404                 client_disconnect_timeout, use_packaging=True):
    405         try:
    406             atrun.verify_machine()
    407         except:
    408             logging.error("Verify failed on %s. Reinstalling autotest",
    409                           host.hostname)
    410             self.install(host)
    411             atrun.verify_machine()
    412         debug = os.path.join(results_dir, 'debug')
    413         try:
    414             os.makedirs(debug)
    415         except Exception:
    416             pass
    417 
    418         delete_file_list = [atrun.remote_control_file,
    419                             atrun.remote_control_file + '.state',
    420                             atrun.manual_control_file,
    421                             atrun.manual_control_file + '.state']
    422         cmd = ';'.join('rm -f ' + control for control in delete_file_list)
    423         host.run(cmd, ignore_status=True)
    424 
    425         tmppath = utils.get(control_file, local_copy=True)
    426 
    427         # build up the initialization prologue for the control file
    428         prologue_lines = []
    429 
    430         # Add the additional user arguments
    431         prologue_lines.append("args = %r\n" % self.job.args)
    432 
    433         # If the packaging system is being used, add the repository list.
    434         repos = None
    435         try:
    436             if use_packaging:
    437                 repos = self.get_fetch_location()
    438                 prologue_lines.append('job.add_repository(%s)\n' % repos)
    439             else:
    440                 logging.debug('use_packaging is set to False, do not add any '
    441                               'repository.')
    442         except global_config.ConfigError, e:
    443             # If repos is defined packaging is enabled so log the error
    444             if repos:
    445                 logging.error(e)
    446 
    447         # on full-size installs, turn on any profilers the server is using
    448         running_profilers = host.job.profilers.add_log.iteritems()
    449         for profiler, (args, dargs) in running_profilers:
    450             call_args = [repr(profiler)]
    451             call_args += [repr(arg) for arg in args]
    452             call_args += ["%s=%r" % item for item in dargs.iteritems()]
    453             prologue_lines.append("job.profilers.add(%s)\n"
    454                                     % ", ".join(call_args))
    455         cfile = "".join(prologue_lines)
    456 
    457         cfile += open(tmppath).read()
    458         open(tmppath, "w").write(cfile)
    459 
    460         # Create and copy state file to remote_control_file + '.state'
    461         state_file = host.job.preprocess_client_state()
    462         host.send_file(state_file, atrun.remote_control_file + '.init.state')
    463         os.remove(state_file)
    464 
    465         # Copy control_file to remote_control_file on the host
    466         host.send_file(tmppath, atrun.remote_control_file)
    467         if os.path.abspath(tmppath) != os.path.abspath(control_file):
    468             os.remove(tmppath)
    469 
    470         atrun.execute_control(
    471                 timeout=timeout,
    472                 client_disconnect_timeout=client_disconnect_timeout)
    473 
    474 
    475     @classmethod
    476     def _check_client_test_result(cls, host, test_name):
    477         """
    478         Check result of client test.
    479         Autotest will store results in the file name status.
    480         We check that second to last line in that file begins with 'END GOOD'
    481 
    482         @raises TestFail: If client test does not pass.
    483         """
    484         client_result_dir = '%s/results/default' % host.autodir
    485         command = 'tail -2 %s/status | head -1' % client_result_dir
    486         status = host.run(command).stdout.strip()
    487         logging.info(status)
    488         if status[:8] != 'END GOOD':
    489             raise error.TestFail('%s client test did not pass.' % test_name)
    490 
    491 
    492     def run_timed_test(self, test_name, results_dir='.', host=None,
    493                        timeout=None, parallel_flag=False,
    494                        client_disconnect_timeout=None, *args, **dargs):
    495         """
    496         Assemble a tiny little control file to just run one test,
    497         and run it as an autotest client-side test
    498         """
    499         if not host:
    500             host = self.host
    501         if not self.installed:
    502             self.install(host)
    503         opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
    504         cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
    505         control = "job.run_test(%s)\n" % cmd
    506         self.run(control, results_dir, host, timeout=timeout,
    507                  parallel_flag=parallel_flag,
    508                  client_disconnect_timeout=client_disconnect_timeout)
    509 
    510         if dargs.get('check_client_result', False):
    511             self._check_client_test_result(host, test_name)
    512 
    513 
    514     def run_test(self, test_name, results_dir='.', host=None,
    515                  parallel_flag=False,
    516                  client_disconnect_timeout=None, *args, **dargs):
    517         self.run_timed_test(test_name, results_dir, host, timeout=None,
    518                             parallel_flag=parallel_flag,
    519                             client_disconnect_timeout=client_disconnect_timeout,
    520                             *args, **dargs)
    521 
    522 
    523 class _BaseRun(object):
    524     """
    525     Represents a run of autotest control file.  This class maintains
    526     all the state necessary as an autotest control file is executed.
    527 
    528     It is not intended to be used directly, rather control files
    529     should be run using the run method in Autotest.
    530     """
    531     def __init__(self, host, results_dir, tag, parallel_flag):
    532         self.host = host
    533         self.results_dir = results_dir
    534         self.env = host.env
    535         self.tag = tag
    536         self.parallel_flag = parallel_flag
    537         self.autodir = Autotest.get_installed_autodir(self.host)
    538         control = os.path.join(self.autodir, 'control')
    539         if tag:
    540             control += '.' + tag
    541         self.manual_control_file = control
    542         self.remote_control_file = control + '.autoserv'
    543         self.config_file = os.path.join(self.autodir, 'global_config.ini')
    544 
    545 
    546     def verify_machine(self):
    547         binary = os.path.join(self.autodir, 'bin/autotest')
    548         try:
    549             self.host.run('ls %s > /dev/null 2>&1' % binary)
    550         except:
    551             raise error.AutoservInstallError(
    552                 "Autotest does not appear to be installed")
    553 
    554         if not self.parallel_flag:
    555             tmpdir = os.path.join(self.autodir, 'tmp')
    556             download = os.path.join(self.autodir, 'tests/download')
    557             self.host.run('umount %s' % tmpdir, ignore_status=True)
    558             self.host.run('umount %s' % download, ignore_status=True)
    559 
    560 
    561     def get_base_cmd_args(self):
    562         args = ['--verbose']
    563         if self.tag:
    564             args.append('-t %s' % self.tag)
    565         if self.host.job.use_external_logging():
    566             args.append('-l')
    567         if self.host.hostname:
    568             args.append('--hostname=%s' % self.host.hostname)
    569         args.append('--user=%s' % self.host.job.user)
    570 
    571         args.append(self.remote_control_file)
    572         return args
    573 
    574 
    575     def get_daemon_cmd(self, monitor_dir):
    576         cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
    577                monitor_dir, '-H autoserv']
    578         cmd += self.get_base_cmd_args()
    579         cmd += ['>/dev/null', '2>/dev/null', '&']
    580         return ' '.join(cmd)
    581 
    582 
    583     def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
    584         cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
    585                monitor_dir, str(stdout_read), str(stderr_read)]
    586         return ' '.join(cmd)
    587 
    588 
    589     def get_client_log(self):
    590         """Find what the "next" client.* prefix should be
    591 
    592         @returns A string of the form client.INTEGER that should be prefixed
    593             to all client debug log files.
    594         """
    595         max_digit = -1
    596         debug_dir = os.path.join(self.results_dir, 'debug')
    597         client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
    598         for log in client_logs:
    599             _, number, _ = log.split('.', 2)
    600             if number.isdigit():
    601                 max_digit = max(max_digit, int(number))
    602         return 'client.%d' % (max_digit + 1)
    603 
    604 
    605     def copy_client_config_file(self, client_log_prefix=None):
    606         """
    607         Create and copy the client config file based on the server config.
    608 
    609         @param client_log_prefix: Optional prefix to prepend to log files.
    610         """
    611         client_config_file = self._create_client_config_file(client_log_prefix)
    612         self.host.send_file(client_config_file, self.config_file)
    613         os.remove(client_config_file)
    614 
    615 
    616     def _create_client_config_file(self, client_log_prefix=None):
    617         """
    618         Create a temporary file with the [CLIENT] section configuration values
    619         taken from the server global_config.ini.
    620 
    621         @param client_log_prefix: Optional prefix to prepend to log files.
    622 
    623         @return: Path of the temporary file generated.
    624         """
    625         config = global_config.global_config.get_section_values('CLIENT')
    626         if client_log_prefix:
    627             config.set('CLIENT', 'default_logging_name', client_log_prefix)
    628         return self._create_aux_file(config.write)
    629 
    630 
    631     def _create_aux_file(self, func, *args):
    632         """
    633         Creates a temporary file and writes content to it according to a
    634         content creation function. The file object is appended to *args, which
    635         is then passed to the content creation function
    636 
    637         @param func: Function that will be used to write content to the
    638                 temporary file.
    639         @param *args: List of parameters that func takes.
    640         @return: Path to the temporary file that was created.
    641         """
    642         fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
    643         aux_file = os.fdopen(fd, "w")
    644         try:
    645             list_args = list(args)
    646             list_args.append(aux_file)
    647             func(*list_args)
    648         finally:
    649             aux_file.close()
    650         return path
    651 
    652 
    653     @staticmethod
    654     def is_client_job_finished(last_line):
    655         return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
    656 
    657 
    658     @staticmethod
    659     def is_client_job_rebooting(last_line):
    660         return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
    661 
    662 
    663     def _diagnose_dut(self, old_boot_id=None):
    664         """
    665         Run diagnostic checks on a DUT.
    666 
    667         1. ping: A dead host will not respond to pings.
    668         2. ssh (happens with 3.): DUT hangs usually fail in authentication
    669             but respond to pings.
    670         3. Check if a reboot occured: A healthy but unexpected reboot leaves the
    671             host running with a new boot id.
    672 
    673         This method will always raise an exception from the AutotestFailure
    674         family and should only get called when the reason for a test failing
    675         is ambiguous.
    676 
    677         @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
    678         @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
    679         @raises AutotestDeviceRebooted: If the boot id changed.
    680         @raises AutotestAbort: If none of the above exceptions were raised.
    681             Since we have no recourse we must abort at this stage.
    682         """
    683         msg = 'Autotest client terminated unexpectedly: '
    684         if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
    685             msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
    686             raise AutotestDeviceNotPingable(msg)
    687 
    688         if old_boot_id:
    689             try:
    690                 new_boot_id = self.host.get_boot_id(timeout=60)
    691             except Exception as e:
    692                 msg += ('DUT is pingable but not SSHable, it most likely'
    693                         ' sporadically rebooted during testing. %s\n' % str(e))
    694                 raise AutotestDeviceNotSSHable(msg)
    695             else:
    696                 if new_boot_id != old_boot_id:
    697                     msg += 'DUT rebooted during the test run.\n'
    698                     raise AutotestDeviceRebooted(msg)
    699 
    700             msg += ('DUT is pingable, SSHable and did NOT restart '
    701                     'un-expectedly. We probably lost connectivity during the '
    702                     'test.')
    703         else:
    704             msg += ('DUT is pingable, could not determine if an un-expected '
    705                     'reboot occured during the test.')
    706 
    707         raise AutotestAbort(msg)
    708 
    709 
    710     def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
    711         """
    712         Logs that something unexpected happened, then tries to diagnose the
    713         failure. The purpose of this function is only to close out the status
    714         log with the appropriate error message, not to critically terminate
    715         the program.
    716 
    717         @param stderr_redirector: log stream.
    718         @param old_boot_id: boot id used to infer if a reboot occured.
    719         """
    720         stderr_redirector.flush_all_buffers()
    721         try:
    722             self._diagnose_dut(old_boot_id)
    723         except AutotestFailure as e:
    724             self.host.job.record('END ABORT', None, None, str(e))
    725 
    726 
    727     @staticmethod
    728     def _strip_stderr_prologue(stderr):
    729         """Strips the 'standard' prologue that get pre-pended to every
    730         remote command and returns the text that was actually written to
    731         stderr by the remote command."""
    732         stderr_lines = stderr.split("\n")[1:]
    733         if not stderr_lines:
    734             return ""
    735         elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
    736             del stderr_lines[0]
    737         return "\n".join(stderr_lines)
    738 
    739 
    740     def _execute_daemon(self, timeout, stderr_redirector,
    741                         client_disconnect_timeout):
    742         monitor_dir = self.host.get_tmp_dir()
    743         daemon_cmd = self.get_daemon_cmd(monitor_dir)
    744 
    745         # grab the location for the server-side client log file
    746         client_log_prefix = self.get_client_log()
    747         client_log_path = os.path.join(self.results_dir, 'debug',
    748                                        client_log_prefix + '.log')
    749         client_log = open(client_log_path, 'w', 0)
    750         self.copy_client_config_file(client_log_prefix)
    751 
    752         stdout_read = stderr_read = 0
    753         self.host.job.push_execution_context(self.results_dir)
    754         try:
    755             self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
    756             disconnect_warnings = []
    757             while True:
    758                 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
    759                                                    stderr_read)
    760                 try:
    761                     result = self.host.run(monitor_cmd, ignore_status=True,
    762                                            timeout=timeout,
    763                                            stdout_tee=client_log,
    764                                            stderr_tee=stderr_redirector)
    765                 except error.AutoservRunError, e:
    766                     result = e.result_obj
    767                     result.exit_status = None
    768                     disconnect_warnings.append(e.description)
    769 
    770                     stderr_redirector.log_warning(
    771                         "Autotest client was disconnected: %s" % e.description,
    772                         "NETWORK")
    773                 except error.AutoservSSHTimeout:
    774                     result = utils.CmdResult(monitor_cmd, "", "", None, 0)
    775                     stderr_redirector.log_warning(
    776                         "Attempt to connect to Autotest client timed out",
    777                         "NETWORK")
    778 
    779                 stdout_read += len(result.stdout)
    780                 stderr_read += len(self._strip_stderr_prologue(result.stderr))
    781 
    782                 if result.exit_status is not None:
    783                     # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
    784                     logging.debug('Result exit status is %d.',
    785                                   result.exit_status)
    786                     return result
    787                 elif not self.host.wait_up(client_disconnect_timeout):
    788                     raise error.AutoservSSHTimeout(
    789                         "client was disconnected, reconnect timed out")
    790         finally:
    791             client_log.close()
    792             self.host.job.pop_execution_context()
    793 
    794 
    795     def _really_execute_control(self, timeout, stderr_redirector,
    796                                 client_disconnect_timeout):
    797         logging.info("Executing %s/bin/autotest %s/controt",
    798                      self.autodir, self.autodir)
    799 
    800         result = self._execute_daemon(timeout, stderr_redirector,
    801                                         client_disconnect_timeout)
    802 
    803         last_line = stderr_redirector.last_line
    804 
    805         # check if we failed hard enough to warrant an exception
    806         if result.exit_status == 1:
    807             err = error.AutotestRunError("client job was aborted")
    808         elif not result.stderr:
    809             err = error.AutotestRunError(
    810                 "_really_execute_control failed to return anything\n"
    811                 "stdout:%s\n" % result.stdout)
    812         else:
    813             err = None
    814 
    815         # log something if the client failed AND never finished logging
    816         if err and not self.is_client_job_finished(last_line):
    817             self.log_unexpected_abort(stderr_redirector)
    818 
    819         if err:
    820             raise err
    821         else:
    822             return stderr_redirector.last_line
    823 
    824 
    825     def execute_control(self, timeout=None, client_disconnect_timeout=None):
    826         collector = log_collector(self.host, self.tag, self.results_dir)
    827         hostname = self.host.hostname
    828         remote_results = collector.client_results_dir
    829         local_results = collector.server_results_dir
    830         self.host.job.add_client_log(hostname, remote_results,
    831                                         local_results)
    832         job_record_context = self.host.job.get_record_context()
    833         logger = client_logger(self.host, self.tag, self.results_dir)
    834 
    835         try:
    836             boot_id = self.host.get_boot_id()
    837             last = self._really_execute_control(timeout, logger,
    838                                                 client_disconnect_timeout)
    839             if self.is_client_job_finished(last):
    840                 logging.info("Client complete")
    841                 return
    842             elif self.is_client_job_rebooting(last):
    843                 # TODO(crbug.com/684311) This feature is never used. Validate
    844                 # and drop this case.
    845                 m = 'chromeos/autotest/errors/client_test_triggered_reboot'
    846                 metrics.Counter(m).increment()
    847                 self.host.job.record("ABORT", None, "reboot",
    848                                      'client triggered reboot is unsupported')
    849                 self.host.job.record("END ABORT", None, None,
    850                                      'client triggered reboot is unsupported')
    851                 return
    852 
    853             # If a test fails without probable cause we try to bucket it's
    854             # failure into one of 2 categories. If we can determine the
    855             # current state of the device and it is suspicious, we close the
    856             # status lines indicating a failure. If we either cannot
    857             # determine the state of the device, or it appears totally
    858             # healthy, we give up and abort.
    859             try:
    860                 self._diagnose_dut(boot_id)
    861             except AutotestDeviceError as e:
    862                 # The status lines of the test are pretty much tailed to
    863                 # our log, with indentation, from the client job on the DUT.
    864                 # So if the DUT goes down unexpectedly we'll end up with a
    865                 # malformed status log unless we manually unwind the status
    866                 # stack. Ideally we would want to write a nice wrapper like
    867                 # server_job methods run_reboot, run_group but they expect
    868                 # reboots and we don't.
    869                 self.host.job.record('FAIL', None, None, str(e))
    870                 self.host.job.record('END FAIL', None, None)
    871                 self.host.job.record('END GOOD', None, None)
    872                 self.host.job.failed_with_device_error = True
    873                 return
    874             except AutotestAbort as e:
    875                 self.host.job.record('ABORT', None, None, str(e))
    876                 self.host.job.record('END ABORT', None, None)
    877 
    878                 # give the client machine a chance to recover from a crash
    879                 self.host.wait_up(
    880                     self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
    881                 logging.debug('Unexpected final status message from '
    882                                 'client %s: %s', self.host.hostname, last)
    883                 # The line 'last' may have sensitive phrases, like
    884                 # 'END GOOD', which breaks the tko parser. So the error
    885                 # message will exclude it, since it will be recorded to
    886                 # status.log.
    887                 msg = ("Aborting - unexpected final status message from "
    888                         "client on %s\n") % self.host.hostname
    889                 raise error.AutotestRunError(msg)
    890         finally:
    891             logging.debug('Autotest job finishes running. Below is the '
    892                           'post-processing operations.')
    893             logger.close()
    894             collector.collect_client_job_results()
    895             collector.remove_redundant_client_logs()
    896             state_file = os.path.basename(self.remote_control_file
    897                                             + '.state')
    898             state_path = os.path.join(self.results_dir, state_file)
    899             self.host.job.postprocess_client_state(state_path)
    900             self.host.job.remove_client_log(hostname, remote_results,
    901                                             local_results)
    902             job_record_context.restore()
    903 
    904             logging.debug('Autotest job finishes.')
    905 
    906         # should only get here if we timed out
    907         assert timeout
    908         raise error.AutotestTimeoutError()
    909 
    910 
    911 class log_collector(object):
    912     def __init__(self, host, client_tag, results_dir):
    913         self.host = host
    914         if not client_tag:
    915             client_tag = "default"
    916         self.client_results_dir = os.path.join(host.get_autodir(), "results",
    917                                                client_tag)
    918         self.server_results_dir = results_dir
    919 
    920 
    921     def collect_client_job_results(self):
    922         """ A method that collects all the current results of a running
    923         client job into the results dir. By default does nothing as no
    924         client job is running, but when running a client job you can override
    925         this with something that will actually do something. """
    926         # make an effort to wait for the machine to come up
    927         try:
    928             self.host.wait_up(timeout=30)
    929         except error.AutoservError:
    930             # don't worry about any errors, we'll try and
    931             # get the results anyway
    932             pass
    933 
    934         # Copy all dirs in default to results_dir
    935         try:
    936             # Build test result directory summary
    937             result_tools_runner.run_on_client(
    938                     self.host, self.client_results_dir,
    939                     ENABLE_RESULT_THROTTLING)
    940 
    941             with metrics.SecondsTimer(
    942                     'chromeos/autotest/job/log_collection_duration',
    943                     fields={'dut_host_name': self.host.hostname}):
    944                 self.host.get_file(
    945                         self.client_results_dir + '/',
    946                         self.server_results_dir,
    947                         preserve_symlinks=True)
    948         except Exception:
    949             # well, don't stop running just because we couldn't get logs
    950             e_msg = "Unexpected error copying test result logs, continuing ..."
    951             logging.error(e_msg)
    952             traceback.print_exc(file=sys.stdout)
    953 
    954 
    955     def remove_redundant_client_logs(self):
    956         """Remove client.*.log files in favour of client.*.DEBUG files."""
    957         debug_dir = os.path.join(self.server_results_dir, 'debug')
    958         debug_files = [f for f in os.listdir(debug_dir)
    959                        if re.search(r'^client\.\d+\.DEBUG$', f)]
    960         for debug_file in debug_files:
    961             log_file = debug_file.replace('DEBUG', 'log')
    962             log_file = os.path.join(debug_dir, log_file)
    963             if os.path.exists(log_file):
    964                 os.remove(log_file)
    965 
    966 
    967 # a file-like object for catching stderr from an autotest client and
    968 # extracting status logs from it
    969 class BaseClientLogger(object):
    970     """Partial file object to write to both stdout and
    971     the status log file.  We only implement those methods
    972     utils.run() actually calls.
    973     """
    974     status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
    975     test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
    976     fetch_package_parser = re.compile(
    977         r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
    978     extract_indent = re.compile(r"^(\t*).*$")
    979     extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
    980 
    981     def __init__(self, host, tag, server_results_dir):
    982         self.host = host
    983         self.job = host.job
    984         self.log_collector = log_collector(host, tag, server_results_dir)
    985         self.leftover = ""
    986         self.last_line = ""
    987         self.logs = {}
    988 
    989 
    990     def _process_log_dict(self, log_dict):
    991         log_list = log_dict.pop("logs", [])
    992         for key in sorted(log_dict.iterkeys()):
    993             log_list += self._process_log_dict(log_dict.pop(key))
    994         return log_list
    995 
    996 
    997     def _process_logs(self):
    998         """Go through the accumulated logs in self.log and print them
    999         out to stdout and the status log. Note that this processes
   1000         logs in an ordering where:
   1001 
   1002         1) logs to different tags are never interleaved
   1003         2) logs to x.y come before logs to x.y.z for all z
   1004         3) logs to x.y come before x.z whenever y < z
   1005 
   1006         Note that this will in general not be the same as the
   1007         chronological ordering of the logs. However, if a chronological
   1008         ordering is desired that one can be reconstructed from the
   1009         status log by looking at timestamp lines."""
   1010         log_list = self._process_log_dict(self.logs)
   1011         for entry in log_list:
   1012             self.job.record_entry(entry, log_in_subdir=False)
   1013         if log_list:
   1014             self.last_line = log_list[-1].render()
   1015 
   1016 
   1017     def _process_quoted_line(self, tag, line):
   1018         """Process a line quoted with an AUTOTEST_STATUS flag. If the
   1019         tag is blank then we want to push out all the data we've been
   1020         building up in self.logs, and then the newest line. If the
   1021         tag is not blank, then push the line into the logs for handling
   1022         later."""
   1023         entry = base_job.status_log_entry.parse(line)
   1024         if entry is None:
   1025             return  # the line contains no status lines
   1026         if tag == "":
   1027             self._process_logs()
   1028             self.job.record_entry(entry, log_in_subdir=False)
   1029             self.last_line = line
   1030         else:
   1031             tag_parts = [int(x) for x in tag.split(".")]
   1032             log_dict = self.logs
   1033             for part in tag_parts:
   1034                 log_dict = log_dict.setdefault(part, {})
   1035             log_list = log_dict.setdefault("logs", [])
   1036             log_list.append(entry)
   1037 
   1038 
   1039     def _process_info_line(self, line):
   1040         """Check if line is an INFO line, and if it is, interpret any control
   1041         messages (e.g. enabling/disabling warnings) that it may contain."""
   1042         match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
   1043         if not match:
   1044             return   # not an INFO line
   1045         for field in match.group(1).split('\t'):
   1046             if field.startswith("warnings.enable="):
   1047                 func = self.job.warning_manager.enable_warnings
   1048             elif field.startswith("warnings.disable="):
   1049                 func = self.job.warning_manager.disable_warnings
   1050             else:
   1051                 continue
   1052             warning_type = field.split("=", 1)[1]
   1053             func(warning_type)
   1054 
   1055 
   1056     def _process_line(self, line):
   1057         """Write out a line of data to the appropriate stream. Status
   1058         lines sent by autotest will be prepended with
   1059         "AUTOTEST_STATUS", and all other lines are ssh error
   1060         messages."""
   1061         status_match = self.status_parser.search(line)
   1062         test_complete_match = self.test_complete_parser.search(line)
   1063         fetch_package_match = self.fetch_package_parser.search(line)
   1064         if status_match:
   1065             tag, line = status_match.groups()
   1066             self._process_info_line(line)
   1067             self._process_quoted_line(tag, line)
   1068         elif test_complete_match:
   1069             self._process_logs()
   1070             fifo_path, = test_complete_match.groups()
   1071             try:
   1072                 self.log_collector.collect_client_job_results()
   1073                 self.host.run("echo A > %s" % fifo_path)
   1074             except Exception:
   1075                 msg = "Post-test log collection failed, continuing anyway"
   1076                 logging.exception(msg)
   1077         elif fetch_package_match:
   1078             pkg_name, dest_path, fifo_path = fetch_package_match.groups()
   1079             serve_packages = global_config.global_config.get_config_value(
   1080                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
   1081             if serve_packages and pkg_name.endswith(".tar.bz2"):
   1082                 try:
   1083                     self._send_tarball(pkg_name, dest_path)
   1084                 except Exception:
   1085                     msg = "Package tarball creation failed, continuing anyway"
   1086                     logging.exception(msg)
   1087             try:
   1088                 self.host.run("echo B > %s" % fifo_path)
   1089             except Exception:
   1090                 msg = "Package tarball installation failed, continuing anyway"
   1091                 logging.exception(msg)
   1092         else:
   1093             logging.info(line)
   1094 
   1095 
   1096     def _send_tarball(self, pkg_name, remote_dest):
   1097         name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
   1098         src_dirs = []
   1099         if pkg_type == 'test':
   1100             for test_dir in ['site_tests', 'tests']:
   1101                 src_dir = os.path.join(self.job.clientdir, test_dir, name)
   1102                 if os.path.exists(src_dir):
   1103                     src_dirs += [src_dir]
   1104                     if AUTOSERV_PREBUILD:
   1105                         prebuild.setup(self.job.clientdir, src_dir)
   1106                     break
   1107         elif pkg_type == 'profiler':
   1108             src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
   1109             if AUTOSERV_PREBUILD:
   1110                 prebuild.setup(self.job.clientdir, src_dir)
   1111         elif pkg_type == 'dep':
   1112             src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
   1113         elif pkg_type == 'client':
   1114             return  # you must already have a client to hit this anyway
   1115         else:
   1116             return  # no other types are supported
   1117 
   1118         # iterate over src_dirs until we find one that exists, then tar it
   1119         for src_dir in src_dirs:
   1120             if os.path.exists(src_dir):
   1121                 try:
   1122                     logging.info('Bundling %s into %s', src_dir, pkg_name)
   1123                     temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
   1124                                                 dir=self.job.tmpdir)
   1125                     tarball_path = self.job.pkgmgr.tar_package(
   1126                         pkg_name, src_dir, temp_dir.name, " .")
   1127                     self.host.send_file(tarball_path, remote_dest)
   1128                 finally:
   1129                     temp_dir.clean()
   1130                 return
   1131 
   1132 
   1133     def log_warning(self, msg, warning_type):
   1134         """Injects a WARN message into the current status logging stream."""
   1135         timestamp = int(time.time())
   1136         if self.job.warning_manager.is_valid(timestamp, warning_type):
   1137             self.job.record('WARN', None, None, msg)
   1138 
   1139 
   1140     def write(self, data):
   1141         # now start processing the existing buffer and the new data
   1142         data = self.leftover + data
   1143         lines = data.split('\n')
   1144         processed_lines = 0
   1145         try:
   1146             # process all the buffered data except the last line
   1147             # ignore the last line since we may not have all of it yet
   1148             for line in lines[:-1]:
   1149                 self._process_line(line)
   1150                 processed_lines += 1
   1151         finally:
   1152             # save any unprocessed lines for future processing
   1153             self.leftover = '\n'.join(lines[processed_lines:])
   1154 
   1155 
   1156     def flush(self):
   1157         sys.stdout.flush()
   1158 
   1159 
   1160     def flush_all_buffers(self):
   1161         if self.leftover:
   1162             self._process_line(self.leftover)
   1163             self.leftover = ""
   1164         self._process_logs()
   1165         self.flush()
   1166 
   1167 
   1168     def close(self):
   1169         self.flush_all_buffers()
   1170 
   1171 
   1172 SiteAutotest = client_utils.import_site_class(
   1173     __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
   1174     BaseAutotest)
   1175 
   1176 
   1177 _SiteRun = client_utils.import_site_class(
   1178     __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
   1179 
   1180 
   1181 SiteClientLogger = client_utils.import_site_class(
   1182     __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
   1183     BaseClientLogger)
   1184 
   1185 
   1186 class Autotest(SiteAutotest):
   1187     pass
   1188 
   1189 
   1190 class client_logger(SiteClientLogger):
   1191     pass
   1192 
   1193 
   1194 class _Run(_SiteRun):
   1195     pass
   1196