Home | History | Annotate | Download | only in server
      1 # Copyright 2007 Google Inc. Released under the GPL v2
      2 #pylint: disable-msg=C0111
      3 
      4 import re, os, sys, traceback, time, glob, tempfile
      5 import logging
      6 
      7 import common
      8 from autotest_lib.server import installable_object, prebuild, utils
      9 from autotest_lib.client.common_lib import base_job, error, autotemp
     10 from autotest_lib.client.common_lib import packages
     11 from autotest_lib.client.common_lib import global_config
     12 from autotest_lib.client.common_lib import utils as client_utils
     13 
     14 try:
     15     from chromite.lib import metrics
     16 except ImportError:
     17     metrics = client_utils.metrics_mock
     18 
     19 
     20 AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
     21 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
     22 
     23 
     24 get_value = global_config.global_config.get_config_value
     25 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
     26                               type=bool, default=False)
     27 
     28 
     29 class AutodirNotFoundError(Exception):
     30     """No Autotest installation could be found."""
     31 
     32 
     33 class AutotestFailure(Exception):
     34     """Gereric exception class for failures during a test run."""
     35 
     36 
     37 class AutotestAbort(AutotestFailure):
     38     """
     39     AutotestAborts are thrown when the DUT seems fine,
     40     and the test doesn't give us an explicit reason for
     41     failure; In this case we have no choice but to abort.
     42     """
     43 
     44 
     45 class AutotestDeviceError(AutotestFailure):
     46     """
     47     Exceptions that inherit from AutotestDeviceError
     48     are thrown when we can determine the current
     49     state of the DUT and conclude that it probably
     50     lead to the test failing; these exceptions lead
     51     to failures instead of aborts.
     52     """
     53 
     54 
     55 class AutotestDeviceNotPingable(AutotestDeviceError):
     56     """Error for when a DUT becomes unpingable."""
     57 
     58 
     59 class AutotestDeviceNotSSHable(AutotestDeviceError):
     60     """Error for when a DUT is pingable but not SSHable."""
     61 
     62 
     63 class AutotestDeviceRebooted(AutotestDeviceError):
     64     """Error for when a DUT rebooted unexpectedly."""
     65 
     66 
     67 class BaseAutotest(installable_object.InstallableObject):
     68     """
     69     This class represents the Autotest program.
     70 
     71     Autotest is used to run tests automatically and collect the results.
     72     It also supports profilers.
     73 
     74     Implementation details:
     75     This is a leaf class in an abstract class hierarchy, it must
     76     implement the unimplemented methods in parent classes.
     77     """
     78 
     79     def __init__(self, host=None):
     80         self.host = host
     81         self.got = False
     82         self.installed = False
     83         self.serverdir = utils.get_server_dir()
     84         super(BaseAutotest, self).__init__()
     85 
     86 
     87     install_in_tmpdir = False
     88     @classmethod
     89     def set_install_in_tmpdir(cls, flag):
     90         """ Sets a flag that controls whether or not Autotest should by
     91         default be installed in a "standard" directory (e.g.
     92         /home/autotest, /usr/local/autotest) or a temporary directory. """
     93         cls.install_in_tmpdir = flag
     94 
     95 
     96     @classmethod
     97     def get_client_autodir_paths(cls, host):
     98         return global_config.global_config.get_config_value(
     99                 'AUTOSERV', 'client_autodir_paths', type=list)
    100 
    101 
    102     @classmethod
    103     def get_installed_autodir(cls, host):
    104         """
    105         Find where the Autotest client is installed on the host.
    106         @returns an absolute path to an installed Autotest client root.
    107         @raises AutodirNotFoundError if no Autotest installation can be found.
    108         """
    109         autodir = host.get_autodir()
    110         if autodir:
    111             logging.debug('Using existing host autodir: %s', autodir)
    112             return autodir
    113 
    114         for path in Autotest.get_client_autodir_paths(host):
    115             try:
    116                 autotest_binary = os.path.join(path, 'bin', 'autotest')
    117                 host.run('test -x %s' % utils.sh_escape(autotest_binary))
    118                 host.run('test -w %s' % utils.sh_escape(path))
    119                 logging.debug('Found existing autodir at %s', path)
    120                 return path
    121             except error.GenericHostRunError:
    122                 logging.debug('%s does not exist on %s', autotest_binary,
    123                               host.hostname)
    124         raise AutodirNotFoundError
    125 
    126 
    127     @classmethod
    128     def get_install_dir(cls, host):
    129         """
    130         Determines the location where autotest should be installed on
    131         host. If self.install_in_tmpdir is set, it will return a unique
    132         temporary directory that autotest can be installed in. Otherwise, looks
    133         for an existing installation to use; if none is found, looks for a
    134         usable directory in the global config client_autodir_paths.
    135         """
    136         try:
    137             install_dir = cls.get_installed_autodir(host)
    138         except AutodirNotFoundError:
    139             install_dir = cls._find_installable_dir(host)
    140 
    141         if cls.install_in_tmpdir:
    142             return host.get_tmp_dir(parent=install_dir)
    143         return install_dir
    144 
    145 
    146     @classmethod
    147     def _find_installable_dir(cls, host):
    148         client_autodir_paths = cls.get_client_autodir_paths(host)
    149         for path in client_autodir_paths:
    150             try:
    151                 host.run('mkdir -p %s' % utils.sh_escape(path))
    152                 host.run('test -w %s' % utils.sh_escape(path))
    153                 return path
    154             except error.AutoservRunError:
    155                 logging.debug('Failed to create %s', path)
    156         metrics.Counter(
    157             'chromeos/autotest/errors/no_autotest_install_path').increment(
    158                 fields={'dut_host_name': host.hostname})
    159         raise error.AutoservInstallError(
    160                 'Unable to find a place to install Autotest; tried %s' %
    161                 ', '.join(client_autodir_paths))
    162 
    163 
    164     def get_fetch_location(self):
    165         c = global_config.global_config
    166         repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
    167                                    default=[])
    168         repos.reverse()
    169         return repos
    170 
    171 
    172     def install(self, host=None, autodir=None, use_packaging=True):
    173         self._install(host=host, autodir=autodir, use_packaging=use_packaging)
    174 
    175 
    176     def install_full_client(self, host=None, autodir=None):
    177         self._install(host=host, autodir=autodir, use_autoserv=False,
    178                       use_packaging=False)
    179 
    180 
    181     def install_no_autoserv(self, host=None, autodir=None):
    182         self._install(host=host, autodir=autodir, use_autoserv=False)
    183 
    184 
    185     def _install_using_packaging(self, host, autodir):
    186         repos = self.get_fetch_location()
    187         if not repos:
    188             raise error.PackageInstallError("No repos to install an "
    189                                             "autotest client from")
    190         pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
    191                                          repo_urls=repos,
    192                                          do_locking=False,
    193                                          run_function=host.run,
    194                                          run_function_dargs=dict(timeout=600))
    195         # The packages dir is used to store all the packages that
    196         # are fetched on that client. (for the tests,deps etc.
    197         # too apart from the client)
    198         pkg_dir = os.path.join(autodir, 'packages')
    199         # clean up the autodir except for the packages directory
    200         host.run('cd %s && ls | grep -v "^packages$"'
    201                  ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
    202         pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
    203                            preserve_install_dir=True)
    204         self.installed = True
    205 
    206 
    207     def _install_using_send_file(self, host, autodir):
    208         dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
    209                                "packages"])
    210         light_files = [os.path.join(self.source_material, f)
    211                        for f in os.listdir(self.source_material)
    212                        if f not in dirs_to_exclude]
    213         host.send_file(light_files, autodir, delete_dest=True)
    214 
    215         # create empty dirs for all the stuff we excluded
    216         commands = []
    217         for path in dirs_to_exclude:
    218             abs_path = os.path.join(autodir, path)
    219             abs_path = utils.sh_escape(abs_path)
    220             commands.append("mkdir -p '%s'" % abs_path)
    221             commands.append("touch '%s'/__init__.py" % abs_path)
    222         host.run(';'.join(commands))
    223 
    224 
    225     def _install(self, host=None, autodir=None, use_autoserv=True,
    226                  use_packaging=True):
    227         """
    228         Install autotest.  If get() was not called previously, an
    229         attempt will be made to install from the autotest svn
    230         repository.
    231 
    232         @param host A Host instance on which autotest will be installed
    233         @param autodir Location on the remote host to install to
    234         @param use_autoserv Enable install modes that depend on the client
    235             running with the autoserv harness
    236         @param use_packaging Enable install modes that use the packaging system
    237 
    238         @exception AutoservError if a tarball was not specified and
    239             the target host does not have svn installed in its path
    240         """
    241         if not host:
    242             host = self.host
    243         if not self.got:
    244             self.get()
    245         host.wait_up(timeout=30)
    246         host.setup()
    247         logging.info("Installing autotest on %s", host.hostname)
    248 
    249         # set up the autotest directory on the remote machine
    250         if not autodir:
    251             autodir = self.get_install_dir(host)
    252         logging.info('Using installation dir %s', autodir)
    253         host.set_autodir(autodir)
    254         host.run('mkdir -p %s' % utils.sh_escape(autodir))
    255 
    256         # make sure there are no files in $AUTODIR/results
    257         results_path = os.path.join(autodir, 'results')
    258         host.run('rm -rf %s/*' % utils.sh_escape(results_path),
    259                  ignore_status=True)
    260 
    261         # Fetch the autotest client from the nearest repository
    262         if use_packaging:
    263             try:
    264                 self._install_using_packaging(host, autodir)
    265                 logging.info("Installation of autotest completed using the "
    266                              "packaging system.")
    267                 return
    268             except (error.PackageInstallError, error.AutoservRunError,
    269                     global_config.ConfigError), e:
    270                 logging.info("Could not install autotest using the packaging "
    271                              "system: %s. Trying other methods", e)
    272         else:
    273             # Delete the package checksum file to force dut updating local
    274             # packages.
    275             command = ('rm -f "%s"' %
    276                        (os.path.join(autodir, packages.CHECKSUM_FILE)))
    277             host.run(command)
    278 
    279         # try to install from file or directory
    280         if self.source_material:
    281             c = global_config.global_config
    282             supports_autoserv_packaging = c.get_config_value(
    283                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
    284             # Copy autotest recursively
    285             if supports_autoserv_packaging and use_autoserv:
    286                 self._install_using_send_file(host, autodir)
    287             else:
    288                 host.send_file(self.source_material, autodir, delete_dest=True)
    289             logging.info("Installation of autotest completed from %s",
    290                          self.source_material)
    291             self.installed = True
    292             return
    293 
    294         # if that fails try to install using svn
    295         if utils.run('which svn').exit_status:
    296             raise error.AutoservError('svn not found on target machine: %s' %
    297                                       host.name)
    298         try:
    299             host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
    300         except error.AutoservRunError, e:
    301             host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
    302         logging.info("Installation of autotest completed using SVN.")
    303         self.installed = True
    304 
    305 
    306     def uninstall(self, host=None):
    307         """
    308         Uninstall (i.e. delete) autotest. Removes the autotest client install
    309         from the specified host.
    310 
    311         @params host a Host instance from which the client will be removed
    312         """
    313         if not self.installed:
    314             return
    315         if not host:
    316             host = self.host
    317         autodir = host.get_autodir()
    318         if not autodir:
    319             return
    320 
    321         # perform the actual uninstall
    322         host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
    323         host.set_autodir(None)
    324         self.installed = False
    325 
    326 
    327     def get(self, location=None):
    328         if not location:
    329             location = os.path.join(self.serverdir, '../client')
    330             location = os.path.abspath(location)
    331         # If there's stuff run on our client directory already, it
    332         # can cause problems. Try giving it a quick clean first.
    333         cwd = os.getcwd()
    334         os.chdir(location)
    335         try:
    336             utils.system('tools/make_clean', ignore_status=True)
    337         finally:
    338             os.chdir(cwd)
    339         super(BaseAutotest, self).get(location)
    340         self.got = True
    341 
    342 
    343     def run(self, control_file, results_dir='.', host=None, timeout=None,
    344             tag=None, parallel_flag=False, background=False,
    345             client_disconnect_timeout=None, use_packaging=True):
    346         """
    347         Run an autotest job on the remote machine.
    348 
    349         @param control_file: An open file-like-obj of the control file.
    350         @param results_dir: A str path where the results should be stored
    351                 on the local filesystem.
    352         @param host: A Host instance on which the control file should
    353                 be run.
    354         @param timeout: Maximum number of seconds to wait for the run or None.
    355         @param tag: Tag name for the client side instance of autotest.
    356         @param parallel_flag: Flag set when multiple jobs are run at the
    357                 same time.
    358         @param background: Indicates that the client should be launched as
    359                 a background job; the code calling run will be responsible
    360                 for monitoring the client and collecting the results.
    361         @param client_disconnect_timeout: Seconds to wait for the remote host
    362                 to come back after a reboot. Defaults to the host setting for
    363                 DEFAULT_REBOOT_TIMEOUT.
    364 
    365         @raises AutotestRunError: If there is a problem executing
    366                 the control file.
    367         """
    368         host = self._get_host_and_setup(host, use_packaging=use_packaging)
    369         logging.debug('Autotest job starts on remote host: %s',
    370                       host.hostname)
    371         results_dir = os.path.abspath(results_dir)
    372 
    373         if client_disconnect_timeout is None:
    374             client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
    375 
    376         if tag:
    377             results_dir = os.path.join(results_dir, tag)
    378 
    379         atrun = _Run(host, results_dir, tag, parallel_flag, background)
    380         self._do_run(control_file, results_dir, host, atrun, timeout,
    381                      client_disconnect_timeout, use_packaging=use_packaging)
    382 
    383 
    384     def _get_host_and_setup(self, host, use_packaging=True):
    385         if not host:
    386             host = self.host
    387         if not self.installed:
    388             self.install(host, use_packaging=use_packaging)
    389 
    390         host.wait_up(timeout=30)
    391         return host
    392 
    393 
    394     def _do_run(self, control_file, results_dir, host, atrun, timeout,
    395                 client_disconnect_timeout, use_packaging=True):
    396         try:
    397             atrun.verify_machine()
    398         except:
    399             logging.error("Verify failed on %s. Reinstalling autotest",
    400                           host.hostname)
    401             self.install(host)
    402             atrun.verify_machine()
    403         debug = os.path.join(results_dir, 'debug')
    404         try:
    405             os.makedirs(debug)
    406         except Exception:
    407             pass
    408 
    409         delete_file_list = [atrun.remote_control_file,
    410                             atrun.remote_control_file + '.state',
    411                             atrun.manual_control_file,
    412                             atrun.manual_control_file + '.state']
    413         cmd = ';'.join('rm -f ' + control for control in delete_file_list)
    414         host.run(cmd, ignore_status=True)
    415 
    416         tmppath = utils.get(control_file, local_copy=True)
    417 
    418         # build up the initialization prologue for the control file
    419         prologue_lines = []
    420 
    421         # Add the additional user arguments
    422         prologue_lines.append("args = %r\n" % self.job.args)
    423 
    424         # If the packaging system is being used, add the repository list.
    425         repos = None
    426         try:
    427             if use_packaging:
    428                 repos = self.get_fetch_location()
    429                 prologue_lines.append('job.add_repository(%s)\n' % repos)
    430             else:
    431                 logging.debug('use_packaging is set to False, do not add any '
    432                               'repository.')
    433         except global_config.ConfigError, e:
    434             # If repos is defined packaging is enabled so log the error
    435             if repos:
    436                 logging.error(e)
    437 
    438         # on full-size installs, turn on any profilers the server is using
    439         if not atrun.background:
    440             running_profilers = host.job.profilers.add_log.iteritems()
    441             for profiler, (args, dargs) in running_profilers:
    442                 call_args = [repr(profiler)]
    443                 call_args += [repr(arg) for arg in args]
    444                 call_args += ["%s=%r" % item for item in dargs.iteritems()]
    445                 prologue_lines.append("job.profilers.add(%s)\n"
    446                                       % ", ".join(call_args))
    447         cfile = "".join(prologue_lines)
    448 
    449         cfile += open(tmppath).read()
    450         open(tmppath, "w").write(cfile)
    451 
    452         # Create and copy state file to remote_control_file + '.state'
    453         state_file = host.job.preprocess_client_state()
    454         host.send_file(state_file, atrun.remote_control_file + '.init.state')
    455         os.remove(state_file)
    456 
    457         # Copy control_file to remote_control_file on the host
    458         host.send_file(tmppath, atrun.remote_control_file)
    459         if os.path.abspath(tmppath) != os.path.abspath(control_file):
    460             os.remove(tmppath)
    461 
    462         atrun.execute_control(
    463                 timeout=timeout,
    464                 client_disconnect_timeout=client_disconnect_timeout)
    465 
    466 
    467     @classmethod
    468     def _check_client_test_result(cls, host, test_name):
    469         """
    470         Check result of client test.
    471         Autotest will store results in the file name status.
    472         We check that second to last line in that file begins with 'END GOOD'
    473 
    474         @raises TestFail: If client test does not pass.
    475         """
    476         client_result_dir = '%s/results/default' % host.autodir
    477         command = 'tail -2 %s/status | head -1' % client_result_dir
    478         status = host.run(command).stdout.strip()
    479         logging.info(status)
    480         if status[:8] != 'END GOOD':
    481             raise error.TestFail('%s client test did not pass.' % test_name)
    482 
    483 
    484     def run_timed_test(self, test_name, results_dir='.', host=None,
    485                        timeout=None, parallel_flag=False, background=False,
    486                        client_disconnect_timeout=None, *args, **dargs):
    487         """
    488         Assemble a tiny little control file to just run one test,
    489         and run it as an autotest client-side test
    490         """
    491         if not host:
    492             host = self.host
    493         if not self.installed:
    494             self.install(host)
    495         opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
    496         cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
    497         control = "job.run_test(%s)\n" % cmd
    498         self.run(control, results_dir, host, timeout=timeout,
    499                  parallel_flag=parallel_flag, background=background,
    500                  client_disconnect_timeout=client_disconnect_timeout)
    501 
    502         if dargs.get('check_client_result', False):
    503             self._check_client_test_result(host, test_name)
    504 
    505 
    506     def run_test(self, test_name, results_dir='.', host=None,
    507                  parallel_flag=False, background=False,
    508                  client_disconnect_timeout=None, *args, **dargs):
    509         self.run_timed_test(test_name, results_dir, host, timeout=None,
    510                             parallel_flag=parallel_flag, background=background,
    511                             client_disconnect_timeout=client_disconnect_timeout,
    512                             *args, **dargs)
    513 
    514 
    515 class _BaseRun(object):
    516     """
    517     Represents a run of autotest control file.  This class maintains
    518     all the state necessary as an autotest control file is executed.
    519 
    520     It is not intended to be used directly, rather control files
    521     should be run using the run method in Autotest.
    522     """
    523     def __init__(self, host, results_dir, tag, parallel_flag, background):
    524         self.host = host
    525         self.results_dir = results_dir
    526         self.env = host.env
    527         self.tag = tag
    528         self.parallel_flag = parallel_flag
    529         self.background = background
    530         self.autodir = Autotest.get_installed_autodir(self.host)
    531         control = os.path.join(self.autodir, 'control')
    532         if tag:
    533             control += '.' + tag
    534         self.manual_control_file = control
    535         self.remote_control_file = control + '.autoserv'
    536         self.config_file = os.path.join(self.autodir, 'global_config.ini')
    537 
    538 
    539     def verify_machine(self):
    540         binary = os.path.join(self.autodir, 'bin/autotest')
    541         try:
    542             self.host.run('ls %s > /dev/null 2>&1' % binary)
    543         except:
    544             raise error.AutoservInstallError(
    545                 "Autotest does not appear to be installed")
    546 
    547         if not self.parallel_flag:
    548             tmpdir = os.path.join(self.autodir, 'tmp')
    549             download = os.path.join(self.autodir, 'tests/download')
    550             self.host.run('umount %s' % tmpdir, ignore_status=True)
    551             self.host.run('umount %s' % download, ignore_status=True)
    552 
    553 
    554     def get_base_cmd_args(self, section):
    555         args = ['--verbose']
    556         if section > 0:
    557             args.append('-c')
    558         if self.tag:
    559             args.append('-t %s' % self.tag)
    560         if self.host.job.use_external_logging():
    561             args.append('-l')
    562         if self.host.hostname:
    563             args.append('--hostname=%s' % self.host.hostname)
    564         args.append('--user=%s' % self.host.job.user)
    565 
    566         args.append(self.remote_control_file)
    567         return args
    568 
    569 
    570     def get_background_cmd(self, section):
    571         cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')]
    572         cmd += self.get_base_cmd_args(section)
    573         cmd += ['>/dev/null', '2>/dev/null', '&']
    574         return ' '.join(cmd)
    575 
    576 
    577     def get_daemon_cmd(self, section, monitor_dir):
    578         cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
    579                monitor_dir, '-H autoserv']
    580         cmd += self.get_base_cmd_args(section)
    581         cmd += ['>/dev/null', '2>/dev/null', '&']
    582         return ' '.join(cmd)
    583 
    584 
    585     def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
    586         cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
    587                monitor_dir, str(stdout_read), str(stderr_read)]
    588         return ' '.join(cmd)
    589 
    590 
    591     def get_client_log(self):
    592         """Find what the "next" client.* prefix should be
    593 
    594         @returns A string of the form client.INTEGER that should be prefixed
    595             to all client debug log files.
    596         """
    597         max_digit = -1
    598         debug_dir = os.path.join(self.results_dir, 'debug')
    599         client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
    600         for log in client_logs:
    601             _, number, _ = log.split('.', 2)
    602             if number.isdigit():
    603                 max_digit = max(max_digit, int(number))
    604         return 'client.%d' % (max_digit + 1)
    605 
    606 
    607     def copy_client_config_file(self, client_log_prefix=None):
    608         """
    609         Create and copy the client config file based on the server config.
    610 
    611         @param client_log_prefix: Optional prefix to prepend to log files.
    612         """
    613         client_config_file = self._create_client_config_file(client_log_prefix)
    614         self.host.send_file(client_config_file, self.config_file)
    615         os.remove(client_config_file)
    616 
    617 
    618     def _create_client_config_file(self, client_log_prefix=None):
    619         """
    620         Create a temporary file with the [CLIENT] section configuration values
    621         taken from the server global_config.ini.
    622 
    623         @param client_log_prefix: Optional prefix to prepend to log files.
    624 
    625         @return: Path of the temporary file generated.
    626         """
    627         config = global_config.global_config.get_section_values('CLIENT')
    628         if client_log_prefix:
    629             config.set('CLIENT', 'default_logging_name', client_log_prefix)
    630         return self._create_aux_file(config.write)
    631 
    632 
    633     def _create_aux_file(self, func, *args):
    634         """
    635         Creates a temporary file and writes content to it according to a
    636         content creation function. The file object is appended to *args, which
    637         is then passed to the content creation function
    638 
    639         @param func: Function that will be used to write content to the
    640                 temporary file.
    641         @param *args: List of parameters that func takes.
    642         @return: Path to the temporary file that was created.
    643         """
    644         fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
    645         aux_file = os.fdopen(fd, "w")
    646         try:
    647             list_args = list(args)
    648             list_args.append(aux_file)
    649             func(*list_args)
    650         finally:
    651             aux_file.close()
    652         return path
    653 
    654 
    655     @staticmethod
    656     def is_client_job_finished(last_line):
    657         return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
    658 
    659 
    660     @staticmethod
    661     def is_client_job_rebooting(last_line):
    662         return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
    663 
    664 
    665     def _diagnose_dut(self, old_boot_id=None):
    666         """
    667         Run diagnostic checks on a DUT.
    668 
    669         1. ping: A dead host will not respond to pings.
    670         2. ssh (happens with 3.): DUT hangs usually fail in authentication
    671             but respond to pings.
    672         3. Check if a reboot occured: A healthy but unexpected reboot leaves the
    673             host running with a new boot id.
    674 
    675         This method will always raise an exception from the AutotestFailure
    676         family and should only get called when the reason for a test failing
    677         is ambiguous.
    678 
    679         @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
    680         @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
    681         @raises AutotestDeviceRebooted: If the boot id changed.
    682         @raises AutotestAbort: If none of the above exceptions were raised.
    683             Since we have no recourse we must abort at this stage.
    684         """
    685         msg = 'Autotest client terminated unexpectedly: '
    686         if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
    687             msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
    688             raise AutotestDeviceNotPingable(msg)
    689 
    690         if old_boot_id:
    691             try:
    692                 new_boot_id = self.host.get_boot_id(timeout=60)
    693             except Exception as e:
    694                 msg += ('DUT is pingable but not SSHable, it most likely'
    695                         ' sporadically rebooted during testing. %s\n' % str(e))
    696                 raise AutotestDeviceNotSSHable(msg)
    697             else:
    698                 if new_boot_id != old_boot_id:
    699                     msg += 'DUT rebooted during the test run.\n'
    700                     raise AutotestDeviceRebooted(msg)
    701 
    702             msg += ('DUT is pingable, SSHable and did NOT restart '
    703                     'un-expectedly. We probably lost connectivity during the '
    704                     'test.')
    705         else:
    706             msg += ('DUT is pingable, could not determine if an un-expected '
    707                     'reboot occured during the test.')
    708 
    709         raise AutotestAbort(msg)
    710 
    711 
    712     def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
    713         """
    714         Logs that something unexpected happened, then tries to diagnose the
    715         failure. The purpose of this function is only to close out the status
    716         log with the appropriate error message, not to critically terminate
    717         the program.
    718 
    719         @param stderr_redirector: log stream.
    720         @param old_boot_id: boot id used to infer if a reboot occured.
    721         """
    722         stderr_redirector.flush_all_buffers()
    723         try:
    724             self._diagnose_dut(old_boot_id)
    725         except AutotestFailure as e:
    726             self.host.job.record('END ABORT', None, None, str(e))
    727 
    728 
    729     def _execute_in_background(self, section, timeout):
    730         full_cmd = self.get_background_cmd(section)
    731         devnull = open(os.devnull, "w")
    732 
    733         self.copy_client_config_file(self.get_client_log())
    734 
    735         self.host.job.push_execution_context(self.results_dir)
    736         try:
    737             result = self.host.run(full_cmd, ignore_status=True,
    738                                    timeout=timeout,
    739                                    stdout_tee=devnull,
    740                                    stderr_tee=devnull)
    741         finally:
    742             self.host.job.pop_execution_context()
    743 
    744         return result
    745 
    746 
    747     @staticmethod
    748     def _strip_stderr_prologue(stderr):
    749         """Strips the 'standard' prologue that get pre-pended to every
    750         remote command and returns the text that was actually written to
    751         stderr by the remote command."""
    752         stderr_lines = stderr.split("\n")[1:]
    753         if not stderr_lines:
    754             return ""
    755         elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
    756             del stderr_lines[0]
    757         return "\n".join(stderr_lines)
    758 
    759 
    760     def _execute_daemon(self, section, timeout, stderr_redirector,
    761                         client_disconnect_timeout):
    762         monitor_dir = self.host.get_tmp_dir()
    763         daemon_cmd = self.get_daemon_cmd(section, monitor_dir)
    764 
    765         # grab the location for the server-side client log file
    766         client_log_prefix = self.get_client_log()
    767         client_log_path = os.path.join(self.results_dir, 'debug',
    768                                        client_log_prefix + '.log')
    769         client_log = open(client_log_path, 'w', 0)
    770         self.copy_client_config_file(client_log_prefix)
    771 
    772         stdout_read = stderr_read = 0
    773         self.host.job.push_execution_context(self.results_dir)
    774         try:
    775             self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
    776             disconnect_warnings = []
    777             while True:
    778                 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
    779                                                    stderr_read)
    780                 try:
    781                     result = self.host.run(monitor_cmd, ignore_status=True,
    782                                            timeout=timeout,
    783                                            stdout_tee=client_log,
    784                                            stderr_tee=stderr_redirector)
    785                 except error.AutoservRunError, e:
    786                     result = e.result_obj
    787                     result.exit_status = None
    788                     disconnect_warnings.append(e.description)
    789 
    790                     stderr_redirector.log_warning(
    791                         "Autotest client was disconnected: %s" % e.description,
    792                         "NETWORK")
    793                 except error.AutoservSSHTimeout:
    794                     result = utils.CmdResult(monitor_cmd, "", "", None, 0)
    795                     stderr_redirector.log_warning(
    796                         "Attempt to connect to Autotest client timed out",
    797                         "NETWORK")
    798 
    799                 stdout_read += len(result.stdout)
    800                 stderr_read += len(self._strip_stderr_prologue(result.stderr))
    801 
    802                 if result.exit_status is not None:
    803                     # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
    804                     logging.debug('Result exit status is %d.',
    805                                   result.exit_status)
    806                     return result
    807                 elif not self.host.wait_up(client_disconnect_timeout):
    808                     raise error.AutoservSSHTimeout(
    809                         "client was disconnected, reconnect timed out")
    810         finally:
    811             client_log.close()
    812             self.host.job.pop_execution_context()
    813 
    814 
    815     def execute_section(self, section, timeout, stderr_redirector,
    816                         client_disconnect_timeout):
    817         logging.info("Executing %s/bin/autotest %s/control phase %d",
    818                      self.autodir, self.autodir, section)
    819 
    820         if self.background:
    821             result = self._execute_in_background(section, timeout)
    822         else:
    823             result = self._execute_daemon(section, timeout, stderr_redirector,
    824                                           client_disconnect_timeout)
    825 
    826         last_line = stderr_redirector.last_line
    827 
    828         # check if we failed hard enough to warrant an exception
    829         if result.exit_status == 1:
    830             err = error.AutotestRunError("client job was aborted")
    831         elif not self.background and not result.stderr:
    832             err = error.AutotestRunError(
    833                 "execute_section %s failed to return anything\n"
    834                 "stdout:%s\n" % (section, result.stdout))
    835         else:
    836             err = None
    837 
    838         # log something if the client failed AND never finished logging
    839         if err and not self.is_client_job_finished(last_line):
    840             self.log_unexpected_abort(stderr_redirector)
    841 
    842         if err:
    843             raise err
    844         else:
    845             return stderr_redirector.last_line
    846 
    847 
    848     def _wait_for_reboot(self, old_boot_id):
    849         logging.info("Client is rebooting")
    850         logging.info("Waiting for client to halt")
    851         if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
    852                                    old_boot_id=old_boot_id):
    853             err = "%s failed to shutdown after %d"
    854             err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
    855             raise error.AutotestRunError(err)
    856         logging.info("Client down, waiting for restart")
    857         if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
    858             # since reboot failed
    859             # hardreset the machine once if possible
    860             # before failing this control file
    861             warning = "%s did not come back up, hard resetting"
    862             warning %= self.host.hostname
    863             logging.warning(warning)
    864             try:
    865                 self.host.hardreset(wait=False)
    866             except (AttributeError, error.AutoservUnsupportedError):
    867                 warning = "Hard reset unsupported on %s"
    868                 warning %= self.host.hostname
    869                 logging.warning(warning)
    870             raise error.AutotestRunError("%s failed to boot after %ds" %
    871                                          (self.host.hostname,
    872                                           self.host.DEFAULT_REBOOT_TIMEOUT))
    873         self.host.reboot_followup()
    874 
    875 
    876     def execute_control(self, timeout=None, client_disconnect_timeout=None):
    877         if not self.background:
    878             collector = log_collector(self.host, self.tag, self.results_dir)
    879             hostname = self.host.hostname
    880             remote_results = collector.client_results_dir
    881             local_results = collector.server_results_dir
    882             self.host.job.add_client_log(hostname, remote_results,
    883                                          local_results)
    884             job_record_context = self.host.job.get_record_context()
    885 
    886         section = 0
    887         start_time = time.time()
    888 
    889         logger = client_logger(self.host, self.tag, self.results_dir)
    890         try:
    891             while not timeout or time.time() < start_time + timeout:
    892                 if timeout:
    893                     section_timeout = start_time + timeout - time.time()
    894                 else:
    895                     section_timeout = None
    896                 boot_id = self.host.get_boot_id()
    897                 last = self.execute_section(section, section_timeout,
    898                                             logger, client_disconnect_timeout)
    899                 if self.background:
    900                     return
    901                 section += 1
    902                 if self.is_client_job_finished(last):
    903                     logging.info("Client complete")
    904                     return
    905                 elif self.is_client_job_rebooting(last):
    906                     try:
    907                         self._wait_for_reboot(boot_id)
    908                     except error.AutotestRunError, e:
    909                         self.host.job.record("ABORT", None, "reboot", str(e))
    910                         self.host.job.record("END ABORT", None, None, str(e))
    911                         raise
    912                     continue
    913 
    914                 # If a test fails without probable cause we try to bucket it's
    915                 # failure into one of 2 categories. If we can determine the
    916                 # current state of the device and it is suspicious, we close the
    917                 # status lines indicating a failure. If we either cannot
    918                 # determine the state of the device, or it appears totally
    919                 # healthy, we give up and abort.
    920                 try:
    921                     self._diagnose_dut(boot_id)
    922                 except AutotestDeviceError as e:
    923                     # The status lines of the test are pretty much tailed to
    924                     # our log, with indentation, from the client job on the DUT.
    925                     # So if the DUT goes down unexpectedly we'll end up with a
    926                     # malformed status log unless we manually unwind the status
    927                     # stack. Ideally we would want to write a nice wrapper like
    928                     # server_job methods run_reboot, run_group but they expect
    929                     # reboots and we don't.
    930                     self.host.job.record('FAIL', None, None, str(e))
    931                     self.host.job.record('END FAIL', None, None)
    932                     self.host.job.record('END GOOD', None, None)
    933                     self.host.job.failed_with_device_error = True
    934                     return
    935                 except AutotestAbort as e:
    936                     self.host.job.record('ABORT', None, None, str(e))
    937                     self.host.job.record('END ABORT', None, None)
    938 
    939                     # give the client machine a chance to recover from a crash
    940                     self.host.wait_up(
    941                         self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
    942                     msg = ("Aborting - unexpected final status message from "
    943                            "client on %s: %s\n") % (self.host.hostname, last)
    944                     raise error.AutotestRunError(msg)
    945         finally:
    946             logging.debug('Autotest job finishes running. Below is the '
    947                           'post-processing operations.')
    948             logger.close()
    949             if not self.background:
    950                 collector.collect_client_job_results()
    951                 collector.remove_redundant_client_logs()
    952                 state_file = os.path.basename(self.remote_control_file
    953                                               + '.state')
    954                 state_path = os.path.join(self.results_dir, state_file)
    955                 self.host.job.postprocess_client_state(state_path)
    956                 self.host.job.remove_client_log(hostname, remote_results,
    957                                                 local_results)
    958                 job_record_context.restore()
    959 
    960             logging.debug('Autotest job finishes.')
    961 
    962         # should only get here if we timed out
    963         assert timeout
    964         raise error.AutotestTimeoutError()
    965 
    966 
    967 class log_collector(object):
    968     def __init__(self, host, client_tag, results_dir):
    969         self.host = host
    970         if not client_tag:
    971             client_tag = "default"
    972         self.client_results_dir = os.path.join(host.get_autodir(), "results",
    973                                                client_tag)
    974         self.server_results_dir = results_dir
    975 
    976 
    977     def collect_client_job_results(self):
    978         """ A method that collects all the current results of a running
    979         client job into the results dir. By default does nothing as no
    980         client job is running, but when running a client job you can override
    981         this with something that will actually do something. """
    982 
    983         # make an effort to wait for the machine to come up
    984         try:
    985             self.host.wait_up(timeout=30)
    986         except error.AutoservError:
    987             # don't worry about any errors, we'll try and
    988             # get the results anyway
    989             pass
    990 
    991         # Copy all dirs in default to results_dir
    992         try:
    993             with metrics.SecondsTimer(
    994                     'chromeos/autotest/job/log_collection_duration',
    995                     fields={'dut_host_name': self.host.hostname}):
    996                 self.host.get_file(
    997                         self.client_results_dir + '/',
    998                         self.server_results_dir,
    999                         preserve_symlinks=True)
   1000         except Exception:
   1001             # well, don't stop running just because we couldn't get logs
   1002             e_msg = "Unexpected error copying test result logs, continuing ..."
   1003             logging.error(e_msg)
   1004             traceback.print_exc(file=sys.stdout)
   1005 
   1006 
   1007     def remove_redundant_client_logs(self):
   1008         """Remove client.*.log files in favour of client.*.DEBUG files."""
   1009         debug_dir = os.path.join(self.server_results_dir, 'debug')
   1010         debug_files = [f for f in os.listdir(debug_dir)
   1011                        if re.search(r'^client\.\d+\.DEBUG$', f)]
   1012         for debug_file in debug_files:
   1013             log_file = debug_file.replace('DEBUG', 'log')
   1014             log_file = os.path.join(debug_dir, log_file)
   1015             if os.path.exists(log_file):
   1016                 os.remove(log_file)
   1017 
   1018 
   1019 # a file-like object for catching stderr from an autotest client and
   1020 # extracting status logs from it
   1021 class BaseClientLogger(object):
   1022     """Partial file object to write to both stdout and
   1023     the status log file.  We only implement those methods
   1024     utils.run() actually calls.
   1025     """
   1026     status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
   1027     test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
   1028     fetch_package_parser = re.compile(
   1029         r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
   1030     extract_indent = re.compile(r"^(\t*).*$")
   1031     extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
   1032 
   1033     def __init__(self, host, tag, server_results_dir):
   1034         self.host = host
   1035         self.job = host.job
   1036         self.log_collector = log_collector(host, tag, server_results_dir)
   1037         self.leftover = ""
   1038         self.last_line = ""
   1039         self.logs = {}
   1040 
   1041 
   1042     def _process_log_dict(self, log_dict):
   1043         log_list = log_dict.pop("logs", [])
   1044         for key in sorted(log_dict.iterkeys()):
   1045             log_list += self._process_log_dict(log_dict.pop(key))
   1046         return log_list
   1047 
   1048 
   1049     def _process_logs(self):
   1050         """Go through the accumulated logs in self.log and print them
   1051         out to stdout and the status log. Note that this processes
   1052         logs in an ordering where:
   1053 
   1054         1) logs to different tags are never interleaved
   1055         2) logs to x.y come before logs to x.y.z for all z
   1056         3) logs to x.y come before x.z whenever y < z
   1057 
   1058         Note that this will in general not be the same as the
   1059         chronological ordering of the logs. However, if a chronological
   1060         ordering is desired that one can be reconstructed from the
   1061         status log by looking at timestamp lines."""
   1062         log_list = self._process_log_dict(self.logs)
   1063         for entry in log_list:
   1064             self.job.record_entry(entry, log_in_subdir=False)
   1065         if log_list:
   1066             self.last_line = log_list[-1].render()
   1067 
   1068 
   1069     def _process_quoted_line(self, tag, line):
   1070         """Process a line quoted with an AUTOTEST_STATUS flag. If the
   1071         tag is blank then we want to push out all the data we've been
   1072         building up in self.logs, and then the newest line. If the
   1073         tag is not blank, then push the line into the logs for handling
   1074         later."""
   1075         entry = base_job.status_log_entry.parse(line)
   1076         if entry is None:
   1077             return  # the line contains no status lines
   1078         if tag == "":
   1079             self._process_logs()
   1080             self.job.record_entry(entry, log_in_subdir=False)
   1081             self.last_line = line
   1082         else:
   1083             tag_parts = [int(x) for x in tag.split(".")]
   1084             log_dict = self.logs
   1085             for part in tag_parts:
   1086                 log_dict = log_dict.setdefault(part, {})
   1087             log_list = log_dict.setdefault("logs", [])
   1088             log_list.append(entry)
   1089 
   1090 
   1091     def _process_info_line(self, line):
   1092         """Check if line is an INFO line, and if it is, interpret any control
   1093         messages (e.g. enabling/disabling warnings) that it may contain."""
   1094         match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
   1095         if not match:
   1096             return   # not an INFO line
   1097         for field in match.group(1).split('\t'):
   1098             if field.startswith("warnings.enable="):
   1099                 func = self.job.warning_manager.enable_warnings
   1100             elif field.startswith("warnings.disable="):
   1101                 func = self.job.warning_manager.disable_warnings
   1102             else:
   1103                 continue
   1104             warning_type = field.split("=", 1)[1]
   1105             func(warning_type)
   1106 
   1107 
   1108     def _process_line(self, line):
   1109         """Write out a line of data to the appropriate stream. Status
   1110         lines sent by autotest will be prepended with
   1111         "AUTOTEST_STATUS", and all other lines are ssh error
   1112         messages."""
   1113         status_match = self.status_parser.search(line)
   1114         test_complete_match = self.test_complete_parser.search(line)
   1115         fetch_package_match = self.fetch_package_parser.search(line)
   1116         if status_match:
   1117             tag, line = status_match.groups()
   1118             self._process_info_line(line)
   1119             self._process_quoted_line(tag, line)
   1120         elif test_complete_match:
   1121             self._process_logs()
   1122             fifo_path, = test_complete_match.groups()
   1123             try:
   1124                 self.log_collector.collect_client_job_results()
   1125                 self.host.run("echo A > %s" % fifo_path)
   1126             except Exception:
   1127                 msg = "Post-test log collection failed, continuing anyway"
   1128                 logging.exception(msg)
   1129         elif fetch_package_match:
   1130             pkg_name, dest_path, fifo_path = fetch_package_match.groups()
   1131             serve_packages = global_config.global_config.get_config_value(
   1132                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
   1133             if serve_packages and pkg_name.endswith(".tar.bz2"):
   1134                 try:
   1135                     self._send_tarball(pkg_name, dest_path)
   1136                 except Exception:
   1137                     msg = "Package tarball creation failed, continuing anyway"
   1138                     logging.exception(msg)
   1139             try:
   1140                 self.host.run("echo B > %s" % fifo_path)
   1141             except Exception:
   1142                 msg = "Package tarball installation failed, continuing anyway"
   1143                 logging.exception(msg)
   1144         else:
   1145             logging.info(line)
   1146 
   1147 
   1148     def _send_tarball(self, pkg_name, remote_dest):
   1149         name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
   1150         src_dirs = []
   1151         if pkg_type == 'test':
   1152             for test_dir in ['site_tests', 'tests']:
   1153                 src_dir = os.path.join(self.job.clientdir, test_dir, name)
   1154                 if os.path.exists(src_dir):
   1155                     src_dirs += [src_dir]
   1156                     if autoserv_prebuild:
   1157                         prebuild.setup(self.job.clientdir, src_dir)
   1158                     break
   1159         elif pkg_type == 'profiler':
   1160             src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
   1161             if autoserv_prebuild:
   1162                 prebuild.setup(self.job.clientdir, src_dir)
   1163         elif pkg_type == 'dep':
   1164             src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
   1165         elif pkg_type == 'client':
   1166             return  # you must already have a client to hit this anyway
   1167         else:
   1168             return  # no other types are supported
   1169 
   1170         # iterate over src_dirs until we find one that exists, then tar it
   1171         for src_dir in src_dirs:
   1172             if os.path.exists(src_dir):
   1173                 try:
   1174                     logging.info('Bundling %s into %s', src_dir, pkg_name)
   1175                     temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
   1176                                                 dir=self.job.tmpdir)
   1177                     tarball_path = self.job.pkgmgr.tar_package(
   1178                         pkg_name, src_dir, temp_dir.name, " .")
   1179                     self.host.send_file(tarball_path, remote_dest)
   1180                 finally:
   1181                     temp_dir.clean()
   1182                 return
   1183 
   1184 
   1185     def log_warning(self, msg, warning_type):
   1186         """Injects a WARN message into the current status logging stream."""
   1187         timestamp = int(time.time())
   1188         if self.job.warning_manager.is_valid(timestamp, warning_type):
   1189             self.job.record('WARN', None, None, msg)
   1190 
   1191 
   1192     def write(self, data):
   1193         # now start processing the existing buffer and the new data
   1194         data = self.leftover + data
   1195         lines = data.split('\n')
   1196         processed_lines = 0
   1197         try:
   1198             # process all the buffered data except the last line
   1199             # ignore the last line since we may not have all of it yet
   1200             for line in lines[:-1]:
   1201                 self._process_line(line)
   1202                 processed_lines += 1
   1203         finally:
   1204             # save any unprocessed lines for future processing
   1205             self.leftover = '\n'.join(lines[processed_lines:])
   1206 
   1207 
   1208     def flush(self):
   1209         sys.stdout.flush()
   1210 
   1211 
   1212     def flush_all_buffers(self):
   1213         if self.leftover:
   1214             self._process_line(self.leftover)
   1215             self.leftover = ""
   1216         self._process_logs()
   1217         self.flush()
   1218 
   1219 
   1220     def close(self):
   1221         self.flush_all_buffers()
   1222 
   1223 
   1224 SiteAutotest = client_utils.import_site_class(
   1225     __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
   1226     BaseAutotest)
   1227 
   1228 
   1229 _SiteRun = client_utils.import_site_class(
   1230     __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
   1231 
   1232 
   1233 SiteClientLogger = client_utils.import_site_class(
   1234     __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
   1235     BaseClientLogger)
   1236 
   1237 
   1238 class Autotest(SiteAutotest):
   1239     pass
   1240 
   1241 
   1242 class client_logger(SiteClientLogger):
   1243     pass
   1244 
   1245 
   1246 class _Run(_SiteRun):
   1247     pass
   1248