Home | History | Annotate | Download | only in server
      1 # Copyright 2007 Google Inc. Released under the GPL v2
      2 #pylint: disable-msg=C0111
      3 
      4 import re, os, sys, traceback, time, glob, tempfile
      5 import logging
      6 from autotest_lib.server import installable_object, prebuild, utils
      7 from autotest_lib.client.common_lib import base_job, error, autotemp
      8 from autotest_lib.client.common_lib import base_packages, packages
      9 from autotest_lib.client.common_lib import global_config
     10 from autotest_lib.client.common_lib import utils as client_utils
     11 from autotest_lib.client.common_lib.cros.graphite import autotest_stats
     12 
     13 AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
     14 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
     15 
     16 
     17 get_value = global_config.global_config.get_config_value
     18 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
     19                               type=bool, default=False)
     20 
     21 
     22 class AutodirNotFoundError(Exception):
     23     """No Autotest installation could be found."""
     24 
     25 
     26 class AutotestFailure(Exception):
     27     """Gereric exception class for failures during a test run."""
     28 
     29 
     30 class AutotestAbort(AutotestFailure):
     31     """
     32     AutotestAborts are thrown when the DUT seems fine,
     33     and the test doesn't give us an explicit reason for
     34     failure; In this case we have no choice but to abort.
     35     """
     36 
     37 
     38 class AutotestDeviceError(AutotestFailure):
     39     """
     40     Exceptions that inherit from AutotestDeviceError
     41     are thrown when we can determine the current
     42     state of the DUT and conclude that it probably
     43     lead to the test failing; these exceptions lead
     44     to failures instead of aborts.
     45     """
     46 
     47 
     48 class AutotestDeviceNotPingable(AutotestDeviceError):
     49     """Error for when a DUT becomes unpingable."""
     50 
     51 
     52 class AutotestDeviceNotSSHable(AutotestDeviceError):
     53     """Error for when a DUT is pingable but not SSHable."""
     54 
     55 
     56 class AutotestDeviceRebooted(AutotestDeviceError):
     57     """Error for when a DUT rebooted unexpectedly."""
     58 
     59 
     60 class BaseAutotest(installable_object.InstallableObject):
     61     """
     62     This class represents the Autotest program.
     63 
     64     Autotest is used to run tests automatically and collect the results.
     65     It also supports profilers.
     66 
     67     Implementation details:
     68     This is a leaf class in an abstract class hierarchy, it must
     69     implement the unimplemented methods in parent classes.
     70     """
     71 
     72     def __init__(self, host=None):
     73         self.host = host
     74         self.got = False
     75         self.installed = False
     76         self.serverdir = utils.get_server_dir()
     77         super(BaseAutotest, self).__init__()
     78 
     79 
     80     install_in_tmpdir = False
     81     @classmethod
     82     def set_install_in_tmpdir(cls, flag):
     83         """ Sets a flag that controls whether or not Autotest should by
     84         default be installed in a "standard" directory (e.g.
     85         /home/autotest, /usr/local/autotest) or a temporary directory. """
     86         cls.install_in_tmpdir = flag
     87 
     88 
     89     @classmethod
     90     def get_client_autodir_paths(cls, host):
     91         return global_config.global_config.get_config_value(
     92                 'AUTOSERV', 'client_autodir_paths', type=list)
     93 
     94 
     95     @classmethod
     96     def get_installed_autodir(cls, host):
     97         """
     98         Find where the Autotest client is installed on the host.
     99         @returns an absolute path to an installed Autotest client root.
    100         @raises AutodirNotFoundError if no Autotest installation can be found.
    101         """
    102         autodir = host.get_autodir()
    103         if autodir:
    104             logging.debug('Using existing host autodir: %s', autodir)
    105             return autodir
    106 
    107         for path in Autotest.get_client_autodir_paths(host):
    108             try:
    109                 autotest_binary = os.path.join(path, 'bin', 'autotest')
    110                 host.run('test -x %s' % utils.sh_escape(autotest_binary))
    111                 host.run('test -w %s' % utils.sh_escape(path))
    112                 logging.debug('Found existing autodir at %s', path)
    113                 return path
    114             except error.AutoservRunError:
    115                 logging.debug('%s does not exist on %s', autotest_binary,
    116                               host.hostname)
    117         raise AutodirNotFoundError
    118 
    119 
    120     @classmethod
    121     def get_install_dir(cls, host):
    122         """
    123         Determines the location where autotest should be installed on
    124         host. If self.install_in_tmpdir is set, it will return a unique
    125         temporary directory that autotest can be installed in. Otherwise, looks
    126         for an existing installation to use; if none is found, looks for a
    127         usable directory in the global config client_autodir_paths.
    128         """
    129         try:
    130             install_dir = cls.get_installed_autodir(host)
    131         except AutodirNotFoundError:
    132             install_dir = cls._find_installable_dir(host)
    133 
    134         if cls.install_in_tmpdir:
    135             return host.get_tmp_dir(parent=install_dir)
    136         return install_dir
    137 
    138 
    139     @classmethod
    140     def _find_installable_dir(cls, host):
    141         client_autodir_paths = cls.get_client_autodir_paths(host)
    142         for path in client_autodir_paths:
    143             try:
    144                 host.run('mkdir -p %s' % utils.sh_escape(path))
    145                 host.run('test -w %s' % utils.sh_escape(path))
    146                 return path
    147             except error.AutoservRunError:
    148                 logging.debug('Failed to create %s', path)
    149         metadata = {'_type': 'AutoservInstallError',
    150                     'hostname': host.hostname}
    151         autotest_stats.Counter('AutoservInstallError',
    152                                metadata=metadata).increment()
    153         raise error.AutoservInstallError(
    154                 'Unable to find a place to install Autotest; tried %s' %
    155                 ', '.join(client_autodir_paths))
    156 
    157 
    158     def get_fetch_location(self):
    159         c = global_config.global_config
    160         repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
    161                                    default=[])
    162         repos.reverse()
    163         return repos
    164 
    165 
    166     def install(self, host=None, autodir=None, use_packaging=True):
    167         self._install(host=host, autodir=autodir, use_packaging=use_packaging)
    168 
    169 
    170     def install_full_client(self, host=None, autodir=None):
    171         self._install(host=host, autodir=autodir, use_autoserv=False,
    172                       use_packaging=False)
    173 
    174 
    175     def install_no_autoserv(self, host=None, autodir=None):
    176         self._install(host=host, autodir=autodir, use_autoserv=False)
    177 
    178 
    179     def _install_using_packaging(self, host, autodir):
    180         repos = self.get_fetch_location()
    181         if not repos:
    182             raise error.PackageInstallError("No repos to install an "
    183                                             "autotest client from")
    184         pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
    185                                          repo_urls=repos,
    186                                          do_locking=False,
    187                                          run_function=host.run,
    188                                          run_function_dargs=dict(timeout=600))
    189         # The packages dir is used to store all the packages that
    190         # are fetched on that client. (for the tests,deps etc.
    191         # too apart from the client)
    192         pkg_dir = os.path.join(autodir, 'packages')
    193         # clean up the autodir except for the packages directory
    194         host.run('cd %s && ls | grep -v "^packages$"'
    195                  ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
    196         pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
    197                            preserve_install_dir=True)
    198         self.installed = True
    199 
    200 
    201     def _install_using_send_file(self, host, autodir):
    202         dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
    203                                "packages"])
    204         light_files = [os.path.join(self.source_material, f)
    205                        for f in os.listdir(self.source_material)
    206                        if f not in dirs_to_exclude]
    207         host.send_file(light_files, autodir, delete_dest=True)
    208 
    209         # create empty dirs for all the stuff we excluded
    210         commands = []
    211         for path in dirs_to_exclude:
    212             abs_path = os.path.join(autodir, path)
    213             abs_path = utils.sh_escape(abs_path)
    214             commands.append("mkdir -p '%s'" % abs_path)
    215             commands.append("touch '%s'/__init__.py" % abs_path)
    216         host.run(';'.join(commands))
    217 
    218 
    219     def _install(self, host=None, autodir=None, use_autoserv=True,
    220                  use_packaging=True):
    221         """
    222         Install autotest.  If get() was not called previously, an
    223         attempt will be made to install from the autotest svn
    224         repository.
    225 
    226         @param host A Host instance on which autotest will be installed
    227         @param autodir Location on the remote host to install to
    228         @param use_autoserv Enable install modes that depend on the client
    229             running with the autoserv harness
    230         @param use_packaging Enable install modes that use the packaging system
    231 
    232         @exception AutoservError if a tarball was not specified and
    233             the target host does not have svn installed in its path
    234         """
    235         if not host:
    236             host = self.host
    237         if not self.got:
    238             self.get()
    239         host.wait_up(timeout=30)
    240         host.setup()
    241         logging.info("Installing autotest on %s", host.hostname)
    242 
    243         # set up the autotest directory on the remote machine
    244         if not autodir:
    245             autodir = self.get_install_dir(host)
    246         logging.info('Using installation dir %s', autodir)
    247         host.set_autodir(autodir)
    248         host.run('mkdir -p %s' % utils.sh_escape(autodir))
    249 
    250         # make sure there are no files in $AUTODIR/results
    251         results_path = os.path.join(autodir, 'results')
    252         host.run('rm -rf %s/*' % utils.sh_escape(results_path),
    253                  ignore_status=True)
    254 
    255         # Fetch the autotest client from the nearest repository
    256         if use_packaging:
    257             try:
    258                 self._install_using_packaging(host, autodir)
    259                 logging.info("Installation of autotest completed using the "
    260                              "packaging system.")
    261                 return
    262             except (error.PackageInstallError, error.AutoservRunError,
    263                     global_config.ConfigError), e:
    264                 logging.info("Could not install autotest using the packaging "
    265                              "system: %s. Trying other methods", e)
    266         else:
    267             # Delete the package checksum file to force dut updating local
    268             # packages.
    269             command = ('rm -f "%s"' %
    270                        (os.path.join(autodir, base_packages.CHECKSUM_FILE)))
    271             host.run(command)
    272 
    273         # try to install from file or directory
    274         if self.source_material:
    275             c = global_config.global_config
    276             supports_autoserv_packaging = c.get_config_value(
    277                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
    278             # Copy autotest recursively
    279             if supports_autoserv_packaging and use_autoserv:
    280                 self._install_using_send_file(host, autodir)
    281             else:
    282                 host.send_file(self.source_material, autodir, delete_dest=True)
    283             logging.info("Installation of autotest completed from %s",
    284                          self.source_material)
    285             self.installed = True
    286             return
    287 
    288         # if that fails try to install using svn
    289         if utils.run('which svn').exit_status:
    290             raise error.AutoservError('svn not found on target machine: %s' %
    291                                       host.name)
    292         try:
    293             host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
    294         except error.AutoservRunError, e:
    295             host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
    296         logging.info("Installation of autotest completed using SVN.")
    297         self.installed = True
    298 
    299 
    300     def uninstall(self, host=None):
    301         """
    302         Uninstall (i.e. delete) autotest. Removes the autotest client install
    303         from the specified host.
    304 
    305         @params host a Host instance from which the client will be removed
    306         """
    307         if not self.installed:
    308             return
    309         if not host:
    310             host = self.host
    311         autodir = host.get_autodir()
    312         if not autodir:
    313             return
    314 
    315         # perform the actual uninstall
    316         host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
    317         host.set_autodir(None)
    318         self.installed = False
    319 
    320 
    321     def get(self, location=None):
    322         if not location:
    323             location = os.path.join(self.serverdir, '../client')
    324             location = os.path.abspath(location)
    325         # If there's stuff run on our client directory already, it
    326         # can cause problems. Try giving it a quick clean first.
    327         cwd = os.getcwd()
    328         os.chdir(location)
    329         try:
    330             utils.system('tools/make_clean', ignore_status=True)
    331         finally:
    332             os.chdir(cwd)
    333         super(BaseAutotest, self).get(location)
    334         self.got = True
    335 
    336 
    337     def run(self, control_file, results_dir='.', host=None, timeout=None,
    338             tag=None, parallel_flag=False, background=False,
    339             client_disconnect_timeout=None, use_packaging=True):
    340         """
    341         Run an autotest job on the remote machine.
    342 
    343         @param control_file: An open file-like-obj of the control file.
    344         @param results_dir: A str path where the results should be stored
    345                 on the local filesystem.
    346         @param host: A Host instance on which the control file should
    347                 be run.
    348         @param timeout: Maximum number of seconds to wait for the run or None.
    349         @param tag: Tag name for the client side instance of autotest.
    350         @param parallel_flag: Flag set when multiple jobs are run at the
    351                 same time.
    352         @param background: Indicates that the client should be launched as
    353                 a background job; the code calling run will be responsible
    354                 for monitoring the client and collecting the results.
    355         @param client_disconnect_timeout: Seconds to wait for the remote host
    356                 to come back after a reboot. Defaults to the host setting for
    357                 DEFAULT_REBOOT_TIMEOUT.
    358 
    359         @raises AutotestRunError: If there is a problem executing
    360                 the control file.
    361         """
    362         host = self._get_host_and_setup(host, use_packaging=use_packaging)
    363         results_dir = os.path.abspath(results_dir)
    364 
    365         if client_disconnect_timeout is None:
    366             client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
    367 
    368         if tag:
    369             results_dir = os.path.join(results_dir, tag)
    370 
    371         atrun = _Run(host, results_dir, tag, parallel_flag, background)
    372         self._do_run(control_file, results_dir, host, atrun, timeout,
    373                      client_disconnect_timeout, use_packaging=use_packaging)
    374 
    375 
    376     def _get_host_and_setup(self, host, use_packaging=True):
    377         if not host:
    378             host = self.host
    379         if not self.installed:
    380             self.install(host, use_packaging=use_packaging)
    381 
    382         host.wait_up(timeout=30)
    383         return host
    384 
    385 
    386     def _do_run(self, control_file, results_dir, host, atrun, timeout,
    387                 client_disconnect_timeout, use_packaging=True):
    388         try:
    389             atrun.verify_machine()
    390         except:
    391             logging.error("Verify failed on %s. Reinstalling autotest",
    392                           host.hostname)
    393             self.install(host)
    394             atrun.verify_machine()
    395         debug = os.path.join(results_dir, 'debug')
    396         try:
    397             os.makedirs(debug)
    398         except Exception:
    399             pass
    400 
    401         delete_file_list = [atrun.remote_control_file,
    402                             atrun.remote_control_file + '.state',
    403                             atrun.manual_control_file,
    404                             atrun.manual_control_file + '.state']
    405         cmd = ';'.join('rm -f ' + control for control in delete_file_list)
    406         host.run(cmd, ignore_status=True)
    407 
    408         tmppath = utils.get(control_file, local_copy=True)
    409 
    410         # build up the initialization prologue for the control file
    411         prologue_lines = []
    412 
    413         # Add the additional user arguments
    414         prologue_lines.append("args = %r\n" % self.job.args)
    415 
    416         # If the packaging system is being used, add the repository list.
    417         repos = None
    418         try:
    419             if use_packaging:
    420                 repos = self.get_fetch_location()
    421                 prologue_lines.append('job.add_repository(%s)\n' % repos)
    422             else:
    423                 logging.debug('use_packaging is set to False, do not add any '
    424                               'repository.')
    425         except global_config.ConfigError, e:
    426             # If repos is defined packaging is enabled so log the error
    427             if repos:
    428                 logging.error(e)
    429 
    430         # on full-size installs, turn on any profilers the server is using
    431         if not atrun.background:
    432             running_profilers = host.job.profilers.add_log.iteritems()
    433             for profiler, (args, dargs) in running_profilers:
    434                 call_args = [repr(profiler)]
    435                 call_args += [repr(arg) for arg in args]
    436                 call_args += ["%s=%r" % item for item in dargs.iteritems()]
    437                 prologue_lines.append("job.profilers.add(%s)\n"
    438                                       % ", ".join(call_args))
    439         cfile = "".join(prologue_lines)
    440 
    441         cfile += open(tmppath).read()
    442         open(tmppath, "w").write(cfile)
    443 
    444         # Create and copy state file to remote_control_file + '.state'
    445         state_file = host.job.preprocess_client_state()
    446         host.send_file(state_file, atrun.remote_control_file + '.init.state')
    447         os.remove(state_file)
    448 
    449         # Copy control_file to remote_control_file on the host
    450         host.send_file(tmppath, atrun.remote_control_file)
    451         if os.path.abspath(tmppath) != os.path.abspath(control_file):
    452             os.remove(tmppath)
    453 
    454         atrun.execute_control(
    455                 timeout=timeout,
    456                 client_disconnect_timeout=client_disconnect_timeout)
    457 
    458 
    459     def run_timed_test(self, test_name, results_dir='.', host=None,
    460                        timeout=None, parallel_flag=False, background=False,
    461                        client_disconnect_timeout=None, *args, **dargs):
    462         """
    463         Assemble a tiny little control file to just run one test,
    464         and run it as an autotest client-side test
    465         """
    466         if not host:
    467             host = self.host
    468         if not self.installed:
    469             self.install(host)
    470         opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
    471         cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
    472         control = "job.run_test(%s)\n" % cmd
    473         self.run(control, results_dir, host, timeout=timeout,
    474                  parallel_flag=parallel_flag, background=background,
    475                  client_disconnect_timeout=client_disconnect_timeout)
    476 
    477 
    478     def run_test(self, test_name, results_dir='.', host=None,
    479                  parallel_flag=False, background=False,
    480                  client_disconnect_timeout=None, *args, **dargs):
    481         self.run_timed_test(test_name, results_dir, host, timeout=None,
    482                             parallel_flag=parallel_flag, background=background,
    483                             client_disconnect_timeout=client_disconnect_timeout,
    484                             *args, **dargs)
    485 
    486 
    487 class _BaseRun(object):
    488     """
    489     Represents a run of autotest control file.  This class maintains
    490     all the state necessary as an autotest control file is executed.
    491 
    492     It is not intended to be used directly, rather control files
    493     should be run using the run method in Autotest.
    494     """
    495     def __init__(self, host, results_dir, tag, parallel_flag, background):
    496         self.host = host
    497         self.results_dir = results_dir
    498         self.env = host.env
    499         self.tag = tag
    500         self.parallel_flag = parallel_flag
    501         self.background = background
    502         self.autodir = Autotest.get_installed_autodir(self.host)
    503         control = os.path.join(self.autodir, 'control')
    504         if tag:
    505             control += '.' + tag
    506         self.manual_control_file = control
    507         self.remote_control_file = control + '.autoserv'
    508         self.config_file = os.path.join(self.autodir, 'global_config.ini')
    509 
    510 
    511     def verify_machine(self):
    512         binary = os.path.join(self.autodir, 'bin/autotest')
    513         try:
    514             self.host.run('ls %s > /dev/null 2>&1' % binary)
    515         except:
    516             raise error.AutoservInstallError(
    517                 "Autotest does not appear to be installed")
    518 
    519         if not self.parallel_flag:
    520             tmpdir = os.path.join(self.autodir, 'tmp')
    521             download = os.path.join(self.autodir, 'tests/download')
    522             self.host.run('umount %s' % tmpdir, ignore_status=True)
    523             self.host.run('umount %s' % download, ignore_status=True)
    524 
    525 
    526     def get_base_cmd_args(self, section):
    527         args = ['--verbose']
    528         if section > 0:
    529             args.append('-c')
    530         if self.tag:
    531             args.append('-t %s' % self.tag)
    532         if self.host.job.use_external_logging():
    533             args.append('-l')
    534         if self.host.hostname:
    535             args.append('--hostname=%s' % self.host.hostname)
    536         args.append('--user=%s' % self.host.job.user)
    537 
    538         args.append(self.remote_control_file)
    539         return args
    540 
    541 
    542     def get_background_cmd(self, section):
    543         cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')]
    544         cmd += self.get_base_cmd_args(section)
    545         cmd += ['>/dev/null', '2>/dev/null', '&']
    546         return ' '.join(cmd)
    547 
    548 
    549     def get_daemon_cmd(self, section, monitor_dir):
    550         cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
    551                monitor_dir, '-H autoserv']
    552         cmd += self.get_base_cmd_args(section)
    553         cmd += ['>/dev/null', '2>/dev/null', '&']
    554         return ' '.join(cmd)
    555 
    556 
    557     def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
    558         cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
    559                monitor_dir, str(stdout_read), str(stderr_read)]
    560         return ' '.join(cmd)
    561 
    562 
    563     def get_client_log(self):
    564         """Find what the "next" client.* prefix should be
    565 
    566         @returns A string of the form client.INTEGER that should be prefixed
    567             to all client debug log files.
    568         """
    569         max_digit = -1
    570         debug_dir = os.path.join(self.results_dir, 'debug')
    571         client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
    572         for log in client_logs:
    573             _, number, _ = log.split('.', 2)
    574             if number.isdigit():
    575                 max_digit = max(max_digit, int(number))
    576         return 'client.%d' % (max_digit + 1)
    577 
    578 
    579     def copy_client_config_file(self, client_log_prefix=None):
    580         """
    581         Create and copy the client config file based on the server config.
    582 
    583         @param client_log_prefix: Optional prefix to prepend to log files.
    584         """
    585         client_config_file = self._create_client_config_file(client_log_prefix)
    586         self.host.send_file(client_config_file, self.config_file)
    587         os.remove(client_config_file)
    588 
    589 
    590     def _create_client_config_file(self, client_log_prefix=None):
    591         """
    592         Create a temporary file with the [CLIENT] section configuration values
    593         taken from the server global_config.ini.
    594 
    595         @param client_log_prefix: Optional prefix to prepend to log files.
    596 
    597         @return: Path of the temporary file generated.
    598         """
    599         config = global_config.global_config.get_section_values('CLIENT')
    600         if client_log_prefix:
    601             config.set('CLIENT', 'default_logging_name', client_log_prefix)
    602         return self._create_aux_file(config.write)
    603 
    604 
    605     def _create_aux_file(self, func, *args):
    606         """
    607         Creates a temporary file and writes content to it according to a
    608         content creation function. The file object is appended to *args, which
    609         is then passed to the content creation function
    610 
    611         @param func: Function that will be used to write content to the
    612                 temporary file.
    613         @param *args: List of parameters that func takes.
    614         @return: Path to the temporary file that was created.
    615         """
    616         fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
    617         aux_file = os.fdopen(fd, "w")
    618         try:
    619             list_args = list(args)
    620             list_args.append(aux_file)
    621             func(*list_args)
    622         finally:
    623             aux_file.close()
    624         return path
    625 
    626 
    627     @staticmethod
    628     def is_client_job_finished(last_line):
    629         return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
    630 
    631 
    632     @staticmethod
    633     def is_client_job_rebooting(last_line):
    634         return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
    635 
    636 
    637     def _diagnose_dut(self, old_boot_id=None):
    638         """
    639         Run diagnostic checks on a DUT.
    640 
    641         1. ping: A dead host will not respond to pings.
    642         2. ssh (happens with 3.): DUT hangs usually fail in authentication
    643             but respond to pings.
    644         3. Check if a reboot occured: A healthy but unexpected reboot leaves the
    645             host running with a new boot id.
    646 
    647         This method will always raise an exception from the AutotestFailure
    648         family and should only get called when the reason for a test failing
    649         is ambiguous.
    650 
    651         @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
    652         @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
    653         @raises AutotestDeviceRebooted: If the boot id changed.
    654         @raises AutotestAbort: If none of the above exceptions were raised.
    655             Since we have no recourse we must abort at this stage.
    656         """
    657         msg = 'Autotest client terminated unexpectedly: '
    658         if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
    659             msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
    660             raise AutotestDeviceNotPingable(msg)
    661 
    662         if old_boot_id:
    663             try:
    664                 new_boot_id = self.host.get_boot_id(timeout=60)
    665             except Exception as e:
    666                 msg += ('DUT is pingable but not SSHable, it most likely'
    667                         ' sporadically rebooted during testing. %s\n' % str(e))
    668                 raise AutotestDeviceNotSSHable(msg)
    669             else:
    670                 if new_boot_id != old_boot_id:
    671                     msg += 'DUT rebooted during the test run.\n'
    672                     raise AutotestDeviceRebooted(msg)
    673 
    674             msg += ('DUT is pingable, SSHable and did NOT restart '
    675                     'un-expectedly. We probably lost connectivity during the '
    676                     'test.')
    677         else:
    678             msg += ('DUT is pingable, could not determine if an un-expected '
    679                     'reboot occured during the test.')
    680 
    681         raise AutotestAbort(msg)
    682 
    683 
    684     def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
    685         """
    686         Logs that something unexpected happened, then tries to diagnose the
    687         failure. The purpose of this function is only to close out the status
    688         log with the appropriate error message, not to critically terminate
    689         the program.
    690 
    691         @param stderr_redirector: log stream.
    692         @param old_boot_id: boot id used to infer if a reboot occured.
    693         """
    694         stderr_redirector.flush_all_buffers()
    695         try:
    696             self._diagnose_dut(old_boot_id)
    697         except AutotestFailure as e:
    698             self.host.job.record('END ABORT', None, None, str(e))
    699 
    700 
    701     def _execute_in_background(self, section, timeout):
    702         full_cmd = self.get_background_cmd(section)
    703         devnull = open(os.devnull, "w")
    704 
    705         self.copy_client_config_file(self.get_client_log())
    706 
    707         self.host.job.push_execution_context(self.results_dir)
    708         try:
    709             result = self.host.run(full_cmd, ignore_status=True,
    710                                    timeout=timeout,
    711                                    stdout_tee=devnull,
    712                                    stderr_tee=devnull)
    713         finally:
    714             self.host.job.pop_execution_context()
    715 
    716         return result
    717 
    718 
    719     @staticmethod
    720     def _strip_stderr_prologue(stderr):
    721         """Strips the 'standard' prologue that get pre-pended to every
    722         remote command and returns the text that was actually written to
    723         stderr by the remote command."""
    724         stderr_lines = stderr.split("\n")[1:]
    725         if not stderr_lines:
    726             return ""
    727         elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
    728             del stderr_lines[0]
    729         return "\n".join(stderr_lines)
    730 
    731 
    732     def _execute_daemon(self, section, timeout, stderr_redirector,
    733                         client_disconnect_timeout):
    734         monitor_dir = self.host.get_tmp_dir()
    735         daemon_cmd = self.get_daemon_cmd(section, monitor_dir)
    736 
    737         # grab the location for the server-side client log file
    738         client_log_prefix = self.get_client_log()
    739         client_log_path = os.path.join(self.results_dir, 'debug',
    740                                        client_log_prefix + '.log')
    741         client_log = open(client_log_path, 'w', 0)
    742         self.copy_client_config_file(client_log_prefix)
    743 
    744         stdout_read = stderr_read = 0
    745         self.host.job.push_execution_context(self.results_dir)
    746         try:
    747             self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
    748             disconnect_warnings = []
    749             while True:
    750                 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
    751                                                    stderr_read)
    752                 try:
    753                     result = self.host.run(monitor_cmd, ignore_status=True,
    754                                            timeout=timeout,
    755                                            stdout_tee=client_log,
    756                                            stderr_tee=stderr_redirector)
    757                 except error.AutoservRunError, e:
    758                     result = e.result_obj
    759                     result.exit_status = None
    760                     disconnect_warnings.append(e.description)
    761 
    762                     stderr_redirector.log_warning(
    763                         "Autotest client was disconnected: %s" % e.description,
    764                         "NETWORK")
    765                 except error.AutoservSSHTimeout:
    766                     result = utils.CmdResult(monitor_cmd, "", "", None, 0)
    767                     stderr_redirector.log_warning(
    768                         "Attempt to connect to Autotest client timed out",
    769                         "NETWORK")
    770 
    771                 stdout_read += len(result.stdout)
    772                 stderr_read += len(self._strip_stderr_prologue(result.stderr))
    773 
    774                 if result.exit_status is not None:
    775                     # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
    776                     logging.debug('Result exit status is %d.',
    777                                   result.exit_status)
    778                     return result
    779                 elif not self.host.wait_up(client_disconnect_timeout):
    780                     raise error.AutoservSSHTimeout(
    781                         "client was disconnected, reconnect timed out")
    782         finally:
    783             client_log.close()
    784             self.host.job.pop_execution_context()
    785 
    786 
    787     def execute_section(self, section, timeout, stderr_redirector,
    788                         client_disconnect_timeout):
    789         logging.info("Executing %s/bin/autotest %s/control phase %d",
    790                      self.autodir, self.autodir, section)
    791 
    792         if self.background:
    793             result = self._execute_in_background(section, timeout)
    794         else:
    795             result = self._execute_daemon(section, timeout, stderr_redirector,
    796                                           client_disconnect_timeout)
    797 
    798         last_line = stderr_redirector.last_line
    799 
    800         # check if we failed hard enough to warrant an exception
    801         if result.exit_status == 1:
    802             err = error.AutotestRunError("client job was aborted")
    803         elif not self.background and not result.stderr:
    804             err = error.AutotestRunError(
    805                 "execute_section %s failed to return anything\n"
    806                 "stdout:%s\n" % (section, result.stdout))
    807         else:
    808             err = None
    809 
    810         # log something if the client failed AND never finished logging
    811         if err and not self.is_client_job_finished(last_line):
    812             self.log_unexpected_abort(stderr_redirector)
    813 
    814         if err:
    815             raise err
    816         else:
    817             return stderr_redirector.last_line
    818 
    819 
    820     def _wait_for_reboot(self, old_boot_id):
    821         logging.info("Client is rebooting")
    822         logging.info("Waiting for client to halt")
    823         if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
    824                                    old_boot_id=old_boot_id):
    825             err = "%s failed to shutdown after %d"
    826             err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
    827             raise error.AutotestRunError(err)
    828         logging.info("Client down, waiting for restart")
    829         if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
    830             # since reboot failed
    831             # hardreset the machine once if possible
    832             # before failing this control file
    833             warning = "%s did not come back up, hard resetting"
    834             warning %= self.host.hostname
    835             logging.warning(warning)
    836             try:
    837                 self.host.hardreset(wait=False)
    838             except (AttributeError, error.AutoservUnsupportedError):
    839                 warning = "Hard reset unsupported on %s"
    840                 warning %= self.host.hostname
    841                 logging.warning(warning)
    842             raise error.AutotestRunError("%s failed to boot after %ds" %
    843                                          (self.host.hostname,
    844                                           self.host.DEFAULT_REBOOT_TIMEOUT))
    845         self.host.reboot_followup()
    846 
    847 
    848     def execute_control(self, timeout=None, client_disconnect_timeout=None):
    849         if not self.background:
    850             collector = log_collector(self.host, self.tag, self.results_dir)
    851             hostname = self.host.hostname
    852             remote_results = collector.client_results_dir
    853             local_results = collector.server_results_dir
    854             self.host.job.add_client_log(hostname, remote_results,
    855                                          local_results)
    856             job_record_context = self.host.job.get_record_context()
    857 
    858         section = 0
    859         start_time = time.time()
    860 
    861         logger = client_logger(self.host, self.tag, self.results_dir)
    862         try:
    863             while not timeout or time.time() < start_time + timeout:
    864                 if timeout:
    865                     section_timeout = start_time + timeout - time.time()
    866                 else:
    867                     section_timeout = None
    868                 boot_id = self.host.get_boot_id()
    869                 last = self.execute_section(section, section_timeout,
    870                                             logger, client_disconnect_timeout)
    871                 if self.background:
    872                     return
    873                 section += 1
    874                 if self.is_client_job_finished(last):
    875                     logging.info("Client complete")
    876                     return
    877                 elif self.is_client_job_rebooting(last):
    878                     try:
    879                         self._wait_for_reboot(boot_id)
    880                     except error.AutotestRunError, e:
    881                         self.host.job.record("ABORT", None, "reboot", str(e))
    882                         self.host.job.record("END ABORT", None, None, str(e))
    883                         raise
    884                     continue
    885 
    886                 # If a test fails without probable cause we try to bucket it's
    887                 # failure into one of 2 categories. If we can determine the
    888                 # current state of the device and it is suspicious, we close the
    889                 # status lines indicating a failure. If we either cannot
    890                 # determine the state of the device, or it appears totally
    891                 # healthy, we give up and abort.
    892                 try:
    893                     self._diagnose_dut(boot_id)
    894                 except AutotestDeviceError as e:
    895                     # The status lines of the test are pretty much tailed to
    896                     # our log, with indentation, from the client job on the DUT.
    897                     # So if the DUT goes down unexpectedly we'll end up with a
    898                     # malformed status log unless we manually unwind the status
    899                     # stack. Ideally we would want to write a nice wrapper like
    900                     # server_job methods run_reboot, run_group but they expect
    901                     # reboots and we don't.
    902                     self.host.job.record('FAIL', None, None, str(e))
    903                     self.host.job.record('END FAIL', None, None)
    904                     self.host.job.record('END GOOD', None, None)
    905                     self.host.job.failed_with_device_error = True
    906                     return
    907                 except AutotestAbort as e:
    908                     self.host.job.record('ABORT', None, None, str(e))
    909                     self.host.job.record('END ABORT', None, None)
    910 
    911                     # give the client machine a chance to recover from a crash
    912                     self.host.wait_up(
    913                         self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
    914                     msg = ("Aborting - unexpected final status message from "
    915                            "client on %s: %s\n") % (self.host.hostname, last)
    916                     raise error.AutotestRunError(msg)
    917         finally:
    918             logger.close()
    919             if not self.background:
    920                 collector.collect_client_job_results()
    921                 collector.remove_redundant_client_logs()
    922                 state_file = os.path.basename(self.remote_control_file
    923                                               + '.state')
    924                 state_path = os.path.join(self.results_dir, state_file)
    925                 self.host.job.postprocess_client_state(state_path)
    926                 self.host.job.remove_client_log(hostname, remote_results,
    927                                                 local_results)
    928                 job_record_context.restore()
    929 
    930         # should only get here if we timed out
    931         assert timeout
    932         raise error.AutotestTimeoutError()
    933 
    934 
    935 class log_collector(object):
    936     def __init__(self, host, client_tag, results_dir):
    937         self.host = host
    938         if not client_tag:
    939             client_tag = "default"
    940         self.client_results_dir = os.path.join(host.get_autodir(), "results",
    941                                                client_tag)
    942         self.server_results_dir = results_dir
    943 
    944 
    945     def collect_client_job_results(self):
    946         """ A method that collects all the current results of a running
    947         client job into the results dir. By default does nothing as no
    948         client job is running, but when running a client job you can override
    949         this with something that will actually do something. """
    950 
    951         # make an effort to wait for the machine to come up
    952         try:
    953             self.host.wait_up(timeout=30)
    954         except error.AutoservError:
    955             # don't worry about any errors, we'll try and
    956             # get the results anyway
    957             pass
    958 
    959         # Copy all dirs in default to results_dir
    960         timer = autotest_stats.Timer('collect_client_job_results')
    961         timer.start()
    962         try:
    963             self.host.get_file(self.client_results_dir + '/',
    964                                self.server_results_dir, preserve_symlinks=True)
    965 
    966             # Only report time used for successful get_file calls.
    967             timer.stop();
    968         except Exception:
    969             # well, don't stop running just because we couldn't get logs
    970             e_msg = "Unexpected error copying test result logs, continuing ..."
    971             logging.error(e_msg)
    972             traceback.print_exc(file=sys.stdout)
    973 
    974 
    975     def remove_redundant_client_logs(self):
    976         """Remove client.*.log files in favour of client.*.DEBUG files."""
    977         debug_dir = os.path.join(self.server_results_dir, 'debug')
    978         debug_files = [f for f in os.listdir(debug_dir)
    979                        if re.search(r'^client\.\d+\.DEBUG$', f)]
    980         for debug_file in debug_files:
    981             log_file = debug_file.replace('DEBUG', 'log')
    982             log_file = os.path.join(debug_dir, log_file)
    983             if os.path.exists(log_file):
    984                 os.remove(log_file)
    985 
    986 
    987 # a file-like object for catching stderr from an autotest client and
    988 # extracting status logs from it
    989 class BaseClientLogger(object):
    990     """Partial file object to write to both stdout and
    991     the status log file.  We only implement those methods
    992     utils.run() actually calls.
    993     """
    994     status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
    995     test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
    996     fetch_package_parser = re.compile(
    997         r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
    998     extract_indent = re.compile(r"^(\t*).*$")
    999     extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
   1000 
   1001     def __init__(self, host, tag, server_results_dir):
   1002         self.host = host
   1003         self.job = host.job
   1004         self.log_collector = log_collector(host, tag, server_results_dir)
   1005         self.leftover = ""
   1006         self.last_line = ""
   1007         self.logs = {}
   1008 
   1009 
   1010     def _process_log_dict(self, log_dict):
   1011         log_list = log_dict.pop("logs", [])
   1012         for key in sorted(log_dict.iterkeys()):
   1013             log_list += self._process_log_dict(log_dict.pop(key))
   1014         return log_list
   1015 
   1016 
   1017     def _process_logs(self):
   1018         """Go through the accumulated logs in self.log and print them
   1019         out to stdout and the status log. Note that this processes
   1020         logs in an ordering where:
   1021 
   1022         1) logs to different tags are never interleaved
   1023         2) logs to x.y come before logs to x.y.z for all z
   1024         3) logs to x.y come before x.z whenever y < z
   1025 
   1026         Note that this will in general not be the same as the
   1027         chronological ordering of the logs. However, if a chronological
   1028         ordering is desired that one can be reconstructed from the
   1029         status log by looking at timestamp lines."""
   1030         log_list = self._process_log_dict(self.logs)
   1031         for entry in log_list:
   1032             self.job.record_entry(entry, log_in_subdir=False)
   1033         if log_list:
   1034             self.last_line = log_list[-1].render()
   1035 
   1036 
   1037     def _process_quoted_line(self, tag, line):
   1038         """Process a line quoted with an AUTOTEST_STATUS flag. If the
   1039         tag is blank then we want to push out all the data we've been
   1040         building up in self.logs, and then the newest line. If the
   1041         tag is not blank, then push the line into the logs for handling
   1042         later."""
   1043         entry = base_job.status_log_entry.parse(line)
   1044         if entry is None:
   1045             return  # the line contains no status lines
   1046         if tag == "":
   1047             self._process_logs()
   1048             self.job.record_entry(entry, log_in_subdir=False)
   1049             self.last_line = line
   1050         else:
   1051             tag_parts = [int(x) for x in tag.split(".")]
   1052             log_dict = self.logs
   1053             for part in tag_parts:
   1054                 log_dict = log_dict.setdefault(part, {})
   1055             log_list = log_dict.setdefault("logs", [])
   1056             log_list.append(entry)
   1057 
   1058 
   1059     def _process_info_line(self, line):
   1060         """Check if line is an INFO line, and if it is, interpret any control
   1061         messages (e.g. enabling/disabling warnings) that it may contain."""
   1062         match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
   1063         if not match:
   1064             return   # not an INFO line
   1065         for field in match.group(1).split('\t'):
   1066             if field.startswith("warnings.enable="):
   1067                 func = self.job.warning_manager.enable_warnings
   1068             elif field.startswith("warnings.disable="):
   1069                 func = self.job.warning_manager.disable_warnings
   1070             else:
   1071                 continue
   1072             warning_type = field.split("=", 1)[1]
   1073             func(warning_type)
   1074 
   1075 
   1076     def _process_line(self, line):
   1077         """Write out a line of data to the appropriate stream. Status
   1078         lines sent by autotest will be prepended with
   1079         "AUTOTEST_STATUS", and all other lines are ssh error
   1080         messages."""
   1081         status_match = self.status_parser.search(line)
   1082         test_complete_match = self.test_complete_parser.search(line)
   1083         fetch_package_match = self.fetch_package_parser.search(line)
   1084         if status_match:
   1085             tag, line = status_match.groups()
   1086             self._process_info_line(line)
   1087             self._process_quoted_line(tag, line)
   1088         elif test_complete_match:
   1089             self._process_logs()
   1090             fifo_path, = test_complete_match.groups()
   1091             try:
   1092                 self.log_collector.collect_client_job_results()
   1093                 self.host.run("echo A > %s" % fifo_path)
   1094             except Exception:
   1095                 msg = "Post-test log collection failed, continuing anyway"
   1096                 logging.exception(msg)
   1097         elif fetch_package_match:
   1098             pkg_name, dest_path, fifo_path = fetch_package_match.groups()
   1099             serve_packages = global_config.global_config.get_config_value(
   1100                 "PACKAGES", "serve_packages_from_autoserv", type=bool)
   1101             if serve_packages and pkg_name.endswith(".tar.bz2"):
   1102                 try:
   1103                     self._send_tarball(pkg_name, dest_path)
   1104                 except Exception:
   1105                     msg = "Package tarball creation failed, continuing anyway"
   1106                     logging.exception(msg)
   1107             try:
   1108                 self.host.run("echo B > %s" % fifo_path)
   1109             except Exception:
   1110                 msg = "Package tarball installation failed, continuing anyway"
   1111                 logging.exception(msg)
   1112         else:
   1113             logging.info(line)
   1114 
   1115 
   1116     def _send_tarball(self, pkg_name, remote_dest):
   1117         name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
   1118         src_dirs = []
   1119         if pkg_type == 'test':
   1120             for test_dir in ['site_tests', 'tests']:
   1121                 src_dir = os.path.join(self.job.clientdir, test_dir, name)
   1122                 if os.path.exists(src_dir):
   1123                     src_dirs += [src_dir]
   1124                     if autoserv_prebuild:
   1125                         prebuild.setup(self.job.clientdir, src_dir)
   1126                     break
   1127         elif pkg_type == 'profiler':
   1128             src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
   1129             if autoserv_prebuild:
   1130                 prebuild.setup(self.job.clientdir, src_dir)
   1131         elif pkg_type == 'dep':
   1132             src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
   1133         elif pkg_type == 'client':
   1134             return  # you must already have a client to hit this anyway
   1135         else:
   1136             return  # no other types are supported
   1137 
   1138         # iterate over src_dirs until we find one that exists, then tar it
   1139         for src_dir in src_dirs:
   1140             if os.path.exists(src_dir):
   1141                 try:
   1142                     logging.info('Bundling %s into %s', src_dir, pkg_name)
   1143                     temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
   1144                                                 dir=self.job.tmpdir)
   1145                     tarball_path = self.job.pkgmgr.tar_package(
   1146                         pkg_name, src_dir, temp_dir.name, " .")
   1147                     self.host.send_file(tarball_path, remote_dest)
   1148                 finally:
   1149                     temp_dir.clean()
   1150                 return
   1151 
   1152 
   1153     def log_warning(self, msg, warning_type):
   1154         """Injects a WARN message into the current status logging stream."""
   1155         timestamp = int(time.time())
   1156         if self.job.warning_manager.is_valid(timestamp, warning_type):
   1157             self.job.record('WARN', None, None, msg)
   1158 
   1159 
   1160     def write(self, data):
   1161         # now start processing the existing buffer and the new data
   1162         data = self.leftover + data
   1163         lines = data.split('\n')
   1164         processed_lines = 0
   1165         try:
   1166             # process all the buffered data except the last line
   1167             # ignore the last line since we may not have all of it yet
   1168             for line in lines[:-1]:
   1169                 self._process_line(line)
   1170                 processed_lines += 1
   1171         finally:
   1172             # save any unprocessed lines for future processing
   1173             self.leftover = '\n'.join(lines[processed_lines:])
   1174 
   1175 
   1176     def flush(self):
   1177         sys.stdout.flush()
   1178 
   1179 
   1180     def flush_all_buffers(self):
   1181         if self.leftover:
   1182             self._process_line(self.leftover)
   1183             self.leftover = ""
   1184         self._process_logs()
   1185         self.flush()
   1186 
   1187 
   1188     def close(self):
   1189         self.flush_all_buffers()
   1190 
   1191 
   1192 SiteAutotest = client_utils.import_site_class(
   1193     __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
   1194     BaseAutotest)
   1195 
   1196 
   1197 _SiteRun = client_utils.import_site_class(
   1198     __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
   1199 
   1200 
   1201 SiteClientLogger = client_utils.import_site_class(
   1202     __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
   1203     BaseClientLogger)
   1204 
   1205 
   1206 class Autotest(SiteAutotest):
   1207     pass
   1208 
   1209 
   1210 class client_logger(SiteClientLogger):
   1211     pass
   1212 
   1213 
   1214 class _Run(_SiteRun):
   1215     pass
   1216