Home | History | Annotate | Download | only in hosts
      1 """This class defines the Remote host class."""
      2 
      3 import os, logging, urllib, time
      4 from autotest_lib.client.common_lib import error
      5 from autotest_lib.server import utils
      6 from autotest_lib.server.hosts import base_classes
      7 
      8 
      9 class RemoteHost(base_classes.Host):
     10     """
     11     This class represents a remote machine on which you can run
     12     programs.
     13 
     14     It may be accessed through a network, a serial line, ...
     15     It is not the machine autoserv is running on.
     16 
     17     Implementation details:
     18     This is an abstract class, leaf subclasses must implement the methods
     19     listed here and in parent classes which have no implementation. They
     20     may reimplement methods which already have an implementation. You
     21     must not instantiate this class but should instantiate one of those
     22     leaf subclasses.
     23     """
     24 
     25     DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
     26     DEFAULT_HALT_TIMEOUT = 2 * 60
     27     _LABEL_FUNCTIONS = []
     28     _DETECTABLE_LABELS = []
     29 
     30     VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
     31 
     32 
     33     def _initialize(self, hostname, autodir=None, *args, **dargs):
     34         super(RemoteHost, self)._initialize(*args, **dargs)
     35 
     36         self.hostname = hostname
     37         self.autodir = autodir
     38         self.tmp_dirs = []
     39 
     40 
     41     def __repr__(self):
     42         return "<remote host: %s>" % self.hostname
     43 
     44 
     45     def close(self):
     46         super(RemoteHost, self).close()
     47         self.stop_loggers()
     48 
     49         if hasattr(self, 'tmp_dirs'):
     50             for dir in self.tmp_dirs:
     51                 try:
     52                     self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
     53                 except error.AutoservRunError:
     54                     pass
     55 
     56 
     57     def job_start(self):
     58         """
     59         Abstract method, called the first time a remote host object
     60         is created for a specific host after a job starts.
     61 
     62         This method depends on the create_host factory being used to
     63         construct your host object. If you directly construct host objects
     64         you will need to call this method yourself (and enforce the
     65         single-call rule).
     66         """
     67         try:
     68             cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
     69                    '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
     70             self.run(cmd)
     71         except Exception, e:
     72             # Non-fatal error
     73             logging.info('Failed to copy /var/log/messages at startup: %s', e)
     74 
     75 
     76     def get_autodir(self):
     77         return self.autodir
     78 
     79 
     80     def set_autodir(self, autodir):
     81         """
     82         This method is called to make the host object aware of the
     83         where autotest is installed. Called in server/autotest.py
     84         after a successful install
     85         """
     86         self.autodir = autodir
     87 
     88 
     89     def sysrq_reboot(self):
     90         self.run_background('echo b > /proc/sysrq-trigger')
     91 
     92 
     93     def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
     94         """
     95         Shut down the remote host.
     96 
     97         N.B.  This method makes no provision to bring the target back
     98         up.  The target will be offline indefinitely if there's no
     99         independent hardware (servo, RPM, etc.) to force the target to
    100         power on.
    101 
    102         @param timeout  Maximum time to wait for host down, in seconds.
    103         @param wait  Whether to wait for the host to go offline.
    104         """
    105         self.run_background('sleep 1 ; halt')
    106         if wait:
    107             self.wait_down(timeout=timeout)
    108 
    109 
    110     def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
    111                fastsync=False, reboot_cmd=None, **dargs):
    112         """
    113         Reboot the remote host.
    114 
    115         Args:
    116                 timeout - How long to wait for the reboot.
    117                 wait - Should we wait to see if the machine comes back up.
    118                        If this is set to True, ignores reboot_cmd's error
    119                        even if occurs.
    120                 fastsync - Don't wait for the sync to complete, just start one
    121                         and move on. This is for cases where rebooting prompty
    122                         is more important than data integrity and/or the
    123                         machine may have disks that cause sync to never return.
    124                 reboot_cmd - Reboot command to execute.
    125         """
    126         self.reboot_setup(**dargs)
    127         if not reboot_cmd:
    128             reboot_cmd = ('sync & sleep 5; '
    129                           'reboot & sleep 60; '
    130                           'reboot -f & sleep 10; '
    131                           'reboot -nf & sleep 10; '
    132                           'telinit 6')
    133 
    134         def reboot():
    135             # pylint: disable=missing-docstring
    136             self.record("GOOD", None, "reboot.start")
    137             try:
    138                 current_boot_id = self.get_boot_id()
    139 
    140                 # sync before starting the reboot, so that a long sync during
    141                 # shutdown isn't timed out by wait_down's short timeout
    142                 if not fastsync:
    143                     self.run('sync; sync', timeout=timeout, ignore_status=True)
    144 
    145                 self.run_background(reboot_cmd)
    146             except error.AutoservRunError:
    147                 # If wait is set, ignore the error here, and rely on the
    148                 # wait_for_restart() for stability, instead.
    149                 # reboot_cmd sometimes causes an error even if reboot is
    150                 # successfully in progress. This is difficult to be avoided,
    151                 # because we have no much control on remote machine after
    152                 # "reboot" starts.
    153                 if not wait:
    154                     # TODO(b/37652392): Revisit no-wait case, later.
    155                     self.record("ABORT", None, "reboot.start",
    156                                 "reboot command failed")
    157                     raise
    158             if wait:
    159                 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
    160                                       **dargs)
    161 
    162         # if this is a full reboot-and-wait, run the reboot inside a group
    163         if wait:
    164             self.log_op(self.OP_REBOOT, reboot)
    165         else:
    166             reboot()
    167 
    168     def suspend(self, timeout, suspend_cmd, **dargs):
    169         """
    170         Suspend the remote host.
    171 
    172         Args:
    173                 timeout - How long to wait for the suspend.
    174                 susped_cmd - suspend command to execute.
    175         """
    176         # define a function for the supend and run it in a group
    177         def suspend():
    178             # pylint: disable=missing-docstring
    179             self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
    180             try:
    181                 self.run_background(suspend_cmd)
    182             except error.AutoservRunError:
    183                 self.record("ABORT", None, "suspend.start",
    184                             "suspend command failed")
    185                 raise error.AutoservSuspendError("suspend command failed")
    186 
    187             # Wait for some time, to ensure the machine is going to sleep.
    188             # Not too long to check if the machine really suspended.
    189             time_slice = min(timeout / 2, 300)
    190             time.sleep(time_slice)
    191             time_counter = time_slice
    192             while time_counter < timeout + 60:
    193                 # Check if the machine is back. We check regularely to
    194                 # ensure the machine was suspended long enough.
    195                 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
    196                     return
    197                 else:
    198                     if time_counter > timeout - 10:
    199                         time_slice = 5
    200                     time.sleep(time_slice)
    201                     time_counter += time_slice
    202 
    203             if utils.ping(self.hostname, tries=1, deadline=1) != 0:
    204                 raise error.AutoservSuspendError(
    205                     "DUT is not responding after %d seconds" % (time_counter))
    206 
    207         start_time = time.time()
    208         self.log_op(self.OP_SUSPEND, suspend)
    209         lasted = time.time() - start_time
    210         if (lasted < timeout):
    211             raise error.AutoservSuspendError(
    212                 "Suspend did not last long enough: %d instead of %d" % (
    213                     lasted, timeout))
    214 
    215     def reboot_followup(self, *args, **dargs):
    216         super(RemoteHost, self).reboot_followup(*args, **dargs)
    217         if self.job:
    218             self.job.profilers.handle_reboot(self)
    219 
    220 
    221     def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
    222         """
    223         Wait for the host to come back from a reboot. This wraps the
    224         generic wait_for_restart implementation in a reboot group.
    225         """
    226         def op_func():
    227             # pylint: disable=missing-docstring
    228             super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
    229         self.log_op(self.OP_REBOOT, op_func)
    230 
    231 
    232     def cleanup(self):
    233         super(RemoteHost, self).cleanup()
    234         self.reboot()
    235 
    236 
    237     def get_tmp_dir(self, parent='/tmp'):
    238         """
    239         Return the pathname of a directory on the host suitable
    240         for temporary file storage.
    241 
    242         The directory and its content will be deleted automatically
    243         on the destruction of the Host object that was used to obtain
    244         it.
    245         """
    246         self.run("mkdir -p %s" % parent)
    247         template = os.path.join(parent, 'autoserv-XXXXXX')
    248         dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
    249         self.tmp_dirs.append(dir_name)
    250         return dir_name
    251 
    252 
    253     def get_platform_label(self):
    254         """
    255         Return the platform label, or None if platform label is not set.
    256         """
    257 
    258         if self.job:
    259             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
    260                                        self.hostname)
    261             keyvals = utils.read_keyval(keyval_path)
    262             return keyvals.get('platform', None)
    263         else:
    264             return None
    265 
    266 
    267     def get_all_labels(self):
    268         """
    269         Return all labels, or empty list if label is not set.
    270         """
    271         if self.job:
    272             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
    273                                        self.hostname)
    274             keyvals = utils.read_keyval(keyval_path)
    275             all_labels = keyvals.get('labels', '')
    276             if all_labels:
    277                 all_labels = all_labels.split(',')
    278                 return [urllib.unquote(label) for label in all_labels]
    279         return []
    280 
    281 
    282     def delete_tmp_dir(self, tmpdir):
    283         """
    284         Delete the given temporary directory on the remote machine.
    285 
    286         @param tmpdir The directory to delete.
    287         """
    288         self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
    289         self.tmp_dirs.remove(tmpdir)
    290 
    291 
    292     def check_uptime(self):
    293         """
    294         Check that uptime is available and monotonically increasing.
    295         """
    296         if not self.is_up():
    297             raise error.AutoservHostError('Client does not appear to be up')
    298         result = self.run("/bin/cat /proc/uptime", 30)
    299         return result.stdout.strip().split()[0]
    300 
    301 
    302     def check_for_lkdtm(self):
    303         """
    304         Check for kernel dump test module. return True if exist.
    305         """
    306         cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
    307         return self.run(cmd, ignore_status=True).exit_status == 0
    308 
    309 
    310     def are_wait_up_processes_up(self):
    311         """
    312         Checks if any HOSTS waitup processes are running yet on the
    313         remote host.
    314 
    315         Returns True if any the waitup processes are running, False
    316         otherwise.
    317         """
    318         processes = self.get_wait_up_processes()
    319         if len(processes) == 0:
    320             return True # wait up processes aren't being used
    321         for procname in processes:
    322             exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
    323                                    ignore_status=True).exit_status
    324             if exit_status == 0:
    325                 return True
    326         return False
    327 
    328 
    329     def get_labels(self):
    330         """Return a list of labels for this given host.
    331 
    332         This is the main way to retrieve all the automatic labels for a host
    333         as it will run through all the currently implemented label functions.
    334         """
    335         labels = []
    336         for label_function in self._LABEL_FUNCTIONS:
    337             try:
    338                 label = label_function(self)
    339             except Exception:
    340                 logging.exception('Label function %s failed; ignoring it.',
    341                                   label_function.__name__)
    342                 label = None
    343             if label:
    344                 if type(label) is str:
    345                     labels.append(label)
    346                 elif type(label) is list:
    347                     labels.extend(label)
    348         return labels
    349