Home | History | Annotate | Download | only in hosts
      1 """This class defines the Remote host class."""
      2 
      3 import os, logging, urllib, time
      4 from autotest_lib.client.common_lib import error
      5 from autotest_lib.server import utils
      6 from autotest_lib.server.hosts import base_classes
      7 
      8 
      9 class RemoteHost(base_classes.Host):
     10     """
     11     This class represents a remote machine on which you can run
     12     programs.
     13 
     14     It may be accessed through a network, a serial line, ...
     15     It is not the machine autoserv is running on.
     16 
     17     Implementation details:
     18     This is an abstract class, leaf subclasses must implement the methods
     19     listed here and in parent classes which have no implementation. They
     20     may reimplement methods which already have an implementation. You
     21     must not instantiate this class but should instantiate one of those
     22     leaf subclasses.
     23     """
     24 
     25     DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
     26     LAST_BOOT_TAG = object()
     27     DEFAULT_HALT_TIMEOUT = 2 * 60
     28     _LABEL_FUNCTIONS = []
     29     _DETECTABLE_LABELS = []
     30 
     31     VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
     32 
     33 
     34     def _initialize(self, hostname, autodir=None, *args, **dargs):
     35         super(RemoteHost, self)._initialize(*args, **dargs)
     36 
     37         self.hostname = hostname
     38         self.autodir = autodir
     39         self.tmp_dirs = []
     40 
     41 
     42     def __repr__(self):
     43         return "<remote host: %s>" % self.hostname
     44 
     45 
     46     def close(self):
     47         super(RemoteHost, self).close()
     48         self.stop_loggers()
     49 
     50         if hasattr(self, 'tmp_dirs'):
     51             for dir in self.tmp_dirs:
     52                 try:
     53                     self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
     54                 except error.AutoservRunError:
     55                     pass
     56 
     57 
     58     def job_start(self):
     59         """
     60         Abstract method, called the first time a remote host object
     61         is created for a specific host after a job starts.
     62 
     63         This method depends on the create_host factory being used to
     64         construct your host object. If you directly construct host objects
     65         you will need to call this method yourself (and enforce the
     66         single-call rule).
     67         """
     68         try:
     69             cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
     70                    '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
     71             self.run(cmd)
     72         except Exception, e:
     73             # Non-fatal error
     74             logging.info('Failed to copy /var/log/messages at startup: %s', e)
     75 
     76 
     77     def get_autodir(self):
     78         return self.autodir
     79 
     80 
     81     def set_autodir(self, autodir):
     82         """
     83         This method is called to make the host object aware of the
     84         where autotest is installed. Called in server/autotest.py
     85         after a successful install
     86         """
     87         self.autodir = autodir
     88 
     89 
     90     def sysrq_reboot(self):
     91         self.run_background('echo b > /proc/sysrq-trigger')
     92 
     93 
     94     def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
     95         self.run_background('sleep 1 ; halt')
     96         if wait:
     97             self.wait_down(timeout=timeout)
     98 
     99 
    100     def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
    101                kernel_args=None, wait=True, fastsync=False,
    102                reboot_cmd=None, **dargs):
    103         """
    104         Reboot the remote host.
    105 
    106         Args:
    107                 timeout - How long to wait for the reboot.
    108                 label - The label we should boot into.  If None, we will
    109                         boot into the default kernel.  If it's LAST_BOOT_TAG,
    110                         we'll boot into whichever kernel was .boot'ed last
    111                         (or the default kernel if we haven't .boot'ed in this
    112                         job).  If it's None, we'll boot into the default kernel.
    113                         If it's something else, we'll boot into that.
    114                 wait - Should we wait to see if the machine comes back up.
    115                 fastsync - Don't wait for the sync to complete, just start one
    116                         and move on. This is for cases where rebooting prompty
    117                         is more important than data integrity and/or the
    118                         machine may have disks that cause sync to never return.
    119                 reboot_cmd - Reboot command to execute.
    120         """
    121         if self.job:
    122             if label == self.LAST_BOOT_TAG:
    123                 label = self.job.last_boot_tag
    124             else:
    125                 self.job.last_boot_tag = label
    126 
    127         self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
    128 
    129         if label or kernel_args:
    130             if not label:
    131                 label = self.bootloader.get_default_title()
    132             self.bootloader.boot_once(label)
    133             if kernel_args:
    134                 self.bootloader.add_args(label, kernel_args)
    135 
    136         if not reboot_cmd:
    137             reboot_cmd = ('sync & sleep 5; '
    138                           'reboot & sleep 60; '
    139                           'reboot -f & sleep 10; '
    140                           'reboot -nf & sleep 10; '
    141                           'telinit 6')
    142 
    143         def reboot():
    144             self.record("GOOD", None, "reboot.start")
    145             try:
    146                 current_boot_id = self.get_boot_id()
    147 
    148                 # sync before starting the reboot, so that a long sync during
    149                 # shutdown isn't timed out by wait_down's short timeout
    150                 if not fastsync:
    151                     self.run('sync; sync', timeout=timeout, ignore_status=True)
    152 
    153                 self.run_background(reboot_cmd)
    154             except error.AutoservRunError:
    155                 self.record("ABORT", None, "reboot.start",
    156                               "reboot command failed")
    157                 raise
    158             if wait:
    159                 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
    160                                       **dargs)
    161 
    162         # if this is a full reboot-and-wait, run the reboot inside a group
    163         if wait:
    164             self.log_op(self.OP_REBOOT, reboot)
    165         else:
    166             reboot()
    167 
    168     def suspend(self, timeout, suspend_cmd, **dargs):
    169         """
    170         Suspend the remote host.
    171 
    172         Args:
    173                 timeout - How long to wait for the suspend.
    174                 susped_cmd - suspend command to execute.
    175         """
    176         # define a function for the supend and run it in a group
    177         def suspend():
    178             self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
    179             try:
    180                 self.run_background(suspend_cmd)
    181             except error.AutoservRunError:
    182                 self.record("ABORT", None, "suspend.start",
    183                             "suspend command failed")
    184                 raise error.AutoservSuspendError("suspend command failed")
    185 
    186             # Wait for some time, to ensure the machine is going to sleep.
    187             # Not too long to check if the machine really suspended.
    188             time_slice = min(timeout / 2, 300)
    189             time.sleep(time_slice)
    190             time_counter = time_slice
    191             while time_counter < timeout + 60:
    192                 # Check if the machine is back. We check regularely to
    193                 # ensure the machine was suspended long enough.
    194                 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
    195                     return
    196                 else:
    197                     if time_counter > timeout - 10:
    198                         time_slice = 5
    199                     time.sleep(time_slice)
    200                     time_counter += time_slice
    201 
    202             if utils.ping(self.hostname, tries=1, deadline=1) != 0:
    203                 raise error.AutoservSuspendError(
    204                     "DUT is not responding after %d seconds" % (time_counter))
    205 
    206         start_time = time.time()
    207         self.log_op(self.OP_SUSPEND, suspend)
    208         lasted = time.time() - start_time
    209         if (lasted < timeout):
    210             raise error.AutoservSuspendError(
    211                 "Suspend did not last long enough: %d instead of %d" % (
    212                     lasted, timeout))
    213 
    214     def reboot_followup(self, *args, **dargs):
    215         super(RemoteHost, self).reboot_followup(*args, **dargs)
    216         if self.job:
    217             self.job.profilers.handle_reboot(self)
    218 
    219 
    220     def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
    221         """
    222         Wait for the host to come back from a reboot. This wraps the
    223         generic wait_for_restart implementation in a reboot group.
    224         """
    225         def op_func():
    226             super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
    227         self.log_op(self.OP_REBOOT, op_func)
    228 
    229 
    230     def cleanup(self):
    231         super(RemoteHost, self).cleanup()
    232         self.reboot()
    233 
    234 
    235     def get_tmp_dir(self, parent='/tmp'):
    236         """
    237         Return the pathname of a directory on the host suitable
    238         for temporary file storage.
    239 
    240         The directory and its content will be deleted automatically
    241         on the destruction of the Host object that was used to obtain
    242         it.
    243         """
    244         self.run("mkdir -p %s" % parent)
    245         template = os.path.join(parent, 'autoserv-XXXXXX')
    246         dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
    247         self.tmp_dirs.append(dir_name)
    248         return dir_name
    249 
    250 
    251     def get_platform_label(self):
    252         """
    253         Return the platform label, or None if platform label is not set.
    254         """
    255 
    256         if self.job:
    257             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
    258                                        self.hostname)
    259             keyvals = utils.read_keyval(keyval_path)
    260             return keyvals.get('platform', None)
    261         else:
    262             return None
    263 
    264 
    265     def get_all_labels(self):
    266         """
    267         Return all labels, or empty list if label is not set.
    268         """
    269         if self.job:
    270             keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
    271                                        self.hostname)
    272             keyvals = utils.read_keyval(keyval_path)
    273             all_labels = keyvals.get('labels', '')
    274             if all_labels:
    275                 all_labels = all_labels.split(',')
    276                 return [urllib.unquote(label) for label in all_labels]
    277         return []
    278 
    279 
    280     def delete_tmp_dir(self, tmpdir):
    281         """
    282         Delete the given temporary directory on the remote machine.
    283         """
    284         self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
    285         self.tmp_dirs.remove(tmpdir)
    286 
    287 
    288     def check_uptime(self):
    289         """
    290         Check that uptime is available and monotonically increasing.
    291         """
    292         if not self.is_up():
    293             raise error.AutoservHostError('Client does not appear to be up')
    294         result = self.run("/bin/cat /proc/uptime", 30)
    295         return result.stdout.strip().split()[0]
    296 
    297 
    298     def check_for_lkdtm(self):
    299         """
    300         Check for kernel dump test module. return True if exist.
    301         """
    302         cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
    303         return self.run(cmd, ignore_status=True).exit_status == 0
    304 
    305 
    306     def are_wait_up_processes_up(self):
    307         """
    308         Checks if any HOSTS waitup processes are running yet on the
    309         remote host.
    310 
    311         Returns True if any the waitup processes are running, False
    312         otherwise.
    313         """
    314         processes = self.get_wait_up_processes()
    315         if len(processes) == 0:
    316             return True # wait up processes aren't being used
    317         for procname in processes:
    318             exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
    319                                    ignore_status=True).exit_status
    320             if exit_status == 0:
    321                 return True
    322         return False
    323 
    324 
    325     def get_labels(self):
    326         """Return a list of labels for this given host.
    327 
    328         This is the main way to retrieve all the automatic labels for a host
    329         as it will run through all the currently implemented label functions.
    330         """
    331         labels = []
    332         for label_function in self._LABEL_FUNCTIONS:
    333             try:
    334                 label = label_function(self)
    335             except Exception as e:
    336                 logging.error('Label function %s failed; ignoring it.',
    337                               label_function.__name__)
    338                 logging.exception(e)
    339                 label = None
    340             if label:
    341                 if type(label) is str:
    342                     labels.append(label)
    343                 elif type(label) is list:
    344                     labels.extend(label)
    345         return labels
    346