1 """This class defines the Remote host class.""" 2 3 import os, logging, urllib, time 4 from autotest_lib.client.common_lib import error 5 from autotest_lib.server import utils 6 from autotest_lib.server.hosts import base_classes 7 8 9 class RemoteHost(base_classes.Host): 10 """ 11 This class represents a remote machine on which you can run 12 programs. 13 14 It may be accessed through a network, a serial line, ... 15 It is not the machine autoserv is running on. 16 17 Implementation details: 18 This is an abstract class, leaf subclasses must implement the methods 19 listed here and in parent classes which have no implementation. They 20 may reimplement methods which already have an implementation. You 21 must not instantiate this class but should instantiate one of those 22 leaf subclasses. 23 """ 24 25 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 26 LAST_BOOT_TAG = object() 27 DEFAULT_HALT_TIMEOUT = 2 * 60 28 _LABEL_FUNCTIONS = [] 29 _DETECTABLE_LABELS = [] 30 31 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 32 33 34 def _initialize(self, hostname, autodir=None, *args, **dargs): 35 super(RemoteHost, self)._initialize(*args, **dargs) 36 37 self.hostname = hostname 38 self.autodir = autodir 39 self.tmp_dirs = [] 40 41 42 def __repr__(self): 43 return "<remote host: %s>" % self.hostname 44 45 46 def close(self): 47 super(RemoteHost, self).close() 48 self.stop_loggers() 49 50 if hasattr(self, 'tmp_dirs'): 51 for dir in self.tmp_dirs: 52 try: 53 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 54 except error.AutoservRunError: 55 pass 56 57 58 def job_start(self): 59 """ 60 Abstract method, called the first time a remote host object 61 is created for a specific host after a job starts. 62 63 This method depends on the create_host factory being used to 64 construct your host object. If you directly construct host objects 65 you will need to call this method yourself (and enforce the 66 single-call rule). 67 """ 68 try: 69 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 70 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 71 self.run(cmd) 72 except Exception, e: 73 # Non-fatal error 74 logging.info('Failed to copy /var/log/messages at startup: %s', e) 75 76 77 def get_autodir(self): 78 return self.autodir 79 80 81 def set_autodir(self, autodir): 82 """ 83 This method is called to make the host object aware of the 84 where autotest is installed. Called in server/autotest.py 85 after a successful install 86 """ 87 self.autodir = autodir 88 89 90 def sysrq_reboot(self): 91 self.run_background('echo b > /proc/sysrq-trigger') 92 93 94 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 95 self.run_background('sleep 1 ; halt') 96 if wait: 97 self.wait_down(timeout=timeout) 98 99 100 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG, 101 kernel_args=None, wait=True, fastsync=False, 102 reboot_cmd=None, **dargs): 103 """ 104 Reboot the remote host. 105 106 Args: 107 timeout - How long to wait for the reboot. 108 label - The label we should boot into. If None, we will 109 boot into the default kernel. If it's LAST_BOOT_TAG, 110 we'll boot into whichever kernel was .boot'ed last 111 (or the default kernel if we haven't .boot'ed in this 112 job). If it's None, we'll boot into the default kernel. 113 If it's something else, we'll boot into that. 114 wait - Should we wait to see if the machine comes back up. 115 fastsync - Don't wait for the sync to complete, just start one 116 and move on. This is for cases where rebooting prompty 117 is more important than data integrity and/or the 118 machine may have disks that cause sync to never return. 119 reboot_cmd - Reboot command to execute. 120 """ 121 if self.job: 122 if label == self.LAST_BOOT_TAG: 123 label = self.job.last_boot_tag 124 else: 125 self.job.last_boot_tag = label 126 127 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs) 128 129 if label or kernel_args: 130 if not label: 131 label = self.bootloader.get_default_title() 132 self.bootloader.boot_once(label) 133 if kernel_args: 134 self.bootloader.add_args(label, kernel_args) 135 136 if not reboot_cmd: 137 reboot_cmd = ('sync & sleep 5; ' 138 'reboot & sleep 60; ' 139 'reboot -f & sleep 10; ' 140 'reboot -nf & sleep 10; ' 141 'telinit 6') 142 143 def reboot(): 144 self.record("GOOD", None, "reboot.start") 145 try: 146 current_boot_id = self.get_boot_id() 147 148 # sync before starting the reboot, so that a long sync during 149 # shutdown isn't timed out by wait_down's short timeout 150 if not fastsync: 151 self.run('sync; sync', timeout=timeout, ignore_status=True) 152 153 self.run_background(reboot_cmd) 154 except error.AutoservRunError: 155 self.record("ABORT", None, "reboot.start", 156 "reboot command failed") 157 raise 158 if wait: 159 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 160 **dargs) 161 162 # if this is a full reboot-and-wait, run the reboot inside a group 163 if wait: 164 self.log_op(self.OP_REBOOT, reboot) 165 else: 166 reboot() 167 168 def suspend(self, timeout, suspend_cmd, **dargs): 169 """ 170 Suspend the remote host. 171 172 Args: 173 timeout - How long to wait for the suspend. 174 susped_cmd - suspend command to execute. 175 """ 176 # define a function for the supend and run it in a group 177 def suspend(): 178 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 179 try: 180 self.run_background(suspend_cmd) 181 except error.AutoservRunError: 182 self.record("ABORT", None, "suspend.start", 183 "suspend command failed") 184 raise error.AutoservSuspendError("suspend command failed") 185 186 # Wait for some time, to ensure the machine is going to sleep. 187 # Not too long to check if the machine really suspended. 188 time_slice = min(timeout / 2, 300) 189 time.sleep(time_slice) 190 time_counter = time_slice 191 while time_counter < timeout + 60: 192 # Check if the machine is back. We check regularely to 193 # ensure the machine was suspended long enough. 194 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 195 return 196 else: 197 if time_counter > timeout - 10: 198 time_slice = 5 199 time.sleep(time_slice) 200 time_counter += time_slice 201 202 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 203 raise error.AutoservSuspendError( 204 "DUT is not responding after %d seconds" % (time_counter)) 205 206 start_time = time.time() 207 self.log_op(self.OP_SUSPEND, suspend) 208 lasted = time.time() - start_time 209 if (lasted < timeout): 210 raise error.AutoservSuspendError( 211 "Suspend did not last long enough: %d instead of %d" % ( 212 lasted, timeout)) 213 214 def reboot_followup(self, *args, **dargs): 215 super(RemoteHost, self).reboot_followup(*args, **dargs) 216 if self.job: 217 self.job.profilers.handle_reboot(self) 218 219 220 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 221 """ 222 Wait for the host to come back from a reboot. This wraps the 223 generic wait_for_restart implementation in a reboot group. 224 """ 225 def op_func(): 226 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 227 self.log_op(self.OP_REBOOT, op_func) 228 229 230 def cleanup(self): 231 super(RemoteHost, self).cleanup() 232 self.reboot() 233 234 235 def get_tmp_dir(self, parent='/tmp'): 236 """ 237 Return the pathname of a directory on the host suitable 238 for temporary file storage. 239 240 The directory and its content will be deleted automatically 241 on the destruction of the Host object that was used to obtain 242 it. 243 """ 244 self.run("mkdir -p %s" % parent) 245 template = os.path.join(parent, 'autoserv-XXXXXX') 246 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip() 247 self.tmp_dirs.append(dir_name) 248 return dir_name 249 250 251 def get_platform_label(self): 252 """ 253 Return the platform label, or None if platform label is not set. 254 """ 255 256 if self.job: 257 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 258 self.hostname) 259 keyvals = utils.read_keyval(keyval_path) 260 return keyvals.get('platform', None) 261 else: 262 return None 263 264 265 def get_all_labels(self): 266 """ 267 Return all labels, or empty list if label is not set. 268 """ 269 if self.job: 270 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 271 self.hostname) 272 keyvals = utils.read_keyval(keyval_path) 273 all_labels = keyvals.get('labels', '') 274 if all_labels: 275 all_labels = all_labels.split(',') 276 return [urllib.unquote(label) for label in all_labels] 277 return [] 278 279 280 def delete_tmp_dir(self, tmpdir): 281 """ 282 Delete the given temporary directory on the remote machine. 283 """ 284 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 285 self.tmp_dirs.remove(tmpdir) 286 287 288 def check_uptime(self): 289 """ 290 Check that uptime is available and monotonically increasing. 291 """ 292 if not self.is_up(): 293 raise error.AutoservHostError('Client does not appear to be up') 294 result = self.run("/bin/cat /proc/uptime", 30) 295 return result.stdout.strip().split()[0] 296 297 298 def check_for_lkdtm(self): 299 """ 300 Check for kernel dump test module. return True if exist. 301 """ 302 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 303 return self.run(cmd, ignore_status=True).exit_status == 0 304 305 306 def are_wait_up_processes_up(self): 307 """ 308 Checks if any HOSTS waitup processes are running yet on the 309 remote host. 310 311 Returns True if any the waitup processes are running, False 312 otherwise. 313 """ 314 processes = self.get_wait_up_processes() 315 if len(processes) == 0: 316 return True # wait up processes aren't being used 317 for procname in processes: 318 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 319 ignore_status=True).exit_status 320 if exit_status == 0: 321 return True 322 return False 323 324 325 def get_labels(self): 326 """Return a list of labels for this given host. 327 328 This is the main way to retrieve all the automatic labels for a host 329 as it will run through all the currently implemented label functions. 330 """ 331 labels = [] 332 for label_function in self._LABEL_FUNCTIONS: 333 try: 334 label = label_function(self) 335 except Exception as e: 336 logging.error('Label function %s failed; ignoring it.', 337 label_function.__name__) 338 logging.exception(e) 339 label = None 340 if label: 341 if type(label) is str: 342 labels.append(label) 343 elif type(label) is list: 344 labels.extend(label) 345 return labels 346