1 # 2 # Copyright 2007 Google Inc. Released under the GPL v2 3 4 """ 5 This module defines the SSHHost class. 6 7 Implementation details: 8 You should import the "hosts" package instead of importing each type of host. 9 10 SSHHost: a remote machine with a ssh access 11 """ 12 13 import inspect 14 import logging 15 import re 16 from autotest_lib.client.common_lib import error 17 from autotest_lib.client.common_lib import pxssh 18 from autotest_lib.server import utils 19 from autotest_lib.server.hosts import abstract_ssh 20 21 # In case cros_host is being ran via SSP on an older Moblab version with an 22 # older chromite version. 23 try: 24 from chromite.lib import metrics 25 except ImportError: 26 metrics = utils.metrics_mock 27 28 29 class SSHHost(abstract_ssh.AbstractSSHHost): 30 """ 31 This class represents a remote machine controlled through an ssh 32 session on which you can run programs. 33 34 It is not the machine autoserv is running on. The machine must be 35 configured for password-less login, for example through public key 36 authentication. 37 38 It includes support for controlling the machine through a serial 39 console on which you can run programs. If such a serial console is 40 set up on the machine then capabilities such as hard reset and 41 boot strap monitoring are available. If the machine does not have a 42 serial console available then ordinary SSH-based commands will 43 still be available, but attempts to use extensions such as 44 console logging or hard reset will fail silently. 45 46 Implementation details: 47 This is a leaf class in an abstract class hierarchy, it must 48 implement the unimplemented methods in parent classes. 49 """ 50 51 def _initialize(self, hostname, *args, **dargs): 52 """ 53 Construct a SSHHost object 54 55 Args: 56 hostname: network hostname or address of remote machine 57 """ 58 super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs) 59 self.setup_ssh() 60 61 62 def ssh_command(self, connect_timeout=30, options='', alive_interval=300): 63 """ 64 Construct an ssh command with proper args for this host. 65 66 @param connect_timeout: connection timeout (in seconds) 67 @param options: SSH options 68 @param alive_interval: SSH Alive interval. 69 """ 70 options = "%s %s" % (options, self.master_ssh_option) 71 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 72 opts=options, 73 hosts_file=self.known_hosts_file, 74 connect_timeout=connect_timeout, 75 alive_interval=alive_interval) 76 return "%s %s" % (base_cmd, self.hostname) 77 78 79 def _verbose_logger_command(self, command): 80 """ 81 Prepend the command for the client with information about the ssh command 82 to be executed and the server stack state. 83 84 @param command: the ssh command to be executed. 85 """ 86 stack_frames = inspect.stack() 87 stack = '' 88 # The last 2 frames on the stack are boring. Print 5-2=3 stack frames. 89 count = min(5, len(stack_frames)) 90 if count >= 3: 91 stack = inspect.getframeinfo(stack_frames[2][0]).function 92 for frame in stack_frames[3:count]: 93 function_name = inspect.getframeinfo(frame[0]).function 94 stack = '%s|%s' % (function_name, stack) 95 del stack_frames 96 # If "logger" executable exists on the DUT use it to respew |command|. 97 # Then regardless of "logger" run |command| as usual. 98 command = ('if type "logger" > /dev/null 2>&1; then' 99 ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";' 100 'fi; ' 101 '%s' % (stack, utils.sh_escape(command), command)) 102 return command 103 104 105 def _run(self, command, timeout, ignore_status, 106 stdout, stderr, connect_timeout, env, options, stdin, args, 107 ignore_timeout, ssh_failure_retry_ok): 108 """Helper function for run().""" 109 ssh_cmd = self.ssh_command(connect_timeout, options) 110 if not env.strip(): 111 env = "" 112 else: 113 env = "export %s;" % env 114 for arg in args: 115 command += ' "%s"' % utils.sh_escape(arg) 116 full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) 117 118 # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP 119 # container, sometimes shortly after reboot we will see DNS 120 # resolution errors on ssh commands; the problem never 121 # occurs more than once in a row. This especially affects 122 # the autoupdate_Rollback test, but other cases have been 123 # affected, too. 124 # 125 # We work around it by detecting the first DNS resolution error 126 # and retrying exactly one time. 127 dns_error_retry_count = 1 128 129 def counters_inc(counter_name, failure_name): 130 """Helper function to increment metrics counters. 131 @param counter_name: string indicating which counter to use 132 @param failure_name: string indentifying an error, or 'success' 133 """ 134 if counter_name == 'call': 135 # ssh_counter records the outcome of each ssh invocation 136 # inside _run(), including exceptions. 137 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') 138 fields = {'error' : failure_name or 'success', 139 'attempt' : ssh_call_count} 140 ssh_counter.increment(fields=fields) 141 142 if counter_name == 'run': 143 # run_counter records each call to _run() with its result 144 # and how many tries were made. Calls are recorded when 145 # _run() exits (including exiting with an exception) 146 run_counter = metrics.Counter('chromeos/autotest/ssh/runs') 147 fields = {'error' : failure_name or 'success', 148 'attempt' : ssh_call_count} 149 run_counter.increment(fields=fields) 150 151 # If ssh_failure_retry_ok is True, retry twice on timeouts and generic 152 # error 255: if a simple retry doesn't work, kill the ssh master 153 # connection and try again. (Note that either error could come from 154 # the command running in the DUT, in which case the retry may be 155 # useless but, in theory, also harmless.) 156 if ssh_failure_retry_ok: 157 # Ignore ssh command timeout, even though it could be a timeout due 158 # to the command executing in the remote host. Note that passing 159 # ignore_timeout = True makes utils.run() return None on timeouts 160 # (and only on timeouts). 161 original_ignore_timeout = ignore_timeout 162 ignore_timeout = True 163 ssh_failure_retry_count = 2 164 else: 165 ssh_failure_retry_count = 0 166 167 ssh_call_count = 0 168 169 while True: 170 try: 171 # Increment call count first, in case utils.run() throws an 172 # exception. 173 ssh_call_count += 1 174 result = utils.run(full_cmd, timeout, True, stdout, stderr, 175 verbose=False, stdin=stdin, 176 stderr_is_expected=ignore_status, 177 ignore_timeout=ignore_timeout) 178 except Exception as e: 179 # No retries on exception. 180 counters_inc('call', 'exception') 181 counters_inc('run', 'exception') 182 raise e 183 184 failure_name = None 185 186 if result: 187 if result.exit_status == 255: 188 if re.search(r'^ssh: .*: Name or service not known', 189 result.stderr): 190 failure_name = 'dns_failure' 191 else: 192 failure_name = 'error_255' 193 elif result.exit_status > 0: 194 failure_name = 'nonzero_status' 195 else: 196 # result == None 197 failure_name = 'timeout' 198 199 # Record the outcome of the ssh invocation. 200 counters_inc('call', failure_name) 201 202 if failure_name: 203 # There was a failure: decide whether to retry. 204 if failure_name == 'dns_failure': 205 if dns_error_retry_count > 0: 206 logging.debug('retrying ssh because of DNS failure') 207 dns_error_retry_count -= 1 208 continue 209 else: 210 if ssh_failure_retry_count == 2: 211 logging.debug('retrying ssh command after %s', 212 failure_name) 213 ssh_failure_retry_count -= 1 214 continue 215 elif ssh_failure_retry_count == 1: 216 # After two failures, restart the master connection 217 # before the final try. 218 logging.debug('retry 2: restarting master connection') 219 self.restart_master_ssh() 220 # Last retry: reinstate timeout behavior. 221 ignore_timeout = original_ignore_timeout 222 ssh_failure_retry_count -= 1 223 continue 224 225 # No retry conditions occurred. Exit the loop. 226 break 227 228 # The outcomes of ssh invocations have been recorded. Now record 229 # the outcome of this function. 230 231 if ignore_timeout and not result: 232 counters_inc('run', 'ignored_timeout') 233 return None 234 235 # The error messages will show up in band (indistinguishable 236 # from stuff sent through the SSH connection), so we have the 237 # remote computer echo the message "Connected." before running 238 # any command. Since the following 2 errors have to do with 239 # connecting, it's safe to do these checks. 240 if result.exit_status == 255: 241 if re.search(r'^ssh: connect to host .* port .*: ' 242 r'Connection timed out\r$', result.stderr): 243 counters_inc('run', 'final_timeout') 244 raise error.AutoservSSHTimeout("ssh timed out", result) 245 if "Permission denied." in result.stderr: 246 msg = "ssh permission denied" 247 counters_inc('run', 'final_eperm') 248 raise error.AutoservSshPermissionDeniedError(msg, result) 249 250 if not ignore_status and result.exit_status > 0: 251 counters_inc('run', 'final_run_error') 252 raise error.AutoservRunError("command execution error", result) 253 254 counters_inc('run', failure_name) 255 return result 256 257 258 def run(self, command, timeout=3600, ignore_status=False, 259 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 260 connect_timeout=30, options='', stdin=None, verbose=True, args=(), 261 ignore_timeout=False, ssh_failure_retry_ok=False): 262 """ 263 Run a command on the remote host. 264 @see common_lib.hosts.host.run() 265 266 @param timeout: command execution timeout 267 @param connect_timeout: ssh connection timeout (in seconds) 268 @param options: string with additional ssh command options 269 @param verbose: log the commands 270 @param ignore_timeout: bool True if SSH command timeouts should be 271 ignored. Will return None on command timeout. 272 @param ssh_failure_retry_ok: True if the command may be retried on 273 probable ssh failure (error 255 or timeout). When true, 274 the command may be executed up to three times, the second 275 time after restarting the ssh master connection. Use only for 276 commands that are idempotent, because when a "probable 277 ssh failure" occurs, we cannot tell if the command executed 278 or not. 279 280 @raises AutoservRunError: if the command failed 281 @raises AutoservSSHTimeout: ssh connection has timed out 282 """ 283 if verbose: 284 logging.debug("Running (ssh) '%s'", command) 285 command = self._verbose_logger_command(command) 286 287 # Start a master SSH connection if necessary. 288 self.start_master_ssh() 289 290 env = " ".join("=".join(pair) for pair in self.env.iteritems()) 291 try: 292 return self._run(command, timeout, ignore_status, 293 stdout_tee, stderr_tee, connect_timeout, env, 294 options, stdin, args, ignore_timeout, 295 ssh_failure_retry_ok) 296 except error.CmdError, cmderr: 297 # We get a CmdError here only if there is timeout of that command. 298 # Catch that and stuff it into AutoservRunError and raise it. 299 timeout_message = str('Timeout encountered: %s' % cmderr.args[0]) 300 raise error.AutoservRunError(timeout_message, cmderr.args[1]) 301 302 303 def run_background(self, command, verbose=True): 304 """Start a command on the host in the background. 305 306 The command is started on the host in the background, and 307 this method call returns immediately without waiting for the 308 command's completion. The PID of the process on the host is 309 returned as a string. 310 311 The command may redirect its stdin, stdout, or stderr as 312 necessary. Without redirection, all input and output will 313 use /dev/null. 314 315 @param command The command to run in the background 316 @param verbose As for `self.run()` 317 318 @return Returns the PID of the remote background process 319 as a string. 320 """ 321 # Redirection here isn't merely hygienic; it's a functional 322 # requirement. sshd won't terminate until stdin, stdout, 323 # and stderr are all closed. 324 # 325 # The subshell is needed to do the right thing in case the 326 # passed in command has its own I/O redirections. 327 cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!' 328 return self.run(cmd_fmt % command, verbose=verbose).stdout 329 330 331 def run_short(self, command, **kwargs): 332 """ 333 Calls the run() command with a short default timeout. 334 335 Takes the same arguments as does run(), 336 with the exception of the timeout argument which 337 here is fixed at 60 seconds. 338 It returns the result of run. 339 340 @param command: the command line string 341 342 """ 343 return self.run(command, timeout=60, **kwargs) 344 345 346 def run_grep(self, command, timeout=30, ignore_status=False, 347 stdout_ok_regexp=None, stdout_err_regexp=None, 348 stderr_ok_regexp=None, stderr_err_regexp=None, 349 connect_timeout=30): 350 """ 351 Run a command on the remote host and look for regexp 352 in stdout or stderr to determine if the command was 353 successul or not. 354 355 356 @param command: the command line string 357 @param timeout: time limit in seconds before attempting to 358 kill the running process. The run() function 359 will take a few seconds longer than 'timeout' 360 to complete if it has to kill the process. 361 @param ignore_status: do not raise an exception, no matter 362 what the exit code of the command is. 363 @param stdout_ok_regexp: regexp that should be in stdout 364 if the command was successul. 365 @param stdout_err_regexp: regexp that should be in stdout 366 if the command failed. 367 @param stderr_ok_regexp: regexp that should be in stderr 368 if the command was successul. 369 @param stderr_err_regexp: regexp that should be in stderr 370 if the command failed. 371 @param connect_timeout: connection timeout (in seconds) 372 373 Returns: 374 if the command was successul, raises an exception 375 otherwise. 376 377 Raises: 378 AutoservRunError: 379 - the exit code of the command execution was not 0. 380 - If stderr_err_regexp is found in stderr, 381 - If stdout_err_regexp is found in stdout, 382 - If stderr_ok_regexp is not found in stderr. 383 - If stdout_ok_regexp is not found in stdout, 384 """ 385 386 # We ignore the status, because we will handle it at the end. 387 result = self.run(command, timeout, ignore_status=True, 388 connect_timeout=connect_timeout) 389 390 # Look for the patterns, in order 391 for (regexp, stream) in ((stderr_err_regexp, result.stderr), 392 (stdout_err_regexp, result.stdout)): 393 if regexp and stream: 394 err_re = re.compile (regexp) 395 if err_re.search(stream): 396 raise error.AutoservRunError( 397 '%s failed, found error pattern: "%s"' % (command, 398 regexp), result) 399 400 for (regexp, stream) in ((stderr_ok_regexp, result.stderr), 401 (stdout_ok_regexp, result.stdout)): 402 if regexp and stream: 403 ok_re = re.compile (regexp) 404 if ok_re.search(stream): 405 if ok_re.search(stream): 406 return 407 408 if not ignore_status and result.exit_status > 0: 409 raise error.AutoservRunError("command execution error", result) 410 411 412 def setup_ssh_key(self): 413 """Setup SSH Key""" 414 logging.debug('Performing SSH key setup on %s:%d as %s.', 415 self.hostname, self.port, self.user) 416 417 try: 418 host = pxssh.pxssh() 419 host.login(self.hostname, self.user, self.password, 420 port=self.port) 421 public_key = utils.get_public_key() 422 423 host.sendline('mkdir -p ~/.ssh') 424 host.prompt() 425 host.sendline('chmod 700 ~/.ssh') 426 host.prompt() 427 host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " % 428 public_key) 429 host.prompt() 430 host.sendline('chmod 600 ~/.ssh/authorized_keys') 431 host.prompt() 432 host.logout() 433 434 logging.debug('SSH key setup complete.') 435 436 except: 437 logging.debug('SSH key setup has failed.') 438 try: 439 host.logout() 440 except: 441 pass 442 443 444 def setup_ssh(self): 445 """Setup SSH""" 446 if self.password: 447 try: 448 self.ssh_ping() 449 except error.AutoservSshPingHostError: 450 self.setup_ssh_key() 451