1 # 2 # Copyright 2007 Google Inc. Released under the GPL v2 3 4 """ 5 This module defines the SSHHost class. 6 7 Implementation details: 8 You should import the "hosts" package instead of importing each type of host. 9 10 SSHHost: a remote machine with a ssh access 11 """ 12 13 import inspect 14 import logging 15 import re 16 from autotest_lib.client.common_lib import error 17 from autotest_lib.client.common_lib import pxssh 18 from autotest_lib.server import utils 19 from autotest_lib.server.hosts import abstract_ssh 20 21 # In case cros_host is being ran via SSP on an older Moblab version with an 22 # older chromite version. 23 try: 24 from chromite.lib import metrics 25 except ImportError: 26 metrics = utils.metrics_mock 27 28 29 class SSHHost(abstract_ssh.AbstractSSHHost): 30 """ 31 This class represents a remote machine controlled through an ssh 32 session on which you can run programs. 33 34 It is not the machine autoserv is running on. The machine must be 35 configured for password-less login, for example through public key 36 authentication. 37 38 It includes support for controlling the machine through a serial 39 console on which you can run programs. If such a serial console is 40 set up on the machine then capabilities such as hard reset and 41 boot strap monitoring are available. If the machine does not have a 42 serial console available then ordinary SSH-based commands will 43 still be available, but attempts to use extensions such as 44 console logging or hard reset will fail silently. 45 46 Implementation details: 47 This is a leaf class in an abstract class hierarchy, it must 48 implement the unimplemented methods in parent classes. 49 """ 50 51 def _initialize(self, hostname, *args, **dargs): 52 """ 53 Construct a SSHHost object 54 55 Args: 56 hostname: network hostname or address of remote machine 57 """ 58 super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs) 59 self.setup_ssh() 60 61 62 def ssh_command(self, connect_timeout=30, options='', alive_interval=300, 63 alive_count_max=3, connection_attempts=1): 64 """ 65 Construct an ssh command with proper args for this host. 66 67 @param connect_timeout: connection timeout (in seconds) 68 @param options: SSH options 69 @param alive_interval: SSH Alive interval. 70 @param alive_count_max: SSH AliveCountMax. 71 @param connection_attempts: SSH ConnectionAttempts 72 """ 73 options = " ".join([options, self._master_ssh.ssh_option]) 74 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 75 opts=options, 76 hosts_file=self.known_hosts_file, 77 connect_timeout=connect_timeout, 78 alive_interval=alive_interval, 79 alive_count_max=alive_count_max, 80 connection_attempts=connection_attempts) 81 return "%s %s" % (base_cmd, self.hostname) 82 83 def _get_server_stack_state(self, lowest_frames=0, highest_frames=None): 84 """ Get the server stack frame status. 85 @param lowest_frames: the lowest frames to start printing. 86 @param highest_frames: the highest frames to print. 87 (None means no restriction) 88 """ 89 stack_frames = inspect.stack() 90 stack = '' 91 for frame in stack_frames[lowest_frames:highest_frames]: 92 function_name = inspect.getframeinfo(frame[0]).function 93 stack = '%s|%s' % (function_name, stack) 94 del stack_frames 95 return stack[:-1] # Delete the last '|' character 96 97 def _verbose_logger_command(self, command): 98 """ 99 Prepend the command for the client with information about the ssh 100 command to be executed and the server stack state. 101 102 @param command: the ssh command to be executed. 103 """ 104 # The last 3 frames on the stack are boring. Print 6-3=3 stack frames. 105 stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6) 106 # If "logger" executable exists on the DUT use it to respew |command|. 107 # Then regardless of "logger" run |command| as usual. 108 command = ('if type "logger" > /dev/null 2>&1; then' 109 ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";' 110 'fi; ' 111 '%s' % (stack, utils.sh_escape(command), command)) 112 return command 113 114 115 def _run(self, command, timeout, ignore_status, 116 stdout, stderr, connect_timeout, env, options, stdin, args, 117 ignore_timeout, ssh_failure_retry_ok): 118 """Helper function for run().""" 119 ssh_cmd = self.ssh_command(connect_timeout, options) 120 if not env.strip(): 121 env = "" 122 else: 123 env = "export %s;" % env 124 for arg in args: 125 command += ' "%s"' % utils.sh_escape(arg) 126 full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) 127 128 # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP 129 # container, sometimes shortly after reboot we will see DNS 130 # resolution errors on ssh commands; the problem never 131 # occurs more than once in a row. This especially affects 132 # the autoupdate_Rollback test, but other cases have been 133 # affected, too. 134 # 135 # We work around it by detecting the first DNS resolution error 136 # and retrying exactly one time. 137 dns_error_retry_count = 1 138 139 def counters_inc(counter_name, failure_name): 140 """Helper function to increment metrics counters. 141 @param counter_name: string indicating which counter to use 142 @param failure_name: string indentifying an error, or 'success' 143 """ 144 if counter_name == 'call': 145 # ssh_counter records the outcome of each ssh invocation 146 # inside _run(), including exceptions. 147 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') 148 fields = {'error' : failure_name or 'success', 149 'attempt' : ssh_call_count} 150 ssh_counter.increment(fields=fields) 151 152 if counter_name == 'run': 153 # run_counter records each call to _run() with its result 154 # and how many tries were made. Calls are recorded when 155 # _run() exits (including exiting with an exception) 156 run_counter = metrics.Counter('chromeos/autotest/ssh/runs') 157 fields = {'error' : failure_name or 'success', 158 'attempt' : ssh_call_count} 159 run_counter.increment(fields=fields) 160 161 # If ssh_failure_retry_ok is True, retry twice on timeouts and generic 162 # error 255: if a simple retry doesn't work, kill the ssh master 163 # connection and try again. (Note that either error could come from 164 # the command running in the DUT, in which case the retry may be 165 # useless but, in theory, also harmless.) 166 if ssh_failure_retry_ok: 167 # Ignore ssh command timeout, even though it could be a timeout due 168 # to the command executing in the remote host. Note that passing 169 # ignore_timeout = True makes utils.run() return None on timeouts 170 # (and only on timeouts). 171 original_ignore_timeout = ignore_timeout 172 ignore_timeout = True 173 ssh_failure_retry_count = 2 174 else: 175 ssh_failure_retry_count = 0 176 177 ssh_call_count = 0 178 179 while True: 180 try: 181 # Increment call count first, in case utils.run() throws an 182 # exception. 183 ssh_call_count += 1 184 result = utils.run(full_cmd, timeout, True, stdout, stderr, 185 verbose=False, stdin=stdin, 186 stderr_is_expected=ignore_status, 187 ignore_timeout=ignore_timeout) 188 except Exception as e: 189 # No retries on exception. 190 counters_inc('call', 'exception') 191 counters_inc('run', 'exception') 192 raise e 193 194 failure_name = None 195 196 if result: 197 if result.exit_status == 255: 198 if re.search(r'^ssh: .*: Name or service not known', 199 result.stderr): 200 failure_name = 'dns_failure' 201 else: 202 failure_name = 'error_255' 203 elif result.exit_status > 0: 204 failure_name = 'nonzero_status' 205 else: 206 # result == None 207 failure_name = 'timeout' 208 209 # Record the outcome of the ssh invocation. 210 counters_inc('call', failure_name) 211 212 if failure_name: 213 # There was a failure: decide whether to retry. 214 if failure_name == 'dns_failure': 215 if dns_error_retry_count > 0: 216 logging.debug('retrying ssh because of DNS failure') 217 dns_error_retry_count -= 1 218 continue 219 else: 220 if ssh_failure_retry_count == 2: 221 logging.debug('retrying ssh command after %s', 222 failure_name) 223 ssh_failure_retry_count -= 1 224 continue 225 elif ssh_failure_retry_count == 1: 226 # After two failures, restart the master connection 227 # before the final try. 228 logging.debug('retry 2: restarting master connection') 229 self.restart_master_ssh() 230 # Last retry: reinstate timeout behavior. 231 ignore_timeout = original_ignore_timeout 232 ssh_failure_retry_count -= 1 233 continue 234 235 # No retry conditions occurred. Exit the loop. 236 break 237 238 # The outcomes of ssh invocations have been recorded. Now record 239 # the outcome of this function. 240 241 if ignore_timeout and not result: 242 counters_inc('run', 'ignored_timeout') 243 return None 244 245 # The error messages will show up in band (indistinguishable 246 # from stuff sent through the SSH connection), so we have the 247 # remote computer echo the message "Connected." before running 248 # any command. Since the following 2 errors have to do with 249 # connecting, it's safe to do these checks. 250 if result.exit_status == 255: 251 if re.search(r'^ssh: connect to host .* port .*: ' 252 r'Connection timed out\r$', result.stderr): 253 counters_inc('run', 'final_timeout') 254 raise error.AutoservSSHTimeout("ssh timed out", result) 255 if "Permission denied." in result.stderr: 256 msg = "ssh permission denied" 257 counters_inc('run', 'final_eperm') 258 raise error.AutoservSshPermissionDeniedError(msg, result) 259 260 if not ignore_status and result.exit_status > 0: 261 counters_inc('run', 'final_run_error') 262 raise error.AutoservRunError("command execution error", result) 263 264 counters_inc('run', failure_name) 265 return result 266 267 268 def run_very_slowly(self, command, timeout=3600, ignore_status=False, 269 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 270 connect_timeout=30, options='', stdin=None, verbose=True, args=(), 271 ignore_timeout=False, ssh_failure_retry_ok=False): 272 """ 273 Run a command on the remote host. 274 This RPC call has an overhead of minimum 40ms and up to 400ms on 275 servers (crbug.com/734887). Each time a run_very_slowly is added for 276 every job - a server core dies in the lab. 277 @see common_lib.hosts.host.run() 278 279 @param timeout: command execution timeout 280 @param connect_timeout: ssh connection timeout (in seconds) 281 @param options: string with additional ssh command options 282 @param verbose: log the commands 283 @param ignore_timeout: bool True if SSH command timeouts should be 284 ignored. Will return None on command timeout. 285 @param ssh_failure_retry_ok: True if the command may be retried on 286 probable ssh failure (error 255 or timeout). When true, 287 the command may be executed up to three times, the second 288 time after restarting the ssh master connection. Use only for 289 commands that are idempotent, because when a "probable 290 ssh failure" occurs, we cannot tell if the command executed 291 or not. 292 293 @raises AutoservRunError: if the command failed 294 @raises AutoservSSHTimeout: ssh connection has timed out 295 """ 296 with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time', 297 scale=0.001): 298 if verbose: 299 stack = self._get_server_stack_state(lowest_frames=1, 300 highest_frames=7) 301 logging.debug("Running (ssh) '%s' from '%s'", command, stack) 302 command = self._verbose_logger_command(command) 303 304 # Start a master SSH connection if necessary. 305 self.start_master_ssh() 306 307 env = " ".join("=".join(pair) for pair in self.env.iteritems()) 308 try: 309 return self._run(command, timeout, ignore_status, 310 stdout_tee, stderr_tee, connect_timeout, env, 311 options, stdin, args, ignore_timeout, 312 ssh_failure_retry_ok) 313 except error.CmdError, cmderr: 314 # We get a CmdError here only if there is timeout of that 315 # command. Catch that and stuff it into AutoservRunError and 316 # raise it. 317 timeout_message = str('Timeout encountered: %s' % 318 cmderr.args[0]) 319 raise error.AutoservRunError(timeout_message, cmderr.args[1]) 320 321 322 def run(self, *args, **kwargs): 323 return self.run_very_slowly(*args, **kwargs) 324 325 326 def run_background(self, command, verbose=True): 327 """Start a command on the host in the background. 328 329 The command is started on the host in the background, and 330 this method call returns immediately without waiting for the 331 command's completion. The PID of the process on the host is 332 returned as a string. 333 334 The command may redirect its stdin, stdout, or stderr as 335 necessary. Without redirection, all input and output will 336 use /dev/null. 337 338 @param command The command to run in the background 339 @param verbose As for `self.run()` 340 341 @return Returns the PID of the remote background process 342 as a string. 343 """ 344 # Redirection here isn't merely hygienic; it's a functional 345 # requirement. sshd won't terminate until stdin, stdout, 346 # and stderr are all closed. 347 # 348 # The subshell is needed to do the right thing in case the 349 # passed in command has its own I/O redirections. 350 cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!' 351 return self.run(cmd_fmt % command, verbose=verbose).stdout 352 353 354 def run_short(self, command, **kwargs): 355 """ 356 Calls the run() command with a short default timeout. 357 358 Takes the same arguments as does run(), 359 with the exception of the timeout argument which 360 here is fixed at 60 seconds. 361 It returns the result of run. 362 363 @param command: the command line string 364 365 """ 366 return self.run(command, timeout=60, **kwargs) 367 368 369 def run_grep(self, command, timeout=30, ignore_status=False, 370 stdout_ok_regexp=None, stdout_err_regexp=None, 371 stderr_ok_regexp=None, stderr_err_regexp=None, 372 connect_timeout=30): 373 """ 374 Run a command on the remote host and look for regexp 375 in stdout or stderr to determine if the command was 376 successul or not. 377 378 379 @param command: the command line string 380 @param timeout: time limit in seconds before attempting to 381 kill the running process. The run() function 382 will take a few seconds longer than 'timeout' 383 to complete if it has to kill the process. 384 @param ignore_status: do not raise an exception, no matter 385 what the exit code of the command is. 386 @param stdout_ok_regexp: regexp that should be in stdout 387 if the command was successul. 388 @param stdout_err_regexp: regexp that should be in stdout 389 if the command failed. 390 @param stderr_ok_regexp: regexp that should be in stderr 391 if the command was successul. 392 @param stderr_err_regexp: regexp that should be in stderr 393 if the command failed. 394 @param connect_timeout: connection timeout (in seconds) 395 396 Returns: 397 if the command was successul, raises an exception 398 otherwise. 399 400 Raises: 401 AutoservRunError: 402 - the exit code of the command execution was not 0. 403 - If stderr_err_regexp is found in stderr, 404 - If stdout_err_regexp is found in stdout, 405 - If stderr_ok_regexp is not found in stderr. 406 - If stdout_ok_regexp is not found in stdout, 407 """ 408 409 # We ignore the status, because we will handle it at the end. 410 result = self.run(command, timeout, ignore_status=True, 411 connect_timeout=connect_timeout) 412 413 # Look for the patterns, in order 414 for (regexp, stream) in ((stderr_err_regexp, result.stderr), 415 (stdout_err_regexp, result.stdout)): 416 if regexp and stream: 417 err_re = re.compile (regexp) 418 if err_re.search(stream): 419 raise error.AutoservRunError( 420 '%s failed, found error pattern: "%s"' % (command, 421 regexp), result) 422 423 for (regexp, stream) in ((stderr_ok_regexp, result.stderr), 424 (stdout_ok_regexp, result.stdout)): 425 if regexp and stream: 426 ok_re = re.compile (regexp) 427 if ok_re.search(stream): 428 if ok_re.search(stream): 429 return 430 431 if not ignore_status and result.exit_status > 0: 432 raise error.AutoservRunError("command execution error", result) 433 434 435 def setup_ssh_key(self): 436 """Setup SSH Key""" 437 logging.debug('Performing SSH key setup on %s as %s.', 438 self.host_port, self.user) 439 440 try: 441 host = pxssh.pxssh() 442 host.login(self.hostname, self.user, self.password, 443 port=self.port) 444 public_key = utils.get_public_key() 445 446 host.sendline('mkdir -p ~/.ssh') 447 host.prompt() 448 host.sendline('chmod 700 ~/.ssh') 449 host.prompt() 450 host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " % 451 public_key) 452 host.prompt() 453 host.sendline('chmod 600 ~/.ssh/authorized_keys') 454 host.prompt() 455 host.logout() 456 457 logging.debug('SSH key setup complete.') 458 459 except: 460 logging.debug('SSH key setup has failed.') 461 try: 462 host.logout() 463 except: 464 pass 465 466 467 def setup_ssh(self): 468 """Setup SSH""" 469 if self.password: 470 try: 471 self.ssh_ping() 472 except error.AutoservSshPingHostError: 473 self.setup_ssh_key() 474