Home | History | Annotate | Download | only in hosts
      1 #
      2 # Copyright 2007 Google Inc. Released under the GPL v2
      3 
      4 """
      5 This module defines the SSHHost class.
      6 
      7 Implementation details:
      8 You should import the "hosts" package instead of importing each type of host.
      9 
     10         SSHHost: a remote machine with a ssh access
     11 """
     12 
     13 import inspect
     14 import logging
     15 import re
     16 from autotest_lib.client.common_lib import error
     17 from autotest_lib.client.common_lib import pxssh
     18 from autotest_lib.server import utils
     19 from autotest_lib.server.hosts import abstract_ssh
     20 
     21 # In case cros_host is being ran via SSP on an older Moblab version with an
     22 # older chromite version.
     23 try:
     24     from chromite.lib import metrics
     25 except ImportError:
     26     metrics = utils.metrics_mock
     27 
     28 
     29 class SSHHost(abstract_ssh.AbstractSSHHost):
     30     """
     31     This class represents a remote machine controlled through an ssh
     32     session on which you can run programs.
     33 
     34     It is not the machine autoserv is running on. The machine must be
     35     configured for password-less login, for example through public key
     36     authentication.
     37 
     38     It includes support for controlling the machine through a serial
     39     console on which you can run programs. If such a serial console is
     40     set up on the machine then capabilities such as hard reset and
     41     boot strap monitoring are available. If the machine does not have a
     42     serial console available then ordinary SSH-based commands will
     43     still be available, but attempts to use extensions such as
     44     console logging or hard reset will fail silently.
     45 
     46     Implementation details:
     47     This is a leaf class in an abstract class hierarchy, it must
     48     implement the unimplemented methods in parent classes.
     49     """
     50 
     51     def _initialize(self, hostname, *args, **dargs):
     52         """
     53         Construct a SSHHost object
     54 
     55         Args:
     56                 hostname: network hostname or address of remote machine
     57         """
     58         super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
     59         self.setup_ssh()
     60 
     61 
     62     def ssh_command(self, connect_timeout=30, options='', alive_interval=300):
     63         """
     64         Construct an ssh command with proper args for this host.
     65 
     66         @param connect_timeout: connection timeout (in seconds)
     67         @param options: SSH options
     68         @param alive_interval: SSH Alive interval.
     69         """
     70         options = "%s %s" % (options, self.master_ssh_option)
     71         base_cmd = self.make_ssh_command(user=self.user, port=self.port,
     72                                          opts=options,
     73                                          hosts_file=self.known_hosts_file,
     74                                          connect_timeout=connect_timeout,
     75                                          alive_interval=alive_interval)
     76         return "%s %s" % (base_cmd, self.hostname)
     77 
     78 
     79     def _verbose_logger_command(self, command):
     80         """
     81         Prepend the command for the client with information about the ssh command
     82         to be executed and the server stack state.
     83 
     84         @param command: the ssh command to be executed.
     85         """
     86         stack_frames = inspect.stack()
     87         stack = ''
     88         # The last 2 frames on the stack are boring. Print 5-2=3 stack frames.
     89         count = min(5, len(stack_frames))
     90         if count >= 3:
     91             stack = inspect.getframeinfo(stack_frames[2][0]).function
     92             for frame in stack_frames[3:count]:
     93                 function_name = inspect.getframeinfo(frame[0]).function
     94                 stack = '%s|%s' % (function_name, stack)
     95         del stack_frames
     96         # If "logger" executable exists on the DUT use it to respew |command|.
     97         # Then regardless of "logger" run |command| as usual.
     98         command = ('if type "logger" > /dev/null 2>&1; then'
     99                    ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";'
    100                    'fi; '
    101                    '%s' % (stack, utils.sh_escape(command), command))
    102         return command
    103 
    104 
    105     def _run(self, command, timeout, ignore_status,
    106              stdout, stderr, connect_timeout, env, options, stdin, args,
    107              ignore_timeout, ssh_failure_retry_ok):
    108         """Helper function for run()."""
    109         ssh_cmd = self.ssh_command(connect_timeout, options)
    110         if not env.strip():
    111             env = ""
    112         else:
    113             env = "export %s;" % env
    114         for arg in args:
    115             command += ' "%s"' % utils.sh_escape(arg)
    116         full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
    117 
    118         # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
    119         # container, sometimes shortly after reboot we will see DNS
    120         # resolution errors on ssh commands; the problem never
    121         # occurs more than once in a row.  This especially affects
    122         # the autoupdate_Rollback test, but other cases have been
    123         # affected, too.
    124         #
    125         # We work around it by detecting the first DNS resolution error
    126         # and retrying exactly one time.
    127         dns_error_retry_count = 1
    128 
    129         def counters_inc(counter_name, failure_name):
    130             """Helper function to increment metrics counters.
    131             @param counter_name: string indicating which counter to use
    132             @param failure_name: string indentifying an error, or 'success'
    133             """
    134             if counter_name == 'call':
    135                 # ssh_counter records the outcome of each ssh invocation
    136                 # inside _run(), including exceptions.
    137                 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
    138                 fields = {'error' : failure_name or 'success',
    139                           'attempt' : ssh_call_count}
    140                 ssh_counter.increment(fields=fields)
    141 
    142             if counter_name == 'run':
    143                 # run_counter records each call to _run() with its result
    144                 # and how many tries were made.  Calls are recorded when
    145                 # _run() exits (including exiting with an exception)
    146                 run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
    147                 fields = {'error' : failure_name or 'success',
    148                           'attempt' : ssh_call_count}
    149                 run_counter.increment(fields=fields)
    150 
    151         # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
    152         # error 255: if a simple retry doesn't work, kill the ssh master
    153         # connection and try again.  (Note that either error could come from
    154         # the command running in the DUT, in which case the retry may be
    155         # useless but, in theory, also harmless.)
    156         if ssh_failure_retry_ok:
    157             # Ignore ssh command timeout, even though it could be a timeout due
    158             # to the command executing in the remote host.  Note that passing
    159             # ignore_timeout = True makes utils.run() return None on timeouts
    160             # (and only on timeouts).
    161             original_ignore_timeout = ignore_timeout
    162             ignore_timeout = True
    163             ssh_failure_retry_count = 2
    164         else:
    165             ssh_failure_retry_count = 0
    166 
    167         ssh_call_count = 0
    168 
    169         while True:
    170             try:
    171                 # Increment call count first, in case utils.run() throws an
    172                 # exception.
    173                 ssh_call_count += 1
    174                 result = utils.run(full_cmd, timeout, True, stdout, stderr,
    175                                    verbose=False, stdin=stdin,
    176                                    stderr_is_expected=ignore_status,
    177                                    ignore_timeout=ignore_timeout)
    178             except Exception as e:
    179                 # No retries on exception.
    180                 counters_inc('call', 'exception')
    181                 counters_inc('run', 'exception')
    182                 raise e
    183 
    184             failure_name = None
    185 
    186             if result:
    187                 if result.exit_status == 255:
    188                     if re.search(r'^ssh: .*: Name or service not known',
    189                                  result.stderr):
    190                         failure_name = 'dns_failure'
    191                     else:
    192                         failure_name = 'error_255'
    193                 elif result.exit_status > 0:
    194                     failure_name = 'nonzero_status'
    195             else:
    196                 # result == None
    197                 failure_name = 'timeout'
    198 
    199             # Record the outcome of the ssh invocation.
    200             counters_inc('call', failure_name)
    201 
    202             if failure_name:
    203                 # There was a failure: decide whether to retry.
    204                 if failure_name == 'dns_failure':
    205                     if dns_error_retry_count > 0:
    206                         logging.debug('retrying ssh because of DNS failure')
    207                         dns_error_retry_count -= 1
    208                         continue
    209                 else:
    210                     if ssh_failure_retry_count == 2:
    211                         logging.debug('retrying ssh command after %s',
    212                                        failure_name)
    213                         ssh_failure_retry_count -= 1
    214                         continue
    215                     elif ssh_failure_retry_count == 1:
    216                         # After two failures, restart the master connection
    217                         # before the final try.
    218                         logging.debug('retry 2: restarting master connection')
    219                         self.restart_master_ssh()
    220                         # Last retry: reinstate timeout behavior.
    221                         ignore_timeout = original_ignore_timeout
    222                         ssh_failure_retry_count -= 1
    223                         continue
    224 
    225             # No retry conditions occurred.  Exit the loop.
    226             break
    227 
    228         # The outcomes of ssh invocations have been recorded.  Now record
    229         # the outcome of this function.
    230 
    231         if ignore_timeout and not result:
    232             counters_inc('run', 'ignored_timeout')
    233             return None
    234 
    235         # The error messages will show up in band (indistinguishable
    236         # from stuff sent through the SSH connection), so we have the
    237         # remote computer echo the message "Connected." before running
    238         # any command.  Since the following 2 errors have to do with
    239         # connecting, it's safe to do these checks.
    240         if result.exit_status == 255:
    241             if re.search(r'^ssh: connect to host .* port .*: '
    242                          r'Connection timed out\r$', result.stderr):
    243                 counters_inc('run', 'final_timeout')
    244                 raise error.AutoservSSHTimeout("ssh timed out", result)
    245             if "Permission denied." in result.stderr:
    246                 msg = "ssh permission denied"
    247                 counters_inc('run', 'final_eperm')
    248                 raise error.AutoservSshPermissionDeniedError(msg, result)
    249 
    250         if not ignore_status and result.exit_status > 0:
    251             counters_inc('run', 'final_run_error')
    252             raise error.AutoservRunError("command execution error", result)
    253 
    254         counters_inc('run', failure_name)
    255         return result
    256 
    257 
    258     def run(self, command, timeout=3600, ignore_status=False,
    259             stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
    260             connect_timeout=30, options='', stdin=None, verbose=True, args=(),
    261             ignore_timeout=False, ssh_failure_retry_ok=False):
    262         """
    263         Run a command on the remote host.
    264         @see common_lib.hosts.host.run()
    265 
    266         @param timeout: command execution timeout
    267         @param connect_timeout: ssh connection timeout (in seconds)
    268         @param options: string with additional ssh command options
    269         @param verbose: log the commands
    270         @param ignore_timeout: bool True if SSH command timeouts should be
    271                 ignored.  Will return None on command timeout.
    272         @param ssh_failure_retry_ok: True if the command may be retried on
    273                 probable ssh failure (error 255 or timeout).  When true,
    274                 the command may be executed up to three times, the second
    275                 time after restarting the ssh master connection.  Use only for
    276                 commands that are idempotent, because when a "probable
    277                 ssh failure" occurs, we cannot tell if the command executed
    278                 or not.
    279 
    280         @raises AutoservRunError: if the command failed
    281         @raises AutoservSSHTimeout: ssh connection has timed out
    282         """
    283         if verbose:
    284             logging.debug("Running (ssh) '%s'", command)
    285             command = self._verbose_logger_command(command)
    286 
    287         # Start a master SSH connection if necessary.
    288         self.start_master_ssh()
    289 
    290         env = " ".join("=".join(pair) for pair in self.env.iteritems())
    291         try:
    292             return self._run(command, timeout, ignore_status,
    293                              stdout_tee, stderr_tee, connect_timeout, env,
    294                              options, stdin, args, ignore_timeout,
    295                              ssh_failure_retry_ok)
    296         except error.CmdError, cmderr:
    297             # We get a CmdError here only if there is timeout of that command.
    298             # Catch that and stuff it into AutoservRunError and raise it.
    299             timeout_message = str('Timeout encountered: %s' % cmderr.args[0])
    300             raise error.AutoservRunError(timeout_message, cmderr.args[1])
    301 
    302 
    303     def run_background(self, command, verbose=True):
    304         """Start a command on the host in the background.
    305 
    306         The command is started on the host in the background, and
    307         this method call returns immediately without waiting for the
    308         command's completion.  The PID of the process on the host is
    309         returned as a string.
    310 
    311         The command may redirect its stdin, stdout, or stderr as
    312         necessary.  Without redirection, all input and output will
    313         use /dev/null.
    314 
    315         @param command The command to run in the background
    316         @param verbose As for `self.run()`
    317 
    318         @return Returns the PID of the remote background process
    319                 as a string.
    320         """
    321         # Redirection here isn't merely hygienic; it's a functional
    322         # requirement.  sshd won't terminate until stdin, stdout,
    323         # and stderr are all closed.
    324         #
    325         # The subshell is needed to do the right thing in case the
    326         # passed in command has its own I/O redirections.
    327         cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
    328         return self.run(cmd_fmt % command, verbose=verbose).stdout
    329 
    330 
    331     def run_short(self, command, **kwargs):
    332         """
    333         Calls the run() command with a short default timeout.
    334 
    335         Takes the same arguments as does run(),
    336         with the exception of the timeout argument which
    337         here is fixed at 60 seconds.
    338         It returns the result of run.
    339 
    340         @param command: the command line string
    341 
    342         """
    343         return self.run(command, timeout=60, **kwargs)
    344 
    345 
    346     def run_grep(self, command, timeout=30, ignore_status=False,
    347                  stdout_ok_regexp=None, stdout_err_regexp=None,
    348                  stderr_ok_regexp=None, stderr_err_regexp=None,
    349                  connect_timeout=30):
    350         """
    351         Run a command on the remote host and look for regexp
    352         in stdout or stderr to determine if the command was
    353         successul or not.
    354 
    355 
    356         @param command: the command line string
    357         @param timeout: time limit in seconds before attempting to
    358                         kill the running process. The run() function
    359                         will take a few seconds longer than 'timeout'
    360                         to complete if it has to kill the process.
    361         @param ignore_status: do not raise an exception, no matter
    362                               what the exit code of the command is.
    363         @param stdout_ok_regexp: regexp that should be in stdout
    364                                  if the command was successul.
    365         @param stdout_err_regexp: regexp that should be in stdout
    366                                   if the command failed.
    367         @param stderr_ok_regexp: regexp that should be in stderr
    368                                  if the command was successul.
    369         @param stderr_err_regexp: regexp that should be in stderr
    370                                  if the command failed.
    371         @param connect_timeout: connection timeout (in seconds)
    372 
    373         Returns:
    374                 if the command was successul, raises an exception
    375                 otherwise.
    376 
    377         Raises:
    378                 AutoservRunError:
    379                 - the exit code of the command execution was not 0.
    380                 - If stderr_err_regexp is found in stderr,
    381                 - If stdout_err_regexp is found in stdout,
    382                 - If stderr_ok_regexp is not found in stderr.
    383                 - If stdout_ok_regexp is not found in stdout,
    384         """
    385 
    386         # We ignore the status, because we will handle it at the end.
    387         result = self.run(command, timeout, ignore_status=True,
    388                           connect_timeout=connect_timeout)
    389 
    390         # Look for the patterns, in order
    391         for (regexp, stream) in ((stderr_err_regexp, result.stderr),
    392                                  (stdout_err_regexp, result.stdout)):
    393             if regexp and stream:
    394                 err_re = re.compile (regexp)
    395                 if err_re.search(stream):
    396                     raise error.AutoservRunError(
    397                         '%s failed, found error pattern: "%s"' % (command,
    398                                                                 regexp), result)
    399 
    400         for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
    401                                  (stdout_ok_regexp, result.stdout)):
    402             if regexp and stream:
    403                 ok_re = re.compile (regexp)
    404                 if ok_re.search(stream):
    405                     if ok_re.search(stream):
    406                         return
    407 
    408         if not ignore_status and result.exit_status > 0:
    409             raise error.AutoservRunError("command execution error", result)
    410 
    411 
    412     def setup_ssh_key(self):
    413         """Setup SSH Key"""
    414         logging.debug('Performing SSH key setup on %s:%d as %s.',
    415                       self.hostname, self.port, self.user)
    416 
    417         try:
    418             host = pxssh.pxssh()
    419             host.login(self.hostname, self.user, self.password,
    420                         port=self.port)
    421             public_key = utils.get_public_key()
    422 
    423             host.sendline('mkdir -p ~/.ssh')
    424             host.prompt()
    425             host.sendline('chmod 700 ~/.ssh')
    426             host.prompt()
    427             host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
    428                             public_key)
    429             host.prompt()
    430             host.sendline('chmod 600 ~/.ssh/authorized_keys')
    431             host.prompt()
    432             host.logout()
    433 
    434             logging.debug('SSH key setup complete.')
    435 
    436         except:
    437             logging.debug('SSH key setup has failed.')
    438             try:
    439                 host.logout()
    440             except:
    441                 pass
    442 
    443 
    444     def setup_ssh(self):
    445         """Setup SSH"""
    446         if self.password:
    447             try:
    448                 self.ssh_ping()
    449             except error.AutoservSshPingHostError:
    450                 self.setup_ssh_key()
    451