Home | History | Annotate | Download | only in hosts
      1 #
      2 # Copyright 2007 Google Inc. Released under the GPL v2
      3 
      4 """
      5 This module defines the SSHHost class.
      6 
      7 Implementation details:
      8 You should import the "hosts" package instead of importing each type of host.
      9 
     10         SSHHost: a remote machine with a ssh access
     11 """
     12 
     13 import inspect
     14 import logging
     15 import re
     16 from autotest_lib.client.common_lib import error
     17 from autotest_lib.client.common_lib import pxssh
     18 from autotest_lib.server import utils
     19 from autotest_lib.server.hosts import abstract_ssh
     20 
     21 # In case cros_host is being ran via SSP on an older Moblab version with an
     22 # older chromite version.
     23 try:
     24     from chromite.lib import metrics
     25 except ImportError:
     26     metrics = utils.metrics_mock
     27 
     28 
     29 class SSHHost(abstract_ssh.AbstractSSHHost):
     30     """
     31     This class represents a remote machine controlled through an ssh
     32     session on which you can run programs.
     33 
     34     It is not the machine autoserv is running on. The machine must be
     35     configured for password-less login, for example through public key
     36     authentication.
     37 
     38     It includes support for controlling the machine through a serial
     39     console on which you can run programs. If such a serial console is
     40     set up on the machine then capabilities such as hard reset and
     41     boot strap monitoring are available. If the machine does not have a
     42     serial console available then ordinary SSH-based commands will
     43     still be available, but attempts to use extensions such as
     44     console logging or hard reset will fail silently.
     45 
     46     Implementation details:
     47     This is a leaf class in an abstract class hierarchy, it must
     48     implement the unimplemented methods in parent classes.
     49     """
     50 
     51     def _initialize(self, hostname, *args, **dargs):
     52         """
     53         Construct a SSHHost object
     54 
     55         Args:
     56                 hostname: network hostname or address of remote machine
     57         """
     58         super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
     59         self.setup_ssh()
     60 
     61 
     62     def ssh_command(self, connect_timeout=30, options='', alive_interval=300,
     63                     alive_count_max=3, connection_attempts=1):
     64         """
     65         Construct an ssh command with proper args for this host.
     66 
     67         @param connect_timeout: connection timeout (in seconds)
     68         @param options: SSH options
     69         @param alive_interval: SSH Alive interval.
     70         @param alive_count_max: SSH AliveCountMax.
     71         @param connection_attempts: SSH ConnectionAttempts
     72         """
     73         options = " ".join([options, self._master_ssh.ssh_option])
     74         base_cmd = self.make_ssh_command(user=self.user, port=self.port,
     75                                          opts=options,
     76                                          hosts_file=self.known_hosts_file,
     77                                          connect_timeout=connect_timeout,
     78                                          alive_interval=alive_interval,
     79                                          alive_count_max=alive_count_max,
     80                                          connection_attempts=connection_attempts)
     81         return "%s %s" % (base_cmd, self.hostname)
     82 
     83     def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
     84         """ Get the server stack frame status.
     85         @param lowest_frames: the lowest frames to start printing.
     86         @param highest_frames: the highest frames to print.
     87                         (None means no restriction)
     88         """
     89         stack_frames = inspect.stack()
     90         stack = ''
     91         for frame in stack_frames[lowest_frames:highest_frames]:
     92             function_name = inspect.getframeinfo(frame[0]).function
     93             stack = '%s|%s' % (function_name, stack)
     94         del stack_frames
     95         return stack[:-1] # Delete the last '|' character
     96 
     97     def _verbose_logger_command(self, command):
     98         """
     99         Prepend the command for the client with information about the ssh
    100         command to be executed and the server stack state.
    101 
    102         @param command: the ssh command to be executed.
    103         """
    104         # The last 3 frames on the stack are boring. Print 6-3=3 stack frames.
    105         stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
    106         # If "logger" executable exists on the DUT use it to respew |command|.
    107         # Then regardless of "logger" run |command| as usual.
    108         command = ('if type "logger" > /dev/null 2>&1; then'
    109                    ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";'
    110                    'fi; '
    111                    '%s' % (stack, utils.sh_escape(command), command))
    112         return command
    113 
    114 
    115     def _run(self, command, timeout, ignore_status,
    116              stdout, stderr, connect_timeout, env, options, stdin, args,
    117              ignore_timeout, ssh_failure_retry_ok):
    118         """Helper function for run()."""
    119         ssh_cmd = self.ssh_command(connect_timeout, options)
    120         if not env.strip():
    121             env = ""
    122         else:
    123             env = "export %s;" % env
    124         for arg in args:
    125             command += ' "%s"' % utils.sh_escape(arg)
    126         full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
    127 
    128         # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
    129         # container, sometimes shortly after reboot we will see DNS
    130         # resolution errors on ssh commands; the problem never
    131         # occurs more than once in a row.  This especially affects
    132         # the autoupdate_Rollback test, but other cases have been
    133         # affected, too.
    134         #
    135         # We work around it by detecting the first DNS resolution error
    136         # and retrying exactly one time.
    137         dns_error_retry_count = 1
    138 
    139         def counters_inc(counter_name, failure_name):
    140             """Helper function to increment metrics counters.
    141             @param counter_name: string indicating which counter to use
    142             @param failure_name: string indentifying an error, or 'success'
    143             """
    144             if counter_name == 'call':
    145                 # ssh_counter records the outcome of each ssh invocation
    146                 # inside _run(), including exceptions.
    147                 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
    148                 fields = {'error' : failure_name or 'success',
    149                           'attempt' : ssh_call_count}
    150                 ssh_counter.increment(fields=fields)
    151 
    152             if counter_name == 'run':
    153                 # run_counter records each call to _run() with its result
    154                 # and how many tries were made.  Calls are recorded when
    155                 # _run() exits (including exiting with an exception)
    156                 run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
    157                 fields = {'error' : failure_name or 'success',
    158                           'attempt' : ssh_call_count}
    159                 run_counter.increment(fields=fields)
    160 
    161         # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
    162         # error 255: if a simple retry doesn't work, kill the ssh master
    163         # connection and try again.  (Note that either error could come from
    164         # the command running in the DUT, in which case the retry may be
    165         # useless but, in theory, also harmless.)
    166         if ssh_failure_retry_ok:
    167             # Ignore ssh command timeout, even though it could be a timeout due
    168             # to the command executing in the remote host.  Note that passing
    169             # ignore_timeout = True makes utils.run() return None on timeouts
    170             # (and only on timeouts).
    171             original_ignore_timeout = ignore_timeout
    172             ignore_timeout = True
    173             ssh_failure_retry_count = 2
    174         else:
    175             ssh_failure_retry_count = 0
    176 
    177         ssh_call_count = 0
    178 
    179         while True:
    180             try:
    181                 # Increment call count first, in case utils.run() throws an
    182                 # exception.
    183                 ssh_call_count += 1
    184                 result = utils.run(full_cmd, timeout, True, stdout, stderr,
    185                                    verbose=False, stdin=stdin,
    186                                    stderr_is_expected=ignore_status,
    187                                    ignore_timeout=ignore_timeout)
    188             except Exception as e:
    189                 # No retries on exception.
    190                 counters_inc('call', 'exception')
    191                 counters_inc('run', 'exception')
    192                 raise e
    193 
    194             failure_name = None
    195 
    196             if result:
    197                 if result.exit_status == 255:
    198                     if re.search(r'^ssh: .*: Name or service not known',
    199                                  result.stderr):
    200                         failure_name = 'dns_failure'
    201                     else:
    202                         failure_name = 'error_255'
    203                 elif result.exit_status > 0:
    204                     failure_name = 'nonzero_status'
    205             else:
    206                 # result == None
    207                 failure_name = 'timeout'
    208 
    209             # Record the outcome of the ssh invocation.
    210             counters_inc('call', failure_name)
    211 
    212             if failure_name:
    213                 # There was a failure: decide whether to retry.
    214                 if failure_name == 'dns_failure':
    215                     if dns_error_retry_count > 0:
    216                         logging.debug('retrying ssh because of DNS failure')
    217                         dns_error_retry_count -= 1
    218                         continue
    219                 else:
    220                     if ssh_failure_retry_count == 2:
    221                         logging.debug('retrying ssh command after %s',
    222                                        failure_name)
    223                         ssh_failure_retry_count -= 1
    224                         continue
    225                     elif ssh_failure_retry_count == 1:
    226                         # After two failures, restart the master connection
    227                         # before the final try.
    228                         logging.debug('retry 2: restarting master connection')
    229                         self.restart_master_ssh()
    230                         # Last retry: reinstate timeout behavior.
    231                         ignore_timeout = original_ignore_timeout
    232                         ssh_failure_retry_count -= 1
    233                         continue
    234 
    235             # No retry conditions occurred.  Exit the loop.
    236             break
    237 
    238         # The outcomes of ssh invocations have been recorded.  Now record
    239         # the outcome of this function.
    240 
    241         if ignore_timeout and not result:
    242             counters_inc('run', 'ignored_timeout')
    243             return None
    244 
    245         # The error messages will show up in band (indistinguishable
    246         # from stuff sent through the SSH connection), so we have the
    247         # remote computer echo the message "Connected." before running
    248         # any command.  Since the following 2 errors have to do with
    249         # connecting, it's safe to do these checks.
    250         if result.exit_status == 255:
    251             if re.search(r'^ssh: connect to host .* port .*: '
    252                          r'Connection timed out\r$', result.stderr):
    253                 counters_inc('run', 'final_timeout')
    254                 raise error.AutoservSSHTimeout("ssh timed out", result)
    255             if "Permission denied." in result.stderr:
    256                 msg = "ssh permission denied"
    257                 counters_inc('run', 'final_eperm')
    258                 raise error.AutoservSshPermissionDeniedError(msg, result)
    259 
    260         if not ignore_status and result.exit_status > 0:
    261             counters_inc('run', 'final_run_error')
    262             raise error.AutoservRunError("command execution error", result)
    263 
    264         counters_inc('run', failure_name)
    265         return result
    266 
    267 
    268     def run_very_slowly(self, command, timeout=3600, ignore_status=False,
    269             stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
    270             connect_timeout=30, options='', stdin=None, verbose=True, args=(),
    271             ignore_timeout=False, ssh_failure_retry_ok=False):
    272         """
    273         Run a command on the remote host.
    274         This RPC call has an overhead of minimum 40ms and up to 400ms on
    275         servers (crbug.com/734887). Each time a run_very_slowly is added for
    276         every job - a server core dies in the lab.
    277         @see common_lib.hosts.host.run()
    278 
    279         @param timeout: command execution timeout
    280         @param connect_timeout: ssh connection timeout (in seconds)
    281         @param options: string with additional ssh command options
    282         @param verbose: log the commands
    283         @param ignore_timeout: bool True if SSH command timeouts should be
    284                 ignored.  Will return None on command timeout.
    285         @param ssh_failure_retry_ok: True if the command may be retried on
    286                 probable ssh failure (error 255 or timeout).  When true,
    287                 the command may be executed up to three times, the second
    288                 time after restarting the ssh master connection.  Use only for
    289                 commands that are idempotent, because when a "probable
    290                 ssh failure" occurs, we cannot tell if the command executed
    291                 or not.
    292 
    293         @raises AutoservRunError: if the command failed
    294         @raises AutoservSSHTimeout: ssh connection has timed out
    295         """
    296         with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time',
    297                                   scale=0.001):
    298             if verbose:
    299                 stack = self._get_server_stack_state(lowest_frames=1,
    300                                                      highest_frames=7)
    301                 logging.debug("Running (ssh) '%s' from '%s'", command, stack)
    302                 command = self._verbose_logger_command(command)
    303 
    304             # Start a master SSH connection if necessary.
    305             self.start_master_ssh()
    306 
    307             env = " ".join("=".join(pair) for pair in self.env.iteritems())
    308             try:
    309                 return self._run(command, timeout, ignore_status,
    310                                  stdout_tee, stderr_tee, connect_timeout, env,
    311                                  options, stdin, args, ignore_timeout,
    312                                  ssh_failure_retry_ok)
    313             except error.CmdError, cmderr:
    314                 # We get a CmdError here only if there is timeout of that
    315                 # command. Catch that and stuff it into AutoservRunError and
    316                 # raise it.
    317                 timeout_message = str('Timeout encountered: %s' %
    318                                       cmderr.args[0])
    319                 raise error.AutoservRunError(timeout_message, cmderr.args[1])
    320 
    321 
    322     def run(self, *args, **kwargs):
    323         return self.run_very_slowly(*args, **kwargs)
    324 
    325 
    326     def run_background(self, command, verbose=True):
    327         """Start a command on the host in the background.
    328 
    329         The command is started on the host in the background, and
    330         this method call returns immediately without waiting for the
    331         command's completion.  The PID of the process on the host is
    332         returned as a string.
    333 
    334         The command may redirect its stdin, stdout, or stderr as
    335         necessary.  Without redirection, all input and output will
    336         use /dev/null.
    337 
    338         @param command The command to run in the background
    339         @param verbose As for `self.run()`
    340 
    341         @return Returns the PID of the remote background process
    342                 as a string.
    343         """
    344         # Redirection here isn't merely hygienic; it's a functional
    345         # requirement.  sshd won't terminate until stdin, stdout,
    346         # and stderr are all closed.
    347         #
    348         # The subshell is needed to do the right thing in case the
    349         # passed in command has its own I/O redirections.
    350         cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
    351         return self.run(cmd_fmt % command, verbose=verbose).stdout
    352 
    353 
    354     def run_short(self, command, **kwargs):
    355         """
    356         Calls the run() command with a short default timeout.
    357 
    358         Takes the same arguments as does run(),
    359         with the exception of the timeout argument which
    360         here is fixed at 60 seconds.
    361         It returns the result of run.
    362 
    363         @param command: the command line string
    364 
    365         """
    366         return self.run(command, timeout=60, **kwargs)
    367 
    368 
    369     def run_grep(self, command, timeout=30, ignore_status=False,
    370                  stdout_ok_regexp=None, stdout_err_regexp=None,
    371                  stderr_ok_regexp=None, stderr_err_regexp=None,
    372                  connect_timeout=30):
    373         """
    374         Run a command on the remote host and look for regexp
    375         in stdout or stderr to determine if the command was
    376         successul or not.
    377 
    378 
    379         @param command: the command line string
    380         @param timeout: time limit in seconds before attempting to
    381                         kill the running process. The run() function
    382                         will take a few seconds longer than 'timeout'
    383                         to complete if it has to kill the process.
    384         @param ignore_status: do not raise an exception, no matter
    385                               what the exit code of the command is.
    386         @param stdout_ok_regexp: regexp that should be in stdout
    387                                  if the command was successul.
    388         @param stdout_err_regexp: regexp that should be in stdout
    389                                   if the command failed.
    390         @param stderr_ok_regexp: regexp that should be in stderr
    391                                  if the command was successul.
    392         @param stderr_err_regexp: regexp that should be in stderr
    393                                  if the command failed.
    394         @param connect_timeout: connection timeout (in seconds)
    395 
    396         Returns:
    397                 if the command was successul, raises an exception
    398                 otherwise.
    399 
    400         Raises:
    401                 AutoservRunError:
    402                 - the exit code of the command execution was not 0.
    403                 - If stderr_err_regexp is found in stderr,
    404                 - If stdout_err_regexp is found in stdout,
    405                 - If stderr_ok_regexp is not found in stderr.
    406                 - If stdout_ok_regexp is not found in stdout,
    407         """
    408 
    409         # We ignore the status, because we will handle it at the end.
    410         result = self.run(command, timeout, ignore_status=True,
    411                           connect_timeout=connect_timeout)
    412 
    413         # Look for the patterns, in order
    414         for (regexp, stream) in ((stderr_err_regexp, result.stderr),
    415                                  (stdout_err_regexp, result.stdout)):
    416             if regexp and stream:
    417                 err_re = re.compile (regexp)
    418                 if err_re.search(stream):
    419                     raise error.AutoservRunError(
    420                         '%s failed, found error pattern: "%s"' % (command,
    421                                                                 regexp), result)
    422 
    423         for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
    424                                  (stdout_ok_regexp, result.stdout)):
    425             if regexp and stream:
    426                 ok_re = re.compile (regexp)
    427                 if ok_re.search(stream):
    428                     if ok_re.search(stream):
    429                         return
    430 
    431         if not ignore_status and result.exit_status > 0:
    432             raise error.AutoservRunError("command execution error", result)
    433 
    434 
    435     def setup_ssh_key(self):
    436         """Setup SSH Key"""
    437         logging.debug('Performing SSH key setup on %s as %s.',
    438                       self.host_port, self.user)
    439 
    440         try:
    441             host = pxssh.pxssh()
    442             host.login(self.hostname, self.user, self.password,
    443                         port=self.port)
    444             public_key = utils.get_public_key()
    445 
    446             host.sendline('mkdir -p ~/.ssh')
    447             host.prompt()
    448             host.sendline('chmod 700 ~/.ssh')
    449             host.prompt()
    450             host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
    451                             public_key)
    452             host.prompt()
    453             host.sendline('chmod 600 ~/.ssh/authorized_keys')
    454             host.prompt()
    455             host.logout()
    456 
    457             logging.debug('SSH key setup complete.')
    458 
    459         except:
    460             logging.debug('SSH key setup has failed.')
    461             try:
    462                 host.logout()
    463             except:
    464                 pass
    465 
    466 
    467     def setup_ssh(self):
    468         """Setup SSH"""
    469         if self.password:
    470             try:
    471                 self.ssh_ping()
    472             except error.AutoservSshPingHostError:
    473                 self.setup_ssh_key()
    474