Home | History | Annotate | Download | only in update_engine
      1 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 import socket
      8 import time
      9 import urllib2
     10 import urlparse
     11 
     12 from autotest_lib.client.bin import utils as client_utils
     13 from autotest_lib.client.common_lib import error
     14 from autotest_lib.client.common_lib.cros import dev_server
     15 from autotest_lib.server import hosts
     16 
     17 
     18 def snippet(text):
     19     """Returns the text with start/end snip markers around it.
     20 
     21     @param text: The snippet text.
     22 
     23     @return The text with start/end snip markers around it.
     24     """
     25     snip = '---8<---' * 10
     26     start = '-- START -'
     27     end = '-- END -'
     28     return ('%s%s\n%s\n%s%s' %
     29             (start, snip[len(start):], text, end, snip[len(end):]))
     30 
     31 
     32 class OmahaDevserverFailedToStart(error.TestError):
     33     """Raised when a omaha devserver fails to start."""
     34 
     35 
     36 class OmahaDevserver(object):
     37     """Spawns a test-private devserver instance."""
     38     # How long to wait for a devserver to start.
     39     _WAIT_FOR_DEVSERVER_STARTED_SECONDS = 30
     40 
     41     # How long to sleep (seconds) between checks to see if a devserver is up.
     42     _WAIT_SLEEP_INTERVAL = 1
     43 
     44     # Max devserver execution time (seconds); used with timeout(1) to ensure we
     45     # don't have defunct instances hogging the system.
     46     _DEVSERVER_TIMELIMIT_SECONDS = 12 * 60 * 60
     47 
     48 
     49     def __init__(self, omaha_host, update_payload_staged_url, max_updates=1,
     50                  critical_update=True):
     51         """Starts a private devserver instance, operating at Omaha capacity.
     52 
     53         @param omaha_host: host address where the devserver is spawned.
     54         @param update_payload_staged_url: URL to provision for update requests.
     55         @param max_updates: int number of updates this devserver will handle.
     56                             This is passed to src/platform/dev/devserver.py.
     57         @param critical_update: Whether to set a deadline in responses.
     58         """
     59         self._devserver_dir = '/home/chromeos-test/chromiumos/src/platform/dev'
     60 
     61         if not update_payload_staged_url:
     62             raise error.TestError('Missing update payload url')
     63 
     64         self._critical_update = critical_update
     65         self._max_updates = max_updates
     66         self._omaha_host = omaha_host
     67         self._devserver_pid = 0
     68         self._devserver_port = 0  # Determined later from devserver portfile.
     69         self._update_payload_staged_url = update_payload_staged_url
     70 
     71         self._devserver_ssh = hosts.SSHHost(self._omaha_host,
     72                                             user='chromeos-test')
     73 
     74         # Temporary files for various devserver outputs.
     75         self._devserver_logfile = None
     76         self._devserver_stdoutfile = None
     77         self._devserver_portfile = None
     78         self._devserver_pidfile = None
     79         self._devserver_static_dir = None
     80 
     81 
     82     def _cleanup_devserver_files(self):
     83         """Cleans up the temporary devserver files."""
     84         for filename in (self._devserver_logfile, self._devserver_stdoutfile,
     85                          self._devserver_portfile, self._devserver_pidfile):
     86             if filename:
     87                 self._devserver_ssh.run('rm -f %s' % filename,
     88                                         ignore_status=True)
     89 
     90         if self._devserver_static_dir:
     91             self._devserver_ssh.run('rm -rf %s' % self._devserver_static_dir,
     92                                     ignore_status=True)
     93 
     94 
     95     def _create_tempfile_on_devserver(self, label, dir=False):
     96         """Creates a temporary file/dir on the devserver and returns its path.
     97 
     98         @param label: Identifier for the file context (string, no whitespaces).
     99         @param dir: If True, create a directory instead of a file.
    100 
    101         @raises test.TestError: If we failed to invoke mktemp on the server.
    102         @raises OmahaDevserverFailedToStart: If tempfile creation failed.
    103         """
    104         remote_cmd = 'mktemp --tmpdir devserver-%s.XXXXXX' % label
    105         if dir:
    106             remote_cmd += ' --directory'
    107 
    108         logging.info(remote_cmd)
    109 
    110         try:
    111             result = self._devserver_ssh.run(remote_cmd, ignore_status=True,
    112                                              ssh_failure_retry_ok=True)
    113         except error.AutoservRunError as e:
    114             self._log_and_raise_remote_ssh_error(e)
    115         if result.exit_status != 0:
    116             logging.info(result)
    117             raise OmahaDevserverFailedToStart(
    118                     'Could not create a temporary %s file on the devserver, '
    119                     'error output: "%s"' % (label, result.stderr))
    120         return result.stdout.strip()
    121 
    122 
    123     @staticmethod
    124     def _log_and_raise_remote_ssh_error(e):
    125         """Logs failure to ssh remote, then raises a TestError."""
    126         logging.debug('Failed to ssh into the devserver: %s', e)
    127         logging.error('If you are running this locally it means you did not '
    128                       'configure ssh correctly.')
    129         raise error.TestError('Failed to ssh into the devserver: %s' % e)
    130 
    131 
    132     def _read_int_from_devserver_file(self, filename):
    133         """Reads and returns an integer value from a file on the devserver."""
    134         return int(self._get_devserver_file_content(filename).strip())
    135 
    136 
    137     def _wait_for_devserver_to_start(self):
    138         """Waits until the devserver starts within the time limit.
    139 
    140         Infers and sets the devserver PID and serving port.
    141 
    142         Raises:
    143             OmahaDevserverFailedToStart: If the time limit is reached and we
    144                                          cannot connect to the devserver.
    145         """
    146         # Compute the overall timeout.
    147         deadline = time.time() + self._WAIT_FOR_DEVSERVER_STARTED_SECONDS
    148 
    149         # First, wait for port file to be filled and determine the server port.
    150         logging.warning('Waiting for devserver to start up.')
    151         while time.time() < deadline:
    152             try:
    153                 self._devserver_pid = self._read_int_from_devserver_file(
    154                         self._devserver_pidfile)
    155                 self._devserver_port = self._read_int_from_devserver_file(
    156                         self._devserver_portfile)
    157                 logging.info('Devserver pid is %d, serving on port %d',
    158                              self._devserver_pid, self._devserver_port)
    159                 break
    160             except Exception:  # Couldn't read file or corrupt content.
    161                 time.sleep(self._WAIT_SLEEP_INTERVAL)
    162         else:
    163             try:
    164                 self._devserver_ssh.run_output('uptime',
    165                                                ssh_failure_retry_ok=True)
    166             except error.AutoservRunError as e:
    167                 logging.debug('Failed to run uptime on the devserver: %s', e)
    168             raise OmahaDevserverFailedToStart(
    169                     'The test failed to find the pid/port of the omaha '
    170                     'devserver after %d seconds. Check the dumped devserver '
    171                     'logs and devserver load for more information.' %
    172                     self._WAIT_FOR_DEVSERVER_STARTED_SECONDS)
    173 
    174         # Check that the server is reponsding to network requests.
    175         logging.warning('Waiting for devserver to accept network requests.')
    176         url = 'http://%s' % self.get_netloc()
    177         while time.time() < deadline:
    178             if dev_server.ImageServer.devserver_healthy(url, timeout_min=0.1):
    179                 break
    180 
    181             # TODO(milleral): Refactor once crbug.com/221626 is resolved.
    182             time.sleep(self._WAIT_SLEEP_INTERVAL)
    183         else:
    184             raise OmahaDevserverFailedToStart(
    185                     'The test failed to establish a connection to the omaha '
    186                     'devserver it set up on port %d. Check the dumped '
    187                     'devserver logs for more information.' %
    188                     self._devserver_port)
    189 
    190 
    191     def start_devserver(self):
    192         """Starts the devserver and confirms it is up.
    193 
    194         Raises:
    195             test.TestError: If we failed to spawn the remote devserver.
    196             OmahaDevserverFailedToStart: If the time limit is reached and we
    197                                          cannot connect to the devserver.
    198         """
    199         update_payload_url_base, update_payload_path = self._split_url(
    200                 self._update_payload_staged_url)
    201 
    202         # Allocate temporary files for various server outputs.
    203         self._devserver_logfile = self._create_tempfile_on_devserver('log')
    204         self._devserver_stdoutfile = self._create_tempfile_on_devserver(
    205                 'stdout')
    206         self._devserver_portfile = self._create_tempfile_on_devserver('port')
    207         self._devserver_pidfile = self._create_tempfile_on_devserver('pid')
    208         self._devserver_static_dir = self._create_tempfile_on_devserver(
    209                 'static', dir=True)
    210 
    211         # Invoke the Omaha/devserver on the remote server. Will attempt to kill
    212         # it with a SIGTERM after a predetermined timeout has elapsed, followed
    213         # by SIGKILL if not dead within 30 seconds from the former signal.
    214         cmdlist = [
    215                 'timeout', '-s', 'TERM', '-k', '30',
    216                 str(self._DEVSERVER_TIMELIMIT_SECONDS),
    217                 '%s/devserver.py' % self._devserver_dir,
    218                 '--payload=%s' % update_payload_path,
    219                 '--port=0',
    220                 '--pidfile=%s' % self._devserver_pidfile,
    221                 '--portfile=%s' % self._devserver_portfile,
    222                 '--logfile=%s' % self._devserver_logfile,
    223                 '--remote_payload',
    224                 '--urlbase=%s' % update_payload_url_base,
    225                 '--max_updates=%s' % self._max_updates,
    226                 '--host_log',
    227                 '--static_dir=%s' % self._devserver_static_dir
    228         ]
    229 
    230         if self._critical_update:
    231             cmdlist.append('--critical_update')
    232 
    233         remote_cmd = '( %s ) </dev/null >%s 2>&1 &' % (
    234                 ' '.join(cmdlist), self._devserver_stdoutfile)
    235 
    236         logging.info('Starting devserver with %r', remote_cmd)
    237         try:
    238             self._devserver_ssh.run_output(remote_cmd,
    239                                            ssh_failure_retry_ok=True)
    240         except error.AutoservRunError as e:
    241             self._log_and_raise_remote_ssh_error(e)
    242 
    243         try:
    244             self._wait_for_devserver_to_start()
    245         except OmahaDevserverFailedToStart:
    246             self._kill_remote_process()
    247             self._dump_devserver_log()
    248             self._cleanup_devserver_files()
    249             raise
    250 
    251 
    252     def _kill_remote_process(self):
    253         """Kills the devserver and verifies it's down; clears the remote pid."""
    254         def devserver_down():
    255             """Ensure that the devserver process is down."""
    256             return not self._remote_process_alive()
    257 
    258         if devserver_down():
    259             return
    260 
    261         for signal in 'SIGTERM', 'SIGKILL':
    262             remote_cmd = 'kill -s %s %s' % (signal, self._devserver_pid)
    263             self._devserver_ssh.run(remote_cmd, ssh_failure_retry_ok=True)
    264             try:
    265                 client_utils.poll_for_condition(
    266                         devserver_down, sleep_interval=1, desc='devserver down')
    267                 break
    268             except client_utils.TimeoutError:
    269                 logging.warning('Could not kill devserver with %s.', signal)
    270         else:
    271             logging.warning('Failed to kill devserver, giving up.')
    272 
    273         self._devserver_pid = None
    274 
    275 
    276     def _remote_process_alive(self):
    277         """Tests whether the remote devserver process is running."""
    278         if not self._devserver_pid:
    279             return False
    280         remote_cmd = 'test -e /proc/%s' % self._devserver_pid
    281         result = self._devserver_ssh.run(remote_cmd, ignore_status=True)
    282         return result.exit_status == 0
    283 
    284 
    285     def get_netloc(self):
    286         """Returns the netloc (host:port) of the devserver."""
    287         if not (self._devserver_pid and self._devserver_port):
    288             raise error.TestError('No running omaha/devserver')
    289 
    290         return '%s:%s' % (self._omaha_host, self._devserver_port)
    291 
    292 
    293     def get_update_url(self):
    294         """Returns the update_url you can use to update via this server."""
    295         return urlparse.urlunsplit(('http', self.get_netloc(), '/update',
    296                                     '', ''))
    297 
    298 
    299     def _get_devserver_file_content(self, filename):
    300         """Returns the content of a file on the devserver."""
    301         return self._devserver_ssh.run_output('cat %s' % filename,
    302                                               stdout_tee=None,
    303                                               ssh_failure_retry_ok=True)
    304 
    305 
    306     def _get_devserver_log(self):
    307         """Obtain the devserver output."""
    308         return self._get_devserver_file_content(self._devserver_logfile)
    309 
    310 
    311     def _get_devserver_stdout(self):
    312         """Obtain the devserver output in stdout and stderr."""
    313         return self._get_devserver_file_content(self._devserver_stdoutfile)
    314 
    315 
    316     def get_hostlog(self, ip, wait_for_reboot_events=False):
    317         """Get the update events json (aka hostlog).
    318 
    319         @param ip: IP of the DUT to get update info for.
    320         @param wait_for_reboot_events: True if we expect the reboot events.
    321 
    322         @return the json dump of the update events for the given IP.
    323         """
    324         omaha_hostlog_url = urlparse.urlunsplit(
    325             ['http', self.get_netloc(), '/api/hostlog',
    326              'ip=' + ip, ''])
    327 
    328         # 4 rootfs and 1 post reboot
    329         expected_events_count = 5
    330         # 10 minute timeout.
    331         timeout = time.time() + 60 * 10
    332         while True:
    333             try:
    334                 conn = urllib2.urlopen(omaha_hostlog_url)
    335             except urllib2.URLError, e:
    336                 logging.warning('Failed to read event log url: %s', e)
    337                 return None
    338             except socket.timeout, e:
    339                 logging.warning('Timed out reading event log url: %s', e)
    340                 return None
    341 
    342             event_log_resp = conn.read()
    343             conn.close()
    344             hostlog = json.loads(event_log_resp)
    345             logging.debug('hostlog returned: %s', hostlog)
    346             if wait_for_reboot_events:
    347                 if 'event_type' in hostlog[-1] and \
    348                         hostlog[-1]['event_type'] == 54:
    349                     return hostlog
    350                 else:
    351                     time.sleep(5)
    352                     if time.time() > timeout:
    353                         raise error.TestError('Timed out getting hostlog.')
    354                     continue
    355             else:
    356                 return hostlog
    357 
    358 
    359     def _dump_devserver_log(self, logging_level=logging.ERROR):
    360         """Dump the devserver log to the autotest log.
    361 
    362         @param logging_level: logging level (from logging) to log the output.
    363         """
    364         logging.log(logging_level, "Devserver stdout and stderr:\n" +
    365                     snippet(self._get_devserver_stdout()))
    366         logging.log(logging_level, "Devserver log file:\n" +
    367                     snippet(self._get_devserver_log()))
    368 
    369 
    370     @staticmethod
    371     def _split_url(url):
    372         """Splits a URL into the URL base and path."""
    373         split_url = urlparse.urlsplit(url)
    374         url_base = urlparse.urlunsplit(
    375                 (split_url.scheme, split_url.netloc, '', '', ''))
    376         url_path = split_url.path
    377         return url_base, url_path.lstrip('/')
    378 
    379 
    380     def stop_devserver(self):
    381         """Kill remote process and wait for it to die, dump its output."""
    382         if not self._devserver_pid:
    383             logging.error('No running omaha/devserver.')
    384             return
    385 
    386         logging.info('Killing omaha/devserver')
    387         self._kill_remote_process()
    388         logging.debug('Final devserver log before killing')
    389         self._dump_devserver_log(logging.DEBUG)
    390         self._cleanup_devserver_files()
    391