Home | History | Annotate | Download | only in autoupdate_EndToEndTest
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import collections
      6 import json
      7 import logging
      8 import os
      9 import urlparse
     10 
     11 from autotest_lib.client.common_lib import error
     12 from autotest_lib.client.common_lib.cros import dev_server
     13 from autotest_lib.server.cros.update_engine import chromiumos_test_platform
     14 from autotest_lib.server.cros.update_engine import update_engine_test
     15 
     16 
     17 def snippet(text):
     18     """Returns the text with start/end snip markers around it.
     19 
     20     @param text: The snippet text.
     21 
     22     @return The text with start/end snip markers around it.
     23     """
     24     snip = '---8<---' * 10
     25     start = '-- START -'
     26     end = '-- END -'
     27     return ('%s%s\n%s\n%s%s' %
     28             (start, snip[len(start):], text, end, snip[len(end):]))
     29 
     30 
     31 class autoupdate_EndToEndTest(update_engine_test.UpdateEngineTest):
     32     """Complete update test between two Chrome OS releases.
     33 
     34     Performs an end-to-end test of updating a ChromeOS device from one version
     35     to another. The test performs the following steps:
     36 
     37       1. Stages the source (full) and target update payload on the central
     38          devserver.
     39       2. Installs a source image on the DUT (if provided) and reboots to it.
     40       3. Then starts the target update by calling cros_au RPC on the devserver.
     41       4. This call copies the devserver code and all payloads to the DUT.
     42       5. Starts a devserver on the DUT.
     43       6. Starts an update pointing to this localhost devserver.
     44       7. Watches as the DUT applies the update to rootfs and stateful.
     45       8. Reboots and repeats steps 5-6, ensuring that the next update check
     46          shows the new image version.
     47       9. Returns the hostlogs collected during each update check for
     48          verification against expected update events.
     49 
     50     This class interacts with several others:
     51     UpdateEngineTest: base class for comparing expected update events against
     52                       the events listed in the hostlog.
     53     UpdateEngineEvent: class representing a single expected update engine event.
     54     ChromiumOSTestPlatform: A class representing the Chrome OS device we are
     55                             updating. It has functions for things the DUT can
     56                             do: get logs, reboot, start update etc
     57 
     58     The flow is like this: this class stages the payloads on
     59     the devserver and then controls the flow of the test. It tells
     60     ChromiumOSTestPlatform to start the update. When that is done updating, it
     61     asks UpdateEngineTest to compare the update that just completed with an
     62     expected update.
     63 
     64     Some notes on naming:
     65       devserver: Refers to a machine running the Chrome OS Update Devserver.
     66       autotest_devserver: An autotest wrapper to interact with a devserver.
     67                           Can be used to stage artifacts to a devserver. We
     68                           will also class cros_au RPC on this devserver to
     69                           start the update.
     70       staged_url's: In this case staged refers to the fact that these items
     71                      are available to be downloaded statically from these urls
     72                      e.g. 'localhost:8080/static/my_file.gz'. These are usually
     73                      given after staging an artifact using a autotest_devserver
     74                      though they can be re-created given enough assumptions.
     75     """
     76     version = 1
     77 
     78     # Timeout periods, given in seconds.
     79     _WAIT_FOR_INITIAL_UPDATE_CHECK_SECONDS = 12 * 60
     80     # TODO(sosa): Investigate why this needs to be so long (this used to be
     81     # 120 and regressed).
     82     _WAIT_FOR_DOWNLOAD_STARTED_SECONDS = 4 * 60
     83     # See https://crbug.com/731214 before changing WAIT_FOR_DOWNLOAD
     84     _WAIT_FOR_DOWNLOAD_COMPLETED_SECONDS = 20 * 60
     85     _WAIT_FOR_UPDATE_COMPLETED_SECONDS = 4 * 60
     86     _WAIT_FOR_UPDATE_CHECK_AFTER_REBOOT_SECONDS = 15 * 60
     87 
     88     _DEVSERVER_HOSTLOG_ROOTFS = 'devserver_hostlog_rootfs'
     89     _DEVSERVER_HOSTLOG_REBOOT = 'devserver_hostlog_reboot'
     90 
     91     StagedURLs = collections.namedtuple('StagedURLs', ['source_url',
     92                                                        'source_stateful_url',
     93                                                        'target_url',
     94                                                        'target_stateful_url'])
     95 
     96     def initialize(self):
     97         """Sets up variables that will be used by test."""
     98         super(autoupdate_EndToEndTest, self).initialize()
     99         self._host = None
    100         self._autotest_devserver = None
    101 
    102     def _get_least_loaded_devserver(self, test_conf):
    103         """Find a devserver to use.
    104 
    105         We first try to pick a devserver with the least load. In case all
    106         devservers' load are higher than threshold, fall back to
    107         the old behavior by picking a devserver based on the payload URI,
    108         with which ImageServer.resolve will return a random devserver based on
    109         the hash of the URI. The picked devserver needs to respect the
    110         location of the host if 'prefer_local_devserver' is set to True or
    111         'restricted_subnets' is  set.
    112 
    113         @param test_conf: a dictionary of test settings.
    114         """
    115         hostname = self._host.hostname if self._host else None
    116         least_loaded_devserver = dev_server.get_least_loaded_devserver(
    117             hostname=hostname)
    118         if least_loaded_devserver:
    119             logging.debug('Choosing the least loaded devserver: %s',
    120                           least_loaded_devserver)
    121             autotest_devserver = dev_server.ImageServer(least_loaded_devserver)
    122         else:
    123             logging.warning('No devserver meets the maximum load requirement. '
    124                             'Picking a random devserver to use.')
    125             autotest_devserver = dev_server.ImageServer.resolve(
    126                 test_conf['target_payload_uri'], self._host.hostname)
    127         devserver_hostname = urlparse.urlparse(
    128             autotest_devserver.url()).hostname
    129 
    130         logging.info('Devserver chosen for this run: %s', devserver_hostname)
    131         return autotest_devserver
    132 
    133 
    134     def _stage_payloads_onto_devserver(self, test_conf):
    135         """Stages payloads that will be used by the test onto the devserver.
    136 
    137         @param test_conf: a dictionary containing payload urls to stage.
    138 
    139         @return a StagedURLs tuple containing the staged urls.
    140         """
    141         logging.info('Staging images onto autotest devserver (%s)',
    142                      self._autotest_devserver.url())
    143 
    144         source_uri = test_conf['source_payload_uri']
    145         staged_src_uri, staged_src_stateful = self._stage_payloads(
    146             source_uri, test_conf['source_archive_uri'])
    147 
    148         target_uri = test_conf['target_payload_uri']
    149         staged_target_uri, staged_target_stateful = self._stage_payloads(
    150             target_uri, test_conf['target_archive_uri'],
    151             test_conf['update_type'])
    152 
    153         return self.StagedURLs(staged_src_uri, staged_src_stateful,
    154                                staged_target_uri, staged_target_stateful)
    155 
    156 
    157     def _stage_payloads(self, payload_uri, archive_uri, payload_type='full'):
    158         """Stages a payload and its associated stateful on devserver."""
    159         if payload_uri:
    160             staged_uri = self._stage_payload_by_uri(payload_uri)
    161 
    162             # Figure out where to get the matching stateful payload.
    163             if archive_uri:
    164                 stateful_uri = self._get_stateful_uri(archive_uri)
    165             else:
    166                 stateful_uri = self._payload_to_stateful_uri(payload_uri)
    167             staged_stateful = self._stage_payload_by_uri(stateful_uri)
    168 
    169             logging.info('Staged %s payload from %s at %s.', payload_type,
    170                          payload_uri, staged_uri)
    171             logging.info('Staged stateful from %s at %s.', stateful_uri,
    172                          staged_stateful)
    173             return staged_uri, staged_stateful
    174 
    175 
    176     @staticmethod
    177     def _get_stateful_uri(build_uri):
    178         """Returns a complete GS URI of a stateful update given a build path."""
    179         return '/'.join([build_uri.rstrip('/'), 'stateful.tgz'])
    180 
    181 
    182     def _payload_to_stateful_uri(self, payload_uri):
    183         """Given a payload GS URI, returns the corresponding stateful URI."""
    184         build_uri = payload_uri.rpartition('/')[0]
    185         if build_uri.endswith('payloads'):
    186             build_uri = build_uri.rpartition('/')[0]
    187         return self._get_stateful_uri(build_uri)
    188 
    189 
    190     def _get_hostlog_file(self, filename, pid):
    191         """Return the hostlog file location.
    192 
    193         This hostlog file contains the update engine events that were fired
    194         during the update. The pid is returned from dev_server.auto_update().
    195 
    196         @param filename: The partial filename to look for.
    197         @param pid: The pid of the update.
    198 
    199         """
    200         hosts = [self._host.hostname, self._host.ip]
    201         for host in hosts:
    202             hostlog = '%s_%s_%s' % (filename, host, pid)
    203             file_url = os.path.join(self.job.resultdir,
    204                                     dev_server.AUTO_UPDATE_LOG_DIR,
    205                                     hostlog)
    206             if os.path.exists(file_url):
    207                 logging.info('Hostlog file to be used for checking update '
    208                              'steps: %s', file_url)
    209                 return file_url
    210         raise error.TestFail('Could not find %s for pid %s' % (filename, pid))
    211 
    212 
    213     def _dump_update_engine_log(self, test_platform):
    214         """Dumps relevant AU error log."""
    215         try:
    216             error_log = test_platform.get_update_log(80)
    217             logging.error('Dumping snippet of update_engine log:\n%s',
    218                           snippet(error_log))
    219         except Exception:
    220             # Mute any exceptions we get printing debug logs.
    221             pass
    222 
    223 
    224     def _report_perf_data(self, perf_file):
    225         """Reports performance and resource data.
    226 
    227         Currently, performance attributes are expected to include 'rss_peak'
    228         (peak memory usage in bytes).
    229 
    230         @param perf_file: A file with performance metrics.
    231         """
    232         logging.debug('Reading perf results from %s.', perf_file)
    233         try:
    234             with open(perf_file, 'r') as perf_file_handle:
    235                 perf_data = json.load(perf_file_handle)
    236         except Exception as e:
    237             logging.warning('Error while reading the perf data file: %s', e)
    238             return
    239 
    240         rss_peak = perf_data.get('rss_peak')
    241         if rss_peak:
    242             rss_peak_kib = rss_peak / 1024
    243             logging.info('Peak memory (RSS) usage on DUT: %d KiB', rss_peak_kib)
    244             self.output_perf_value(description='mem_usage_peak',
    245                                    value=int(rss_peak_kib),
    246                                    units='KiB',
    247                                    higher_is_better=False)
    248         else:
    249             logging.warning('No rss_peak key in JSON returned by update '
    250                             'engine perf script.')
    251 
    252         update_time = perf_data.get('update_length')
    253         if update_time:
    254             logging.info('Time it took to update: %s seconds', update_time)
    255             self.output_perf_value(description='update_length',
    256                                    value=int(update_time), units='sec',
    257                                    higher_is_better=False)
    258         else:
    259             logging.warning('No data about how long it took to update was '
    260                             'found.')
    261 
    262 
    263     def _verify_active_slot_changed(self, source_active_slot,
    264                                     target_active_slot, source_release,
    265                                     target_release):
    266         """Make sure we're using a different slot after the update."""
    267         if target_active_slot == source_active_slot:
    268             err_msg = 'The active image slot did not change after the update.'
    269             if source_release is None:
    270                 err_msg += (
    271                     ' The DUT likely rebooted into the old image, which '
    272                     'probably means that the payload we applied was '
    273                     'corrupt.')
    274             elif source_release == target_release:
    275                 err_msg += (' Given that the source and target versions are '
    276                             'identical, either it (1) rebooted into the '
    277                             'old image due to a bad payload or (2) we retried '
    278                             'the update after it failed once and the second '
    279                             'attempt was written to the original slot.')
    280             else:
    281                 err_msg += (' This is strange since the DUT reported the '
    282                             'correct target version. This is probably a system '
    283                             'bug; check the DUT system log.')
    284             raise error.TestFail(err_msg)
    285 
    286         logging.info('Target active slot changed as expected: %s',
    287                      target_active_slot)
    288 
    289 
    290     def _verify_version(self, expected, actual):
    291         """Compares actual and expected versions."""
    292         if expected != actual:
    293             err_msg = 'Failed to verify OS version. Expected %s, was %s' % (
    294                 expected, actual)
    295             logging.error(err_msg)
    296             raise error.TestFail(err_msg)
    297 
    298     def run_update_test(self, cros_device, test_conf):
    299         """Runs the update test, collects perf stats, checks expected version.
    300 
    301         @param cros_device: The device under test.
    302         @param test_conf: A dictionary containing test configuration values.
    303 
    304         """
    305         # Record the active root partition.
    306         source_active_slot = cros_device.get_active_slot()
    307         logging.info('Source active slot: %s', source_active_slot)
    308 
    309         source_release = test_conf['source_release']
    310         target_release = test_conf['target_release']
    311 
    312         cros_device.copy_perf_script_to_device(self.bindir)
    313         try:
    314             # Update the DUT to the target image.
    315             pid = cros_device.install_target_image(test_conf[
    316                 'target_payload_uri'])
    317 
    318             # Verify the hostlog of events was returned from the update.
    319             file_url = self._get_hostlog_file(self._DEVSERVER_HOSTLOG_ROOTFS,
    320                                               pid)
    321 
    322             # Call into base class to compare expected events against hostlog.
    323             self.verify_update_events(source_release, file_url)
    324         except:
    325             logging.fatal('ERROR: Failure occurred during the target update.')
    326             raise
    327 
    328         # Collect perf stats about this update run.
    329         perf_file = cros_device.get_perf_stats_for_update(self.job.resultdir)
    330         if perf_file is not None:
    331             self._report_perf_data(perf_file)
    332 
    333         # Device is updated. Check that we are running the expected version.
    334         if cros_device.oobe_triggers_update():
    335             # If DUT automatically checks for update during OOBE (e.g
    336             # rialto), this update check fires before the test can get the
    337             # post-reboot update check. So we just check the version from
    338             # lsb-release.
    339             logging.info('Skipping post reboot update check.')
    340             self._verify_version(target_release, cros_device.get_cros_version())
    341         else:
    342             # Verify we have a hostlog for the post-reboot update check.
    343             file_url = self._get_hostlog_file(self._DEVSERVER_HOSTLOG_REBOOT,
    344                                               pid)
    345 
    346             # Call into base class to compare expected events against hostlog.
    347             self.verify_update_events(source_release, file_url, target_release)
    348 
    349         self._verify_active_slot_changed(source_active_slot,
    350                                          cros_device.get_active_slot(),
    351                                          source_release, target_release)
    352 
    353         logging.info('Update successful, test completed')
    354 
    355 
    356     def run_once(self, host, test_conf):
    357         """Performs a complete auto update test.
    358 
    359         @param host: a host object representing the DUT.
    360         @param test_conf: a dictionary containing test configuration values.
    361 
    362         @raise error.TestError if anything went wrong with setting up the test;
    363                error.TestFail if any part of the test has failed.
    364         """
    365         self._host = host
    366         logging.debug('The test configuration supplied: %s', test_conf)
    367 
    368         self._autotest_devserver = self._get_least_loaded_devserver(test_conf)
    369         self._stage_payloads_onto_devserver(test_conf)
    370 
    371         # Get an object representing the CrOS DUT.
    372         cros_device = chromiumos_test_platform.ChromiumOSTestPlatform(
    373             self._host, self._autotest_devserver, self.job.resultdir)
    374 
    375         # Install source image
    376         cros_device.install_source_image(test_conf['source_payload_uri'])
    377         cros_device.check_login_after_source_update()
    378 
    379         # Start the update to the target image.
    380         try:
    381             self.run_update_test(cros_device, test_conf)
    382         except update_engine_test.UpdateEngineEventMissing:
    383             self._dump_update_engine_log(cros_device)
    384             raise
    385 
    386         # Check we can login after the update.
    387         cros_device.check_login_after_target_update()
    388