Home | History | Annotate | Download | only in update_engine
      1 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 import os
      8 import re
      9 import update_engine_event as uee
     10 import urlparse
     11 
     12 from autotest_lib.client.common_lib import error
     13 from autotest_lib.client.common_lib import utils
     14 from autotest_lib.client.common_lib.cros import dev_server
     15 from autotest_lib.server import test
     16 from autotest_lib.server.cros.dynamic_suite import tools
     17 from autotest_lib.server.cros.update_engine import omaha_devserver
     18 from datetime import datetime, timedelta
     19 from update_engine_event import UpdateEngineEvent
     20 
     21 class UpdateEngineTest(test.test):
     22     """Class for comparing expected update_engine events against actual ones.
     23 
     24     During a rootfs update, there are several events that are fired (e.g.
     25     download_started, download_finished, update_started etc). Each event has
     26     properties associated with it that need to be verified.
     27 
     28     In this class we build a list of expected events (list of
     29     UpdateEngineEvent objects), and compare that against a "hostlog" returned
     30     from update_engine from the update. This hostlog is a json list of
     31     events fired during the update. It is accessed by the api/hostlog URL on the
     32     devserver during the update.
     33 
     34     We can also verify the hostlog of a one-time update event that is fired
     35     after rebooting after an update.
     36 
     37     During a typical autoupdate we will check both of these hostlogs.
     38     """
     39     version = 1
     40 
     41     # Timeout periods, given in seconds.
     42     _INITIAL_CHECK_TIMEOUT = 12 * 60
     43     _DOWNLOAD_STARTED_TIMEOUT = 4 * 60
     44     # See https://crbug.com/731214 before changing _DOWNLOAD_FINISHED_TIMEOUT
     45     _DOWNLOAD_FINISHED_TIMEOUT = 20 * 60
     46     _UPDATE_COMPLETED_TIMEOUT = 4 * 60
     47     _POST_REBOOT_TIMEOUT = 15 * 60
     48 
     49     # The names of the two hostlog files we will be verifying
     50     _DEVSERVER_HOSTLOG_ROOTFS = 'devserver_hostlog_rootfs'
     51     _DEVSERVER_HOSTLOG_REBOOT = 'devserver_hostlog_reboot'
     52 
     53     _CELLULAR_BUCKET = 'gs://chromeos-throw-away-bucket/CrOSPayloads/Cellular/'
     54 
     55 
     56     def initialize(self):
     57         self._hostlog_filename = None
     58         self._hostlog_events = []
     59         self._num_consumed_events = 0
     60         self._current_timestamp = None
     61         self._expected_events = []
     62         self._omaha_devserver = None
     63         self._host = None
     64         # Some AU tests use multiple DUTs
     65         self._hosts = None
     66 
     67     def cleanup(self):
     68         if self._omaha_devserver is not None:
     69             self._omaha_devserver.stop_devserver()
     70 
     71 
     72     def _get_expected_events_for_rootfs_update(self, source_release):
     73         """Creates a list of expected events fired during a rootfs update.
     74 
     75         There are 4 events fired during a rootfs update. We will create these
     76         in the correct order with the correct data, timeout, and error
     77         condition function.
     78         """
     79         initial_check = UpdateEngineEvent(
     80             version=source_release,
     81             on_error=self._error_initial_check)
     82         download_started = UpdateEngineEvent(
     83             event_type=uee.EVENT_TYPE_DOWNLOAD_STARTED,
     84             event_result=uee.EVENT_RESULT_SUCCESS,
     85             version=source_release,
     86             on_error=self._error_incorrect_event)
     87         download_finished = UpdateEngineEvent(
     88             event_type=uee.EVENT_TYPE_DOWNLOAD_FINISHED,
     89             event_result=uee.EVENT_RESULT_SUCCESS,
     90             version=source_release,
     91             on_error=self._error_incorrect_event)
     92         update_complete = UpdateEngineEvent(
     93             event_type=uee.EVENT_TYPE_UPDATE_COMPLETE,
     94             event_result=uee.EVENT_RESULT_SUCCESS,
     95             version=source_release,
     96             on_error=self._error_incorrect_event)
     97 
     98         # There is an error message if any of them take too long to fire.
     99         initial_error = self._timeout_error_message('an initial update check',
    100                                                     self._INITIAL_CHECK_TIMEOUT)
    101         dls_error = self._timeout_error_message('a download started '
    102                                                 'notification',
    103                                                 self._DOWNLOAD_STARTED_TIMEOUT,
    104                                                 uee.EVENT_TYPE_DOWNLOAD_STARTED)
    105         dlf_error = self._timeout_error_message('a download finished '
    106                                                 'notification',
    107                                                 self._DOWNLOAD_FINISHED_TIMEOUT,
    108                                                 uee.EVENT_TYPE_DOWNLOAD_FINISHED
    109                                                 )
    110         uc_error = self._timeout_error_message('an update complete '
    111                                                'notification',
    112                                                self._UPDATE_COMPLETED_TIMEOUT,
    113                                                uee.EVENT_TYPE_UPDATE_COMPLETE)
    114 
    115         # Build an array of tuples (event, timeout, timeout_error_message)
    116         self._expected_events = [
    117             (initial_check, self._INITIAL_CHECK_TIMEOUT, initial_error),
    118             (download_started, self._DOWNLOAD_STARTED_TIMEOUT, dls_error),
    119             (download_finished, self._DOWNLOAD_FINISHED_TIMEOUT, dlf_error),
    120             (update_complete, self._UPDATE_COMPLETED_TIMEOUT, uc_error)
    121         ]
    122 
    123 
    124     def _get_expected_event_for_post_reboot_check(self, source_release,
    125                                                   target_release):
    126         """Creates the expected event fired during post-reboot update check."""
    127         post_reboot_check = UpdateEngineEvent(
    128             event_type=uee.EVENT_TYPE_REBOOTED_AFTER_UPDATE,
    129             event_result=uee.EVENT_RESULT_SUCCESS,
    130             version=target_release,
    131             previous_version=source_release,
    132             on_error=self._error_reboot_after_update)
    133         err = self._timeout_error_message('a successful reboot '
    134                                           'notification',
    135                                           self._POST_REBOOT_TIMEOUT,
    136                                           uee.EVENT_TYPE_REBOOTED_AFTER_UPDATE)
    137 
    138         self._expected_events = [
    139             (post_reboot_check, self._POST_REBOOT_TIMEOUT, err)
    140         ]
    141 
    142 
    143     def _read_hostlog_events(self):
    144         """Read the list of events from the hostlog json file."""
    145         if len(self._hostlog_events) <= self._num_consumed_events:
    146             try:
    147                 with open(self._hostlog_filename, 'r') as out_log:
    148                     self._hostlog_events = json.loads(out_log.read())
    149             except Exception as e:
    150                 raise error.TestFail('Error while reading the hostlogs '
    151                                      'from devserver: %s' % e)
    152 
    153 
    154     def _get_next_hostlog_event(self):
    155         """Returns the next event from the hostlog json file.
    156 
    157         @return The next new event in the host log
    158                 None if no such event was found or an error occurred.
    159         """
    160         self._read_hostlog_events()
    161         # Return next new event, if one is found.
    162         if len(self._hostlog_events) > self._num_consumed_events:
    163             new_event = {
    164                 key: str(val) for key, val
    165                 in self._hostlog_events[self._num_consumed_events].iteritems()
    166             }
    167             self._num_consumed_events += 1
    168             logging.info('Consumed new event: %s', new_event)
    169             return new_event
    170 
    171 
    172     def _verify_event_with_timeout(self, expected_event, timeout, on_timeout):
    173         """Verify an expected event occurs within a given timeout.
    174 
    175         @param expected_event: an expected event
    176         @param timeout: specified in seconds
    177         @param on_timeout: A string to return if timeout occurs, or None.
    178 
    179         @return None if event complies, an error string otherwise.
    180         """
    181         actual_event = self._get_next_hostlog_event()
    182         if actual_event:
    183             # If this is the first event, set it as the current time
    184             if self._current_timestamp is None:
    185                 self._current_timestamp = datetime.strptime(actual_event[
    186                                                                 'timestamp'],
    187                                                             '%Y-%m-%d %H:%M:%S')
    188 
    189             # Get the time stamp for the current event and convert to datetime
    190             timestamp = actual_event['timestamp']
    191             event_timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
    192 
    193             # Add the timeout onto the timestamp to get its expiry
    194             event_timeout = self._current_timestamp + timedelta(seconds=timeout)
    195 
    196             # If the event happened before the timeout
    197             if event_timestamp < event_timeout:
    198                 difference = event_timestamp - self._current_timestamp
    199                 logging.info('Event took %s seconds to fire during the '
    200                              'update', difference.seconds)
    201                 result = expected_event.equals(actual_event)
    202                 self._current_timestamp = event_timestamp
    203                 return result
    204 
    205         logging.error('The expected event was not found in the hostlog: %s',
    206                       expected_event)
    207         return on_timeout
    208 
    209 
    210     def _error_initial_check(self, expected, actual, mismatched_attrs):
    211         """Error message for when update fails at initial update check."""
    212         err_msg = ('The update test appears to have completed successfully but '
    213                    'we found a problem while verifying the hostlog of events '
    214                    'returned from the update. Some attributes reported for '
    215                    'the initial update check event are not what we expected: '
    216                    '%s. ' % mismatched_attrs)
    217         if 'version' in mismatched_attrs:
    218             err_msg += ('The expected version is (%s) but reported version was '
    219                         '(%s). ' % (expected['version'], actual['version']))
    220             err_msg += ('If reported version = target version, it is likely '
    221                         'we retried the update because the test thought the '
    222                         'first attempt failed but it actually succeeded '
    223                         '(e.g due to SSH disconnect, DUT not reachable by '
    224                         'hostname, applying stateful failed after rootfs '
    225                         'succeeded). This second update attempt is then started'
    226                         ' from the target version instead of the source '
    227                         'version, so our hostlog verification is invalid.')
    228         err_msg += ('Check the full hostlog for this update in the %s file in '
    229                     'the %s directory.' % (self._DEVSERVER_HOSTLOG_ROOTFS,
    230                                            dev_server.AUTO_UPDATE_LOG_DIR))
    231         return err_msg
    232 
    233 
    234     def _error_incorrect_event(self, expected, actual, mismatched_attrs):
    235         """Error message for when an event is not what we expect."""
    236         return ('The update appears to have completed successfully but '
    237                 'when analysing the update events in the hostlog we have '
    238                 'found that one of the events is incorrect. This should '
    239                 'never happen. The mismatched attributes are: %s. We expected '
    240                 '%s, but got %s.' % (mismatched_attrs, expected, actual))
    241 
    242 
    243     def _error_reboot_after_update(self, expected, actual, mismatched_attrs):
    244         """Error message for problems in the post-reboot update check."""
    245         err_msg = ('The update completed successfully but there was a problem '
    246                    'with the post-reboot update check. After a successful '
    247                    'update, we do a final update check to parse a unique '
    248                    'omaha request. The mistmatched attributes for this update '
    249                    'check were %s. ' % mismatched_attrs)
    250         if 'event_result' in mismatched_attrs:
    251             err_msg += ('The event_result was expected to be (%s:%s) but '
    252                         'reported (%s:%s). ' %
    253                         (expected['event_result'],
    254                          uee.get_event_result(expected['event_result']),
    255                          actual.get('event_result'),
    256                          uee.get_event_result(actual.get('event_result'))))
    257         if 'event_type' in mismatched_attrs:
    258             err_msg += ('The event_type was expeted to be (%s:%s) but '
    259                         'reported (%s:%s). ' %
    260                         (expected['event_type'],
    261                          uee.get_event_type(expected['event_type']),
    262                          actual.get('event_type'),
    263                          uee.get_event_type(actual.get('event_type'))))
    264         if 'version' in mismatched_attrs:
    265             err_msg += ('The version was expected to be (%s) but '
    266                         'reported (%s). This probably means that the payload '
    267                         'we applied was incorrect or corrupt. ' %
    268                         (expected['version'], actual['version']))
    269         if 'previous_version' in mismatched_attrs:
    270             err_msg += ('The previous version is expected to be (%s) but '
    271                         'reported (%s). This can happen if we retried the '
    272                         'update after rootfs update completed on the first '
    273                         'attempt then we failed. Or if stateful got wiped and '
    274                         '/var/lib/update_engine/prefs/previous-version was '
    275                         'deleted. ' % (expected['previous_version'],
    276                                        actual['previous_version']))
    277         err_msg += ('You can see the full hostlog for this update check in '
    278                     'the %s file within the %s directory. ' %
    279                     (self._DEVSERVER_HOSTLOG_REBOOT,
    280                      dev_server.AUTO_UPDATE_LOG_DIR))
    281         return err_msg
    282 
    283 
    284     def _timeout_error_message(self, desc, timeout, event_type=None):
    285         """Error message for when an event takes too long to fire."""
    286         if event_type is not None:
    287             desc += ' (%s)' % uee.get_event_type(event_type)
    288         return ('The update completed successfully but one of the steps of '
    289                 'the update took longer than we would like. We failed to '
    290                 'receive %s within %d seconds.' % (desc, timeout))
    291 
    292 
    293     def _stage_payload_by_uri(self, payload_uri):
    294         """Stage a payload based on its GS URI.
    295 
    296         This infers the build's label, filename and GS archive from the
    297         provided GS URI.
    298 
    299         @param payload_uri: The full GS URI of the payload.
    300 
    301         @return URL of the staged payload on the server.
    302 
    303         @raise error.TestError if there's a problem with staging.
    304 
    305         """
    306         archive_url, _, filename = payload_uri.rpartition('/')
    307         build_name = urlparse.urlsplit(archive_url).path.strip('/')
    308         return self._stage_payload(build_name, filename,
    309                                    archive_url=archive_url)
    310 
    311 
    312     def _stage_payload(self, build_name, filename, archive_url=None):
    313         """Stage the given payload onto the devserver.
    314 
    315         Works for either a stateful or full/delta test payload. Expects the
    316         gs_path or a combo of build_name + filename.
    317 
    318         @param build_name: The build name e.g. x86-mario-release/<version>.
    319                            If set, assumes default gs archive bucket and
    320                            requires filename to be specified.
    321         @param filename: In conjunction with build_name, this is the file you
    322                          are downloading.
    323         @param archive_url: An optional GS archive location, if not using the
    324                             devserver's default.
    325 
    326         @return URL of the staged payload on the server.
    327 
    328         @raise error.TestError if there's a problem with staging.
    329 
    330         """
    331         try:
    332             self._autotest_devserver.stage_artifacts(image=build_name,
    333                                                      files=[filename],
    334                                                      archive_url=archive_url)
    335             return self._autotest_devserver.get_staged_file_url(filename,
    336                                                                 build_name)
    337         except dev_server.DevServerException, e:
    338             raise error.TestError('Failed to stage payload: %s' % e)
    339 
    340 
    341     def _get_payload_url(self, build=None, full_payload=True):
    342         """
    343         Gets the GStorage URL of the full or delta payload for this build.
    344 
    345         @param build: build string e.g samus-release/R65-10225.0.0.
    346         @param full_payload: True for full payload. False for delta.
    347 
    348         @returns the payload URL.
    349 
    350         """
    351         if build is None:
    352             if self._job_repo_url is None:
    353                 self._job_repo_url = self._get_job_repo_url()
    354             _, build = tools.get_devserver_build_from_package_url(
    355                 self._job_repo_url)
    356 
    357         gs = dev_server._get_image_storage_server()
    358         if full_payload:
    359             # Example: chromeos_R65-10225.0.0_samus_full_dev.bin
    360             regex = 'chromeos_%s*_full_*' % build.rpartition('/')[2]
    361         else:
    362             # Example: chromeos_R65-10225.0.0_R65-10225.0.0_samus_delta_dev.bin
    363             regex = 'chromeos_%s*_delta_*' % build.rpartition('/')[2]
    364         payload_url_regex = gs + build + '/' + regex
    365         logging.debug('Trying to find payloads at %s', payload_url_regex)
    366         payloads = utils.gs_ls(payload_url_regex)
    367         if not payloads:
    368             raise error.TestFail('Could not find payload for %s', build)
    369         logging.debug('Payloads found: %s', payloads)
    370         return payloads[0]
    371 
    372 
    373     def _get_staged_file_info(self, staged_url):
    374         """
    375         Gets the staged files info that includes SHA256 and size.
    376 
    377         @param staged_url: the staged file url.
    378 
    379         @returns file info (SHA256 and size).
    380 
    381         """
    382         split_url = staged_url.rpartition('/static/')
    383         file_info_url = os.path.join(split_url[0], 'api/fileinfo', split_url[2])
    384         logging.info('file info url: %s', file_info_url)
    385         devserver_hostname = urlparse.urlparse(file_info_url).hostname
    386         cmd = 'ssh %s \'curl "%s"\'' % (devserver_hostname,
    387                                         utils.sh_escape(file_info_url))
    388         try:
    389             result = utils.run(cmd).stdout
    390             return json.loads(result)
    391         except error.CmdError as e:
    392             logging.error('Failed to read file info: %s', e)
    393             raise error.TestFail('Could not reach fileinfo API on devserver.')
    394 
    395 
    396     def _get_job_repo_url(self):
    397         """Gets the job_repo_url argument supplied to the test by the lab."""
    398         if self._hosts is not None:
    399             self._host = self._hosts[0]
    400         if self._host is None:
    401             raise error.TestFail('No host specified by AU test.')
    402         info = self._host.host_info_store.get()
    403         return info.attributes.get(self._host.job_repo_url_attribute, '')
    404 
    405 
    406     def _check_for_cellular_entries_in_update_log(self, update_engine_log):
    407         """
    408         Check update_engine.log for log entries about cellular.
    409 
    410         @param update_engine_log: The text of an update_engine.log file.
    411 
    412         """
    413         logging.info('Making sure we have cellular entries in update_engine '
    414                      'log.')
    415         line1 = "Allowing updates over cellular as permission preference is " \
    416                 "set to true."
    417         line2 = "We are connected via cellular, Updates allowed: Yes"
    418         for line in [line1, line2]:
    419             ue = re.compile(line)
    420             if ue.search(update_engine_log) is None:
    421                 raise error.TestFail('We did not find cellular string "%s" in '
    422                                      'the update_engine log. Please check the '
    423                                      'update_engine logs in the results '
    424                                      'directory.' % line)
    425 
    426 
    427     def _copy_payload_to_public_bucket(self, payload_url):
    428         """
    429         Copy payload and make link public.
    430 
    431         @param payload_url: Payload URL on Google Storage.
    432 
    433         @returns The payload URL that is now publicly accessible.
    434 
    435         """
    436         payload_filename = payload_url.rpartition('/')[2]
    437         utils.run('gsutil cp %s %s' % (payload_url, self._CELLULAR_BUCKET))
    438         new_gs_url = self._CELLULAR_BUCKET + payload_filename
    439         utils.run('gsutil acl ch -u AllUsers:R %s' % new_gs_url)
    440         return new_gs_url.replace('gs://', 'https://storage.googleapis.com/')
    441 
    442 
    443     def verify_update_events(self, source_release, hostlog_filename,
    444                              target_release=None):
    445         """Compares a hostlog file against a set of expected events.
    446 
    447         This is the main function of this class. It takes in an expected
    448         source and target version along with a hostlog file location. It will
    449         then generate the expected events based on the data and compare it
    450         against the events listed in the hostlog json file.
    451         """
    452         self._hostlog_events = []
    453         self._num_consumed_events = 0
    454         self._current_timestamp = None
    455         if target_release is not None:
    456             self._get_expected_event_for_post_reboot_check(source_release,
    457                                                            target_release)
    458         else:
    459             self._get_expected_events_for_rootfs_update(source_release)
    460 
    461         self._hostlog_filename = hostlog_filename
    462         logging.info('Checking update steps with hostlog file: %s',
    463                      self._hostlog_filename)
    464 
    465         for expected_event, timeout, on_timeout in self._expected_events:
    466             logging.info('Expecting %s within %s seconds', expected_event,
    467                          timeout)
    468             err_msg = self._verify_event_with_timeout(
    469                 expected_event, timeout, on_timeout)
    470             if err_msg is not None:
    471                 logging.error('Failed expected event: %s', err_msg)
    472                 raise UpdateEngineEventMissing(err_msg)
    473 
    474 
    475     def get_update_url_for_test(self, job_repo_url, full_payload=True,
    476                                 critical_update=False, max_updates=1,
    477                                 cellular=False):
    478         """
    479         Get the correct update URL for autoupdate tests to use.
    480 
    481         There are bunch of different update configurations that are required
    482         by AU tests. Some tests need a full payload, some need a delta payload.
    483         Some require the omaha response to be critical or be able to handle
    484         multiple DUTs etc. This function returns the correct update URL to the
    485         test based on the inputs parameters.
    486 
    487         Ideally all updates would use an existing lab devserver to handle the
    488         updates. However the lab devservers default setup does not work for
    489         all test needs. So we also kick off our own omaha_devserver for the
    490         test run some times.
    491 
    492         This tests expects the test to set self._host or self._hosts.
    493 
    494         @param job_repo_url: string url containing the current build.
    495         @param full_payload: bool whether we want a full payload.
    496         @param critical_update: bool whether we need a critical update.
    497         @param max_updates: int number of updates the test will perform. This
    498                             is passed to src/platform/dev/devserver.py if we
    499                             create our own deverver.
    500         @param cellular: update will be done over cellular connection.
    501 
    502         @returns an update url string.
    503 
    504         """
    505         if job_repo_url is None:
    506             self._job_repo_url = self._get_job_repo_url()
    507         else:
    508             self._job_repo_url = job_repo_url
    509         if not self._job_repo_url:
    510             raise error.TestFail('There was no job_repo_url so we cannot get '
    511                                  'a payload to use.')
    512         ds_url, build = tools.get_devserver_build_from_package_url(
    513             self._job_repo_url)
    514 
    515         # We always stage the payloads on the existing lab devservers.
    516         self._autotest_devserver = dev_server.ImageServer(ds_url)
    517 
    518         if cellular:
    519             # Get the google storage url of the payload. We will be copying
    520             # the payload to a public google storage bucket (similar location
    521             # to updates via autest command).
    522             payload_url = self._get_payload_url(build,
    523                                                 full_payload=full_payload)
    524             return self._copy_payload_to_public_bucket(payload_url)
    525 
    526         if full_payload:
    527             self._autotest_devserver.stage_artifacts(build, ['full_payload'])
    528             if not critical_update:
    529                 # We can use the same lab devserver to handle the update.
    530                 return self._autotest_devserver.get_update_url(build)
    531             else:
    532                 staged_url = self._autotest_devserver._get_image_url(build)
    533         else:
    534             # We need to stage delta ourselves due to crbug.com/793434.
    535             delta_payload = self._get_payload_url(build, full_payload=False)
    536             staged_url = self._stage_payload_by_uri(delta_payload)
    537 
    538         # We need to start our own devserver for the rest of the cases.
    539         self._omaha_devserver = omaha_devserver.OmahaDevserver(
    540             self._autotest_devserver.hostname, staged_url,
    541             max_updates=max_updates, critical_update=critical_update)
    542         self._omaha_devserver.start_devserver()
    543         return self._omaha_devserver.get_update_url()
    544 
    545 
    546 class UpdateEngineEventMissing(error.TestFail):
    547     """Raised if the hostlog is missing an expected event."""
    548