Home | History | Annotate | Download | only in cros
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import glob
      6 import logging
      7 import os
      8 import re
      9 import urllib2
     10 import urlparse
     11 
     12 from autotest_lib.client.bin import utils
     13 from autotest_lib.client.common_lib import error, global_config
     14 from autotest_lib.client.common_lib.cros import dev_server
     15 from autotest_lib.server import autotest
     16 from autotest_lib.server import utils as server_utils
     17 from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
     18 from autotest_lib.server.cros.dynamic_suite import tools
     19 from chromite.lib import retry_util
     20 
     21 try:
     22     from chromite.lib import metrics
     23 except ImportError:
     24     metrics = utils.metrics_mock
     25 
     26 
     27 def _metric_name(base_name):
     28     return 'chromeos/autotest/provision/' + base_name
     29 
     30 
     31 # Local stateful update path is relative to the CrOS source directory.
     32 UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
     33 UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
     34 # A list of update engine client states that occur after an update is triggered.
     35 UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
     36                              'UPDATE_STATUS_UPDATE_AVAILABLE',
     37                              'UPDATE_STATUS_DOWNLOADING',
     38                              'UPDATE_STATUS_FINALIZING']
     39 
     40 
     41 _STATEFUL_UPDATE_SCRIPT = 'stateful_update'
     42 _QUICK_PROVISION_SCRIPT = 'quick-provision'
     43 
     44 _UPDATER_BIN = '/usr/bin/update_engine_client'
     45 _UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
     46 
     47 _KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
     48 _KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
     49 
     50 # Time to wait for new kernel to be marked successful after
     51 # auto update.
     52 _KERNEL_UPDATE_TIMEOUT = 120
     53 
     54 
     55 # PROVISION_FAILED - A flag file to indicate provision failures.  The
     56 # file is created at the start of any AU procedure (see
     57 # `ChromiumOSUpdater._prepare_host()`).  The file's location in
     58 # stateful means that on successul update it will be removed.  Thus, if
     59 # this file exists, it indicates that we've tried and failed in a
     60 # previous attempt to update.
     61 PROVISION_FAILED = '/var/tmp/provision_failed'
     62 
     63 
     64 # A flag file used to enable special handling in lab DUTs.  Some
     65 # parts of the system in Chromium OS test images will behave in ways
     66 # convenient to the test lab when this file is present.  Generally,
     67 # we create this immediately after any update completes.
     68 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
     69 
     70 
     71 # _TARGET_VERSION - A file containing the new version to which we plan
     72 # to update.  This file is used by the CrOS shutdown code to detect and
     73 # handle certain version downgrade cases.  Specifically:  Downgrading
     74 # may trigger an unwanted powerwash in the target build when the
     75 # following conditions are met:
     76 #  * Source build is a v4.4 kernel with R69-10756.0.0 or later.
     77 #  * Target build predates the R69-10756.0.0 cutoff.
     78 # When this file is present and indicates a downgrade, the OS shutdown
     79 # code on the DUT knows how to prevent the powerwash.
     80 _TARGET_VERSION = '/run/update_target_version'
     81 
     82 
     83 # _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
     84 # when the Host.reboot() method fails.  The source of this text comes
     85 # from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
     86 
     87 _REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
     88 
     89 
     90 class RootFSUpdateError(error.TestFail):
     91     """Raised when the RootFS fails to update."""
     92 
     93 
     94 class StatefulUpdateError(error.TestFail):
     95     """Raised when the stateful partition fails to update."""
     96 
     97 
     98 class _AttributedUpdateError(error.TestFail):
     99     """Update failure with an attributed cause."""
    100 
    101     def __init__(self, attribution, msg):
    102         super(_AttributedUpdateError, self).__init__(
    103             '%s: %s' % (attribution, msg))
    104         self._message = msg
    105 
    106     def _classify(self):
    107         for err_pattern, classification in self._CLASSIFIERS:
    108             if re.match(err_pattern, self._message):
    109                 return classification
    110         return None
    111 
    112     @property
    113     def failure_summary(self):
    114         """Summarize this error for metrics reporting."""
    115         classification = self._classify()
    116         if classification:
    117             return '%s: %s' % (self._SUMMARY, classification)
    118         else:
    119             return self._SUMMARY
    120 
    121 
    122 class HostUpdateError(_AttributedUpdateError):
    123     """Failure updating a DUT attributable to the DUT.
    124 
    125     This class of exception should be raised when the most likely cause
    126     of failure was a condition existing on the DUT prior to the update,
    127     such as a hardware problem, or a bug in the software on the DUT.
    128     """
    129 
    130     DUT_DOWN = 'No answer to ssh'
    131 
    132     _SUMMARY = 'DUT failed prior to update'
    133     _CLASSIFIERS = [
    134         (DUT_DOWN, DUT_DOWN),
    135         (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
    136     ]
    137 
    138     def __init__(self, hostname, msg):
    139         super(HostUpdateError, self).__init__(
    140             'Error on %s prior to update' % hostname, msg)
    141 
    142 
    143 class DevServerError(_AttributedUpdateError):
    144     """Failure updating a DUT attributable to the devserver.
    145 
    146     This class of exception should be raised when the most likely cause
    147     of failure was the devserver serving the target image for update.
    148     """
    149 
    150     _SUMMARY = 'Devserver failed prior to update'
    151     _CLASSIFIERS = []
    152 
    153     def __init__(self, devserver, msg):
    154         super(DevServerError, self).__init__(
    155             'Devserver error on %s' % devserver, msg)
    156 
    157 
    158 class ImageInstallError(_AttributedUpdateError):
    159     """Failure updating a DUT when installing from the devserver.
    160 
    161     This class of exception should be raised when the target DUT fails
    162     to download and install the target image from the devserver, and
    163     either the devserver or the DUT might be at fault.
    164     """
    165 
    166     _SUMMARY = 'Image failed to download and install'
    167     _CLASSIFIERS = []
    168 
    169     def __init__(self, hostname, devserver, msg):
    170         super(ImageInstallError, self).__init__(
    171             'Download and install failed from %s onto %s'
    172             % (devserver, hostname), msg)
    173 
    174 
    175 class NewBuildUpdateError(_AttributedUpdateError):
    176     """Failure updating a DUT attributable to the target build.
    177 
    178     This class of exception should be raised when updating to a new
    179     build fails, and the most likely cause of the failure is a bug in
    180     the newly installed target build.
    181     """
    182 
    183     CHROME_FAILURE = 'Chrome failed to reach login screen'
    184     UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
    185                              'chromeos-setgoodkernel')
    186     ROLLBACK_FAILURE = 'System rolled back to previous build'
    187 
    188     _SUMMARY = 'New build failed'
    189     _CLASSIFIERS = [
    190         (CHROME_FAILURE, 'Chrome did not start'),
    191         (UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
    192         (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
    193     ]
    194 
    195     def __init__(self, update_version, msg):
    196         super(NewBuildUpdateError, self).__init__(
    197             'Failure in build %s' % update_version, msg)
    198 
    199     @property
    200     def failure_summary(self):
    201         #pylint: disable=missing-docstring
    202         return 'Build failed to work after installing'
    203 
    204 
    205 def _url_to_version(update_url):
    206     """Return the version based on update_url.
    207 
    208     @param update_url: url to the image to update to.
    209 
    210     """
    211     # The Chrome OS version is generally the last element in the URL. The only
    212     # exception is delta update URLs, which are rooted under the version; e.g.,
    213     # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
    214     # strip off the au section of the path before reading the version.
    215     return re.sub('/au/.*', '',
    216                   urlparse.urlparse(update_url).path).split('/')[-1].strip()
    217 
    218 
    219 def url_to_image_name(update_url):
    220     """Return the image name based on update_url.
    221 
    222     From a URL like:
    223         http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
    224     return lumpy-release/R27-3837.0.0
    225 
    226     @param update_url: url to the image to update to.
    227     @returns a string representing the image name in the update_url.
    228 
    229     """
    230     return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
    231 
    232 
    233 def get_update_failure_reason(exception):
    234     """Convert an exception into a failure reason for metrics.
    235 
    236     The passed in `exception` should be one raised by failure of
    237     `ChromiumOSUpdater.run_update`.  The returned string will describe
    238     the failure.  If the input exception value is not a truish value
    239     the return value will be `None`.
    240 
    241     The number of possible return strings is restricted to a limited
    242     enumeration of values so that the string may be safely used in
    243     Monarch metrics without worrying about cardinality of the range of
    244     string values.
    245 
    246     @param exception  Exception to be converted to a failure reason.
    247 
    248     @return A string suitable for use in Monarch metrics, or `None`.
    249     """
    250     if exception:
    251         if isinstance(exception, _AttributedUpdateError):
    252             return exception.failure_summary
    253         else:
    254             return 'Unknown Error: %s' % type(exception).__name__
    255     return None
    256 
    257 
    258 def _get_devserver_build_from_update_url(update_url):
    259     """Get the devserver and build from the update url.
    260 
    261     @param update_url: The url for update.
    262         Eg: http://devserver:port/update/build.
    263 
    264     @return: A tuple of (devserver url, build) or None if the update_url
    265         doesn't match the expected pattern.
    266 
    267     @raises ValueError: If the update_url doesn't match the expected pattern.
    268     @raises ValueError: If no global_config was found, or it doesn't contain an
    269         image_url_pattern.
    270     """
    271     pattern = global_config.global_config.get_config_value(
    272             'CROS', 'image_url_pattern', type=str, default='')
    273     if not pattern:
    274         raise ValueError('Cannot parse update_url, the global config needs '
    275                 'an image_url_pattern.')
    276     re_pattern = pattern.replace('%s', '(\S+)')
    277     parts = re.search(re_pattern, update_url)
    278     if not parts or len(parts.groups()) < 2:
    279         raise ValueError('%s is not an update url' % update_url)
    280     return parts.groups()
    281 
    282 
    283 def _list_image_dir_contents(update_url):
    284     """Lists the contents of the devserver for a given build/update_url.
    285 
    286     @param update_url: An update url. Eg: http://devserver:port/update/build.
    287     """
    288     if not update_url:
    289         logging.warning('Need update_url to list contents of the devserver.')
    290         return
    291     error_msg = 'Cannot check contents of devserver, update url %s' % update_url
    292     try:
    293         devserver_url, build = _get_devserver_build_from_update_url(update_url)
    294     except ValueError as e:
    295         logging.warning('%s: %s', error_msg, e)
    296         return
    297     devserver = dev_server.ImageServer(devserver_url)
    298     try:
    299         devserver.list_image_dir(build)
    300     # The devserver will retry on URLError to avoid flaky connections, but will
    301     # eventually raise the URLError if it persists. All HTTPErrors get
    302     # converted to DevServerExceptions.
    303     except (dev_server.DevServerException, urllib2.URLError) as e:
    304         logging.warning('%s: %s', error_msg, e)
    305 
    306 
    307 def _get_metric_fields(update_url):
    308     """Return a dict of metric fields.
    309 
    310     This is used for sending autoupdate metrics for the given update URL.
    311 
    312     @param update_url  Metrics fields will be calculated from this URL.
    313     """
    314     build_name = url_to_image_name(update_url)
    315     try:
    316         board, build_type, milestone, _ = server_utils.ParseBuildName(
    317             build_name)
    318     except server_utils.ParseBuildNameException:
    319         logging.warning('Unable to parse build name %s for metrics. '
    320                         'Continuing anyway.', build_name)
    321         board, build_type, milestone = ('', '', '')
    322     return {
    323         'dev_server': dev_server.get_resolved_hostname(update_url),
    324         'board': board,
    325         'build_type': build_type,
    326         'milestone': milestone,
    327     }
    328 
    329 
    330 # TODO(garnold) This implements shared updater functionality needed for
    331 # supporting the autoupdate_EndToEnd server-side test. We should probably
    332 # migrate more of the existing ChromiumOSUpdater functionality to it as we
    333 # expand non-CrOS support in other tests.
    334 class ChromiumOSUpdater(object):
    335     """Chromium OS specific DUT update functionality."""
    336 
    337     def __init__(self, update_url, host=None, interactive=True,
    338                  use_quick_provision=False):
    339         """Initializes the object.
    340 
    341         @param update_url: The URL we want the update to use.
    342         @param host: A client.common_lib.hosts.Host implementation.
    343         @param interactive: Bool whether we are doing an interactive update.
    344         @param use_quick_provision: Whether we should attempt to perform
    345             the update using the quick-provision script.
    346         """
    347         self.update_url = update_url
    348         self.host = host
    349         self.interactive = interactive
    350         self.update_version = _url_to_version(update_url)
    351         self._use_quick_provision = use_quick_provision
    352 
    353 
    354     def _run(self, cmd, *args, **kwargs):
    355         """Abbreviated form of self.host.run(...)"""
    356         return self.host.run(cmd, *args, **kwargs)
    357 
    358 
    359     def check_update_status(self):
    360         """Returns the current update engine state.
    361 
    362         We use the `update_engine_client -status' command and parse the line
    363         indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
    364         """
    365         update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
    366                                       _UPDATER_BIN)
    367         return update_status.stdout.strip().split('=')[-1]
    368 
    369 
    370     def _rootdev(self, options=''):
    371         """Returns the stripped output of rootdev <options>.
    372 
    373         @param options: options to run rootdev.
    374 
    375         """
    376         return self._run('rootdev %s' % options).stdout.strip()
    377 
    378 
    379     def get_kernel_state(self):
    380         """Returns the (<active>, <inactive>) kernel state as a pair.
    381 
    382         @raise RootFSUpdateError if the DUT reports a root partition
    383                 number that isn't one of the known valid values.
    384         """
    385         active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
    386         if active_root == _KERNEL_A['root']:
    387             return _KERNEL_A, _KERNEL_B
    388         elif active_root == _KERNEL_B['root']:
    389             return _KERNEL_B, _KERNEL_A
    390         else:
    391             raise RootFSUpdateError(
    392                     'Encountered unknown root partition: %s' % active_root)
    393 
    394 
    395     def _cgpt(self, flag, kernel):
    396         """Return numeric cgpt value for the specified flag, kernel, device."""
    397         return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
    398             kernel['kernel'], flag)).stdout.strip())
    399 
    400 
    401     def _get_next_kernel(self):
    402         """Return the kernel that has priority for the next boot."""
    403         priority_a = self._cgpt('-P', _KERNEL_A)
    404         priority_b = self._cgpt('-P', _KERNEL_B)
    405         if priority_a > priority_b:
    406             return _KERNEL_A
    407         else:
    408             return _KERNEL_B
    409 
    410 
    411     def _get_kernel_success(self, kernel):
    412         """Return boolean success flag for the specified kernel.
    413 
    414         @param kernel: information of the given kernel, either _KERNEL_A
    415             or _KERNEL_B.
    416         """
    417         return self._cgpt('-S', kernel) != 0
    418 
    419 
    420     def _get_kernel_tries(self, kernel):
    421         """Return tries count for the specified kernel.
    422 
    423         @param kernel: information of the given kernel, either _KERNEL_A
    424             or _KERNEL_B.
    425         """
    426         return self._cgpt('-T', kernel)
    427 
    428 
    429     def _get_last_update_error(self):
    430         """Get the last autoupdate error code."""
    431         command_result = self._run(
    432                  '%s --last_attempt_error' % _UPDATER_BIN)
    433         return command_result.stdout.strip().replace('\n', ', ')
    434 
    435 
    436     def _base_update_handler_no_retry(self, run_args):
    437         """Base function to handle a remote update ssh call.
    438 
    439         @param run_args: Dictionary of args passed to ssh_host.run function.
    440 
    441         @throws: intercepts and re-throws all exceptions
    442         """
    443         try:
    444             self.host.run(**run_args)
    445         except Exception as e:
    446             logging.debug('exception in update handler: %s', e)
    447             raise e
    448 
    449 
    450     def _base_update_handler(self, run_args, err_msg_prefix=None):
    451         """Handle a remote update ssh call, possibly with retries.
    452 
    453         @param run_args: Dictionary of args passed to ssh_host.run function.
    454         @param err_msg_prefix: Prefix of the exception error message.
    455         """
    456         def exception_handler(e):
    457             """Examines exceptions and returns True if the update handler
    458             should be retried.
    459 
    460             @param e: the exception intercepted by the retry util.
    461             """
    462             return (isinstance(e, error.AutoservSSHTimeout) or
    463                     (isinstance(e, error.GenericHostRunError) and
    464                      hasattr(e, 'description') and
    465                      (re.search('ERROR_CODE=37', e.description) or
    466                       re.search('generic error .255.', e.description))))
    467 
    468         try:
    469             # Try the update twice (arg 2 is max_retry, not including the first
    470             # call).  Some exceptions may be caught by the retry handler.
    471             retry_util.GenericRetry(exception_handler, 1,
    472                                     self._base_update_handler_no_retry,
    473                                     run_args)
    474         except Exception as e:
    475             message = err_msg_prefix + ': ' + str(e)
    476             raise RootFSUpdateError(message)
    477 
    478 
    479     def _wait_for_update_service(self):
    480         """Ensure that the update engine daemon is running, possibly
    481         by waiting for it a bit in case the DUT just rebooted and the
    482         service hasn't started yet.
    483         """
    484         def handler(e):
    485             """Retry exception handler.
    486 
    487             Assumes that the error is due to the update service not having
    488             started yet.
    489 
    490             @param e: the exception intercepted by the retry util.
    491             """
    492             if isinstance(e, error.AutoservRunError):
    493                 logging.debug('update service check exception: %s\n'
    494                               'retrying...', e)
    495                 return True
    496             else:
    497                 return False
    498 
    499         # Retry at most three times, every 5s.
    500         status = retry_util.GenericRetry(handler, 3,
    501                                          self.check_update_status,
    502                                          sleep=5)
    503 
    504         # Expect the update engine to be idle.
    505         if status != UPDATER_IDLE:
    506             raise RootFSUpdateError(
    507                     'Update engine status is %s (%s was expected).'
    508                     % (status, UPDATER_IDLE))
    509 
    510 
    511     def _reset_update_engine(self):
    512         """Resets the host to prepare for a clean update regardless of state."""
    513         self._run('stop ui || true')
    514         self._run('stop update-engine || true')
    515         self._run('start update-engine')
    516         self._wait_for_update_service()
    517 
    518 
    519     def _reset_stateful_partition(self):
    520         """Clear any pending stateful update request."""
    521         self._run('%s --stateful_change=reset 2>&1'
    522                   % self._get_stateful_update_script())
    523         self._run('rm -f %s' % _TARGET_VERSION)
    524 
    525 
    526     def _set_target_version(self):
    527         """Set the "target version" for the update."""
    528         version_number = self.update_version.split('-')[1]
    529         self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
    530 
    531 
    532     def _revert_boot_partition(self):
    533         """Revert the boot partition."""
    534         part = self._rootdev('-s')
    535         logging.warning('Reverting update; Boot partition will be %s', part)
    536         return self._run('/postinst %s 2>&1' % part)
    537 
    538 
    539     def _verify_kernel_state(self):
    540         """Verify that the next kernel to boot is correct for update.
    541 
    542         This tests that the kernel state is correct for a successfully
    543         downloaded and installed update.  That is, the next kernel to
    544         boot must be the currently inactive kernel.
    545 
    546         @raise RootFSUpdateError if the DUT next kernel isn't the
    547                 expected next kernel.
    548         """
    549         inactive_kernel = self.get_kernel_state()[1]
    550         next_kernel = self._get_next_kernel()
    551         if next_kernel != inactive_kernel:
    552             raise RootFSUpdateError(
    553                     'Update failed.  The kernel for next boot is %s, '
    554                     'but %s was expected.'
    555                     % (next_kernel['name'], inactive_kernel['name']))
    556         return inactive_kernel
    557 
    558 
    559     def _verify_update_completed(self):
    560         """Verifies that an update has completed.
    561 
    562         @raise RootFSUpdateError if the DUT doesn't indicate that
    563                 download is complete and the DUT is ready for reboot.
    564         """
    565         status = self.check_update_status()
    566         if status != UPDATER_NEED_REBOOT:
    567             error_msg = ''
    568             if status == UPDATER_IDLE:
    569                 error_msg = 'Update error: %s' % self._get_last_update_error()
    570             raise RootFSUpdateError(
    571                     'Update engine status is %s (%s was expected).  %s'
    572                     % (status, UPDATER_NEED_REBOOT, error_msg))
    573         return self._verify_kernel_state()
    574 
    575 
    576     def trigger_update(self):
    577         """Triggers a background update."""
    578         # If this function is called immediately after reboot (which it
    579         # can be), there is no guarantee that the update engine is up
    580         # and running yet, so wait for it.
    581         self._wait_for_update_service()
    582 
    583         autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
    584                           (_UPDATER_BIN, self.update_url))
    585         run_args = {'command': autoupdate_cmd}
    586         err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
    587         logging.info('Triggering update via: %s', autoupdate_cmd)
    588         metric_fields = {'success': False}
    589         try:
    590             self._base_update_handler(run_args, err_prefix)
    591             metric_fields['success'] = True
    592         finally:
    593             c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
    594             metric_fields.update(_get_metric_fields(self.update_url))
    595             c.increment(fields=metric_fields)
    596 
    597 
    598     def update_image(self):
    599         """Updates the device root FS and kernel and verifies success."""
    600         autoupdate_cmd = ('%s --update --omaha_url=%s' %
    601                           (_UPDATER_BIN, self.update_url))
    602         if not self.interactive:
    603             autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
    604         run_args = {'command': autoupdate_cmd, 'timeout': 3600}
    605         err_prefix = ('Failed to install device image using payload at %s '
    606                       'on %s. ' % (self.update_url, self.host.hostname))
    607         logging.info('Updating image via: %s', autoupdate_cmd)
    608         metric_fields = {'success': False}
    609         try:
    610             self._base_update_handler(run_args, err_prefix)
    611             metric_fields['success'] = True
    612         finally:
    613             c = metrics.Counter('chromeos/autotest/autoupdater/update')
    614             metric_fields.update(_get_metric_fields(self.update_url))
    615             c.increment(fields=metric_fields)
    616         return self._verify_update_completed()
    617 
    618 
    619     def _get_remote_script(self, script_name):
    620         """Ensure that `script_name` is present on the DUT.
    621 
    622         The given script (e.g. `stateful_update`) may be present in the
    623         stateful partition under /usr/local/bin, or we may have to
    624         download it from the devserver.
    625 
    626         Determine whether the script is present or must be downloaded
    627         and download if necessary.  Then, return a command fragment
    628         sufficient to run the script from whereever it now lives on the
    629         DUT.
    630 
    631         @param script_name  The name of the script as expected in
    632                             /usr/local/bin and on the devserver.
    633         @return A string with the command (minus arguments) that will
    634                 run the target script.
    635         """
    636         remote_script = '/usr/local/bin/%s' % script_name
    637         if self.host.path_exists(remote_script):
    638             return remote_script
    639         remote_tmp_script = '/tmp/%s' % script_name
    640         server_name = urlparse.urlparse(self.update_url)[1]
    641         script_url = 'http://%s/static/%s' % (server_name, script_name)
    642         fetch_script = (
    643             'curl -o %s %s && head -1 %s | grep "^#!" | sed "s/#!//"') % (
    644                    remote_tmp_script, script_url, remote_tmp_script)
    645         script_interpreter = self._run(fetch_script,
    646                                        ignore_status=True).stdout.strip()
    647         if not script_interpreter:
    648             return None
    649         return '%s %s' % (script_interpreter, remote_tmp_script)
    650 
    651 
    652     def _get_stateful_update_script(self):
    653         """Returns a command to run the stateful update script.
    654 
    655         Find `stateful_update` on the target or install it, as
    656         necessary.  If installation fails, raise an exception.
    657 
    658         @raise StatefulUpdateError if the script can't be found or
    659             installed.
    660         @return A string that can be joined with arguments to run the
    661             `stateful_update` command on the DUT.
    662         """
    663         script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
    664         if not script_command:
    665             raise StatefulUpdateError('Could not install %s on DUT'
    666                                       % _STATEFUL_UPDATE_SCRIPT)
    667         return script_command
    668 
    669 
    670     def rollback_rootfs(self, powerwash):
    671         """Triggers rollback and waits for it to complete.
    672 
    673         @param powerwash: If true, powerwash as part of rollback.
    674 
    675         @raise RootFSUpdateError if anything went wrong.
    676         """
    677         version = self.host.get_release_version()
    678         # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
    679         # X.Y.Z. This version split just pulls the first part out.
    680         try:
    681             build_number = int(version.split('.')[0])
    682         except ValueError:
    683             logging.error('Could not parse build number.')
    684             build_number = 0
    685 
    686         if build_number >= 5772:
    687             can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
    688             logging.info('Checking for rollback.')
    689             try:
    690                 self._run(can_rollback_cmd)
    691             except error.AutoservRunError as e:
    692                 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
    693                                         (self.host.hostname, str(e)))
    694 
    695         rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
    696         if not powerwash:
    697             rollback_cmd += ' --nopowerwash'
    698 
    699         logging.info('Performing rollback.')
    700         try:
    701             self._run(rollback_cmd)
    702         except error.AutoservRunError as e:
    703             raise RootFSUpdateError('Rollback failed on %s: %s' %
    704                                     (self.host.hostname, str(e)))
    705 
    706         self._verify_update_completed()
    707 
    708 
    709     def update_stateful(self, clobber=True):
    710         """Updates the stateful partition.
    711 
    712         @param clobber: If True, a clean stateful installation.
    713 
    714         @raise StatefulUpdateError if the update script fails to
    715                 complete successfully.
    716         """
    717         logging.info('Updating stateful partition...')
    718         statefuldev_url = self.update_url.replace('update', 'static')
    719 
    720         # Attempt stateful partition update; this must succeed so that the newly
    721         # installed host is testable after update.
    722         statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
    723         if clobber:
    724             statefuldev_cmd.append('--stateful_change=clean')
    725 
    726         statefuldev_cmd.append('2>&1')
    727         try:
    728             self._run(' '.join(statefuldev_cmd), timeout=1200)
    729         except error.AutoservRunError:
    730             raise StatefulUpdateError(
    731                     'Failed to perform stateful update on %s' %
    732                     self.host.hostname)
    733 
    734 
    735     def verify_boot_expectations(self, expected_kernel, rollback_message):
    736         """Verifies that we fully booted given expected kernel state.
    737 
    738         This method both verifies that we booted using the correct kernel
    739         state and that the OS has marked the kernel as good.
    740 
    741         @param expected_kernel: kernel that we are verifying with,
    742             i.e. I expect to be booted onto partition 4 etc. See output of
    743             get_kernel_state.
    744         @param rollback_message: string include in except message text
    745             if we booted with the wrong partition.
    746 
    747         @raise NewBuildUpdateError if any of the various checks fail.
    748         """
    749         # Figure out the newly active kernel.
    750         active_kernel = self.get_kernel_state()[0]
    751 
    752         # Check for rollback due to a bad build.
    753         if active_kernel != expected_kernel:
    754 
    755             # Kernel crash reports should be wiped between test runs, but
    756             # may persist from earlier parts of the test, or from problems
    757             # with provisioning.
    758             #
    759             # Kernel crash reports will NOT be present if the crash happened
    760             # before encrypted stateful is mounted.
    761             #
    762             # TODO(dgarrett): Integrate with server/crashcollect.py at some
    763             # point.
    764             kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
    765             if kernel_crashes:
    766                 rollback_message += ': kernel_crash'
    767                 logging.debug('Found %d kernel crash reports:',
    768                               len(kernel_crashes))
    769                 # The crash names contain timestamps that may be useful:
    770                 #   kernel.20131207.005945.0.kcrash
    771                 for crash in kernel_crashes:
    772                     logging.debug('  %s', os.path.basename(crash))
    773 
    774             # Print out some information to make it easier to debug
    775             # the rollback.
    776             logging.debug('Dumping partition table.')
    777             self._run('cgpt show $(rootdev -s -d)')
    778             logging.debug('Dumping crossystem for firmware debugging.')
    779             self._run('crossystem --all')
    780             raise NewBuildUpdateError(self.update_version, rollback_message)
    781 
    782         # Make sure chromeos-setgoodkernel runs.
    783         try:
    784             utils.poll_for_condition(
    785                 lambda: (self._get_kernel_tries(active_kernel) == 0
    786                          and self._get_kernel_success(active_kernel)),
    787                 exception=RootFSUpdateError(),
    788                 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
    789         except RootFSUpdateError:
    790             services_status = self._run('status system-services').stdout
    791             if services_status != 'system-services start/running\n':
    792                 event = NewBuildUpdateError.CHROME_FAILURE
    793             else:
    794                 event = NewBuildUpdateError.UPDATE_ENGINE_FAILURE
    795             raise NewBuildUpdateError(self.update_version, event)
    796 
    797 
    798     def _prepare_host(self):
    799         """Make sure the target DUT is working and ready for update.
    800 
    801         Initially, the target DUT's state is unknown.  The DUT is
    802         expected to be online, but we strive to be forgiving if Chrome
    803         and/or the update engine aren't fully functional.
    804         """
    805         # Summary of work, and the rationale:
    806         #  1. Reboot, because it's a good way to clear out problems.
    807         #  2. Touch the PROVISION_FAILED file, to allow repair to detect
    808         #     failure later.
    809         #  3. Run the hook for host class specific preparation.
    810         #  4. Stop Chrome, because the system is designed to eventually
    811         #     reboot if Chrome is stuck in a crash loop.
    812         #  5. Force `update-engine` to start, because if Chrome failed
    813         #     to start properly, the status of the `update-engine` job
    814         #     will be uncertain.
    815         if not self.host.is_up():
    816             raise HostUpdateError(self.host.hostname,
    817                                   HostUpdateError.DUT_DOWN)
    818         self._reset_stateful_partition()
    819         self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
    820         self._run('touch %s' % PROVISION_FAILED)
    821         self.host.prepare_for_update()
    822         self._reset_update_engine()
    823         logging.info('Updating from version %s to %s.',
    824                      self.host.get_release_version(),
    825                      self.update_version)
    826 
    827 
    828     def _verify_devserver(self):
    829         """Check that our chosen devserver is still working.
    830 
    831         @raise DevServerError if the devserver fails any sanity check.
    832         """
    833         server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
    834         try:
    835             if not dev_server.ImageServer.devserver_healthy(server):
    836                 raise DevServerError(
    837                         server, 'Devserver is not healthy')
    838         except Exception as e:
    839             raise DevServerError(
    840                     server, 'Devserver is not up and available')
    841 
    842 
    843     def _install_via_update_engine(self):
    844         """Install an updating using the production AU flow.
    845 
    846         This uses the standard AU flow and the `stateful_update` script
    847         to download and install a root FS, kernel and stateful
    848         filesystem content.
    849 
    850         @return The kernel expected to be booted next.
    851         """
    852         logging.info('Installing image using update_engine.')
    853         expected_kernel = self.update_image()
    854         self.update_stateful()
    855         self._set_target_version()
    856         return expected_kernel
    857 
    858 
    859     def _install_via_quick_provision(self):
    860         """Install an updating using the `quick-provision` script.
    861 
    862         This uses the `quick-provision` script to download and install
    863         a root FS, kernel and stateful filesystem content.
    864 
    865         @return The kernel expected to be booted next.
    866         """
    867         if not self._use_quick_provision:
    868             return None
    869         build_re = global_config.global_config.get_config_value(
    870                 'CROS', 'quick_provision_build_regex', type=str, default='')
    871         image_name = url_to_image_name(self.update_url)
    872         if not build_re or re.match(build_re, image_name) is None:
    873             logging.info('Not eligible for quick-provision.')
    874             return None
    875         logging.info('Installing image using quick-provision.')
    876         provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
    877         server_name = urlparse.urlparse(self.update_url)[1]
    878         static_url = 'http://%s/static' % server_name
    879         command = '%s --noreboot %s %s' % (
    880                       provision_command, image_name, static_url)
    881         try:
    882             self._run(command)
    883             self._set_target_version()
    884             return self._verify_kernel_state()
    885         except Exception:
    886             # N.B.  We handle only `Exception` here.  Non-Exception
    887             # classes (such as KeyboardInterrupt) are handled by our
    888             # caller.
    889             logging.exception('quick-provision script failed; '
    890                               'will fall back to update_engine.')
    891             self._revert_boot_partition()
    892             self._reset_stateful_partition()
    893             self._reset_update_engine()
    894             return None
    895 
    896 
    897     def _install_update(self):
    898         """Install the requested image on the DUT, but don't start it.
    899 
    900         This downloads and installs a root FS, kernel and stateful
    901         filesystem content.  This does not reboot the DUT, so the update
    902         is merely pending when the method returns.
    903 
    904         @return The kernel expected to be booted next.
    905         """
    906         logging.info('Installing image at %s onto %s',
    907                      self.update_url, self.host.hostname)
    908         try:
    909             return (self._install_via_quick_provision()
    910                     or self._install_via_update_engine())
    911         except:
    912             # N.B. This handling code includes non-Exception classes such
    913             # as KeyboardInterrupt.  We need to clean up, but we also must
    914             # re-raise.
    915             self._revert_boot_partition()
    916             self._reset_stateful_partition()
    917             self._reset_update_engine()
    918             # Collect update engine logs in the event of failure.
    919             if self.host.job:
    920                 logging.info('Collecting update engine logs due to failure...')
    921                 self.host.get_file(
    922                         _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
    923                         preserve_perm=False)
    924             _list_image_dir_contents(self.update_url)
    925             raise
    926 
    927 
    928     def _complete_update(self, expected_kernel):
    929         """Finish the update, and confirm that it succeeded.
    930 
    931         Initial condition is that the target build has been downloaded
    932         and installed on the DUT, but has not yet been booted.  This
    933         function is responsible for rebooting the DUT, and checking that
    934         the new build is running successfully.
    935 
    936         @param expected_kernel: kernel expected to be active after reboot.
    937         """
    938         # Regarding the 'crossystem' command below: In some cases,
    939         # the update flow puts the TPM into a state such that it
    940         # fails verification.  We don't know why.  However, this
    941         # call papers over the problem by clearing the TPM during
    942         # the reboot.
    943         #
    944         # We ignore failures from 'crossystem'.  Although failure
    945         # here is unexpected, and could signal a bug, the point of
    946         # the exercise is to paper over problems; allowing this to
    947         # fail would defeat the purpose.
    948         self._run('crossystem clear_tpm_owner_request=1',
    949                   ignore_status=True)
    950         self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
    951 
    952         # Touch the lab machine file to leave a marker that
    953         # distinguishes this image from other test images.
    954         # Afterwards, we must re-run the autoreboot script because
    955         # it depends on the _LAB_MACHINE_FILE.
    956         autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
    957                           '( touch "$FILE" ; start autoreboot )')
    958         self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
    959         self.verify_boot_expectations(
    960                 expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE)
    961 
    962         logging.debug('Cleaning up old autotest directories.')
    963         try:
    964             installed_autodir = autotest.Autotest.get_installed_autodir(
    965                     self.host)
    966             self._run('rm -rf ' + installed_autodir)
    967         except autotest.AutodirNotFoundError:
    968             logging.debug('No autotest installed directory found.')
    969 
    970 
    971     def run_update(self):
    972         """Perform a full update of a DUT in the test lab.
    973 
    974         This downloads and installs the root FS and stateful partition
    975         content needed for the update specified in `self.host` and
    976         `self.update_url`.  The update is performed according to the
    977         requirements for provisioning a DUT for testing the requested
    978         build.
    979 
    980         At the end of the procedure, metrics are reported describing the
    981         outcome of the operation.
    982 
    983         @returns A tuple of the form `(image_name, attributes)`, where
    984             `image_name` is the name of the image installed, and
    985             `attributes` is new attributes to be applied to the DUT.
    986         """
    987         server_name = dev_server.get_resolved_hostname(self.update_url)
    988         metrics.Counter(_metric_name('install')).increment(
    989                 fields={'devserver': server_name})
    990 
    991         self._verify_devserver()
    992 
    993         try:
    994             self._prepare_host()
    995         except _AttributedUpdateError:
    996             raise
    997         except Exception as e:
    998             logging.exception('Failure preparing host prior to update.')
    999             raise HostUpdateError(self.host.hostname, str(e))
   1000 
   1001         try:
   1002             expected_kernel = self._install_update()
   1003         except _AttributedUpdateError:
   1004             raise
   1005         except Exception as e:
   1006             logging.exception('Failure during download and install.')
   1007             server_name = dev_server.get_resolved_hostname(self.update_url)
   1008             raise ImageInstallError(self.host.hostname, server_name, str(e))
   1009 
   1010         try:
   1011             self._complete_update(expected_kernel)
   1012         except _AttributedUpdateError:
   1013             raise
   1014         except Exception as e:
   1015             logging.exception('Failure from build after update.')
   1016             raise NewBuildUpdateError(self.update_version, str(e))
   1017 
   1018         image_name = url_to_image_name(self.update_url)
   1019         # update_url is different from devserver url needed to stage autotest
   1020         # packages, therefore, resolve a new devserver url here.
   1021         devserver_url = dev_server.ImageServer.resolve(
   1022                 image_name, self.host.hostname).url()
   1023         repo_url = tools.get_package_url(devserver_url, image_name)
   1024         return image_name, {ds_constants.JOB_REPO_URL: repo_url}
   1025