Home | History | Annotate | Download | only in provision_AutoUpdate
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import re
      7 import sys
      8 import time
      9 import urllib2
     10 
     11 from autotest_lib.client.common_lib import error
     12 from autotest_lib.client.common_lib import global_config
     13 from autotest_lib.client.common_lib.cros import dev_server
     14 from autotest_lib.server import afe_utils
     15 from autotest_lib.server import test
     16 from autotest_lib.server import utils
     17 from autotest_lib.server.cros import autoupdater
     18 from autotest_lib.server.cros import provision
     19 
     20 
     21 try:
     22     from chromite.lib import metrics
     23 except ImportError:
     24     metrics = utils.metrics_mock
     25 
     26 
     27 _CONFIG = global_config.global_config
     28 # pylint: disable-msg=E1120
     29 _IMAGE_URL_PATTERN = _CONFIG.get_config_value(
     30         'CROS', 'image_url_pattern', type=str)
     31 
     32 
     33 def _metric_name(base_name):
     34     return 'chromeos/autotest/provision/' + base_name
     35 
     36 
     37 def _get_build_metrics_fields(build_name):
     38     try:
     39         return utils.ParseBuildName(build_name)[0 : 2]
     40     except utils.ParseBuildNameException:
     41         logging.warning('Unable to parse build name %s for metrics. '
     42                         'Continuing anyway.', build_name)
     43         return ('', '')
     44 
     45 
     46 def _emit_updater_metrics(name_prefix, build_name, failure_reason,
     47                           duration, fields):
     48     # reset_after=True is required for String gauges events to ensure that
     49     # the metrics are not repeatedly emitted until the server restarts.
     50     metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'),
     51                    reset_after=True).set(build_name, fields=fields)
     52     if failure_reason:
     53         metrics.String(
     54                 _metric_name(name_prefix + '_failure_reason_by_devserver_dut'),
     55                 reset_after=True).set(failure_reason, fields=fields)
     56     metrics.SecondsDistribution(
     57             _metric_name(name_prefix + '_duration_by_devserver_dut')).add(
     58                     duration, fields=fields)
     59 
     60 
     61 def _emit_provision_metrics(update_url, dut_host_name,
     62                           exception, duration):
     63     # The following is high cardinality, but sparse.
     64     # Each DUT is of a single board type, and likely build type.
     65     #
     66     # TODO(jrbarnette) The devserver-triggered provisioning code
     67     # includes retries in certain cases.  For that reason, the metrics
     68     # distinguish 'provision' metrics which summarizes across all
     69     # retries, and 'auto_update' which summarizes an individual update
     70     # attempt.  ChromiumOSUpdater doesn't do retries, so we just report
     71     # the same information twice.  We should replace the metrics with
     72     # something better tailored to the current implementation.
     73     build_name = autoupdater.url_to_image_name(update_url)
     74     board, build_type = _get_build_metrics_fields(build_name)
     75     fields = {
     76         'board': board,
     77         'build_type': build_type,
     78         'dut_host_name': dut_host_name,
     79         'dev_server': dev_server.get_resolved_hostname(update_url),
     80         'success': not exception,
     81     }
     82     failure_reason = autoupdater.get_update_failure_reason(exception)
     83     _emit_updater_metrics('provision', build_name, failure_reason,
     84                           duration, fields)
     85     fields['attempt'] = 1
     86     _emit_updater_metrics('auto_update', build_name, failure_reason,
     87                           duration, fields)
     88 
     89 
     90 class provision_AutoUpdate(test.test):
     91     """A test that can provision a machine to the correct ChromeOS version."""
     92     version = 1
     93 
     94     def initialize(self, host, value, is_test_na=False):
     95         """Initialize.
     96 
     97         @param host: The host object to update to |value|.
     98         @param value: The build type and version to install on the host.
     99         @param is_test_na: boolean, if True, will simply skip the test
    100                            and emit TestNAError. The control file
    101                            determines whether the test should be skipped
    102                            and passes the decision via this argument. Note
    103                            we can't raise TestNAError in control file as it won't
    104                            be caught and handled properly.
    105         """
    106         if is_test_na:
    107             raise error.TestNAError(
    108                 'Test not available for test_that. chroot detected, '
    109                 'you are probably using test_that.')
    110         # We check value in initialize so that it fails faster.
    111         if not value:
    112             raise error.TestFail('No build version specified.')
    113 
    114 
    115     def run_once(self, host, value, force_update_engine=False):
    116         """The method called by the control file to start the test.
    117 
    118         @param host: The host object to update to |value|.
    119         @param value: The host object to provision with a build corresponding
    120                       to |value|.
    121         @param force_update_engine: When true, the update flow must
    122                       perform the update unconditionally, using
    123                       update_engine.  Optimizations that could suppress
    124                       invoking update_engine, including quick-provision,
    125                       mustn't be used.
    126         """
    127         with_cheets = False
    128         logging.debug('Start provisioning %s to %s.', host, value)
    129         if value.endswith(provision.CHEETS_SUFFIX):
    130             image = re.sub(provision.CHEETS_SUFFIX + '$', '', value)
    131             with_cheets = True
    132         else:
    133             image = value
    134 
    135         # If the host is already on the correct build, we have nothing to do.
    136         # Note that this means we're not doing any sort of stateful-only
    137         # update, and that we're relying more on cleanup to do cleanup.
    138         if not force_update_engine:
    139             info = host.host_info_store.get()
    140             if info.build == value:
    141                 # We can't raise a TestNA, as would make sense, as that makes
    142                 # job.run_test return False as if the job failed.  However, it'd
    143                 # still be nice to get this into the status.log, so we manually
    144                 # emit an INFO line instead.
    145                 self.job.record('INFO', None, None,
    146                                 'Host already running %s' % value)
    147                 return
    148 
    149         # We're about to reimage a machine, so we need full_payload and
    150         # stateful.  If something happened where the devserver doesn't have one
    151         # of these, then it's also likely that it'll be missing autotest.
    152         # Therefore, we require the devserver to also have autotest staged, so
    153         # that the test that runs after this provision finishes doesn't error
    154         # out because the devserver that its job_repo_url is set to is missing
    155         # autotest test code.
    156         # TODO(milleral): http://crbug.com/249426
    157         # Add an asynchronous staging call so that we can ask the devserver to
    158         # fetch autotest in the background here, and then wait on it after
    159         # reimaging finishes or at some other point in the provisioning.
    160         ds = None
    161         use_quick_provision = False
    162         try:
    163             ds = dev_server.ImageServer.resolve(image, host.hostname)
    164             ds.stage_artifacts(image, ['full_payload', 'stateful',
    165                                        'autotest_packages'])
    166             if not force_update_engine:
    167                 try:
    168                     ds.stage_artifacts(image, ['quick_provision'])
    169                     use_quick_provision = True
    170                 except dev_server.DevServerException as e:
    171                     logging.warning('Unable to stage quick provision '
    172                                     'payload: %s', e)
    173         except dev_server.DevServerException as e:
    174             raise error.TestFail, str(e), sys.exc_info()[2]
    175         finally:
    176             # If a devserver is resolved, Log what has been downloaded so far.
    177             if ds:
    178                 try:
    179                     ds.list_image_dir(image)
    180                 except (dev_server.DevServerException, urllib2.URLError) as e2:
    181                     logging.warning('Failed to list_image_dir for build %s. '
    182                                     'Error: %s', image, e2)
    183 
    184         url = _IMAGE_URL_PATTERN % (ds.url(), image)
    185 
    186         logging.debug('Installing image')
    187         start_time = time.time()
    188         failure = None
    189         try:
    190             afe_utils.machine_install_and_update_labels(
    191                     host, url, use_quick_provision, with_cheets)
    192         except BaseException as e:
    193             failure = e
    194             raise
    195         finally:
    196             _emit_provision_metrics(
    197                 url, host.hostname, failure, time.time() - start_time)
    198         logging.debug('Finished provisioning %s to %s', host, value)
    199