1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import glob 6 import httplib 7 import logging 8 import multiprocessing 9 import os 10 import re 11 import urlparse 12 import urllib2 13 14 from autotest_lib.client.bin import utils 15 from autotest_lib.client.common_lib import error, global_config 16 from autotest_lib.client.common_lib.cros import dev_server 17 from autotest_lib.client.common_lib.cros.graphite import autotest_stats 18 19 20 # Local stateful update path is relative to the CrOS source directory. 21 LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 22 LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 23 UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 24 UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 25 # A list of update engine client states that occur after an update is triggered. 26 UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE', 27 'UPDATE_STATUS_UPDATE_AVAILABLE', 28 'UPDATE_STATUS_DOWNLOADING', 29 'UPDATE_STATUS_FINALIZING'] 30 31 class ChromiumOSError(error.InstallError): 32 """Generic error for ChromiumOS-specific exceptions.""" 33 34 35 class BrilloError(error.InstallError): 36 """Generic error for Brillo-specific exceptions.""" 37 38 39 class RootFSUpdateError(ChromiumOSError): 40 """Raised when the RootFS fails to update.""" 41 42 43 class StatefulUpdateError(ChromiumOSError): 44 """Raised when the stateful partition fails to update.""" 45 46 47 def url_to_version(update_url): 48 """Return the version based on update_url. 49 50 @param update_url: url to the image to update to. 51 52 """ 53 # The Chrome OS version is generally the last element in the URL. The only 54 # exception is delta update URLs, which are rooted under the version; e.g., 55 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 56 # strip off the au section of the path before reading the version. 57 return re.sub('/au/.*', '', 58 urlparse.urlparse(update_url).path).split('/')[-1].strip() 59 60 61 def url_to_image_name(update_url): 62 """Return the image name based on update_url. 63 64 From a URL like: 65 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 66 return lumpy-release/R27-3837.0.0 67 68 @param update_url: url to the image to update to. 69 @returns a string representing the image name in the update_url. 70 71 """ 72 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 73 74 75 def _get_devserver_build_from_update_url(update_url): 76 """Get the devserver and build from the update url. 77 78 @param update_url: The url for update. 79 Eg: http://devserver:port/update/build. 80 81 @return: A tuple of (devserver url, build) or None if the update_url 82 doesn't match the expected pattern. 83 84 @raises ValueError: If the update_url doesn't match the expected pattern. 85 @raises ValueError: If no global_config was found, or it doesn't contain an 86 image_url_pattern. 87 """ 88 pattern = global_config.global_config.get_config_value( 89 'CROS', 'image_url_pattern', type=str, default='') 90 if not pattern: 91 raise ValueError('Cannot parse update_url, the global config needs ' 92 'an image_url_pattern.') 93 re_pattern = pattern.replace('%s', '(\S+)') 94 parts = re.search(re_pattern, update_url) 95 if not parts or len(parts.groups()) < 2: 96 raise ValueError('%s is not an update url' % update_url) 97 return parts.groups() 98 99 100 def list_image_dir_contents(update_url): 101 """Lists the contents of the devserver for a given build/update_url. 102 103 @param update_url: An update url. Eg: http://devserver:port/update/build. 104 """ 105 if not update_url: 106 logging.warning('Need update_url to list contents of the devserver.') 107 return 108 error_msg = 'Cannot check contents of devserver, update url %s' % update_url 109 try: 110 devserver_url, build = _get_devserver_build_from_update_url(update_url) 111 except ValueError as e: 112 logging.warning('%s: %s', error_msg, e) 113 return 114 devserver = dev_server.ImageServer(devserver_url) 115 try: 116 devserver.list_image_dir(build) 117 # The devserver will retry on URLError to avoid flaky connections, but will 118 # eventually raise the URLError if it persists. All HTTPErrors get 119 # converted to DevServerExceptions. 120 except (dev_server.DevServerException, urllib2.URLError) as e: 121 logging.warning('%s: %s', error_msg, e) 122 123 124 # TODO(garnold) This implements shared updater functionality needed for 125 # supporting the autoupdate_EndToEnd server-side test. We should probably 126 # migrate more of the existing ChromiumOSUpdater functionality to it as we 127 # expand non-CrOS support in other tests. 128 class BaseUpdater(object): 129 """Platform-agnostic DUT update functionality.""" 130 131 def __init__(self, updater_ctrl_bin, update_url, host): 132 """Initializes the object. 133 134 @param updater_ctrl_bin: Path to update_engine_client. 135 @param update_url: The URL we want the update to use. 136 @param host: A client.common_lib.hosts.Host implementation. 137 """ 138 self.updater_ctrl_bin = updater_ctrl_bin 139 self.update_url = update_url 140 self.host = host 141 self._update_error_queue = multiprocessing.Queue(2) 142 143 144 def check_update_status(self): 145 """Returns the current update engine state. 146 147 We use the `update_engine_client -status' command and parse the line 148 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE". 149 """ 150 update_status = self.host.run( 151 '%s -status 2>&1 | grep CURRENT_OP' % self.updater_ctrl_bin) 152 return update_status.stdout.strip().split('=')[-1] 153 154 155 def trigger_update(self): 156 """Triggers a background update. 157 158 @raise RootFSUpdateError if anything went wrong. 159 """ 160 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' % 161 (self.updater_ctrl_bin, self.update_url)) 162 err_msg = 'Failed to trigger an update on %s.' % self.host.hostname 163 logging.info('Triggering update via: %s', autoupdate_cmd) 164 try: 165 self.host.run(autoupdate_cmd) 166 except (error.AutoservSshPermissionDeniedError, 167 error.AutoservSSHTimeout) as e: 168 err_msg += ' SSH reports an error: %s' % type(e).__name__ 169 raise RootFSUpdateError(err_msg) 170 except error.AutoservRunError as e: 171 # Check if the exit code is 255, if so it's probably a generic 172 # SSH error. 173 result = e.args[1] 174 if result.exit_status == 255: 175 err_msg += (' SSH reports a generic error (255), which could ' 176 'indicate a problem with underlying connectivity ' 177 'layers.') 178 raise RootFSUpdateError(err_msg) 179 180 # We have ruled out all SSH cases, the error code is from 181 # update_engine_client, though we still don't know why. 182 list_image_dir_contents(self.update_url) 183 err_msg += (' It could be that the devserver is unreachable, the ' 184 'payload unavailable, or there is a bug in the update ' 185 'engine (unlikely). Reported error: %s' % 186 type(e).__name__) 187 raise RootFSUpdateError(err_msg) 188 189 190 def _verify_update_completed(self): 191 """Verifies that an update has completed. 192 193 @raise RootFSUpdateError: if verification fails. 194 """ 195 status = self.check_update_status() 196 if status != UPDATER_NEED_REBOOT: 197 raise RootFSUpdateError('Update did not complete with correct ' 198 'status. Expecting %s, actual %s' % 199 (UPDATER_NEED_REBOOT, status)) 200 201 202 def update_image(self): 203 """Updates the device image and verifies success.""" 204 try: 205 autoupdate_cmd = ('%s --update --omaha_url=%s 2>&1' % 206 (self.updater_ctrl_bin, self.update_url)) 207 self.host.run(autoupdate_cmd, timeout=3600) 208 except error.AutoservRunError as e: 209 list_image_dir_contents(self.update_url) 210 update_error = RootFSUpdateError( 211 'Failed to install device image using payload at %s ' 212 'on %s: %s' % 213 (self.update_url, self.host.hostname, e)) 214 self._update_error_queue.put(update_error) 215 raise update_error 216 except Exception as e: 217 # Don't allow other exceptions to not be caught. 218 self._update_error_queue.put(e) 219 raise e 220 221 try: 222 self._verify_update_completed() 223 except RootFSUpdateError as e: 224 self._update_error_queue.put(e) 225 raise 226 227 228 class ChromiumOSUpdater(BaseUpdater): 229 """Helper class used to update DUT with image of desired version.""" 230 REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 231 UPDATER_BIN = '/usr/bin/update_engine_client' 232 STATEFUL_UPDATE = '/tmp/stateful_update' 233 UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 234 UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine'] 235 236 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 237 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 238 # Time to wait for new kernel to be marked successful after 239 # auto update. 240 KERNEL_UPDATE_TIMEOUT = 120 241 242 _timer = autotest_stats.Timer('cros_autoupdater') 243 244 def __init__(self, update_url, host=None, local_devserver=False): 245 super(ChromiumOSUpdater, self).__init__(self.UPDATER_BIN, update_url, 246 host) 247 self.local_devserver = local_devserver 248 if not local_devserver: 249 self.update_version = url_to_version(update_url) 250 else: 251 self.update_version = None 252 253 254 def reset_update_engine(self): 255 """Resets the host to prepare for a clean update regardless of state.""" 256 self._run('rm -f %s' % self.UPDATED_MARKER) 257 self._run('stop ui || true') 258 self._run('stop update-engine || true') 259 self._run('start update-engine') 260 261 if self.check_update_status() != UPDATER_IDLE: 262 raise ChromiumOSError('%s is not in an installable state' % 263 self.host.hostname) 264 265 266 def _run(self, cmd, *args, **kwargs): 267 """Abbreviated form of self.host.run(...)""" 268 return self.host.run(cmd, *args, **kwargs) 269 270 271 def rootdev(self, options=''): 272 """Returns the stripped output of rootdev <options>. 273 274 @param options: options to run rootdev. 275 276 """ 277 return self._run('rootdev %s' % options).stdout.strip() 278 279 280 def get_kernel_state(self): 281 """Returns the (<active>, <inactive>) kernel state as a pair.""" 282 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 283 if active_root == self.KERNEL_A['root']: 284 return self.KERNEL_A, self.KERNEL_B 285 elif active_root == self.KERNEL_B['root']: 286 return self.KERNEL_B, self.KERNEL_A 287 else: 288 raise ChromiumOSError('Encountered unknown root partition: %s' % 289 active_root) 290 291 292 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 293 """Return numeric cgpt value for the specified flag, kernel, device. """ 294 return int(self._run('cgpt show -n -i %d %s %s' % ( 295 kernel['kernel'], flag, dev)).stdout.strip()) 296 297 298 def get_kernel_priority(self, kernel): 299 """Return numeric priority for the specified kernel. 300 301 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 302 303 """ 304 return self._cgpt('-P', kernel) 305 306 307 def get_kernel_success(self, kernel): 308 """Return boolean success flag for the specified kernel. 309 310 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 311 312 """ 313 return self._cgpt('-S', kernel) != 0 314 315 316 def get_kernel_tries(self, kernel): 317 """Return tries count for the specified kernel. 318 319 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 320 321 """ 322 return self._cgpt('-T', kernel) 323 324 325 def get_stateful_update_script(self): 326 """Returns the path to the stateful update script on the target.""" 327 # We attempt to load the local stateful update path in 3 different 328 # ways. First we use the location specified in the autotest global 329 # config. If this doesn't exist, we attempt to use the Chromium OS 330 # Chroot path to the installed script. If all else fails, we use the 331 # stateful update script on the host. 332 stateful_update_path = os.path.join( 333 global_config.global_config.get_config_value( 334 'CROS', 'source_tree', default=''), 335 LOCAL_STATEFUL_UPDATE_PATH) 336 337 if not os.path.exists(stateful_update_path): 338 logging.warning('Could not find Chrome OS source location for ' 339 'stateful_update script at %s, falling back to ' 340 'chroot copy.', stateful_update_path) 341 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 342 343 if not os.path.exists(stateful_update_path): 344 logging.warning('Could not chroot stateful_update script, falling ' 345 'back on client copy.') 346 statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH 347 else: 348 self.host.send_file( 349 stateful_update_path, self.STATEFUL_UPDATE, 350 delete_dest=True) 351 statefuldev_script = self.STATEFUL_UPDATE 352 353 return statefuldev_script 354 355 356 def reset_stateful_partition(self): 357 """Clear any pending stateful update request.""" 358 statefuldev_cmd = [self.get_stateful_update_script()] 359 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 360 self._run(' '.join(statefuldev_cmd)) 361 362 363 def revert_boot_partition(self): 364 """Revert the boot partition.""" 365 part = self.rootdev('-s') 366 logging.warning('Reverting update; Boot partition will be %s', part) 367 return self._run('/postinst %s 2>&1' % part) 368 369 370 def rollback_rootfs(self, powerwash): 371 """Triggers rollback and waits for it to complete. 372 373 @param powerwash: If true, powerwash as part of rollback. 374 375 @raise RootFSUpdateError if anything went wrong. 376 377 """ 378 version = self.host.get_release_version() 379 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches 380 # X.Y.Z. This version split just pulls the first part out. 381 try: 382 build_number = int(version.split('.')[0]) 383 except ValueError: 384 logging.error('Could not parse build number.') 385 build_number = 0 386 387 if build_number >= 5772: 388 can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN 389 logging.info('Checking for rollback.') 390 try: 391 self._run(can_rollback_cmd) 392 except error.AutoservRunError as e: 393 raise RootFSUpdateError("Rollback isn't possible on %s: %s" % 394 (self.host.hostname, str(e))) 395 396 rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN 397 if not powerwash: 398 rollback_cmd += ' --nopowerwash' 399 400 logging.info('Performing rollback.') 401 try: 402 self._run(rollback_cmd) 403 except error.AutoservRunError as e: 404 raise RootFSUpdateError('Rollback failed on %s: %s' % 405 (self.host.hostname, str(e))) 406 407 self._verify_update_completed() 408 409 410 # TODO(garnold) This is here for backward compatibility and should be 411 # deprecated once we shift to using update_image() everywhere. 412 @_timer.decorate 413 def update_rootfs(self): 414 """Run the standard command to force an update.""" 415 return self.update_image() 416 417 418 @_timer.decorate 419 def update_stateful(self, clobber=True): 420 """Updates the stateful partition. 421 422 @param clobber: If True, a clean stateful installation. 423 """ 424 logging.info('Updating stateful partition...') 425 statefuldev_url = self.update_url.replace('update', 426 'static') 427 428 # Attempt stateful partition update; this must succeed so that the newly 429 # installed host is testable after update. 430 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 431 if clobber: 432 statefuldev_cmd.append('--stateful_change=clean') 433 434 statefuldev_cmd.append('2>&1') 435 try: 436 self._run(' '.join(statefuldev_cmd), timeout=1200) 437 except error.AutoservRunError: 438 update_error = StatefulUpdateError( 439 'Failed to perform stateful update on %s' % 440 self.host.hostname) 441 self._update_error_queue.put(update_error) 442 raise update_error 443 except Exception as e: 444 # Don't allow other exceptions to not be caught. 445 self._update_error_queue.put(e) 446 raise e 447 448 449 @_timer.decorate 450 def run_update(self, update_root=True): 451 """Update the DUT with image of specific version. 452 453 @param update_root: True to force a rootfs update. 454 """ 455 booted_version = self.host.get_release_version() 456 if self.update_version: 457 logging.info('Updating from version %s to %s.', 458 booted_version, self.update_version) 459 460 # Check that Dev Server is accepting connections (from autoserv's host). 461 # If we can't talk to it, the machine host probably can't either. 462 auserver_host = urlparse.urlparse(self.update_url)[1] 463 try: 464 httplib.HTTPConnection(auserver_host).connect() 465 except IOError: 466 raise ChromiumOSError( 467 'Update server at %s not available' % auserver_host) 468 469 logging.info('Installing from %s to %s', self.update_url, 470 self.host.hostname) 471 472 # Reset update state. 473 self.reset_update_engine() 474 self.reset_stateful_partition() 475 476 try: 477 updaters = [ 478 multiprocessing.process.Process(target=self.update_rootfs), 479 multiprocessing.process.Process(target=self.update_stateful) 480 ] 481 if not update_root: 482 logging.info('Root update is skipped.') 483 updaters = updaters[1:] 484 485 # Run the updaters in parallel. 486 for updater in updaters: updater.start() 487 for updater in updaters: updater.join() 488 489 # Re-raise the first error that occurred. 490 if not self._update_error_queue.empty(): 491 update_error = self._update_error_queue.get() 492 self.revert_boot_partition() 493 self.reset_stateful_partition() 494 raise update_error 495 496 logging.info('Update complete.') 497 except: 498 # Collect update engine logs in the event of failure. 499 if self.host.job: 500 logging.info('Collecting update engine logs...') 501 self.host.get_file( 502 self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 503 preserve_perm=False) 504 list_image_dir_contents(self.update_url) 505 raise 506 finally: 507 self.host.show_update_engine_log() 508 509 510 def check_version(self): 511 """Check the image running in DUT has the desired version. 512 513 @returns: True if the DUT's image version matches the version that 514 the autoupdater tries to update to. 515 516 """ 517 booted_version = self.host.get_release_version() 518 return (self.update_version and 519 self.update_version.endswith(booted_version)) 520 521 522 def check_version_to_confirm_install(self): 523 """Check image running in DUT has the desired version to be installed. 524 525 The method should not be used to check if DUT needs to have a full 526 reimage. Only use it to confirm a image is installed. 527 528 The method is designed to verify version for following 6 scenarios with 529 samples of version to update to and expected booted version: 530 1. trybot paladin build. 531 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 532 booted version: 3837.0.2013_03_21_1340 533 534 2. trybot release build. 535 update version: trybot-lumpy-release/R27-3837.0.0-b456 536 booted version: 3837.0.0 537 538 3. buildbot official release build. 539 update version: lumpy-release/R27-3837.0.0 540 booted version: 3837.0.0 541 542 4. non-official paladin rc build. 543 update version: lumpy-paladin/R27-3878.0.0-rc7 544 booted version: 3837.0.0-rc7 545 546 5. chrome-perf build. 547 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 548 booted version: 3837.0.0 549 550 6. pgo-generate build. 551 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 552 booted version: 3837.0.0-pgo-generate 553 554 When we are checking if a DUT needs to do a full install, we should NOT 555 use this method to check if the DUT is running the same version, since 556 it may return false positive for a DUT running trybot paladin build to 557 be updated to another trybot paladin build. 558 559 TODO: This logic has a bug if a trybot paladin build failed to be 560 installed in a DUT running an older trybot paladin build with same 561 platform number, but different build number (-b###). So to conclusively 562 determine if a tryjob paladin build is imaged successfully, we may need 563 to find out the date string from update url. 564 565 @returns: True if the DUT's image version (without the date string if 566 the image is a trybot build), matches the version that the 567 autoupdater is trying to update to. 568 569 """ 570 # In the local_devserver case, we can't know the expected 571 # build, so just pass. 572 if not self.update_version: 573 return True 574 575 # Always try the default check_version method first, this prevents 576 # any backward compatibility issue. 577 if self.check_version(): 578 return True 579 580 return utils.version_match(self.update_version, 581 self.host.get_release_version(), 582 self.update_url) 583 584 585 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 586 """Verifies that we fully booted given expected kernel state. 587 588 This method both verifies that we booted using the correct kernel 589 state and that the OS has marked the kernel as good. 590 591 @param expected_kernel_state: kernel state that we are verifying with 592 i.e. I expect to be booted onto partition 4 etc. See output of 593 get_kernel_state. 594 @param rollback_message: string to raise as a ChromiumOSError 595 if we booted with the wrong partition. 596 597 @raises ChromiumOSError: If we didn't. 598 """ 599 # Figure out the newly active kernel. 600 active_kernel_state = self.get_kernel_state()[0] 601 602 # Check for rollback due to a bad build. 603 if (expected_kernel_state and 604 active_kernel_state != expected_kernel_state): 605 606 # Kernel crash reports should be wiped between test runs, but 607 # may persist from earlier parts of the test, or from problems 608 # with provisioning. 609 # 610 # Kernel crash reports will NOT be present if the crash happened 611 # before encrypted stateful is mounted. 612 # 613 # TODO(dgarrett): Integrate with server/crashcollect.py at some 614 # point. 615 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash') 616 if kernel_crashes: 617 rollback_message += ': kernel_crash' 618 logging.debug('Found %d kernel crash reports:', 619 len(kernel_crashes)) 620 # The crash names contain timestamps that may be useful: 621 # kernel.20131207.005945.0.kcrash 622 for crash in kernel_crashes: 623 logging.debug(' %s', os.path.basename(crash)) 624 625 # Print out some information to make it easier to debug 626 # the rollback. 627 logging.debug('Dumping partition table.') 628 self._run('cgpt show $(rootdev -s -d)') 629 logging.debug('Dumping crossystem for firmware debugging.') 630 self._run('crossystem --all') 631 raise ChromiumOSError(rollback_message) 632 633 # Make sure chromeos-setgoodkernel runs. 634 try: 635 utils.poll_for_condition( 636 lambda: (self.get_kernel_tries(active_kernel_state) == 0 637 and self.get_kernel_success(active_kernel_state)), 638 exception=ChromiumOSError(), 639 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 640 except ChromiumOSError: 641 services_status = self._run('status system-services').stdout 642 if services_status != 'system-services start/running\n': 643 event = ('Chrome failed to reach login screen') 644 else: 645 event = ('update-engine failed to call ' 646 'chromeos-setgoodkernel') 647 raise ChromiumOSError( 648 'After update and reboot, %s ' 649 'within %d seconds' % (event, 650 self.KERNEL_UPDATE_TIMEOUT)) 651 652 653 class BrilloUpdater(BaseUpdater): 654 """Helper class for updating a Brillo DUT.""" 655 656 def __init__(self, update_url, host=None): 657 """Initialize the object. 658 659 @param update_url: The URL we want the update to use. 660 @param host: A client.common_lib.hosts.Host implementation. 661 """ 662 super(BrilloUpdater, self).__init__( 663 '/system/bin/update_engine_client', update_url, host) 664