Home | History | Annotate | Download | only in hosts
      1 # Copyright 2016 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import json
      6 import logging
      7 import os
      8 import time
      9 
     10 import common
     11 from autotest_lib.client.common_lib import error
     12 from autotest_lib.client.common_lib import global_config
     13 from autotest_lib.client.common_lib import hosts
     14 from autotest_lib.client.common_lib.cros import dev_server
     15 from autotest_lib.client.common_lib.cros import retry
     16 from autotest_lib.server import afe_utils
     17 from autotest_lib.server import crashcollect
     18 from autotest_lib.server.cros import autoupdater
     19 from autotest_lib.server.cros.dynamic_suite import tools
     20 from autotest_lib.server.hosts import cros_firmware
     21 from autotest_lib.server.hosts import repair_utils
     22 
     23 # _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be
     24 # in dev mode (usually, those should be unmanaged devices)
     25 #
     26 _DEV_MODE_ALLOWED_POOLS = set(
     27     global_config.global_config.get_config_value(
     28             'CROS',
     29             'pools_dev_mode_allowed',
     30             type=str,
     31             default='',
     32             allow_blank=True).split(','))
     33 
     34 # Setting to suppress dev mode check; primarily used for moblab where all
     35 # DUT's are in dev mode.
     36 _DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value(
     37             'CROS',
     38             'dev_mode_allowed',
     39             type=bool,
     40             default=False)
     41 
     42 # Triggers for the 'au', 'powerwash', and 'usb' repair actions.
     43 # These are also used as dependencies in the `CrosHost` repair
     44 # sequence, as follows:
     45 #
     46 # au:
     47 #   - triggers: _CROS_AU_TRIGGERS
     48 #   - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS
     49 #
     50 # powerwash:
     51 #   - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_AU_TRIGGERS
     52 #   - depends on: _CROS_USB_TRIGGERS
     53 #
     54 # usb:
     55 #   - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
     56 #               _CROS_AU_TRIGGERS
     57 #   - no dependencies
     58 #
     59 # N.B. AC power detection depends on software on the DUT, and there
     60 # have been bugs where detection failed even though the DUT really
     61 # did have power.  So, we make the 'power' verifier a trigger for
     62 # reinstall repair actions, too.
     63 #
     64 # TODO(jrbarnette):  AU repair can't fix all problems reported by
     65 # the 'cros' verifier; it's listed as an AU trigger as a
     66 # simplification.  The ultimate fix is to split the 'cros' verifier
     67 # into smaller individual verifiers.
     68 _CROS_AU_TRIGGERS = ('power', 'rwfw', 'python', 'cros',)
     69 _CROS_POWERWASH_TRIGGERS = ('tpm', 'good_au', 'ext4',)
     70 _CROS_USB_TRIGGERS = ('ssh', 'writable',)
     71 
     72 
     73 class ACPowerVerifier(hosts.Verifier):
     74     """Check for AC power and a reasonable battery charge."""
     75 
     76     def verify(self, host):
     77         # pylint: disable=missing-docstring
     78         try:
     79             info = host.get_power_supply_info()
     80         except error.AutoservRunError:
     81             raise hosts.AutoservVerifyError(
     82                     'Failed to get power supply info')
     83 
     84         try:
     85             if info['Line Power']['online'] != 'yes':
     86                 raise hosts.AutoservVerifyError(
     87                         'AC power is not plugged in')
     88         except KeyError:
     89             raise hosts.AutoservVerifyError(
     90                     'Cannot determine AC power status')
     91 
     92         try:
     93             if float(info['Battery']['percentage']) < 50.0:
     94                 raise hosts.AutoservVerifyError(
     95                         'Battery is less than 50%')
     96         except KeyError:
     97             logging.info('Cannot determine battery status - '
     98                          'skipping check.')
     99 
    100     @property
    101     def description(self):
    102         # pylint: disable=missing-docstring
    103         return 'The DUT is plugged in to AC power'
    104 
    105 
    106 class WritableVerifier(hosts.Verifier):
    107     """
    108     Confirm the stateful file systems are writable.
    109 
    110     The standard linux response to certain unexpected file system errors
    111     (including hardware errors in block devices) is to change the file
    112     system status to read-only.  This checks that that hasn't happened.
    113 
    114     The test covers the two file systems that need to be writable for
    115     critical operations like AU:
    116       * The (unencrypted) stateful system which includes
    117         /mnt/stateful_partition.
    118       * The encrypted stateful partition, which includes /var.
    119 
    120     The test doesn't check various bind mounts; those are expected to
    121     fail the same way as their underlying main mounts.  Whether the
    122     Linux kernel can guarantee that is untested...
    123     """
    124 
    125     # N.B. Order matters here:  Encrypted stateful is loop-mounted from
    126     # a file in unencrypted stateful, so we don't test for errors in
    127     # encrypted stateful if unencrypted fails.
    128     _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp']
    129 
    130     def verify(self, host):
    131         # pylint: disable=missing-docstring
    132         # This deliberately stops looking after the first error.
    133         # See above for the details.
    134         for testdir in self._TEST_DIRECTORIES:
    135             filename = os.path.join(testdir, 'writable_test')
    136             command = 'touch %s && rm %s' % (filename, filename)
    137             rv = host.run(command=command, ignore_status=True)
    138             if rv.exit_status != 0:
    139                 msg = 'Can\'t create a file in %s' % testdir
    140                 raise hosts.AutoservVerifyError(msg)
    141 
    142     @property
    143     def description(self):
    144         # pylint: disable=missing-docstring
    145         return 'The stateful filesystems are writable'
    146 
    147 
    148 class EXT4fsErrorVerifier(hosts.Verifier):
    149     """
    150     Confirm we have not seen critical file system kernel errors.
    151     """
    152     def verify(self, host):
    153         # pylint: disable=missing-docstring
    154         # grep for stateful FS errors of the type "EXT4-fs error (device sda1):"
    155         command = ("dmesg | grep -E \"EXT4-fs error \(device "
    156                    "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | "
    157                    "grep -e '^/mnt/stateful_partition ' | "
    158                    "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"")
    159         output = host.run(command=command, ignore_status=True).stdout
    160         if output:
    161             sample = output.splitlines()[0]
    162             message = 'Saw file system error: %s' % sample
    163             raise hosts.AutoservVerifyError(message)
    164         # Check for other critical FS errors.
    165         command = 'dmesg | grep "This should not happen!!  Data will be lost"'
    166         output = host.run(command=command, ignore_status=True).stdout
    167         if output:
    168             message = 'Saw file system error: Data will be lost'
    169             raise hosts.AutoservVerifyError(message)
    170         else:
    171             logging.error('Could not determine stateful mount.')
    172 
    173     @property
    174     def description(self):
    175         # pylint: disable=missing-docstring
    176         return 'Did not find critical file system errors'
    177 
    178 
    179 class UpdateSuccessVerifier(hosts.Verifier):
    180     """
    181     Checks that the DUT successfully finished its last provision job.
    182 
    183     At the start of any update (e.g. for a Provision job), the code
    184     creates a marker file named `PROVISION_FAILED`.  The file is located
    185     in a part of the stateful partition that will be removed if an
    186     update finishes successfully.  Thus, the presence of the file
    187     indicates that a prior update failed.
    188 
    189     The verifier tests for the existence of the marker file and fails if
    190     it still exists.
    191     """
    192     def verify(self, host):
    193         # pylint: disable=missing-docstring
    194         result = host.run('test -f %s' % autoupdater.PROVISION_FAILED,
    195                           ignore_status=True)
    196         if result.exit_status == 0:
    197             raise hosts.AutoservVerifyError(
    198                     'Last AU on this DUT failed')
    199 
    200     @property
    201     def description(self):
    202         # pylint: disable=missing-docstring
    203         return 'The most recent AU attempt on this DUT succeeded'
    204 
    205 
    206 class TPMStatusVerifier(hosts.Verifier):
    207     """Verify that the host's TPM is in a good state."""
    208 
    209     def verify(self, host):
    210         # pylint: disable=missing-docstring
    211         if _is_virtual_machine(host):
    212             # We do not forward host TPM / emulated TPM to qemu VMs, so skip
    213             # this verification step.
    214             logging.debug('Skipped verification %s on VM', self)
    215             return
    216 
    217         try:
    218             status = CryptohomeStatus(host)
    219         except hosts.AutoservVerifyError:
    220             logging.info('Cannot determine the Cryptohome valid status - '
    221                          'skipping check.')
    222             return
    223         try:
    224             tpm = status['tpm']
    225             if not tpm['enabled']:
    226                 raise hosts.AutoservVerifyError(
    227                         'TPM is not enabled -- Hardware is not working.')
    228             if not tpm['can_connect']:
    229                 raise hosts.AutoservVerifyError(
    230                         ('TPM connect failed -- '
    231                          'last_error=%d.' % tpm['last_error']))
    232             if tpm['owned'] and not tpm['can_load_srk']:
    233                 raise hosts.AutoservVerifyError(
    234                         'Cannot load the TPM SRK')
    235             if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']:
    236                 raise hosts.AutoservVerifyError(
    237                         'Cannot load the TPM SRK public key')
    238         except KeyError:
    239             logging.info('Cannot determine the Crytohome valid status - '
    240                          'skipping check.')
    241 
    242     @property
    243     def description(self):
    244         # pylint: disable=missing-docstring
    245         return 'The host\'s TPM is available and working'
    246 
    247 
    248 class PythonVerifier(hosts.Verifier):
    249     """Confirm the presence of a working Python interpreter."""
    250 
    251     def verify(self, host):
    252         # pylint: disable=missing-docstring
    253         result = host.run('python -c "import cPickle"',
    254                           ignore_status=True)
    255         if result.exit_status != 0:
    256             message = 'The python interpreter is broken'
    257             if result.exit_status == 127:
    258                 search = host.run('which python', ignore_status=True)
    259                 if search.exit_status != 0 or not search.stdout:
    260                     message = ('Python is missing; may be caused by '
    261                                'powerwash')
    262             raise hosts.AutoservVerifyError(message)
    263 
    264     @property
    265     def description(self):
    266         # pylint: disable=missing-docstring
    267         return 'Python on the host is installed and working'
    268 
    269 
    270 class DevModeVerifier(hosts.Verifier):
    271     """Verify that the host is not in dev mode."""
    272 
    273     def verify(self, host):
    274         # pylint: disable=missing-docstring
    275         # Some pools are allowed to be in dev mode
    276         info = host.host_info_store.get()
    277         if (_DEV_MODE_ALWAYS_ALLOWED or
    278                 bool(info.pools & _DEV_MODE_ALLOWED_POOLS)):
    279             return
    280 
    281         result = host.run('crossystem devsw_boot', ignore_status=True).stdout
    282         if result != '0':
    283             raise hosts.AutoservVerifyError('The host is in dev mode')
    284 
    285     @property
    286     def description(self):
    287         # pylint: disable=missing-docstring
    288         return 'The host should not be in dev mode'
    289 
    290 
    291 class HWIDVerifier(hosts.Verifier):
    292     """Verify that the host has HWID & serial number."""
    293 
    294     def verify(self, host):
    295         # pylint: disable=missing-docstring
    296         try:
    297             info = host.host_info_store.get()
    298 
    299             hwid = host.run('crossystem hwid', ignore_status=True).stdout
    300             if hwid:
    301                 info.attributes['HWID'] = hwid
    302 
    303             serial_number = host.run('vpd -g serial_number',
    304                                      ignore_status=True).stdout
    305             if serial_number:
    306                 info.attributes['serial_number'] = serial_number
    307 
    308             if info != host.host_info_store.get():
    309                 host.host_info_store.commit(info)
    310         except Exception as e:
    311             logging.exception('Failed to get HWID & Serial Number for host ',
    312                               '%s: %s', host.hostname, str(e))
    313 
    314     @property
    315     def description(self):
    316         # pylint: disable=missing-docstring
    317         return 'The host should have valid HWID and Serial Number'
    318 
    319 
    320 class JetstreamTpmVerifier(hosts.Verifier):
    321     """Verify that Jetstream TPM is in a good state."""
    322 
    323     @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
    324     def verify(self, host):
    325         # pylint: disable=missing-docstring
    326         try:
    327             status = CryptohomeStatus(host)
    328             if not status.tpm_enabled:
    329                 raise hosts.AutoservVerifyError('TPM is not enabled')
    330             if not status.tpm_owned:
    331                 raise hosts.AutoservVerifyError('TPM is not owned')
    332             if not status.tpm_can_load_srk:
    333                 raise hosts.AutoservVerifyError('TPM cannot load SRK')
    334             if not status.tpm_can_load_srk_pubkey:
    335                 raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey')
    336 
    337             # Check that the TPM is fully initialized. The output of this
    338             # command is line-oriented property/value pairs.
    339             result = host.run('cryptohome --action=tpm_status')
    340             if 'TPM Ready: true' not in result.stdout:
    341                 raise hosts.AutoservVerifyError('TPM is not ready')
    342         except error.AutoservRunError:
    343             raise hosts.AutoservVerifyError(
    344                     'Could not determine TPM status')
    345 
    346     @property
    347     def description(self):
    348         # pylint: disable=missing-docstring
    349         return 'Jetstream TPM state check'
    350 
    351 
    352 class JetstreamAttestationVerifier(hosts.Verifier):
    353     """Verify that Jetstream attestation client has a certificate."""
    354 
    355     @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
    356     def verify(self, host):
    357         # pylint: disable=missing-docstring
    358         try:
    359             # This output is in text protobuf format.
    360             result = host.run('cryptohome --action=tpm_more_status')
    361             if 'attestation_prepared: true' not in result.stdout:
    362                 raise hosts.AutoservVerifyError(
    363                         'Attestation has not been prepared')
    364 
    365             result = host.run('cryptohome --action=tpm_attestation_get_ek')
    366             if 'EK Certificate' not in result.stdout:
    367                 raise hosts.AutoservVerifyError(
    368                         'Endorsement certificate not found')
    369         except error.AutoservRunError:
    370             raise hosts.AutoservVerifyError(
    371                     'Unable to fetch endorsement certificate')
    372 
    373     @property
    374     def description(self):
    375         # pylint: disable=missing-docstring
    376         return 'Jetstream attestation endorsement check'
    377 
    378 
    379 class JetstreamServicesVerifier(hosts.Verifier):
    380     """Verify that Jetstream services are running."""
    381 
    382     # Retry for b/62576902
    383     @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10)
    384     def verify(self, host):
    385         # pylint: disable=missing-docstring
    386         try:
    387             if not host.upstart_status('ap-controller'):
    388                 raise hosts.AutoservVerifyError(
    389                     'ap-controller service is not running')
    390         except error.AutoservRunError:
    391             raise hosts.AutoservVerifyError(
    392                 'ap-controller service not found')
    393 
    394         try:
    395             host.run('pgrep ap-controller')
    396         except error.AutoservRunError:
    397             raise hosts.AutoservVerifyError(
    398                 'ap-controller process is not running')
    399 
    400     @property
    401     def description(self):
    402         # pylint: disable=missing-docstring
    403         return 'Jetstream services must be running'
    404 
    405 
    406 class KvmExistsVerifier(hosts.Verifier):
    407     """Verify that /dev/kvm exists if it should be there"""
    408 
    409     def verify(self, host):
    410         # pylint: disable=missing-docstring
    411         result = host.run('[ ! -e /dev/kvm -a -f /usr/bin/vm_concierge ]',
    412                           ignore_status=True)
    413         if result.exit_status == 0:
    414             raise hosts.AutoservVerifyError('/dev/kvm is missing')
    415 
    416     @property
    417     def description(self):
    418         # pylint: disable=missing-docstring
    419         return '/dev/kvm should exist if device supports Linux VMs'
    420 
    421 
    422 class _ResetRepairAction(hosts.RepairAction):
    423     """Common handling for repair actions that reset a DUT."""
    424 
    425     def _collect_logs(self, host):
    426         """Collect logs from a successfully repaired DUT."""
    427         dirname = 'after_%s' % self.tag
    428         local_log_dir = crashcollect.get_crashinfo_dir(host, dirname)
    429         host.collect_logs('/var/log', local_log_dir, ignore_errors=True)
    430         # Collect crash info.
    431         crashcollect.get_crashinfo(host, None)
    432 
    433     def _check_reset_success(self, host):
    434         """Check whether reset succeeded, and gather logs if possible."""
    435         if host.wait_up(host.BOOT_TIMEOUT):
    436             try:
    437                 # Collect logs once we regain ssh access before
    438                 # clobbering them.
    439                 self._collect_logs(host)
    440             except Exception:
    441                 # If the DUT is up, we want to declare success, even if
    442                 # log gathering fails for some reason.  So, if there's
    443                 # a failure, just log it and move on.
    444                 logging.exception('Non-critical failure in log '
    445                                   'collection during %s.',
    446                                   self.tag)
    447             return
    448         raise hosts.AutoservRepairError(
    449                 'Host %s is still offline after %s.' %
    450                 (host.hostname, self.tag), 'failed_to_boot_after_' + self.tag)
    451 
    452 
    453 class ServoSysRqRepair(_ResetRepairAction):
    454     """
    455     Repair a Chrome device by sending a system request to the kernel.
    456 
    457     Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x)
    458     will ask the kernel to panic itself and reboot while conserving
    459     the kernel logs in console ramoops.
    460     """
    461 
    462     def repair(self, host):
    463         # pylint: disable=missing-docstring
    464         repair_utils.require_servo(host)
    465         # Press 3 times Alt+VolUp+X
    466         # no checking DUT health between each press as
    467         # killing Chrome is not really likely to fix the DUT SSH.
    468         for _ in range(3):
    469             try:
    470                 host.servo.sysrq_x()
    471             except error.TestFail, ex:
    472                 raise hosts.AutoservRepairError(
    473                       'cannot press sysrq-x: %s.' % str(ex),
    474                       'cannot_press_sysrq_x')
    475             # less than 5 seconds between presses.
    476             time.sleep(2.0)
    477         self._check_reset_success(host)
    478 
    479     @property
    480     def description(self):
    481         # pylint: disable=missing-docstring
    482         return 'Reset the DUT via keyboard sysrq-x'
    483 
    484 
    485 class ServoResetRepair(_ResetRepairAction):
    486     """Repair a Chrome device by resetting it with servo."""
    487 
    488     def repair(self, host):
    489         # pylint: disable=missing-docstring
    490         repair_utils.require_servo(host)
    491         host.servo.get_power_state_controller().reset()
    492         self._check_reset_success(host)
    493 
    494     @property
    495     def description(self):
    496         # pylint: disable=missing-docstring
    497         return 'Reset the DUT via servo'
    498 
    499 
    500 class CrosRebootRepair(repair_utils.RebootRepair):
    501     """Repair a CrOS target by clearing dev mode and rebooting it."""
    502 
    503     def repair(self, host):
    504         # pylint: disable=missing-docstring
    505         # N.B. We need to reboot regardless of whether clearing
    506         # dev_mode succeeds or fails.
    507         host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
    508                  ignore_status=True)
    509         host.run('crossystem disable_dev_request=1',
    510                  ignore_status=True)
    511         super(CrosRebootRepair, self).repair(host)
    512 
    513     @property
    514     def description(self):
    515         # pylint: disable=missing-docstring
    516         return 'Reset GBB flags and Reboot the host'
    517 
    518 
    519 class AutoUpdateRepair(hosts.RepairAction):
    520     """
    521     Repair by re-installing a test image using autoupdate.
    522 
    523     Try to install the DUT's designated "stable test image" using the
    524     standard procedure for installing a new test image via autoupdate.
    525     """
    526 
    527     def repair(self, host):
    528         # pylint: disable=missing-docstring
    529         image_name = host.get_cros_repair_image_name()
    530         logging.info('Staging build for AU: %s', image_name)
    531         devserver = dev_server.ImageServer.resolve(image_name, host.hostname)
    532         devserver.trigger_download(image_name, synchronous=False)
    533         update_url = tools.image_url_pattern() % (
    534                 devserver.url(), image_name)
    535         afe_utils.machine_install_and_update_labels(host, update_url)
    536 
    537     @property
    538     def description(self):
    539         # pylint: disable=missing-docstring
    540         return 'Re-install the stable build via AU'
    541 
    542 
    543 class PowerWashRepair(AutoUpdateRepair):
    544     """
    545     Powerwash the DUT, then re-install using autoupdate.
    546 
    547     Powerwash the DUT, then attempt to re-install a stable test image as
    548     for `AutoUpdateRepair`.
    549     """
    550 
    551     def repair(self, host):
    552         # pylint: disable=missing-docstring
    553         host.run('echo "fast safe" > '
    554                  '/mnt/stateful_partition/factory_install_reset')
    555         host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True)
    556         super(PowerWashRepair, self).repair(host)
    557 
    558     @property
    559     def description(self):
    560         # pylint: disable=missing-docstring
    561         return 'Powerwash and then re-install the stable build via AU'
    562 
    563 
    564 class ServoInstallRepair(hosts.RepairAction):
    565     """
    566     Reinstall a test image from USB using servo.
    567 
    568     Use servo to re-install the DUT's designated "stable test image"
    569     from servo-attached USB storage.
    570     """
    571 
    572     def repair(self, host):
    573         # pylint: disable=missing-docstring
    574         repair_utils.require_servo(host)
    575         host.servo_install(host.stage_image_for_servo())
    576 
    577     @property
    578     def description(self):
    579         # pylint: disable=missing-docstring
    580         return 'Reinstall from USB using servo'
    581 
    582 
    583 class ColdRebootRepair(_ResetRepairAction):
    584     """
    585     Repair a Chrome device by performing a cold reboot that resets the EC.
    586 
    587     Use ectool to perform a cold reboot which will reset the EC.
    588     """
    589 
    590     def repair(self, host):
    591         # pylint: disable=missing-docstring
    592         host.reboot(reboot_cmd='ectool reboot_ec cold')
    593         self._check_reset_success(host)
    594 
    595     @property
    596     def description(self):
    597         # pylint: disable=missing-docstring
    598         return 'Reset the DUT via cold reboot with ectool'
    599 
    600 
    601 class JetstreamTpmRepair(hosts.RepairAction):
    602     """Repair by resetting TPM and rebooting."""
    603 
    604     def repair(self, host):
    605         # pylint: disable=missing-docstring
    606         host.run('rm -f /var/cache/ap/setup-network', ignore_status=True)
    607         host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True)
    608         host.run('rm -f /home/.shadow/.can_attempt_ownership',
    609                  ignore_status=True)
    610         host.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
    611         host.reboot()
    612 
    613     @property
    614     def description(self):
    615         # pylint: disable=missing-docstring
    616         return 'Reset TPM and reboot'
    617 
    618 
    619 class JetstreamServiceRepair(hosts.RepairAction):
    620     """Repair by restarting Jetstream services."""
    621 
    622     def repair(self, host):
    623         # pylint: disable=missing-docstring
    624         host.cleanup_services()
    625 
    626     @property
    627     def description(self):
    628         # pylint: disable=missing-docstring
    629         return 'Restart Jetstream services'
    630 
    631 
    632 def _cros_verify_dag():
    633     """Return the verification DAG for a `CrosHost`."""
    634     FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier
    635     FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
    636     verify_dag = (
    637         (repair_utils.SshVerifier,        'ssh',      ()),
    638         (DevModeVerifier,                 'devmode',  ('ssh',)),
    639         (HWIDVerifier,                    'hwid',     ('ssh',)),
    640         (ACPowerVerifier,                 'power',    ('ssh',)),
    641         (EXT4fsErrorVerifier,             'ext4',     ('ssh',)),
    642         (WritableVerifier,                'writable', ('ssh',)),
    643         (TPMStatusVerifier,               'tpm',      ('ssh',)),
    644         (UpdateSuccessVerifier,           'good_au',  ('ssh',)),
    645         (FirmwareStatusVerifier,          'fwstatus', ('ssh',)),
    646         (FirmwareVersionVerifier,         'rwfw',     ('ssh',)),
    647         (PythonVerifier,                  'python',   ('ssh',)),
    648         (repair_utils.LegacyHostVerifier, 'cros',     ('ssh',)),
    649         (KvmExistsVerifier,               'ec_reset', ('ssh',)),
    650     )
    651     return verify_dag
    652 
    653 
    654 def _cros_basic_repair_actions():
    655     """Return the basic repair actions for a `CrosHost`"""
    656     FirmwareRepair = cros_firmware.FirmwareRepair
    657     repair_actions = (
    658         # RPM cycling must precede Servo reset:  if the DUT has a dead
    659         # battery, we need to reattach AC power before we reset via servo.
    660         (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
    661         (ServoSysRqRepair, 'sysrq', (), ('ssh',)),
    662         (ServoResetRepair, 'servoreset', (), ('ssh',)),
    663 
    664         # N.B. FirmwareRepair can't fix a 'good_au' failure directly,
    665         # because it doesn't remove the flag file that triggers the
    666         # failure.  We include it as a repair trigger because it's
    667         # possible the the last update failed because of the firmware,
    668         # and we want the repair steps below to be able to trust the
    669         # firmware.
    670         (FirmwareRepair, 'firmware', (), ('ssh', 'fwstatus', 'good_au',)),
    671 
    672         (CrosRebootRepair, 'reboot', ('ssh',), ('devmode', 'writable',)),
    673 
    674         (ColdRebootRepair, 'coldboot', ('ssh',), ('ec_reset',)),
    675     )
    676     return repair_actions
    677 
    678 
    679 def _cros_extended_repair_actions(au_triggers=_CROS_AU_TRIGGERS,
    680                                   powerwash_triggers=_CROS_POWERWASH_TRIGGERS,
    681                                   usb_triggers=_CROS_USB_TRIGGERS):
    682     """Return the extended repair actions for a `CrosHost`"""
    683 
    684     # The dependencies and triggers for the 'au', 'powerwash', and 'usb'
    685     # repair actions stack up:  Each one is able to repair progressively
    686     # more verifiers than the one before.  The 'triggers' lists specify
    687     # the progression.
    688 
    689     repair_actions = (
    690         (AutoUpdateRepair, 'au',
    691                 usb_triggers + powerwash_triggers, au_triggers),
    692         (PowerWashRepair, 'powerwash',
    693                 usb_triggers, powerwash_triggers + au_triggers),
    694         (ServoInstallRepair, 'usb',
    695                 (), usb_triggers + powerwash_triggers + au_triggers),
    696     )
    697     return repair_actions
    698 
    699 
    700 def _cros_repair_actions():
    701     """Return the repair actions for a `CrosHost`."""
    702     repair_actions = (_cros_basic_repair_actions() +
    703                       _cros_extended_repair_actions())
    704     return repair_actions
    705 
    706 
    707 def create_cros_repair_strategy():
    708     """Return a `RepairStrategy` for a `CrosHost`."""
    709     verify_dag = _cros_verify_dag()
    710     repair_actions = _cros_repair_actions()
    711     return hosts.RepairStrategy(verify_dag, repair_actions, 'cros')
    712 
    713 
    714 def _moblab_verify_dag():
    715     """Return the verification DAG for a `MoblabHost`."""
    716     FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
    717     verify_dag = (
    718         (repair_utils.SshVerifier,        'ssh',     ()),
    719         (ACPowerVerifier,                 'power',   ('ssh',)),
    720         (FirmwareVersionVerifier,         'rwfw',    ('ssh',)),
    721         (PythonVerifier,                  'python',  ('ssh',)),
    722         (repair_utils.LegacyHostVerifier, 'cros',    ('ssh',)),
    723     )
    724     return verify_dag
    725 
    726 
    727 def _moblab_repair_actions():
    728     """Return the repair actions for a `MoblabHost`."""
    729     repair_actions = (
    730         (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
    731         (AutoUpdateRepair, 'au', ('ssh',), _CROS_AU_TRIGGERS),
    732     )
    733     return repair_actions
    734 
    735 
    736 def create_moblab_repair_strategy():
    737     """
    738     Return a `RepairStrategy` for a `MoblabHost`.
    739 
    740     Moblab is a subset of the CrOS verify and repair.  Several pieces
    741     are removed because they're not expected to be meaningful.  Some
    742     others are removed for more specific reasons:
    743 
    744     'tpm':  Moblab DUTs don't run the tests that matter to this
    745         verifier.  TODO(jrbarnette)  This assertion is unproven.
    746 
    747     'good_au':  This verifier can't pass, because the Moblab AU
    748         procedure doesn't properly delete the PROVISION_FAILED file.
    749         TODO(jrbarnette) We should refactor ChromiumOSUpdater so
    750         that it can be different for Moblab.
    751 
    752     'firmware':  Moblab DUTs shouldn't be in FAFT pools, so we don't try
    753         this.
    754 
    755     'powerwash':  Powerwash on Moblab causes trouble with deleting the
    756         DHCP leases file, so we skip it.
    757     """
    758     verify_dag = _moblab_verify_dag()
    759     repair_actions = _moblab_repair_actions()
    760     return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab')
    761 
    762 
    763 def _jetstream_repair_actions():
    764     """Return the repair actions for a `JetstreamHost`."""
    765     au_triggers = _CROS_AU_TRIGGERS
    766     jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation')
    767     jetstream_service_triggers = (jetstream_tpm_triggers +
    768                                   ('jetstream_services',))
    769     repair_actions = (
    770         _cros_basic_repair_actions() +
    771         (
    772             (JetstreamTpmRepair, 'jetstream_tpm_repair',
    773              _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS,
    774              au_triggers + jetstream_tpm_triggers),
    775 
    776             (JetstreamServiceRepair, 'jetstream_service_repair',
    777              _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + (
    778                  'jetstream_tpm', 'jetstream_attestation'),
    779              au_triggers + jetstream_service_triggers),
    780         ) +
    781         _cros_extended_repair_actions(
    782             au_triggers=au_triggers + jetstream_service_triggers))
    783     return repair_actions
    784 
    785 
    786 def _jetstream_verify_dag():
    787     """Return the verification DAG for a `JetstreamHost`."""
    788     verify_dag = _cros_verify_dag() + (
    789         (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)),
    790         (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)),
    791         (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)),
    792     )
    793     return verify_dag
    794 
    795 
    796 def create_jetstream_repair_strategy():
    797     """
    798     Return a `RepairStrategy` for a `JetstreamHost`.
    799 
    800     The Jetstream repair strategy is based on the CrOS verify and repair,
    801     but adds the JetstreamServicesVerifier.
    802     """
    803     verify_dag = _jetstream_verify_dag()
    804     repair_actions = _jetstream_repair_actions()
    805     return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream')
    806 
    807 
    808 # TODO(pprabhu) Move this to a better place. I have no idea what that place
    809 # would be.
    810 def _is_virtual_machine(host):
    811     """Determine whether the given |host| is a virtual machine.
    812 
    813     @param host: a hosts.Host object.
    814     @returns True if the host is a virtual machine, False otherwise.
    815     """
    816     output = host.run('cat /proc/cpuinfo | grep "model name"',
    817                       ignore_status=True)
    818     return (output.exit_status == 0 and output.stdout and
    819             'qemu' in output.stdout.lower())
    820 
    821 
    822 class CryptohomeStatus(dict):
    823     """Wrapper for getting cryptohome status from a host."""
    824 
    825     def __init__(self, host):
    826         super(CryptohomeStatus, self).__init__()
    827         self.update(_get_cryptohome_status(host))
    828         self.tpm = self['tpm']
    829 
    830     @property
    831     def tpm_enabled(self):
    832         # pylint: disable=missing-docstring
    833         return self.tpm.get('enabled') == True
    834 
    835     @property
    836     def tpm_owned(self):
    837         # pylint: disable=missing-docstring
    838         return self.tpm.get('owned') == True
    839 
    840     @property
    841     def tpm_can_load_srk(self):
    842         # pylint: disable=missing-docstring
    843         return self.tpm.get('can_load_srk') == True
    844 
    845     @property
    846     def tpm_can_load_srk_pubkey(self):
    847         # pylint: disable=missing-docstring
    848         return self.tpm.get('can_load_srk_pubkey') == True
    849 
    850 
    851 def _get_cryptohome_status(host):
    852     """Returns a dictionary containing the cryptohome status.
    853 
    854     @param host: a hosts.Host object.
    855     @returns A dictionary containing the cryptohome status.
    856     @raises AutoservVerifyError: if the output could not be parsed or the TPM
    857        status is missing.
    858     @raises hosts.AutoservRunError: if the cryptohome command failed.
    859     """
    860     # This cryptohome command emits status information in JSON format. It
    861     # looks something like this:
    862     # {
    863     #    "installattrs": {
    864     #       ...
    865     #    },
    866     #    "mounts": [ {
    867     #       ...
    868     #    } ],
    869     #    "tpm": {
    870     #       "being_owned": false,
    871     #       "can_connect": true,
    872     #       "can_decrypt": false,
    873     #       "can_encrypt": false,
    874     #       "can_load_srk": true,
    875     #       "can_load_srk_pubkey": true,
    876     #       "enabled": true,
    877     #       "has_context": true,
    878     #       "has_cryptohome_key": false,
    879     #       "has_key_handle": false,
    880     #       "last_error": 0,
    881     #       "owned": true
    882     #    }
    883     # }
    884     try:
    885         output = host.run('cryptohome --action=status').stdout.strip()
    886         status = json.loads(output)
    887         if 'tpm' not in status:
    888             raise hosts.AutoservVerifyError('TPM status is missing')
    889         return status
    890     except ValueError:
    891         raise hosts.AutoservVerifyError('Unable to parse cryptohome status')
    892