Home | History | Annotate | Download | only in deployment
      1 #!/usr/bin/env python
      2 # Copyright 2015 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Install an initial test image on a set of DUTs.
      7 
      8 The methods in this module are meant for two nominally distinct use
      9 cases that share a great deal of code internally.  The first use
     10 case is for deployment of DUTs that have just been placed in the lab
     11 for the first time.  The second use case is for use after repairing
     12 a servo.
     13 
     14 Newly deployed DUTs may be in a somewhat anomalous state:
     15   * The DUTs are running a production base image, not a test image.
     16     By extension, the DUTs aren't reachable over SSH.
     17   * The DUTs are not necessarily in the AFE database.  DUTs that
     18     _are_ in the database should be locked.  Either way, the DUTs
     19     cannot be scheduled to run tests.
     20   * The servos for the DUTs need not be configured with the proper
     21     overlay.
     22 
     23 More broadly, it's not expected that the DUT will be working at the
     24 start of this operation.  If the DUT isn't working at the end of the
     25 operation, an error will be reported.
     26 
     27 The script performs the following functions:
     28   * Configure the servo for the target overlay, and test that the
     29     servo is generally in good order.
     30   * For the full deployment case, install dev-signed RO firmware
     31     from the designated stable test image for the DUTs.
     32   * For both cases, use servo to install the stable test image from
     33     USB.
     34   * If the DUT isn't in the AFE database, add it.
     35 
     36 The script imposes these preconditions:
     37   * Every DUT has a properly connected servo.
     38   * Every DUT and servo have proper DHCP and DNS configurations.
     39   * Every servo host is up and running, and accessible via SSH.
     40   * There is a known, working test image that can be staged and
     41     installed on the target DUTs via servo.
     42   * Every DUT has the same board and model.
     43   * For the full deployment case, every DUT must be in dev mode,
     44     and configured to allow boot from USB with ctrl+U.
     45 
     46 The implementation uses the `multiprocessing` module to run all
     47 installations in parallel, separate processes.
     48 
     49 """
     50 
     51 import atexit
     52 from collections import namedtuple
     53 import functools
     54 import json
     55 import logging
     56 import multiprocessing
     57 import os
     58 import shutil
     59 import sys
     60 import tempfile
     61 import time
     62 import traceback
     63 
     64 from chromite.lib import gs
     65 
     66 import common
     67 from autotest_lib.client.common_lib import error
     68 from autotest_lib.client.common_lib import host_states
     69 from autotest_lib.client.common_lib import time_utils
     70 from autotest_lib.client.common_lib import utils
     71 from autotest_lib.client.common_lib.cros import retry
     72 from autotest_lib.server import afe_utils
     73 from autotest_lib.server import constants
     74 from autotest_lib.server import frontend
     75 from autotest_lib.server import hosts
     76 from autotest_lib.server.cros.dynamic_suite.constants import VERSION_PREFIX
     77 from autotest_lib.server.hosts import afe_store
     78 from autotest_lib.server.hosts import servo_host
     79 from autotest_lib.site_utils.deployment import cmdvalidate
     80 from autotest_lib.site_utils.deployment.prepare import dut as preparedut
     81 from autotest_lib.site_utils.stable_images import build_data
     82 from autotest_lib.utils import labellib
     83 
     84 
     85 _LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s'
     86 
     87 _DEFAULT_POOL = constants.Labels.POOL_PREFIX + 'suites'
     88 
     89 _DIVIDER = '\n============\n'
     90 
     91 _LOG_BUCKET_NAME = 'chromeos-install-logs'
     92 
     93 _OMAHA_STATUS = 'gs://chromeos-build-release-console/omaha_status.json'
     94 
     95 # Lock reasons we'll pass when locking DUTs, depending on the
     96 # host's prior state.
     97 _LOCK_REASON_EXISTING = 'Repairing or deploying an existing host'
     98 _LOCK_REASON_NEW_HOST = 'Repairing or deploying a new host'
     99 
    100 _ReportResult = namedtuple('_ReportResult', ['hostname', 'message'])
    101 
    102 
    103 class InstallFailedError(Exception):
    104     """Generic error raised explicitly in this module."""
    105 
    106 
    107 class _NoAFEServoPortError(InstallFailedError):
    108     """Exception when there is no servo port stored in the AFE."""
    109 
    110 
    111 class _MultiFileWriter(object):
    112 
    113     """Group file objects for writing at once."""
    114 
    115     def __init__(self, files):
    116         """Initialize _MultiFileWriter.
    117 
    118         @param files  Iterable of file objects for writing.
    119         """
    120         self._files = files
    121 
    122     def write(self, s):
    123         """Write a string to the files.
    124 
    125         @param s  Write this string.
    126         """
    127         for file in self._files:
    128             file.write(s)
    129 
    130 
    131 def _get_upload_log_path(arguments):
    132     return 'gs://{bucket}/{name}'.format(
    133         bucket=_LOG_BUCKET_NAME,
    134         name=arguments.upload_basename)
    135 
    136 
    137 def _upload_logs(dirpath, gspath):
    138     """Upload report logs to Google Storage.
    139 
    140     @param dirpath  Path to directory containing the logs.
    141     @param gspath   Path to GS bucket.
    142     """
    143     ctx = gs.GSContext()
    144     ctx.Copy(dirpath, gspath, recursive=True)
    145 
    146 
    147 def _get_omaha_build(board):
    148     """Get the currently preferred Beta channel build for `board`.
    149 
    150     Open and read through the JSON file provided by GoldenEye that
    151     describes what version Omaha is currently serving for all boards
    152     on all channels.  Find the entry for `board` on the Beta channel,
    153     and return that version string.
    154 
    155     @param board  The board to look up from GoldenEye.
    156 
    157     @return Returns a Chrome OS version string in standard form
    158             R##-####.#.#.  Will return `None` if no Beta channel
    159             entry is found.
    160     """
    161     ctx = gs.GSContext()
    162     omaha_status = json.loads(ctx.Cat(_OMAHA_STATUS))
    163     omaha_board = board.replace('_', '-')
    164     for e in omaha_status['omaha_data']:
    165         if (e['channel'] == 'beta' and
    166                 e['board']['public_codename'] == omaha_board):
    167             milestone = e['chrome_version'].split('.')[0]
    168             build = e['chrome_os_version']
    169             return 'R%s-%s' % (milestone, build)
    170     return None
    171 
    172 
    173 def _update_build(afe, report_log, arguments):
    174     """Update the stable_test_versions table.
    175 
    176     This calls the `set_stable_version` RPC call to set the stable
    177     repair version selected by this run of the command.  Additionally,
    178     this updates the stable firmware for the board.  The repair version
    179     is selected from three possible versions:
    180       * The stable test version currently in the AFE database.
    181       * The version Omaha is currently serving as the Beta channel
    182         build.
    183       * The version supplied by the user.
    184     The actual version selected will be whichever of these three is
    185     the most up-to-date version.
    186 
    187     The stable firmware version will be set to whatever firmware is
    188     bundled in the selected repair image. If the selected repair image bundles
    189     firmware for more than one model, then the firmware for every model in the
    190     build will be updated.
    191 
    192     This function will log information about the available versions
    193     prior to selection.  After selection the repair and firmware
    194     versions slected will be logged.
    195 
    196     @param afe          AFE object for RPC calls.
    197     @param report_log   File-like object for logging report output.
    198     @param arguments    Command line arguments with options.
    199 
    200     @return Returns the version selected.
    201     """
    202     # Gather the current AFE and Omaha version settings, and report them
    203     # to the user.
    204     cros_version_map = afe.get_stable_version_map(afe.CROS_IMAGE_TYPE)
    205     fw_version_map = afe.get_stable_version_map(afe.FIRMWARE_IMAGE_TYPE)
    206     afe_cros = cros_version_map.get_version(arguments.board)
    207     afe_fw = fw_version_map.get_version(arguments.board)
    208     omaha_cros = _get_omaha_build(arguments.board)
    209     report_log.write('AFE    version is %s.\n' % afe_cros)
    210     report_log.write('Omaha  version is %s.\n' % omaha_cros)
    211     report_log.write('AFE   firmware is %s.\n' % afe_fw)
    212     cros_version = afe_cros
    213 
    214     # Check whether we should upgrade the repair build to either
    215     # the Omaha or the user's requested build.  If we do, we must
    216     # also update the firmware version.
    217     if (omaha_cros is not None
    218             and (cros_version is None or
    219                  utils.compare_versions(cros_version, omaha_cros) < 0)):
    220         cros_version = omaha_cros
    221     if arguments.build and arguments.build != cros_version:
    222         if (cros_version is None
    223                 or utils.compare_versions(cros_version, arguments.build) < 0):
    224             cros_version = arguments.build
    225         else:
    226             report_log.write('Selected version %s is too old; '
    227                              'using version %s'
    228                              % (arguments.build, cros_version))
    229 
    230     afe_fw_versions = {arguments.board: afe_fw}
    231     fw_versions = build_data.get_firmware_versions(
    232         arguments.board, cros_version)
    233     # At this point `cros_version` is our new repair build, and
    234     # `fw_version` is our new target firmware.  Call the AFE back with
    235     # updates as necessary.
    236     if not arguments.dry_run:
    237         if cros_version != afe_cros:
    238             cros_version_map.set_version(arguments.board, cros_version)
    239 
    240             if fw_versions != afe_fw_versions:
    241                 for model, fw_version in fw_versions.iteritems():
    242                     if fw_version is not None:
    243                         fw_version_map.set_version(model, fw_version)
    244                     else:
    245                         fw_version_map.delete_version(model)
    246 
    247     # Report the new state of the world.
    248     report_log.write(_DIVIDER)
    249     report_log.write('Repair CrOS version for board %s is now %s.\n' %
    250                      (arguments.board, cros_version))
    251     for model, fw_version in fw_versions.iteritems():
    252         report_log.write('Firmware version for model %s is now %s.\n' %
    253                          (model, fw_version))
    254     return cros_version
    255 
    256 
    257 def _create_host(hostname, afe, afe_host):
    258     """Create a CrosHost object for the DUT.
    259 
    260     This host object is used to update AFE label information for the DUT, but
    261     can not be used for installation image on the DUT. In particular, this host
    262     object does not have the servo attribute populated.
    263 
    264     @param hostname  Hostname of the target DUT.
    265     @param afe       A frontend.AFE object.
    266     @param afe_host  AFE Host object for the DUT.
    267     """
    268     machine_dict = {
    269             'hostname': hostname,
    270             'afe_host': afe_host,
    271             'host_info_store': afe_store.AfeStore(hostname, afe),
    272     }
    273     return hosts.create_host(machine_dict)
    274 
    275 
    276 def _try_lock_host(afe_host):
    277     """Lock a host in the AFE, and report whether it succeeded.
    278 
    279     The lock action is logged regardless of success; failures are
    280     logged if they occur.
    281 
    282     @param afe_host AFE Host instance to be locked.
    283 
    284     @return `True` on success, or `False` on failure.
    285     """
    286     try:
    287         logging.warning('Locking host now.')
    288         afe_host.modify(locked=True,
    289                         lock_reason=_LOCK_REASON_EXISTING)
    290     except Exception as e:
    291         logging.exception('Failed to lock: %s', e)
    292         return False
    293     return True
    294 
    295 
    296 def _try_unlock_host(afe_host):
    297     """Unlock a host in the AFE, and report whether it succeeded.
    298 
    299     The unlock action is logged regardless of success; failures are
    300     logged if they occur.
    301 
    302     @param afe_host AFE Host instance to be unlocked.
    303 
    304     @return `True` on success, or `False` on failure.
    305     """
    306     try:
    307         logging.warning('Unlocking host.')
    308         afe_host.modify(locked=False, lock_reason='')
    309     except Exception as e:
    310         logging.exception('Failed to unlock: %s', e)
    311         return False
    312     return True
    313 
    314 
    315 def _update_host_attributes(afe, hostname, host_attrs):
    316     """Update the attributes for a given host.
    317 
    318     @param afe          AFE object for RPC calls.
    319     @param hostname     Host name of the DUT.
    320     @param host_attrs   Dictionary with attributes to be applied to the
    321                         host.
    322     """
    323     s_hostname, s_port, s_serial = _extract_servo_attributes(hostname,
    324                                                              host_attrs)
    325     afe.set_host_attribute(servo_host.SERVO_HOST_ATTR,
    326                            s_hostname,
    327                            hostname=hostname)
    328     afe.set_host_attribute(servo_host.SERVO_PORT_ATTR,
    329                            s_port,
    330                            hostname=hostname)
    331     if s_serial:
    332         afe.set_host_attribute(servo_host.SERVO_SERIAL_ATTR,
    333                                s_serial,
    334                                hostname=hostname)
    335 
    336 
    337 def _extract_servo_attributes(hostname, host_attrs):
    338     """Extract servo attributes from the host attribute dict, setting defaults.
    339 
    340     @return (servo_hostname, servo_port, servo_serial)
    341     """
    342     # Grab the servo hostname/port/serial from `host_attrs` if supplied.
    343     # For new servo V4 deployments, we require the user to supply the
    344     # attributes (because there are no appropriate defaults).  So, if
    345     # none are supplied, we assume it can't be V4, and apply the
    346     # defaults for servo V3.
    347     s_hostname = (host_attrs.get(servo_host.SERVO_HOST_ATTR) or
    348                   servo_host.make_servo_hostname(hostname))
    349     s_port = (host_attrs.get(servo_host.SERVO_PORT_ATTR) or
    350               str(servo_host.ServoHost.DEFAULT_PORT))
    351     s_serial = host_attrs.get(servo_host.SERVO_SERIAL_ATTR)
    352     return s_hostname, s_port, s_serial
    353 
    354 
    355 def _wait_for_idle(afe, host_id):
    356     """Helper function for `_ensure_host_idle`.
    357 
    358     Poll the host with the given `host_id` via `afe`, waiting for it
    359     to become idle.  Run forever; the caller takes care of timing out.
    360 
    361     @param afe        AFE object for RPC calls.
    362     @param host_id    Id of the host that's expected to become idle.
    363     """
    364     while True:
    365         afe_host = afe.get_hosts(id=host_id)[0]
    366         if afe_host.status in host_states.IDLE_STATES:
    367             return
    368         # Let's not spam our server.
    369         time.sleep(0.2)
    370 
    371 
    372 def _ensure_host_idle(afe, afe_host):
    373     """Abort any special task running on `afe_host`.
    374 
    375     The given `afe_host` is currently locked.  If there's a special task
    376     running on the given `afe_host`, abort it, then wait for the host to
    377     show up as idle, return whether the operation succeeded.
    378 
    379     @param afe        AFE object for RPC calls.
    380     @param afe_host   Host to be aborted.
    381 
    382     @return A true value if the host is idle at return, or a false value
    383         if the host wasn't idle after some reasonable time.
    384     """
    385     # We need to talk to the shard, not the master, for at least two
    386     # reasons:
    387     #   * The `abort_special_tasks` RPC doesn't forward from the master
    388     #     to the shard, and only the shard has access to the special
    389     #     tasks.
    390     #   * Host status on the master can lag actual status on the shard
    391     #     by several minutes.  Only the shard can provide status
    392     #     guaranteed to post-date the call to lock the DUT.
    393     if afe_host.shard:
    394         afe = frontend.AFE(server=afe_host.shard)
    395     afe_host = afe.get_hosts(id=afe_host.id)[0]
    396     if afe_host.status in host_states.IDLE_STATES:
    397         return True
    398     afe.run('abort_special_tasks', host_id=afe_host.id, is_active=1)
    399     return not retry.timeout(_wait_for_idle, (afe, afe_host.id),
    400                              timeout_sec=5.0)[0]
    401 
    402 
    403 def _get_afe_host(afe, hostname, host_attrs, arguments):
    404     """Get an AFE Host object for the given host.
    405 
    406     If the host is found in the database, return the object
    407     from the RPC call with the updated attributes in host_attr_dict.
    408 
    409     If no host is found, create one with appropriate servo
    410     attributes and the given board label.
    411 
    412     @param afe          AFE object for RPC calls.
    413     @param hostname     Host name of the DUT.
    414     @param host_attrs   Dictionary with attributes to be applied to the
    415                         host.
    416     @param arguments    Command line arguments with options.
    417 
    418     @return A tuple of the afe_host, plus a flag. The flag indicates
    419             whether the Host should be unlocked if subsequent operations
    420             fail.  (Hosts are always unlocked after success).
    421     """
    422     hostlist = afe.get_hosts([hostname])
    423     unlock_on_failure = False
    424     if hostlist:
    425         afe_host = hostlist[0]
    426         if not afe_host.locked:
    427             if _try_lock_host(afe_host):
    428                 unlock_on_failure = True
    429             else:
    430                 raise Exception('Failed to lock host')
    431         if not _ensure_host_idle(afe, afe_host):
    432             if unlock_on_failure and not _try_unlock_host(afe_host):
    433                 raise Exception('Failed to abort host, and failed to unlock it')
    434             raise Exception('Failed to abort task on host')
    435         # This host was pre-existing; if the user didn't supply
    436         # attributes, don't update them, because the defaults may
    437         # not be correct.
    438         if host_attrs:
    439             _update_host_attributes(afe, hostname, host_attrs)
    440     else:
    441         afe_host = afe.create_host(hostname,
    442                                    locked=True,
    443                                    lock_reason=_LOCK_REASON_NEW_HOST)
    444         _update_host_attributes(afe, hostname, host_attrs)
    445 
    446     # Correct board/model label is critical to installation. Always ensure user
    447     # supplied board/model matches the AFE information.
    448     _ensure_label_in_afe(afe_host, 'board', arguments.board)
    449     _ensure_label_in_afe(afe_host, 'model', arguments.model)
    450 
    451     afe_host = afe.get_hosts([hostname])[0]
    452     return afe_host, unlock_on_failure
    453 
    454 
    455 def _ensure_label_in_afe(afe_host, label_name, label_value):
    456     """Add the given board label, only if one doesn't already exist.
    457 
    458     @params label_name  name of the label, e.g. 'board', 'model', etc.
    459     @params label_value value of the label.
    460 
    461     @raises InstallFailedError if supplied board  is different from existing
    462             board in AFE.
    463     """
    464     if not label_value:
    465         return
    466 
    467     labels = labellib.LabelsMapping(afe_host.labels)
    468     if label_name not in labels:
    469         afe_host.add_labels(['%s:%s' % (label_name, label_value)])
    470         return
    471 
    472     existing_value = labels[label_name]
    473     if label_value != existing_value:
    474         raise InstallFailedError(
    475                 'provided %s %s does not match the %s %s for host %s' %
    476                 (label_name, label_value, label_name, existing_value,
    477                  afe_host.hostname))
    478 
    479 
    480 def _create_host_for_installation(host, arguments):
    481     """Creates a context manager of hosts.CrosHost object for installation.
    482 
    483     The host object yielded by the returned context manager is agnostic of the
    484     infrastructure environment. In particular, it does not have any references
    485     to the AFE.
    486 
    487     @param host: A server.hosts.CrosHost object.
    488     @param arguments: Parsed commandline arguments for this script.
    489 
    490     @return a context manager which yields hosts.CrosHost object.
    491     """
    492     info = host.host_info_store.get()
    493     s_host, s_port, s_serial = _extract_servo_attributes(host.hostname,
    494                                                          info.attributes)
    495     return preparedut.create_host(host.hostname, arguments.board,
    496                                   arguments.model, s_host, s_port, s_serial,
    497                                   arguments.logdir)
    498 
    499 
    500 def _install_test_image(host, arguments):
    501     """Install a test image to the DUT.
    502 
    503     Install a stable test image on the DUT using the full servo
    504     repair flow.
    505 
    506     @param host       Host instance for the DUT being installed.
    507     @param arguments  Command line arguments with options.
    508     """
    509     repair_image = _get_cros_repair_image_name(host)
    510     logging.info('Using repair image %s', repair_image)
    511     if arguments.dry_run:
    512         return
    513     if arguments.stageusb:
    514         try:
    515             preparedut.download_image_to_servo_usb(host, repair_image)
    516         except Exception as e:
    517             logging.exception('Failed to stage image on USB: %s', e)
    518             raise Exception('USB staging failed')
    519     if arguments.install_firmware:
    520         try:
    521             if arguments.using_servo:
    522                 logging.debug('Install FW using servo.')
    523                 preparedut.flash_firmware_using_servo(host, repair_image)
    524             else:
    525                 logging.debug('Install FW by chromeos-firmwareupdate.')
    526                 preparedut.install_firmware(host, arguments.force_firmware)
    527         except error.AutoservRunError as e:
    528             logging.exception('Firmware update failed: %s', e)
    529             msg = '%s failed' % (
    530                     'Flashing firmware using servo' if arguments.using_servo
    531                     else 'chromeos-firmwareupdate')
    532             raise Exception(msg)
    533     if arguments.install_test_image:
    534         try:
    535             preparedut.install_test_image(host)
    536         except error.AutoservRunError as e:
    537             logging.exception('Failed to install: %s', e)
    538             raise Exception('chromeos-install failed')
    539 
    540 
    541 def _install_and_update_afe(afe, hostname, host_attrs, arguments):
    542     """Perform all installation and AFE updates.
    543 
    544     First, lock the host if it exists and is unlocked.  Then,
    545     install the test image on the DUT.  At the end, unlock the
    546     DUT, unless the installation failed and the DUT was locked
    547     before we started.
    548 
    549     If installation succeeds, make sure the DUT is in the AFE,
    550     and make sure that it has basic labels.
    551 
    552     @param afe          AFE object for RPC calls.
    553     @param hostname     Host name of the DUT.
    554     @param host_attrs   Dictionary with attributes to be applied to the
    555                         host.
    556     @param arguments    Command line arguments with options.
    557     """
    558     afe_host, unlock_on_failure = _get_afe_host(afe, hostname, host_attrs,
    559                                                 arguments)
    560     host = None
    561     try:
    562         host = _create_host(hostname, afe, afe_host)
    563         with _create_host_for_installation(host, arguments) as host_to_install:
    564             _install_test_image(host_to_install, arguments)
    565 
    566         if arguments.install_test_image and not arguments.dry_run:
    567             host.labels.update_labels(host)
    568             platform_labels = afe.get_labels(
    569                     host__hostname=hostname, platform=True)
    570             if not platform_labels:
    571                 platform = host.get_platform()
    572                 new_labels = afe.get_labels(name=platform)
    573                 if not new_labels:
    574                     afe.create_label(platform, platform=True)
    575                 afe_host.add_labels([platform])
    576         version = [label for label in afe_host.labels
    577                        if label.startswith(VERSION_PREFIX)]
    578         if version and not arguments.dry_run:
    579             afe_host.remove_labels(version)
    580     except Exception as e:
    581         if unlock_on_failure and not _try_unlock_host(afe_host):
    582             logging.error('Failed to unlock host!')
    583         raise
    584     finally:
    585         if host is not None:
    586             host.close()
    587 
    588     if not _try_unlock_host(afe_host):
    589         raise Exception('Install succeeded, but failed to unlock the DUT.')
    590 
    591 
    592 def _install_dut(arguments, host_attr_dict, hostname):
    593     """Deploy or repair a single DUT.
    594 
    595     @param arguments       Command line arguments with options.
    596     @param host_attr_dict  Dict mapping hostnames to attributes to be
    597                            stored in the AFE.
    598     @param hostname        Host name of the DUT to install on.
    599 
    600     @return On success, return `None`.  On failure, return a string
    601             with an error message.
    602     """
    603     # In some cases, autotest code that we call during install may
    604     # put stuff onto stdout with 'print' statements.  Most notably,
    605     # the AFE frontend may print 'FAILED RPC CALL' (boo, hiss).  We
    606     # want nothing from this subprocess going to the output we
    607     # inherited from our parent, so redirect stdout and stderr, before
    608     # we make any AFE calls.  Note that this is reasonable because we're
    609     # in a subprocess.
    610 
    611     logpath = os.path.join(arguments.logdir, hostname + '.log')
    612     logfile = open(logpath, 'w')
    613     sys.stderr = sys.stdout = logfile
    614     _configure_logging_to_file(logfile)
    615 
    616     afe = frontend.AFE(server=arguments.web)
    617     try:
    618         _install_and_update_afe(afe, hostname,
    619                                 host_attr_dict.get(hostname, {}),
    620                                 arguments)
    621     except Exception as e:
    622         logging.exception('Original exception: %s', e)
    623         return str(e)
    624     return None
    625 
    626 
    627 def _report_hosts(report_log, heading, host_results_list):
    628     """Report results for a list of hosts.
    629 
    630     To improve visibility, results are preceded by a header line,
    631     followed by a divider line.  Then results are printed, one host
    632     per line.
    633 
    634     @param report_log         File-like object for logging report
    635                               output.
    636     @param heading            The header string to be printed before
    637                               results.
    638     @param host_results_list  A list of _ReportResult tuples
    639                               to be printed one per line.
    640     """
    641     if not host_results_list:
    642         return
    643     report_log.write(heading)
    644     report_log.write(_DIVIDER)
    645     for result in host_results_list:
    646         report_log.write('{result.hostname:30} {result.message}\n'
    647                          .format(result=result))
    648     report_log.write('\n')
    649 
    650 
    651 def _report_results(afe, report_log, hostnames, results):
    652     """Gather and report a summary of results from installation.
    653 
    654     Segregate results into successes and failures, reporting
    655     each separately.  At the end, report the total of successes
    656     and failures.
    657 
    658     @param afe          AFE object for RPC calls.
    659     @param report_log   File-like object for logging report output.
    660     @param hostnames    List of the hostnames that were tested.
    661     @param results      List of error messages, in the same order
    662                         as the hostnames.  `None` means the
    663                         corresponding host succeeded.
    664     """
    665     successful_hosts = []
    666     success_reports = []
    667     failure_reports = []
    668     for result, hostname in zip(results, hostnames):
    669         if result is None:
    670             successful_hosts.append(hostname)
    671         else:
    672             failure_reports.append(_ReportResult(hostname, result))
    673     if successful_hosts:
    674         afe.repair_hosts(hostnames=successful_hosts)
    675         for h in afe.get_hosts(hostnames=successful_hosts):
    676             for label in h.labels:
    677                 if label.startswith(constants.Labels.POOL_PREFIX):
    678                     result = _ReportResult(h.hostname,
    679                                            'Host already in %s' % label)
    680                     success_reports.append(result)
    681                     break
    682             else:
    683                 h.add_labels([_DEFAULT_POOL])
    684                 result = _ReportResult(h.hostname,
    685                                        'Host added to %s' % _DEFAULT_POOL)
    686                 success_reports.append(result)
    687     report_log.write(_DIVIDER)
    688     _report_hosts(report_log, 'Successes', success_reports)
    689     _report_hosts(report_log, 'Failures', failure_reports)
    690     report_log.write(
    691         'Installation complete:  %d successes, %d failures.\n' %
    692         (len(success_reports), len(failure_reports)))
    693 
    694 
    695 def _clear_root_logger_handlers():
    696     """Remove all handlers from root logger."""
    697     root_logger = logging.getLogger()
    698     for h in root_logger.handlers:
    699         root_logger.removeHandler(h)
    700 
    701 
    702 def _configure_logging_to_file(logfile):
    703     """Configure the logging module for `install_duts()`.
    704 
    705     @param log_file  Log file object.
    706     """
    707     _clear_root_logger_handlers()
    708     handler = logging.StreamHandler(logfile)
    709     formatter = logging.Formatter(_LOG_FORMAT, time_utils.TIME_FMT)
    710     handler.setFormatter(formatter)
    711     root_logger = logging.getLogger()
    712     root_logger.addHandler(handler)
    713 
    714 
    715 def _get_used_servo_ports(servo_hostname, afe):
    716     """
    717     Return a list of used servo ports for the given servo host.
    718 
    719     @param servo_hostname:  Hostname of the servo host to check for.
    720     @param afe:             AFE instance.
    721 
    722     @returns a list of used ports for the given servo host.
    723     """
    724     used_ports = []
    725     host_list = afe.get_hosts_by_attribute(
    726             attribute=servo_host.SERVO_HOST_ATTR, value=servo_hostname)
    727     for host in host_list:
    728         afe_host = afe.get_hosts(hostname=host)
    729         if afe_host:
    730             servo_port = afe_host[0].attributes.get(servo_host.SERVO_PORT_ATTR)
    731             if servo_port:
    732                 used_ports.append(int(servo_port))
    733     return used_ports
    734 
    735 
    736 def _get_free_servo_port(servo_hostname, used_servo_ports, afe):
    737     """
    738     Get a free servo port for the servo_host.
    739 
    740     @param servo_hostname:    Hostname of the servo host.
    741     @param used_servo_ports:  Dict of dicts that contain the list of used ports
    742                               for the given servo host.
    743     @param afe:               AFE instance.
    744 
    745     @returns a free servo port if servo_hostname is non-empty, otherwise an
    746         empty string.
    747     """
    748     used_ports = []
    749     servo_port = servo_host.ServoHost.DEFAULT_PORT
    750     # If no servo hostname was specified we can assume we're dealing with a
    751     # servo v3 or older deployment since the servo hostname can be
    752     # inferred from the dut hostname (by appending '-servo' to it).  We only
    753     # need to find a free port if we're using a servo v4 since we can use the
    754     # default port for v3 and older.
    755     if not servo_hostname:
    756         return ''
    757     # If we haven't checked this servo host yet, check the AFE if other duts
    758     # used this servo host and grab the ports specified for them.
    759     elif servo_hostname not in used_servo_ports:
    760         used_ports = _get_used_servo_ports(servo_hostname, afe)
    761     else:
    762         used_ports = used_servo_ports[servo_hostname]
    763     used_ports.sort()
    764     if used_ports:
    765         # Range is taken from servod.py in hdctools.
    766         start_port = servo_host.ServoHost.DEFAULT_PORT
    767         end_port = start_port - 99
    768         # We'll choose first port available in descending order.
    769         for port in xrange(start_port, end_port - 1, -1):
    770             if port not in used_ports:
    771               servo_port = port
    772               break
    773     used_ports.append(servo_port)
    774     used_servo_ports[servo_hostname] = used_ports
    775     return servo_port
    776 
    777 
    778 def _get_afe_servo_port(host_info, afe):
    779     """
    780     Get the servo port from the afe if it matches the same servo host hostname.
    781 
    782     @param host_info   HostInfo tuple (hostname, host_attr_dict).
    783 
    784     @returns Servo port (int) if servo host hostname matches the one specified
    785     host_info.host_attr_dict, otherwise None.
    786 
    787     @raises _NoAFEServoPortError: When there is no stored host info or servo
    788         port host attribute in the AFE for the given host.
    789     """
    790     afe_hosts = afe.get_hosts(hostname=host_info.hostname)
    791     if not afe_hosts:
    792         raise _NoAFEServoPortError
    793 
    794     servo_port = afe_hosts[0].attributes.get(servo_host.SERVO_PORT_ATTR)
    795     afe_servo_host = afe_hosts[0].attributes.get(servo_host.SERVO_HOST_ATTR)
    796     host_info_servo_host = host_info.host_attr_dict.get(
    797         servo_host.SERVO_HOST_ATTR)
    798 
    799     if afe_servo_host == host_info_servo_host and servo_port:
    800         return int(servo_port)
    801     else:
    802         raise _NoAFEServoPortError
    803 
    804 
    805 def _get_host_attributes(host_info_list, afe):
    806     """
    807     Get host attributes if a hostname_file was supplied.
    808 
    809     @param host_info_list   List of HostInfo tuples (hostname, host_attr_dict).
    810 
    811     @returns Dict of attributes from host_info_list.
    812     """
    813     host_attributes = {}
    814     # We need to choose servo ports for these hosts but we need to make sure
    815     # we don't choose ports already used. We'll store all used ports in a
    816     # dict of lists where the key is the servo_host and the val is a list of
    817     # ports used.
    818     used_servo_ports = {}
    819     for host_info in host_info_list:
    820         host_attr_dict = host_info.host_attr_dict
    821         # If the host already has an entry in the AFE that matches the same
    822         # servo host hostname and the servo port is set, use that port.
    823         try:
    824             host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_afe_servo_port(
    825                 host_info, afe)
    826         except _NoAFEServoPortError:
    827             host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_free_servo_port(
    828                 host_attr_dict[servo_host.SERVO_HOST_ATTR], used_servo_ports,
    829                 afe)
    830         host_attributes[host_info.hostname] = host_attr_dict
    831     return host_attributes
    832 
    833 
    834 def _get_cros_repair_image_name(host):
    835     """Get the CrOS repair image name for given host.
    836 
    837     @param host: hosts.CrosHost object. This object need not have an AFE
    838                  reference.
    839     """
    840     info = host.host_info_store.get()
    841     if not info.board:
    842         raise InstallFailedError('Unknown board for given host')
    843     return afe_utils.get_stable_cros_image_name(info.board)
    844 
    845 
    846 def install_duts(arguments):
    847     """Install a test image on DUTs, and deploy them.
    848 
    849     This handles command line parsing for both the repair and
    850     deployment commands.  The two operations are largely identical;
    851     the main difference is that full deployment includes flashing
    852     dev-signed firmware on the DUT prior to installing the test
    853     image.
    854 
    855     @param arguments    Command line arguments with options, as
    856                         returned by `argparse.Argparser`.
    857     """
    858     arguments = cmdvalidate.validate_arguments(arguments)
    859     if arguments is None:
    860         sys.exit(1)
    861     sys.stderr.write('Installation output logs in %s\n' % arguments.logdir)
    862 
    863     # Override tempfile.tempdir.  Some of the autotest code we call
    864     # will create temporary files that don't get cleaned up.  So, we
    865     # put the temp files in our results directory, so that we can
    866     # clean up everything at one fell swoop.
    867     tempfile.tempdir = tempfile.mkdtemp()
    868     atexit.register(shutil.rmtree, tempfile.tempdir)
    869 
    870     # We don't want to distract the user with logging output, so we catch
    871     # logging output in a file.
    872     logging_file_path = os.path.join(arguments.logdir, 'debug.log')
    873     logfile = open(logging_file_path, 'w')
    874     _configure_logging_to_file(logfile)
    875 
    876     report_log_path = os.path.join(arguments.logdir, 'report.log')
    877     with open(report_log_path, 'w') as report_log_file:
    878         report_log = _MultiFileWriter([report_log_file, sys.stdout])
    879         afe = frontend.AFE(server=arguments.web)
    880         if arguments.dry_run:
    881             report_log.write('Dry run - installation and most testing '
    882                              'will be skipped.\n')
    883         current_build = _update_build(afe, report_log, arguments)
    884         host_attr_dict = _get_host_attributes(arguments.host_info_list, afe)
    885         install_pool = multiprocessing.Pool(len(arguments.hostnames))
    886         install_function = functools.partial(_install_dut, arguments,
    887                                              host_attr_dict)
    888         results_list = install_pool.map(install_function, arguments.hostnames)
    889         _report_results(afe, report_log, arguments.hostnames, results_list)
    890 
    891     if arguments.upload:
    892         try:
    893             gspath = _get_upload_log_path(arguments)
    894             sys.stderr.write('Logs will be uploaded to %s\n' % (gspath,))
    895             _upload_logs(arguments.logdir, gspath)
    896         except Exception as e:
    897             upload_failure_log_path = os.path.join(arguments.logdir,
    898                                                    'gs_upload_failure.log')
    899             with open(upload_failure_log_path, 'w') as file:
    900                 traceback.print_exc(limit=None, file=file)
    901             sys.stderr.write('Failed to upload logs;'
    902                              ' failure details are stored in {}.\n'
    903                              .format(upload_failure_log_path))
    904