Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Runs on autotest servers from a cron job to self update them.
      7 
      8 This script is designed to run on all autotest servers to allow them to
      9 automatically self-update based on the manifests used to create their (existing)
     10 repos.
     11 """
     12 
     13 from __future__ import print_function
     14 
     15 import ConfigParser
     16 import argparse
     17 import os
     18 import re
     19 import subprocess
     20 import socket
     21 import sys
     22 import time
     23 
     24 import common
     25 
     26 from autotest_lib.client.common_lib import global_config
     27 from autotest_lib.server import utils as server_utils
     28 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     29 
     30 
     31 # How long after restarting a service do we watch it to see if it's stable.
     32 SERVICE_STABILITY_TIMER = 60
     33 
     34 # A dict to map update_commands defined in config file to repos or files that
     35 # decide whether need to update these commands. E.g. if no changes under
     36 # frontend repo, no need to update afe.
     37 COMMANDS_TO_REPOS_DICT = {'afe': 'frontend/client/',
     38                           'tko': 'frontend/client/'}
     39 BUILD_EXTERNALS_COMMAND = 'build_externals'
     40 
     41 _RESTART_SERVICES_FILE = os.path.join(os.environ['HOME'],
     42                                       'push_restart_services')
     43 
     44 AFE = frontend_wrappers.RetryingAFE(
     45         server=server_utils.get_global_afe_hostname(), timeout_min=5,
     46         delay_sec=10)
     47 HOSTNAME = socket.gethostname()
     48 
     49 class DirtyTreeException(Exception):
     50     """Raised when the tree has been modified in an unexpected way."""
     51 
     52 
     53 class UnknownCommandException(Exception):
     54     """Raised when we try to run a command name with no associated command."""
     55 
     56 
     57 class UnstableServices(Exception):
     58     """Raised if a service appears unstable after restart."""
     59 
     60 
     61 def strip_terminal_codes(text):
     62     """This function removes all terminal formatting codes from a string.
     63 
     64     @param text: String of text to cleanup.
     65     @returns String with format codes removed.
     66     """
     67     ESC = '\x1b'
     68     return re.sub(ESC+r'\[[^m]*m', '', text)
     69 
     70 
     71 def _clean_pyc_files():
     72     print('Removing .pyc files')
     73     try:
     74         subprocess.check_output([
     75                 'find', '.',
     76                 '(',
     77                 # These are ignored to reduce IO load (crbug.com/759780).
     78                 '-path', './site-packages',
     79                 '-o', '-path', './containers',
     80                 '-o', '-path', './logs',
     81                 '-o', '-path', './results',
     82                 ')',
     83                 '-prune',
     84                 '-o', '-name', '*.pyc',
     85                 '-exec', 'rm', '-f', '{}', '+'])
     86     except Exception as e:
     87         print('Warning: fail to remove .pyc! %s' % e)
     88 
     89 
     90 def verify_repo_clean():
     91     """This function cleans the current repo then verifies that it is valid.
     92 
     93     @raises DirtyTreeException if the repo is still not clean.
     94     @raises subprocess.CalledProcessError on a repo command failure.
     95     """
     96     subprocess.check_output(['git', 'stash', '-u'])
     97     subprocess.check_output(['git', 'stash', 'clear'])
     98     out = subprocess.check_output(['repo', 'status'], stderr=subprocess.STDOUT)
     99     out = strip_terminal_codes(out).strip()
    100 
    101     if not 'working directory clean' in out and not 'working tree clean' in out:
    102         raise DirtyTreeException('%s repo not clean: %s' % (HOSTNAME, out))
    103 
    104 
    105 def _clean_externals():
    106     """Clean untracked files within ExternalSource and site-packages/
    107 
    108     @raises subprocess.CalledProcessError on a git command failure.
    109     """
    110     dirs_to_clean = ['site-packages/', 'ExternalSource/']
    111     cmd = ['git', 'clean', '-fxd'] + dirs_to_clean
    112     subprocess.check_output(cmd)
    113 
    114 
    115 def repo_versions():
    116     """This function collects the versions of all git repos in the general repo.
    117 
    118     @returns A dictionary mapping project names to git hashes for HEAD.
    119     @raises subprocess.CalledProcessError on a repo command failure.
    120     """
    121     cmd = ['repo', 'forall', '-p', '-c', 'pwd && git log -1 --format=%h']
    122     output = strip_terminal_codes(subprocess.check_output(cmd))
    123 
    124     # The expected output format is:
    125 
    126     # project chrome_build/
    127     # /dir/holding/chrome_build
    128     # 73dee9d
    129     #
    130     # project chrome_release/
    131     # /dir/holding/chrome_release
    132     # 9f3a5d8
    133 
    134     lines = output.splitlines()
    135 
    136     PROJECT_PREFIX = 'project '
    137 
    138     project_heads = {}
    139     for n in range(0, len(lines), 4):
    140         project_line = lines[n]
    141         project_dir = lines[n+1]
    142         project_hash = lines[n+2]
    143         # lines[n+3] is a blank line, but doesn't exist for the final block.
    144 
    145         # Convert 'project chrome_build/' -> 'chrome_build'
    146         assert project_line.startswith(PROJECT_PREFIX)
    147         name = project_line[len(PROJECT_PREFIX):].rstrip('/')
    148 
    149         project_heads[name] = (project_dir, project_hash)
    150 
    151     return project_heads
    152 
    153 
    154 def repo_versions_to_decide_whether_run_cmd_update():
    155     """Collect versions of repos/files defined in COMMANDS_TO_REPOS_DICT.
    156 
    157     For the update_commands defined in config files, no need to run the command
    158     every time. Only run it when the repos/files related to the commands have
    159     been changed.
    160 
    161     @returns A set of tuples: {(cmd, repo_version), ()...}
    162     """
    163     results = set()
    164     for cmd, repo in COMMANDS_TO_REPOS_DICT.iteritems():
    165         version = subprocess.check_output(
    166                 ['git', 'log', '-1', '--pretty=tformat:%h',
    167                  '%s/%s' % (common.autotest_dir, repo)])
    168         results.add((cmd, version.strip()))
    169     return results
    170 
    171 
    172 def repo_sync(update_push_servers=False):
    173     """Perform a repo sync.
    174 
    175     @param update_push_servers: If True, then update test_push servers to ToT.
    176                                 Otherwise, update server to prod branch.
    177     @raises subprocess.CalledProcessError on a repo command failure.
    178     """
    179     subprocess.check_output(['repo', 'sync', '--force-sync'])
    180     if update_push_servers:
    181         print('Updating push servers, checkout cros/master')
    182         subprocess.check_output(['git', 'checkout', 'cros/master'],
    183                                 stderr=subprocess.STDOUT)
    184     else:
    185         print('Updating server to prod branch')
    186         subprocess.check_output(['git', 'checkout', 'cros/prod'],
    187                                 stderr=subprocess.STDOUT)
    188     _clean_pyc_files()
    189 
    190 
    191 def discover_update_commands():
    192     """Lookup the commands to run on this server.
    193 
    194     These commonly come from shadow_config.ini, since they vary by server type.
    195 
    196     @returns List of command names in string format.
    197     """
    198     try:
    199         return global_config.global_config.get_config_value(
    200                 'UPDATE', 'commands', type=list)
    201 
    202     except (ConfigParser.NoSectionError, global_config.ConfigError):
    203         return []
    204 
    205 
    206 def get_restart_services():
    207     """Find the services that need restarting on the current server.
    208 
    209     These commonly come from shadow_config.ini, since they vary by server type.
    210 
    211     @returns Iterable of service names in string format.
    212     """
    213     with open(_RESTART_SERVICES_FILE) as f:
    214         for line in f:
    215             yield line.rstrip()
    216 
    217 
    218 def update_command(cmd_tag, dryrun=False, use_chromite_master=False):
    219     """Restart a command.
    220 
    221     The command name is looked up in global_config.ini to find the full command
    222     to run, then it's executed.
    223 
    224     @param cmd_tag: Which command to restart.
    225     @param dryrun: If true print the command that would have been run.
    226     @param use_chromite_master: True if updating chromite to master, rather
    227                                 than prod.
    228 
    229     @raises UnknownCommandException If cmd_tag can't be looked up.
    230     @raises subprocess.CalledProcessError on a command failure.
    231     """
    232     # Lookup the list of commands to consider. They are intended to be
    233     # in global_config.ini so that they can be shared everywhere.
    234     cmds = dict(global_config.global_config.config.items(
    235         'UPDATE_COMMANDS'))
    236 
    237     if cmd_tag not in cmds:
    238         raise UnknownCommandException(cmd_tag, cmds)
    239 
    240     command = cmds[cmd_tag]
    241     # When updating push servers, pass an arg to build_externals to update
    242     # chromite to master branch for testing
    243     if use_chromite_master and cmd_tag == BUILD_EXTERNALS_COMMAND:
    244         command += ' --use_chromite_master'
    245 
    246     print('Running: %s: %s' % (cmd_tag, command))
    247     if dryrun:
    248         print('Skip: %s' % command)
    249     else:
    250         try:
    251             subprocess.check_output(command, shell=True,
    252                                     cwd=common.autotest_dir,
    253                                     stderr=subprocess.STDOUT)
    254         except subprocess.CalledProcessError as e:
    255             print('FAILED %s :' % HOSTNAME)
    256             print(e.output)
    257             raise
    258 
    259 
    260 def restart_service(service_name, dryrun=False):
    261     """Restart a service.
    262 
    263     Restarts the standard service with "service <name> restart".
    264 
    265     @param service_name: The name of the service to restart.
    266     @param dryrun: Don't really run anything, just print out the command.
    267 
    268     @raises subprocess.CalledProcessError on a command failure.
    269     """
    270     cmd = ['sudo', 'service', service_name, 'restart']
    271     print('Restarting: %s' % service_name)
    272     if dryrun:
    273         print('Skip: %s' % ' '.join(cmd))
    274     else:
    275         subprocess.check_call(cmd, stderr=subprocess.STDOUT)
    276 
    277 
    278 def service_status(service_name):
    279     """Return the results "status <name>" for a given service.
    280 
    281     This string is expected to contain the pid, and so to change is the service
    282     is shutdown or restarted for any reason.
    283 
    284     @param service_name: The name of the service to check on.
    285 
    286     @returns The output of the external command.
    287              Ex: autofs start/running, process 1931
    288 
    289     @raises subprocess.CalledProcessError on a command failure.
    290     """
    291     return subprocess.check_output(['sudo', 'service', service_name, 'status'])
    292 
    293 
    294 def restart_services(service_names, dryrun=False, skip_service_status=False):
    295     """Restart services as needed for the current server type.
    296 
    297     Restart the listed set of services, and watch to see if they are stable for
    298     at least SERVICE_STABILITY_TIMER. It restarts all services quickly,
    299     waits for that delay, then verifies the status of all of them.
    300 
    301     @param service_names: The list of service to restart and monitor.
    302     @param dryrun: Don't really restart the service, just print out the command.
    303     @param skip_service_status: Set to True to skip service status check.
    304                                 Default is False.
    305 
    306     @raises subprocess.CalledProcessError on a command failure.
    307     @raises UnstableServices if any services are unstable after restart.
    308     """
    309     service_statuses = {}
    310 
    311     if dryrun:
    312         for name in service_names:
    313             restart_service(name, dryrun=True)
    314         return
    315 
    316     # Restart each, and record the status (including pid).
    317     for name in service_names:
    318         restart_service(name)
    319 
    320     # Skip service status check if --skip-service-status is specified. Used for
    321     # servers in backup status.
    322     if skip_service_status:
    323         print('--skip-service-status is specified, skip checking services.')
    324         return
    325 
    326     # Wait for a while to let the services settle.
    327     time.sleep(SERVICE_STABILITY_TIMER)
    328     service_statuses = {name: service_status(name) for name in service_names}
    329     time.sleep(SERVICE_STABILITY_TIMER)
    330     # Look for any services that changed status.
    331     unstable_services = [n for n in service_names
    332                          if service_status(n) != service_statuses[n]]
    333 
    334     # Report any services having issues.
    335     if unstable_services:
    336       raise UnstableServices('%s service restart failed: %s' %
    337                              (HOSTNAME, unstable_services))
    338 
    339 
    340 def run_deploy_actions(cmds_skip=set(), dryrun=False,
    341                        skip_service_status=False, use_chromite_master=False):
    342     """Run arbitrary update commands specified in global.ini.
    343 
    344     @param cmds_skip: cmds no need to run since the corresponding repo/file
    345                       does not change.
    346     @param dryrun: Don't really restart the service, just print out the command.
    347     @param skip_service_status: Set to True to skip service status check.
    348                                 Default is False.
    349     @param use_chromite_master: True if updating chromite to master, rather
    350                                 than prod.
    351 
    352     @raises subprocess.CalledProcessError on a command failure.
    353     @raises UnstableServices if any services are unstable after restart.
    354     """
    355     defined_cmds = set(discover_update_commands())
    356     cmds = defined_cmds - cmds_skip
    357     if cmds:
    358         print('Running update commands:', ', '.join(cmds))
    359         for cmd in cmds:
    360             update_command(cmd, dryrun=dryrun,
    361                            use_chromite_master=use_chromite_master)
    362 
    363     services = list(get_restart_services())
    364     if services:
    365         print('Restarting Services:', ', '.join(services))
    366         restart_services(services, dryrun=dryrun,
    367                          skip_service_status=skip_service_status)
    368 
    369 
    370 def report_changes(versions_before, versions_after):
    371     """Produce a report describing what changed in all repos.
    372 
    373     @param versions_before: Results of repo_versions() from before the update.
    374     @param versions_after: Results of repo_versions() from after the update.
    375 
    376     @returns string containing a human friendly changes report.
    377     """
    378     result = []
    379 
    380     if versions_after:
    381         for project in sorted(set(versions_before.keys() + versions_after.keys())):
    382             result.append('%s:' % project)
    383 
    384             _, before_hash = versions_before.get(project, (None, None))
    385             after_dir, after_hash = versions_after.get(project, (None, None))
    386 
    387             if project not in versions_before:
    388                 result.append('Added.')
    389 
    390             elif project not in versions_after:
    391                 result.append('Removed.')
    392 
    393             elif before_hash == after_hash:
    394                 result.append('No Change.')
    395 
    396             else:
    397                 hashes = '%s..%s' % (before_hash, after_hash)
    398                 cmd = ['git', 'log', hashes, '--oneline']
    399                 out = subprocess.check_output(cmd, cwd=after_dir,
    400                                               stderr=subprocess.STDOUT)
    401                 result.append(out.strip())
    402 
    403             result.append('')
    404     else:
    405         for project in sorted(versions_before.keys()):
    406             _, before_hash = versions_before[project]
    407             result.append('%s: %s' % (project, before_hash))
    408         result.append('')
    409 
    410     return '\n'.join(result)
    411 
    412 
    413 def parse_arguments(args):
    414     """Parse command line arguments.
    415 
    416     @param args: The command line arguments to parse. (ususally sys.argsv[1:])
    417 
    418     @returns An argparse.Namespace populated with argument values.
    419     """
    420     parser = argparse.ArgumentParser(
    421             description='Command to update an autotest server.')
    422     parser.add_argument('--skip-verify', action='store_false',
    423                         dest='verify', default=True,
    424                         help='Disable verification of a clean repository.')
    425     parser.add_argument('--skip-update', action='store_false',
    426                         dest='update', default=True,
    427                         help='Skip the repository source code update.')
    428     parser.add_argument('--skip-actions', action='store_false',
    429                         dest='actions', default=True,
    430                         help='Skip the post update actions.')
    431     parser.add_argument('--skip-report', action='store_false',
    432                         dest='report', default=True,
    433                         help='Skip the git version report.')
    434     parser.add_argument('--actions-only', action='store_true',
    435                         help='Run the post update actions (restart services).')
    436     parser.add_argument('--dryrun', action='store_true',
    437                         help='Don\'t actually run any commands, just log.')
    438     parser.add_argument('--skip-service-status', action='store_true',
    439                         help='Skip checking the service status.')
    440     parser.add_argument('--update_push_servers', action='store_true',
    441                         help='Indicate to update test_push server. If not '
    442                              'specify, then update server to production.')
    443     parser.add_argument('--force-clean-externals', action='store_true',
    444                         default=False,
    445                         help='Force a cleanup of all untracked files within '
    446                              'site-packages/ and ExternalSource/, so that '
    447                              'build_externals will build from scratch.')
    448     parser.add_argument('--force_update', action='store_true',
    449                         help='Force to run the update commands for afe, tko '
    450                              'and build_externals')
    451 
    452     results = parser.parse_args(args)
    453 
    454     if results.actions_only:
    455         results.verify = False
    456         results.update = False
    457         results.report = False
    458 
    459     # TODO(dgarrett): Make these behaviors support dryrun.
    460     if results.dryrun:
    461         results.verify = False
    462         results.update = False
    463         results.force_clean_externals = False
    464 
    465     if not results.update_push_servers:
    466       print('Will skip service check for pushing servers in prod.')
    467       results.skip_service_status = True
    468     return results
    469 
    470 
    471 class ChangeDir(object):
    472 
    473     """Context manager for changing to a directory temporarily."""
    474 
    475     def __init__(self, dir):
    476         self.new_dir = dir
    477         self.old_dir = None
    478 
    479     def __enter__(self):
    480         self.old_dir = os.getcwd()
    481         os.chdir(self.new_dir)
    482 
    483     def __exit__(self, exc_type, exc_val, exc_tb):
    484         os.chdir(self.old_dir)
    485 
    486 
    487 def _sync_chromiumos_repo():
    488     """Update ~chromeos-test/chromiumos repo."""
    489     print('Updating ~chromeos-test/chromiumos')
    490     with ChangeDir(os.path.expanduser('~chromeos-test/chromiumos')):
    491         ret = subprocess.call(['repo', 'sync', '--force-sync'],
    492                               stderr=subprocess.STDOUT)
    493         _clean_pyc_files()
    494     if ret != 0:
    495         print('Update failed, exited with status: %d' % ret)
    496 
    497 
    498 def main(args):
    499     """Main method."""
    500     # Be careful before you change this call to `os.chdir()`:
    501     # We make several calls to `subprocess.check_output()` and
    502     # friends that depend on this directory, most notably calls to
    503     # the 'repo' command from `verify_repo_clean()`.
    504     os.chdir(common.autotest_dir)
    505     global_config.global_config.parse_config_file()
    506 
    507     behaviors = parse_arguments(args)
    508     print('Updating server: %s' % HOSTNAME)
    509     if behaviors.verify:
    510         print('Checking tree status:')
    511         verify_repo_clean()
    512         print('Tree status: clean')
    513 
    514     if behaviors.force_clean_externals:
    515        print('Cleaning all external packages and their cache...')
    516        _clean_externals()
    517        print('...done.')
    518 
    519     versions_before = repo_versions()
    520     versions_after = set()
    521     cmd_versions_before = repo_versions_to_decide_whether_run_cmd_update()
    522     cmd_versions_after = set()
    523 
    524     if behaviors.update:
    525         print('Updating Repo.')
    526         repo_sync(behaviors.update_push_servers)
    527         versions_after = repo_versions()
    528         cmd_versions_after = repo_versions_to_decide_whether_run_cmd_update()
    529         _sync_chromiumos_repo()
    530 
    531     if behaviors.actions:
    532         # If the corresponding repo/file not change, no need to run the cmd.
    533         cmds_skip = (set() if behaviors.force_update else
    534                      {t[0] for t in cmd_versions_before & cmd_versions_after})
    535         run_deploy_actions(
    536                 cmds_skip, behaviors.dryrun, behaviors.skip_service_status,
    537                 use_chromite_master=behaviors.update_push_servers)
    538 
    539     if behaviors.report:
    540         print('Changes:')
    541         print(report_changes(versions_before, versions_after))
    542 
    543 
    544 if __name__ == '__main__':
    545     sys.exit(main(sys.argv[1:]))
    546