Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 
      3 from __future__ import print_function
      4 
      5 import argparse
      6 import logging
      7 import multiprocessing
      8 import subprocess
      9 import sys
     10 
     11 import common
     12 from autotest_lib.server import frontend
     13 from autotest_lib.site_utils.lib import infra
     14 
     15 DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
     16 POOL_SIZE = 124
     17 
     18 
     19 def _filter_servers(servers):
     20     """Filter a set of servers to those that should be deployed to."""
     21     non_push_roles = {'devserver', 'crash_server', 'reserve'}
     22     for s in servers:
     23         if s['status'] == 'repair_required':
     24             continue
     25         if s['status'] == 'backup':
     26             continue
     27         if set(s['roles']) & non_push_roles:
     28             continue
     29         yield s
     30 
     31 
     32 def discover_servers(afe):
     33     """Discover the in-production servers to update.
     34 
     35     Returns the set of servers from serverdb that are in production and should
     36     be updated. This filters out servers in need of repair, or servers of roles
     37     that are not yet supported by deploy_server / deploy_server_local.
     38 
     39     @param afe: Server to contact with RPC requests.
     40 
     41     @returns: A set of server hostnames.
     42     """
     43     # Example server details....
     44     # {
     45     #     'hostname': 'server1',
     46     #     'status': 'backup',
     47     #     'roles': ['drone', 'scheduler'],
     48     #     'attributes': {'max_processes': 300}
     49     # }
     50     rpc = frontend.AFE(server=afe)
     51     servers = rpc.run('get_servers')
     52 
     53     return {s['hostname'] for s in _filter_servers(servers)}
     54 
     55 
     56 def _parse_arguments(args):
     57     """Parse command line arguments.
     58 
     59     @param args: The command line arguments to parse. (usually sys.argv[1:])
     60 
     61     @returns A tuple of (argparse.Namespace populated with argument values,
     62                          list of extra args to pass to deploy_server_local).
     63     """
     64     parser = argparse.ArgumentParser(
     65             formatter_class=argparse.RawDescriptionHelpFormatter,
     66             description='Run deploy_server_local on a bunch of servers. Extra '
     67                         'arguments will be passed through.',
     68             epilog=('Update all servers:\n'
     69                     '  deploy_server.py -x --afe cautotest\n'
     70                     '\n'
     71                     'Update one server:\n'
     72                     '  deploy_server.py <server> -x\n'
     73                     ))
     74 
     75     parser.add_argument('-x', action='store_true',
     76                         help='Actually perform actions. If not supplied, '
     77                              'script does nothing.')
     78     parser.add_argument('--afe',
     79             help='The AFE server used to get servers from server_db,'
     80                  'e.g, cautotest. Used only if no SERVER specified.')
     81     parser.add_argument('servers', action='store', nargs='*', metavar='SERVER')
     82 
     83     return parser.parse_known_args()
     84 
     85 
     86 def _update_server(server, extra_args=[]):
     87     """Run deploy_server_local for given server.
     88 
     89     @param server: hostname to update.
     90     @param extra_args: args to be passed in to deploy_server_local.
     91 
     92     @return: A tuple of (server, success, output), where:
     93              server: Name of the server.
     94              sucess: True if update succeeds, False otherwise.
     95              output: A string of the deploy_server_local script output
     96                      including any errors.
     97     """
     98     cmd = ('%s %s' %
     99            (DEPLOY_SERVER_LOCAL, ' '.join(extra_args)))
    100     success = False
    101     try:
    102         output = infra.execute_command(server, cmd)
    103         success = True
    104     except subprocess.CalledProcessError as e:
    105         output = e.output
    106 
    107     return server, success, output
    108 
    109 def _update_in_parallel(servers, extra_args=[]):
    110     """Update a group of servers in parallel.
    111 
    112     @param servers: A list of servers to update.
    113     @param options: Options for the push.
    114 
    115     @returns A dictionary from server names that failed to the output
    116              of the update script.
    117     """
    118     # Create a list to record all the finished servers.
    119     manager = multiprocessing.Manager()
    120     finished_servers = manager.list()
    121 
    122     do_server = lambda s: _update_server(s, extra_args)
    123 
    124     # The update actions run in parallel. If any update failed, we should wait
    125     # for other running updates being finished. Abort in the middle of an update
    126     # may leave the server in a bad state.
    127     pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
    128     try:
    129         results = pool.map_async(do_server, servers)
    130         pool.close()
    131 
    132         # Track the updating progress for current group of servers.
    133         incomplete_servers = set()
    134         server_names = set([s[0] for s in servers])
    135         while not results.ready():
    136             incomplete_servers = sorted(set(servers) - set(finished_servers))
    137             print('Not finished yet. %d servers in this group. '
    138                 '%d servers are still running:\n%s\n' %
    139                 (len(servers), len(incomplete_servers), incomplete_servers))
    140             # Check the progress every 20s
    141             results.wait(20)
    142 
    143         # After update finished, parse the result.
    144         failures = {}
    145         for server, success, output in results.get():
    146             if not success:
    147                 failures[server] = output
    148 
    149         return failures
    150 
    151     finally:
    152         pool.terminate()
    153         pool.join()
    154 
    155 
    156 def main(args):
    157     """Entry point to deploy_server.py
    158 
    159     @param args: The command line arguments to parse. (usually sys.argv)
    160 
    161     @returns The system exit code.
    162     """
    163     options, extra_args = _parse_arguments(args[1:])
    164     # Remove all the handlers from the root logger to get rid of the handlers
    165     # introduced by the import packages.
    166     logging.getLogger().handlers = []
    167     logging.basicConfig(level=logging.DEBUG)
    168 
    169     servers = options.servers
    170     if not servers:
    171         if not options.afe:
    172             print('No servers or afe specified. Aborting')
    173             return 1
    174         print('Retrieving servers from %s..' % options.afe)
    175         servers = discover_servers(options.afe)
    176         print('Retrieved servers were: %s' % servers)
    177 
    178     if not options.x:
    179         print('Doing nothing because -x was not supplied.')
    180         print('servers: %s' % options.servers)
    181         print('extra args for deploy_server_local: %s' % extra_args)
    182         return 0
    183 
    184     failures = _update_in_parallel(servers, extra_args)
    185 
    186     if not failures:
    187         print('Completed all updates successfully.')
    188         return 0
    189 
    190     print('The following servers failed, with the following output:')
    191     for s, o in failures.iteritems():
    192         print('======== %s ========' % s)
    193         print(o)
    194 
    195     print('The servers that failed were:')
    196     print('\n'.join(failures.keys()))
    197     print('\n\nTo retry on failed servers, run the following command:')
    198     retry_cmd = [args[0], '-x'] + failures.keys() + extra_args
    199     print(' '.join(retry_cmd))
    200     return 1
    201 
    202 
    203 
    204 if __name__ == '__main__':
    205     sys.exit(main(sys.argv))
    206