Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2015 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Adjust pool balances to cover DUT shortfalls.
      7 
      8 This command takes all broken DUTs in a specific pool for specific
      9 models and swaps them with working DUTs taken from a selected pool
     10 of spares.  The command is meant primarily for replacing broken DUTs
     11 in critical pools like BVT or CQ, but it can also be used to adjust
     12 pool sizes, or to create or remove pools.
     13 
     14 usage:  balance_pool.py [ options ] POOL MODEL [ MODEL ... ]
     15 
     16 positional arguments:
     17   POOL                  Name of the pool to balance
     18   MODEL                 Names of models to balance
     19 
     20 optional arguments:
     21   -h, --help            show this help message and exit
     22   -t COUNT, --total COUNT
     23                         Set the number of DUTs in the pool to the specified
     24                         count for every MODEL
     25   -a COUNT, --grow COUNT
     26                         Add the specified number of DUTs to the pool for every
     27                         MODEL
     28   -d COUNT, --shrink COUNT
     29                         Remove the specified number of DUTs from the pool for
     30                         every MODEL
     31   -s POOL, --spare POOL
     32                         Pool from which to draw replacement spares (default:
     33                         pool:suites)
     34   --sku SKU             The specific SKU we intend to swap with
     35   -n, --dry-run         Report actions to take in the form of shell commands
     36 
     37 
     38 The command attempts to remove all broken DUTs from the target POOL
     39 for every MODEL, and replace them with enough working DUTs taken
     40 from the spare pool to bring the strength of POOL to the requested
     41 total COUNT.
     42 
     43 If no COUNT options are supplied (i.e. there are no --total, --grow,
     44 or --shrink options), the command will maintain the current totals of
     45 DUTs for every MODEL in the target POOL.
     46 
     47 If not enough working spares are available, broken DUTs may be left
     48 in the pool to keep the pool at the target COUNT.
     49 
     50 When reducing pool size, working DUTs will be returned after broken
     51 DUTs, if it's necessary to achieve the target COUNT.
     52 
     53 """
     54 
     55 
     56 import argparse
     57 import sys
     58 import time
     59 
     60 import common
     61 from autotest_lib.server import constants
     62 from autotest_lib.server import frontend
     63 from autotest_lib.server import site_utils
     64 from autotest_lib.server.lib import status_history
     65 from autotest_lib.site_utils import lab_inventory
     66 from autotest_lib.utils import labellib
     67 from chromite.lib import metrics
     68 from chromite.lib import parallel
     69 
     70 #This must be imported after chromite.lib.metrics
     71 from infra_libs import ts_mon
     72 
     73 _POOL_PREFIX = constants.Labels.POOL_PREFIX
     74 # This is the ratio of all models we should calculate the default max
     75 # number of broken models against.  It seemed like the best choice that
     76 # was neither too strict nor lax.
     77 _MAX_BROKEN_DEFAULT_RATIO = 3.0 / 8.0
     78 
     79 _ALL_CRITICAL_POOLS = 'all_critical_pools'
     80 _SPARE_DEFAULT = lab_inventory.SPARE_POOL
     81 
     82 
     83 def _log_message(message, *args):
     84     """Log a message with optional format arguments to stdout.
     85 
     86     This function logs a single line to stdout, with formatting
     87     if necessary, and without adornments.
     88 
     89     If `*args` are supplied, the message will be formatted using
     90     the arguments.
     91 
     92     @param message  Message to be logged, possibly after formatting.
     93     @param args     Format arguments.  If empty, the message is logged
     94                     without formatting.
     95 
     96     """
     97     if args:
     98         message = message % args
     99     sys.stdout.write('%s\n' % message)
    100 
    101 
    102 def _log_info(dry_run, message, *args):
    103     """Log information in a dry-run dependent fashion.
    104 
    105     This function logs a single line to stdout, with formatting
    106     if necessary.  When logging for a dry run, the message is
    107     printed as a shell comment, rather than as unadorned text.
    108 
    109     If `*args` are supplied, the message will be formatted using
    110     the arguments.
    111 
    112     @param message  Message to be logged, possibly after formatting.
    113     @param args     Format arguments.  If empty, the message is logged
    114                     without formatting.
    115 
    116     """
    117     if dry_run:
    118         message = '# ' + message
    119     _log_message(message, *args)
    120 
    121 
    122 def _log_error(message, *args):
    123     """Log an error to stderr, with optional format arguments.
    124 
    125     This function logs a single line to stderr, prefixed to indicate
    126     that it is an error message.
    127 
    128     If `*args` are supplied, the message will be formatted using
    129     the arguments.
    130 
    131     @param message  Message to be logged, possibly after formatting.
    132     @param args     Format arguments.  If empty, the message is logged
    133                     without formatting.
    134 
    135     """
    136     if args:
    137         message = message % args
    138     sys.stderr.write('ERROR: %s\n' % message)
    139 
    140 
    141 class _DUTPool(object):
    142     """Information about a pool of DUTs matching given labels.
    143 
    144     This class collects information about all DUTs for a given pool and matching
    145     the given labels, and divides them into three categories:
    146       + Working - the DUT is working for testing, and not locked.
    147       + Broken - the DUT is unable to run tests, or it is locked.
    148       + Ineligible - the DUT is not available to be removed from this pool.  The
    149             DUT may be either working or broken.
    150 
    151     DUTs with more than one pool: label are ineligible for exchange
    152     during balancing.  This is done for the sake of chameleon hosts,
    153     which must always be assigned to pool:suites.  These DUTs are
    154     always marked with pool:chameleon to prevent their reassignment.
    155 
    156     TODO(jrbarnette):  The use of `pool:chamelon` (instead of just
    157     the `chameleon` label is a hack that should be eliminated.
    158 
    159     _DUTPool instances are used to track both main pools that need
    160     to be resupplied with working DUTs and spare pools that supply
    161     those DUTs.
    162 
    163     @property pool                Name of the pool associated with
    164                                   this pool of DUTs.
    165     @property labels              Labels that constrain the DUTs to consider.
    166     @property working_hosts       The list of this pool's working DUTs.
    167     @property broken_hosts        The list of this pool's broken DUTs.
    168     @property ineligible_hosts    The list of this pool's ineligible DUTs.
    169     @property pool_labels         A list of labels that identify a DUT as part
    170                                   of this pool.
    171     @property total_hosts         The total number of hosts in pool.
    172 
    173     """
    174 
    175     def __init__(self, afe, pool, labels, start_time, end_time):
    176         self.pool = pool
    177         self.labels = labellib.LabelsMapping(labels)
    178         self.labels['pool'] = pool
    179         self._pool_labels = [_POOL_PREFIX + self.pool]
    180 
    181         self.working_hosts = []
    182         self.broken_hosts = []
    183         self.ineligible_hosts = []
    184         self.total_hosts = self._get_hosts(afe, start_time, end_time)
    185 
    186 
    187     def _get_hosts(self, afe, start_time, end_time):
    188         all_histories = status_history.HostJobHistory.get_multiple_histories(
    189                 afe, start_time, end_time, self.labels.getlabels())
    190         for h in all_histories:
    191             host = h.host
    192             host_pools = [l for l in host.labels
    193                           if l.startswith(_POOL_PREFIX)]
    194             if len(host_pools) != 1:
    195                 self.ineligible_hosts.append(host)
    196             else:
    197                 diag = h.last_diagnosis()[0]
    198                 if (diag == status_history.WORKING and
    199                         not host.locked):
    200                     self.working_hosts.append(host)
    201                 else:
    202                     self.broken_hosts.append(host)
    203         return len(all_histories)
    204 
    205 
    206     @property
    207     def pool_labels(self):
    208         """Return the AFE labels that identify this pool.
    209 
    210         The returned labels are the labels that must be removed
    211         to remove a DUT from the pool, or added to add a DUT.
    212 
    213         @return A list of AFE labels suitable for AFE.add_labels()
    214                 or AFE.remove_labels().
    215 
    216         """
    217         return self._pool_labels
    218 
    219     def calculate_spares_needed(self, target_total):
    220         """Calculate and log the spares needed to achieve a target.
    221 
    222         Return how many working spares are needed to achieve the
    223         given `target_total` with all DUTs working.
    224 
    225         The spares count may be positive or negative.  Positive
    226         values indicate spares are needed to replace broken DUTs in
    227         order to reach the target; negative numbers indicate that
    228         no spares are needed, and that a corresponding number of
    229         working devices can be returned.
    230 
    231         If the new target total would require returning ineligible
    232         DUTs, an error is logged, and the target total is adjusted
    233         so that those DUTs are not exchanged.
    234 
    235         @param target_total  The new target pool size.
    236 
    237         @return The number of spares needed.
    238 
    239         """
    240         num_ineligible = len(self.ineligible_hosts)
    241         spares_needed = target_total >= num_ineligible
    242         metrics.Boolean(
    243             'chromeos/autotest/balance_pools/exhausted_pools',
    244             'True for each pool/model which requests more DUTs than supplied',
    245             # TODO(jrbarnette) The 'board' field is a legacy.  We need
    246             # to leave it here until we do the extra work Monarch
    247             # requires to delete a field.
    248             field_spec=[
    249                     ts_mon.StringField('pool'),
    250                     ts_mon.StringField('board'),
    251                     ts_mon.StringField('model'),
    252             ]).set(
    253                     not spares_needed,
    254                     fields={
    255                             'pool': self.pool,
    256                             'board': self.labels.get('model', ''),
    257                             'model': self.labels.get('model', ''),
    258                     },
    259         )
    260         if not spares_needed:
    261             _log_error(
    262                     '%s pool (%s): Target of %d is below minimum of %d DUTs.',
    263                     self.pool, self.labels, target_total, num_ineligible,
    264             )
    265             _log_error('Adjusting target to %d DUTs.', num_ineligible)
    266             target_total = num_ineligible
    267         else:
    268             _log_message('%s %s pool: Target of %d is above minimum.',
    269                          self.labels.get('model', ''), self.pool, target_total)
    270         adjustment = target_total - self.total_hosts
    271         return len(self.broken_hosts) + adjustment
    272 
    273     def allocate_surplus(self, num_broken):
    274         """Allocate a list DUTs that can returned as surplus.
    275 
    276         Return a list of devices that can be returned in order to
    277         reduce this pool's supply.  Broken DUTs will be preferred
    278         over working ones.
    279 
    280         The `num_broken` parameter indicates the number of broken
    281         DUTs to be left in the pool.  If this number exceeds the
    282         number of broken DUTs actually in the pool, the returned
    283         list will be empty.  If this number is negative, it
    284         indicates a number of working DUTs to be returned in
    285         addition to all broken ones.
    286 
    287         @param num_broken    Total number of broken DUTs to be left in
    288                              this pool.
    289 
    290         @return A list of DUTs to be returned as surplus.
    291 
    292         """
    293         if num_broken >= 0:
    294             surplus = self.broken_hosts[num_broken:]
    295             return surplus
    296         else:
    297             return (self.broken_hosts +
    298                     self.working_hosts[:-num_broken])
    299 
    300 
    301 def _exchange_labels(dry_run, hosts, target_pool, spare_pool):
    302     """Reassign a list of DUTs from one pool to another.
    303 
    304     For all the given hosts, remove all labels associated with
    305     `spare_pool`, and add the labels for `target_pool`.
    306 
    307     If `dry_run` is true, perform no changes, but log the `atest`
    308     commands needed to accomplish the necessary label changes.
    309 
    310     @param dry_run       Whether the logging is for a dry run or
    311                          for actual execution.
    312     @param hosts         List of DUTs (AFE hosts) to be reassigned.
    313     @param target_pool   The `_DUTPool` object from which the hosts
    314                          are drawn.
    315     @param spare_pool    The `_DUTPool` object to which the hosts
    316                          will be added.
    317 
    318     """
    319     _log_info(dry_run, 'Transferring %d DUTs from %s to %s.',
    320               len(hosts), spare_pool.pool, target_pool.pool)
    321     metrics.Counter(
    322         'chromeos/autotest/balance_pools/duts_moved',
    323         'DUTs transferred between pools',
    324         # TODO(jrbarnette) The 'board' field is a legacy.  We need to
    325         # leave it here until we do the extra work Monarch requires to
    326         # delete a field.
    327         field_spec=[
    328                 ts_mon.StringField('board'),
    329                 ts_mon.StringField('model'),
    330                 ts_mon.StringField('source_pool'),
    331                 ts_mon.StringField('target_pool'),
    332         ]
    333     ).increment_by(
    334             len(hosts),
    335             fields={
    336                     'board': target_pool.labels.get('model', ''),
    337                     'model': target_pool.labels.get('model', ''),
    338                     'source_pool': spare_pool.pool,
    339                     'target_pool': target_pool.pool,
    340             },
    341     )
    342     if not hosts:
    343         return
    344 
    345     additions = target_pool.pool_labels
    346     removals = spare_pool.pool_labels
    347     for host in hosts:
    348         if not dry_run:
    349             _log_message('Updating host: %s.', host.hostname)
    350             host.remove_labels(removals)
    351             host.add_labels(additions)
    352         else:
    353             _log_message('atest label remove -m %s %s',
    354                          host.hostname, ' '.join(removals))
    355             _log_message('atest label add -m %s %s',
    356                          host.hostname, ' '.join(additions))
    357 
    358 
    359 def _balance_model(arguments, afe, pool, labels, start_time, end_time):
    360     """Balance one model as requested by command line arguments.
    361 
    362     @param arguments     Parsed command line arguments.
    363     @param afe           AFE object to be used for the changes.
    364     @param pool          Pool of the model to be balanced.
    365     @param labels        Restrict the balancing operation within DUTs
    366                          that have these labels.
    367     @param start_time    Start time for HostJobHistory objects in
    368                          the DUT pools.
    369     @param end_time      End time for HostJobHistory objects in the
    370                          DUT pools.
    371 
    372     """
    373     spare_pool = _DUTPool(afe, arguments.spare, labels, start_time, end_time)
    374     main_pool = _DUTPool(afe, pool, labels, start_time, end_time)
    375 
    376     target_total = main_pool.total_hosts
    377     if arguments.total is not None:
    378         target_total = arguments.total
    379     elif arguments.grow:
    380         target_total += arguments.grow
    381     elif arguments.shrink:
    382         target_total -= arguments.shrink
    383 
    384     spares_needed = main_pool.calculate_spares_needed(target_total)
    385     if spares_needed > 0:
    386         spare_duts = spare_pool.working_hosts[:spares_needed]
    387         shortfall = spares_needed - len(spare_duts)
    388     else:
    389         spare_duts = []
    390         shortfall = spares_needed
    391 
    392     surplus_duts = main_pool.allocate_surplus(shortfall)
    393 
    394     if spares_needed or surplus_duts or arguments.verbose:
    395         dry_run = arguments.dry_run
    396         _log_message('')
    397 
    398         _log_info(dry_run, 'Balancing %s %s pool:', labels, main_pool.pool)
    399         _log_info(dry_run,
    400                   'Total %d DUTs, %d working, %d broken, %d reserved.',
    401                   main_pool.total_hosts, len(main_pool.working_hosts),
    402                   len(main_pool.broken_hosts), len(main_pool.ineligible_hosts))
    403 
    404         if spares_needed > 0:
    405             add_msg = 'grow pool by %d DUTs' % spares_needed
    406         elif spares_needed < 0:
    407             add_msg = 'shrink pool by %d DUTs' % -spares_needed
    408         else:
    409             add_msg = 'no change to pool size'
    410         _log_info(dry_run, 'Target is %d working DUTs; %s.',
    411                   target_total, add_msg)
    412 
    413         _log_info(dry_run,
    414                   '%s %s pool has %d spares available for balancing pool %s',
    415                   labels, spare_pool.pool, len(spare_pool.working_hosts),
    416                   main_pool.pool)
    417 
    418         if spares_needed > len(spare_duts):
    419             _log_error('Not enough spares: need %d, only have %d.',
    420                        spares_needed, len(spare_duts))
    421         elif shortfall >= 0:
    422             _log_info(dry_run,
    423                       '%s %s pool will return %d broken DUTs, '
    424                       'leaving %d still in the pool.',
    425                       labels, main_pool.pool,
    426                       len(surplus_duts),
    427                       len(main_pool.broken_hosts) - len(surplus_duts))
    428         else:
    429             _log_info(dry_run,
    430                       '%s %s pool will return %d surplus DUTs, '
    431                       'including %d working DUTs.',
    432                       labels, main_pool.pool,
    433                       len(main_pool.broken_hosts) - shortfall,
    434                       -shortfall)
    435 
    436     if (len(main_pool.broken_hosts) > arguments.max_broken and
    437         not arguments.force_rebalance):
    438         _log_error('%s %s pool: Refusing to act on pool with %d broken DUTs.',
    439                    labels, main_pool.pool, len(main_pool.broken_hosts))
    440         _log_error('Please investigate this model to for a bug ')
    441         _log_error('that is bricking devices. Once you have finished your ')
    442         _log_error('investigation, you can force a rebalance with ')
    443         _log_error('--force-rebalance')
    444         spare_duts = []
    445         surplus_duts = []
    446 
    447     if not spare_duts and not surplus_duts:
    448         if arguments.verbose:
    449             _log_info(arguments.dry_run, 'No exchange required.')
    450 
    451     _exchange_labels(arguments.dry_run, surplus_duts,
    452                      spare_pool, main_pool)
    453     _exchange_labels(arguments.dry_run, spare_duts,
    454                      main_pool, spare_pool)
    455 
    456 
    457 def _too_many_broken(inventory, pool, args):
    458     """
    459     Get the inventory of models and check if too many are broken.
    460 
    461     @param inventory: _LabInventory object.
    462     @param pool: The pool to check.
    463     @param args: Parsed command line arguments.
    464 
    465     @return True if the number of models with 1 or more broken duts
    466             exceed max_broken_models, False otherwise.
    467     """
    468     # Were we asked to skip this check?
    469     if (args.force_rebalance or
    470             (args.all_models and args.max_broken_models == 0)):
    471         return False
    472 
    473     max_broken = args.max_broken_models
    474     if max_broken is None:
    475         total_num = len(inventory.get_pool_models(pool))
    476         max_broken = int(_MAX_BROKEN_DEFAULT_RATIO * total_num)
    477     _log_info(args.dry_run,
    478               'Max broken models for pool %s: %d',
    479               pool, max_broken)
    480 
    481     broken = [model for model, counts in inventory.iteritems()
    482                   if counts.get_broken(pool) != 0]
    483     _log_message('There are %d models in the %s pool with at least 1 '
    484                  'broken DUT (max threshold %d)',
    485                  len(broken), pool, max_broken)
    486     for b in sorted(broken):
    487         _log_message(b)
    488     return len(broken) > max_broken
    489 
    490 
    491 def _parse_command(argv):
    492     """Parse the command line arguments.
    493 
    494     Create an argument parser for this command's syntax, parse the
    495     command line, and return the result of the `ArgumentParser`
    496     `parse_args()` method.
    497 
    498     @param argv Standard command line argument vector; `argv[0]` is
    499                 assumed to be the command name.
    500 
    501     @return Result returned by `ArgumentParser.parse_args()`.
    502 
    503     """
    504     parser = argparse.ArgumentParser(
    505             prog=argv[0],
    506             description='Balance pool shortages from spares on reserve')
    507 
    508     parser.add_argument(
    509         '-w', '--web', type=str, default=None,
    510         help='AFE host to use. Default comes from shadow_config.',
    511     )
    512     count_group = parser.add_mutually_exclusive_group()
    513     count_group.add_argument('-t', '--total', type=int,
    514                              metavar='COUNT', default=None,
    515                              help='Set the number of DUTs in the '
    516                                   'pool to the specified count for '
    517                                   'every MODEL')
    518     count_group.add_argument('-a', '--grow', type=int,
    519                              metavar='COUNT', default=None,
    520                              help='Add the specified number of DUTs '
    521                                   'to the pool for every MODEL')
    522     count_group.add_argument('-d', '--shrink', type=int,
    523                              metavar='COUNT', default=None,
    524                              help='Remove the specified number of DUTs '
    525                                   'from the pool for every MODEL')
    526 
    527     parser.add_argument('-s', '--spare', default=_SPARE_DEFAULT,
    528                         metavar='POOL',
    529                         help='Pool from which to draw replacement '
    530                              'spares (default: pool:%s)' % _SPARE_DEFAULT)
    531     parser.add_argument('-n', '--dry-run', action='store_true',
    532                         help='Report actions to take in the form of '
    533                              'shell commands')
    534     parser.add_argument('-v', '--verbose', action='store_true',
    535                         help='Print more detail about calculations for debug '
    536                              'purposes.')
    537 
    538     parser.add_argument('-m', '--max-broken', default=2, type=int,
    539                         metavar='COUNT',
    540                         help='Only rebalance a pool if it has at most '
    541                              'COUNT broken DUTs.')
    542     parser.add_argument('-f', '--force-rebalance', action='store_true',
    543                         help='Forcefully rebalance all DUTs in a pool, even '
    544                              'if it has a large number of broken DUTs. '
    545                              'Before doing this, please investigate whether '
    546                              'there is a bug that is bricking devices in the '
    547                              'lab.')
    548     parser.add_argument('--production', action='store_true',
    549                         help='Treat this as a production run. This will '
    550                              'collect metrics.')
    551 
    552     parser.add_argument(
    553             '--all-models',
    554             action='store_true',
    555             help='Rebalance all managed models.  This will do a very expensive '
    556                  'check to see how many models have at least one broken DUT. '
    557                  'To bypass that check, set --max-broken-models to 0.',
    558     )
    559     parser.add_argument(
    560             '--max-broken-models', default=None, type=int, metavar='COUNT',
    561             help='Only rebalance all models if number of models with broken '
    562                  'DUTs in the specified pool is less than COUNT.',
    563     )
    564 
    565     parser.add_argument('pool',
    566                         metavar='POOL',
    567                         help='Name of the pool to balance.  Use %s to balance '
    568                              'all critical pools' % _ALL_CRITICAL_POOLS)
    569     parser.add_argument('models', nargs='*', metavar='MODEL',
    570                         help='Names of models to balance.')
    571 
    572     parser.add_argument('--sku', type=str,
    573                         help='Optional name of sku to restrict to.')
    574 
    575     arguments = parser.parse_args(argv[1:])
    576 
    577     # Error-check arguments.
    578     if arguments.models and arguments.all_models:
    579         parser.error('Cannot specify individual models on the command line '
    580                      'when using --all-models.')
    581     if (arguments.pool == _ALL_CRITICAL_POOLS and
    582         arguments.spare != _SPARE_DEFAULT):
    583         parser.error('Cannot specify --spare pool to be %s when balancing all '
    584                      'critical pools.' % _SPARE_DEFAULT)
    585     return arguments
    586 
    587 
    588 def infer_balancer_targets(afe, arguments, pools):
    589     """Take some arguments and translate them to a list of models to balance
    590 
    591     Args:
    592     @param afe           AFE object to be used for taking inventory.
    593     @param arguments     Parsed command line arguments.
    594     @param pools         The list of pools to balance.
    595 
    596     @returns    a list of (model, labels) tuples to be balanced
    597 
    598     """
    599     balancer_targets = []
    600 
    601     for pool in pools:
    602         if arguments.all_models:
    603             inventory = lab_inventory.get_inventory(afe)
    604             quarantine = _too_many_broken(inventory, pool, arguments)
    605             if quarantine:
    606                 _log_error('Refusing to balance all models for %s pool, '
    607                            'too many models with at least 1 broken DUT '
    608                            'detected.', pool)
    609             else:
    610                 for model in inventory.get_models(pool):
    611                     labels = labellib.LabelsMapping()
    612                     labels['model'] = model
    613                     balancer_targets.append((pool, labels.getlabels()))
    614             metrics.Boolean(
    615                 'chromeos/autotest/balance_pools/unchanged_pools').set(
    616                     quarantine, fields={'pool': pool})
    617             _log_message('Pool %s quarantine status: %s', pool, quarantine)
    618         else:
    619             for model in arguments.models:
    620                 labels = labellib.LabelsMapping()
    621                 labels['model'] = model
    622                 if arguments.sku:
    623                     labels['sku'] = arguments.sku
    624                 balancer_targets.append((pool, labels.getlabels()))
    625     return balancer_targets
    626 
    627 
    628 def main(argv):
    629     """Standard main routine.
    630 
    631     @param argv  Command line arguments including `sys.argv[0]`.
    632 
    633     """
    634     arguments = _parse_command(argv)
    635     if arguments.production:
    636         metrics_manager = site_utils.SetupTsMonGlobalState(
    637                 'balance_pools',
    638                 indirect=False,
    639                 auto_flush=False,
    640         )
    641     else:
    642         metrics_manager = site_utils.TrivialContextManager()
    643 
    644     with metrics_manager:
    645         end_time = time.time()
    646         start_time = end_time - 24 * 60 * 60
    647         afe = frontend.AFE(server=arguments.web)
    648 
    649         def balancer(pool, labels):
    650             """Balance the specified model.
    651 
    652             @param pool: The pool to rebalance for the model.
    653             @param labels: labels to restrict to balancing operations
    654                     within.
    655             """
    656             _balance_model(arguments, afe, pool, labels,
    657                            start_time, end_time)
    658             _log_message('')
    659 
    660         pools = (lab_inventory.CRITICAL_POOLS
    661                 if arguments.pool == _ALL_CRITICAL_POOLS
    662                 else [arguments.pool])
    663         balancer_targets = infer_balancer_targets(afe, arguments, pools)
    664         try:
    665             parallel.RunTasksInProcessPool(
    666                     balancer,
    667                     balancer_targets,
    668                     processes=8,
    669             )
    670         except KeyboardInterrupt:
    671             pass
    672         finally:
    673             metrics.Flush()
    674 
    675 
    676 if __name__ == '__main__':
    677     main(sys.argv)
    678