Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2015 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Adjust pool balances to cover DUT shortfalls.
      7 
      8 This command takes all broken DUTs in a specific pool for specific
      9 models and swaps them with working DUTs taken from a selected pool
     10 of spares.  The command is meant primarily for replacing broken DUTs
     11 in critical pools like BVT or CQ, but it can also be used to adjust
     12 pool sizes, or to create or remove pools.
     13 
     14 usage:  balance_pool.py [ options ] POOL MODEL [ MODEL ... ]
     15 
     16 positional arguments:
     17   POOL                  Name of the pool to balance
     18   MODEL                 Names of models to balance
     19 
     20 optional arguments:
     21   -h, --help            show this help message and exit
     22   -t COUNT, --total COUNT
     23                         Set the number of DUTs in the pool to the specified
     24                         count for every MODEL
     25   -a COUNT, --grow COUNT
     26                         Add the specified number of DUTs to the pool for every
     27                         MODEL
     28   -d COUNT, --shrink COUNT
     29                         Remove the specified number of DUTs from the pool for
     30                         every MODEL
     31   -s POOL, --spare POOL
     32                         Pool from which to draw replacement spares (default:
     33                         pool:suites)
     34   -p PHASE, --phase PHASE
     35                         Phase to restrict the balance pool operation to
     36   --sku SKU             The specific SKU we intend to swap with
     37   -n, --dry-run         Report actions to take in the form of shell commands
     38 
     39 
     40 The command attempts to remove all broken DUTs from the target POOL
     41 for every MODEL, and replace them with enough working DUTs taken
     42 from the spare pool to bring the strength of POOL to the requested
     43 total COUNT.
     44 
     45 If no COUNT options are supplied (i.e. there are no --total, --grow,
     46 or --shrink options), the command will maintain the current totals of
     47 DUTs for every MODEL in the target POOL.
     48 
     49 If not enough working spares are available, broken DUTs may be left
     50 in the pool to keep the pool at the target COUNT.
     51 
     52 When reducing pool size, working DUTs will be returned after broken
     53 DUTs, if it's necessary to achieve the target COUNT.
     54 
     55 """
     56 
     57 
     58 import argparse
     59 import os
     60 import re
     61 import sys
     62 import time
     63 
     64 import common
     65 from autotest_lib.server import constants
     66 from autotest_lib.server import site_utils
     67 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     68 from autotest_lib.server.lib import status_history
     69 from autotest_lib.site_utils import lab_inventory
     70 from autotest_lib.utils import labellib
     71 from chromite.lib import metrics
     72 from chromite.lib import parallel
     73 
     74 #This must be imported after chromite.lib.metrics
     75 from infra_libs import ts_mon
     76 
     77 _POOL_PREFIX = constants.Labels.POOL_PREFIX
     78 # This is the ratio of all models we should calculate the default max
     79 # number of broken models against.  It seemed like the best choice that
     80 # was neither too strict nor lax.
     81 _MAX_BROKEN_DEFAULT_RATIO = 3.0 / 8.0
     82 
     83 _ALL_CRITICAL_POOLS = 'all_critical_pools'
     84 _SPARE_DEFAULT = lab_inventory.SPARE_POOL
     85 
     86 
     87 # _VALID_POOL_PATTERN - Regular expression matching pool names that will
     88 # be accepted on the command line.
     89 #
     90 # Note: This pattern was selected merely to recognize all existing pool
     91 # names; there's no underlying technical restriction motivating this
     92 # pattern.  No reasonable request to add more special characters to the
     93 # allowed set should be refused.
     94 
     95 _VALID_POOL_PATTERN = re.compile('^[a-zA-z0-9_\-]+$')
     96 
     97 
     98 def _log_message(message, *args):
     99     """Log a message with optional format arguments to stdout.
    100 
    101     This function logs a single line to stdout, with formatting
    102     if necessary, and without adornments.
    103 
    104     If `*args` are supplied, the message will be formatted using
    105     the arguments.
    106 
    107     @param message  Message to be logged, possibly after formatting.
    108     @param args     Format arguments.  If empty, the message is logged
    109                     without formatting.
    110 
    111     """
    112     if args:
    113         message = message % args
    114     sys.stdout.write('%s\n' % message)
    115 
    116 
    117 def _log_info(dry_run, message, *args):
    118     """Log information in a dry-run dependent fashion.
    119 
    120     This function logs a single line to stdout, with formatting
    121     if necessary.  When logging for a dry run, the message is
    122     printed as a shell comment, rather than as unadorned text.
    123 
    124     If `*args` are supplied, the message will be formatted using
    125     the arguments.
    126 
    127     @param message  Message to be logged, possibly after formatting.
    128     @param args     Format arguments.  If empty, the message is logged
    129                     without formatting.
    130 
    131     """
    132     if dry_run:
    133         message = '# ' + message
    134     _log_message(message, *args)
    135 
    136 
    137 def _log_error(message, *args):
    138     """Log an error to stderr, with optional format arguments.
    139 
    140     This function logs a single line to stderr, prefixed to indicate
    141     that it is an error message.
    142 
    143     If `*args` are supplied, the message will be formatted using
    144     the arguments.
    145 
    146     @param message  Message to be logged, possibly after formatting.
    147     @param args     Format arguments.  If empty, the message is logged
    148                     without formatting.
    149 
    150     """
    151     if args:
    152         message = message % args
    153     sys.stderr.write('ERROR: %s\n' % message)
    154 
    155 
    156 class _DUTPool(object):
    157     """Information about a pool of DUTs matching given labels.
    158 
    159     This class collects information about all DUTs for a given pool and matching
    160     the given labels, and divides them into three categories:
    161       + Working - the DUT is working for testing, and not locked.
    162       + Broken - the DUT is unable to run tests, or it is locked.
    163       + Ineligible - the DUT is not available to be removed from this pool.  The
    164             DUT may be either working or broken.
    165 
    166     DUTs with more than one pool: label are ineligible for exchange
    167     during balancing.  This is done for the sake of chameleon hosts,
    168     which must always be assigned to pool:suites.  These DUTs are
    169     always marked with pool:chameleon to prevent their reassignment.
    170 
    171     TODO(jrbarnette):  The use of `pool:chamelon` (instead of just
    172     the `chameleon` label is a hack that should be eliminated.
    173 
    174     _DUTPool instances are used to track both main pools that need
    175     to be resupplied with working DUTs and spare pools that supply
    176     those DUTs.
    177 
    178     @property pool                Name of the pool associated with
    179                                   this pool of DUTs.
    180     @property labels              Labels that constrain the DUTs to consider.
    181     @property working_hosts       The list of this pool's working DUTs.
    182     @property broken_hosts        The list of this pool's broken DUTs.
    183     @property ineligible_hosts    The list of this pool's ineligible DUTs.
    184     @property pool_labels         A list of labels that identify a DUT as part
    185                                   of this pool.
    186     @property total_hosts         The total number of hosts in pool.
    187 
    188     """
    189 
    190     def __init__(self, afe, pool, labels, start_time, end_time):
    191         self.pool = pool
    192         self.labels = labellib.LabelsMapping(labels)
    193         self.labels['pool'] = pool
    194         self._pool_labels = [_POOL_PREFIX + self.pool]
    195 
    196         self.working_hosts = []
    197         self.broken_hosts = []
    198         self.ineligible_hosts = []
    199         self.total_hosts = self._get_hosts(afe, start_time, end_time)
    200 
    201 
    202     def _get_hosts(self, afe, start_time, end_time):
    203         all_histories = status_history.HostJobHistory.get_multiple_histories(
    204                 afe, start_time, end_time, self.labels.getlabels())
    205         for h in all_histories:
    206             host = h.host
    207             host_pools = [l for l in host.labels
    208                           if l.startswith(_POOL_PREFIX)]
    209             if len(host_pools) != 1:
    210                 self.ineligible_hosts.append(host)
    211             else:
    212                 diag = h.last_diagnosis()[0]
    213                 if (diag == status_history.WORKING and
    214                         not host.locked):
    215                     self.working_hosts.append(host)
    216                 else:
    217                     self.broken_hosts.append(host)
    218         return len(all_histories)
    219 
    220 
    221     @property
    222     def pool_labels(self):
    223         """Return the AFE labels that identify this pool.
    224 
    225         The returned labels are the labels that must be removed
    226         to remove a DUT from the pool, or added to add a DUT.
    227 
    228         @return A list of AFE labels suitable for AFE.add_labels()
    229                 or AFE.remove_labels().
    230 
    231         """
    232         return self._pool_labels
    233 
    234     def calculate_spares_needed(self, target_total):
    235         """Calculate and log the spares needed to achieve a target.
    236 
    237         Return how many working spares are needed to achieve the
    238         given `target_total` with all DUTs working.
    239 
    240         The spares count may be positive or negative.  Positive
    241         values indicate spares are needed to replace broken DUTs in
    242         order to reach the target; negative numbers indicate that
    243         no spares are needed, and that a corresponding number of
    244         working devices can be returned.
    245 
    246         If the new target total would require returning ineligible
    247         DUTs, an error is logged, and the target total is adjusted
    248         so that those DUTs are not exchanged.
    249 
    250         @param target_total  The new target pool size.
    251 
    252         @return The number of spares needed.
    253 
    254         """
    255         num_ineligible = len(self.ineligible_hosts)
    256         spares_needed = target_total >= num_ineligible
    257         metrics.Boolean(
    258             'chromeos/autotest/balance_pools/exhausted_pools',
    259             'True for each pool/model which requests more DUTs than supplied',
    260             # TODO(jrbarnette) The 'board' field is a legacy.  We need
    261             # to leave it here until we do the extra work Monarch
    262             # requires to delete a field.
    263             field_spec=[
    264                     ts_mon.StringField('pool'),
    265                     ts_mon.StringField('board'),
    266                     ts_mon.StringField('model'),
    267             ]).set(
    268                     not spares_needed,
    269                     fields={
    270                             'pool': self.pool,
    271                             'board': self.labels.get('model', ''),
    272                             'model': self.labels.get('model', ''),
    273                     },
    274         )
    275         if not spares_needed:
    276             _log_error(
    277                     '%s pool (%s): Target of %d is below minimum of %d DUTs.',
    278                     self.pool, self.labels, target_total, num_ineligible,
    279             )
    280             _log_error('Adjusting target to %d DUTs.', num_ineligible)
    281             target_total = num_ineligible
    282         else:
    283             _log_message('%s %s pool: Target of %d is above minimum.',
    284                          self.labels.get('model', ''), self.pool, target_total)
    285         adjustment = target_total - self.total_hosts
    286         return len(self.broken_hosts) + adjustment
    287 
    288     def allocate_surplus(self, num_broken):
    289         """Allocate a list DUTs that can returned as surplus.
    290 
    291         Return a list of devices that can be returned in order to
    292         reduce this pool's supply.  Broken DUTs will be preferred
    293         over working ones.
    294 
    295         The `num_broken` parameter indicates the number of broken
    296         DUTs to be left in the pool.  If this number exceeds the
    297         number of broken DUTs actually in the pool, the returned
    298         list will be empty.  If this number is negative, it
    299         indicates a number of working DUTs to be returned in
    300         addition to all broken ones.
    301 
    302         @param num_broken    Total number of broken DUTs to be left in
    303                              this pool.
    304 
    305         @return A list of DUTs to be returned as surplus.
    306 
    307         """
    308         if num_broken >= 0:
    309             surplus = self.broken_hosts[num_broken:]
    310             return surplus
    311         else:
    312             return (self.broken_hosts +
    313                     self.working_hosts[:-num_broken])
    314 
    315 
    316 def _exchange_labels(dry_run, hosts, target_pool, spare_pool):
    317     """Reassign a list of DUTs from one pool to another.
    318 
    319     For all the given hosts, remove all labels associated with
    320     `spare_pool`, and add the labels for `target_pool`.
    321 
    322     If `dry_run` is true, perform no changes, but log the `atest`
    323     commands needed to accomplish the necessary label changes.
    324 
    325     @param dry_run       Whether the logging is for a dry run or
    326                          for actual execution.
    327     @param hosts         List of DUTs (AFE hosts) to be reassigned.
    328     @param target_pool   The `_DUTPool` object from which the hosts
    329                          are drawn.
    330     @param spare_pool    The `_DUTPool` object to which the hosts
    331                          will be added.
    332 
    333     """
    334     _log_info(dry_run, 'Transferring %d DUTs from %s to %s.',
    335               len(hosts), spare_pool.pool, target_pool.pool)
    336     metrics.Counter(
    337         'chromeos/autotest/balance_pools/duts_moved',
    338         'DUTs transferred between pools',
    339         # TODO(jrbarnette) The 'board' field is a legacy.  We need to
    340         # leave it here until we do the extra work Monarch requires to
    341         # delete a field.
    342         field_spec=[
    343                 ts_mon.StringField('board'),
    344                 ts_mon.StringField('model'),
    345                 ts_mon.StringField('source_pool'),
    346                 ts_mon.StringField('target_pool'),
    347         ]
    348     ).increment_by(
    349             len(hosts),
    350             fields={
    351                     'board': target_pool.labels.get('model', ''),
    352                     'model': target_pool.labels.get('model', ''),
    353                     'source_pool': spare_pool.pool,
    354                     'target_pool': target_pool.pool,
    355             },
    356     )
    357     if not hosts:
    358         return
    359 
    360     additions = target_pool.pool_labels
    361     removals = spare_pool.pool_labels
    362     for host in hosts:
    363         if not dry_run:
    364             _log_message('Updating host: %s.', host.hostname)
    365             host.remove_labels(removals)
    366             host.add_labels(additions)
    367         else:
    368             _log_message('atest label remove -m %s %s',
    369                          host.hostname, ' '.join(removals))
    370             _log_message('atest label add -m %s %s',
    371                          host.hostname, ' '.join(additions))
    372 
    373 
    374 def _balance_model(arguments, afe, pool, labels, start_time, end_time):
    375     """Balance one model as requested by command line arguments.
    376 
    377     @param arguments     Parsed command line arguments.
    378     @param afe           AFE object to be used for the changes.
    379     @param pool          Pool of the model to be balanced.
    380     @param labels        Restrict the balancing operation within DUTs
    381                          that have these labels.
    382     @param start_time    Start time for HostJobHistory objects in
    383                          the DUT pools.
    384     @param end_time      End time for HostJobHistory objects in the
    385                          DUT pools.
    386 
    387     """
    388     spare_pool = _DUTPool(afe, arguments.spare, labels, start_time, end_time)
    389     main_pool = _DUTPool(afe, pool, labels, start_time, end_time)
    390 
    391     target_total = main_pool.total_hosts
    392     if arguments.total is not None:
    393         target_total = arguments.total
    394     elif arguments.grow:
    395         target_total += arguments.grow
    396     elif arguments.shrink:
    397         target_total -= arguments.shrink
    398 
    399     spares_needed = main_pool.calculate_spares_needed(target_total)
    400     if spares_needed > 0:
    401         spare_duts = spare_pool.working_hosts[:spares_needed]
    402         shortfall = spares_needed - len(spare_duts)
    403     else:
    404         spare_duts = []
    405         shortfall = spares_needed
    406 
    407     surplus_duts = main_pool.allocate_surplus(shortfall)
    408 
    409     if spares_needed or surplus_duts or arguments.verbose:
    410         dry_run = arguments.dry_run
    411         _log_message('')
    412 
    413         _log_info(dry_run, 'Balancing %s %s pool:', labels, main_pool.pool)
    414         _log_info(dry_run,
    415                   'Total %d DUTs, %d working, %d broken, %d reserved.',
    416                   main_pool.total_hosts, len(main_pool.working_hosts),
    417                   len(main_pool.broken_hosts), len(main_pool.ineligible_hosts))
    418 
    419         if spares_needed > 0:
    420             add_msg = 'grow pool by %d DUTs' % spares_needed
    421         elif spares_needed < 0:
    422             add_msg = 'shrink pool by %d DUTs' % -spares_needed
    423         else:
    424             add_msg = 'no change to pool size'
    425         _log_info(dry_run, 'Target is %d working DUTs; %s.',
    426                   target_total, add_msg)
    427 
    428         _log_info(dry_run,
    429                   '%s %s pool has %d spares available for balancing pool %s',
    430                   labels, spare_pool.pool, len(spare_pool.working_hosts),
    431                   main_pool.pool)
    432 
    433         if spares_needed > len(spare_duts):
    434             _log_error('Not enough spares: need %d, only have %d.',
    435                        spares_needed, len(spare_duts))
    436         elif shortfall >= 0:
    437             _log_info(dry_run,
    438                       '%s %s pool will return %d broken DUTs, '
    439                       'leaving %d still in the pool.',
    440                       labels, main_pool.pool,
    441                       len(surplus_duts),
    442                       len(main_pool.broken_hosts) - len(surplus_duts))
    443         else:
    444             _log_info(dry_run,
    445                       '%s %s pool will return %d surplus DUTs, '
    446                       'including %d working DUTs.',
    447                       labels, main_pool.pool,
    448                       len(main_pool.broken_hosts) - shortfall,
    449                       -shortfall)
    450 
    451     if (len(main_pool.broken_hosts) > arguments.max_broken and
    452         not arguments.force_rebalance):
    453         _log_error('%s %s pool: Refusing to act on pool with %d broken DUTs.',
    454                    labels, main_pool.pool, len(main_pool.broken_hosts))
    455         _log_error('Please investigate this model to for a bug ')
    456         _log_error('that is bricking devices. Once you have finished your ')
    457         _log_error('investigation, you can force a rebalance with ')
    458         _log_error('--force-rebalance')
    459         spare_duts = []
    460         surplus_duts = []
    461 
    462     if not spare_duts and not surplus_duts:
    463         if arguments.verbose:
    464             _log_info(arguments.dry_run, 'No exchange required.')
    465 
    466     _exchange_labels(arguments.dry_run, surplus_duts,
    467                      spare_pool, main_pool)
    468     _exchange_labels(arguments.dry_run, spare_duts,
    469                      main_pool, spare_pool)
    470 
    471 
    472 def _too_many_broken(inventory, pool, args):
    473     """
    474     Get the inventory of models and check if too many are broken.
    475 
    476     @param inventory: _LabInventory object.
    477     @param pool: The pool to check.
    478     @param args: Parsed command line arguments.
    479 
    480     @return True if the number of models with 1 or more broken duts
    481             exceed max_broken_models, False otherwise.
    482     """
    483     # Were we asked to skip this check?
    484     if (args.force_rebalance or
    485             (args.all_models and args.max_broken_models == 0)):
    486         return False
    487 
    488     max_broken = args.max_broken_models
    489     if max_broken is None:
    490         total_num = len(inventory.get_pool_models(pool))
    491         max_broken = int(_MAX_BROKEN_DEFAULT_RATIO * total_num)
    492     _log_info(args.dry_run,
    493               'Max broken models for pool %s: %d',
    494               pool, max_broken)
    495 
    496     broken = [model for model, counts in inventory.iteritems()
    497                   if counts.get_broken(pool) != 0]
    498     _log_message('There are %d models in the %s pool with at least 1 '
    499                  'broken DUT (max threshold %d)',
    500                  len(broken), pool, max_broken)
    501     for b in sorted(broken):
    502         _log_message(b)
    503     return len(broken) > max_broken
    504 
    505 
    506 def _parse_command(argv):
    507     """Parse the command line arguments.
    508 
    509     Create an argument parser for this command's syntax, parse the
    510     command line, and return the result of the `ArgumentParser`
    511     `parse_args()` method.
    512 
    513     @param argv Standard command line argument vector; `argv[0]` is
    514                 assumed to be the command name.
    515 
    516     @return Result returned by `ArgumentParser.parse_args()`.
    517 
    518     """
    519     parser = argparse.ArgumentParser(
    520             prog=os.path.basename(argv[0]),
    521             description='Balance pool shortages from spares on reserve')
    522 
    523     parser.add_argument(
    524         '-w', '--web', type=str, default=None,
    525         help='AFE host to use. Default comes from shadow_config.',
    526     )
    527     count_group = parser.add_mutually_exclusive_group()
    528     count_group.add_argument('-t', '--total', type=int,
    529                              metavar='COUNT', default=None,
    530                              help='Set the number of DUTs in the '
    531                                   'pool to the specified count for '
    532                                   'every MODEL')
    533     count_group.add_argument('-a', '--grow', type=int,
    534                              metavar='COUNT', default=None,
    535                              help='Add the specified number of DUTs '
    536                                   'to the pool for every MODEL')
    537     count_group.add_argument('-d', '--shrink', type=int,
    538                              metavar='COUNT', default=None,
    539                              help='Remove the specified number of DUTs '
    540                                   'from the pool for every MODEL')
    541 
    542     parser.add_argument('-s', '--spare', default=_SPARE_DEFAULT,
    543                         metavar='POOL',
    544                         help='Pool from which to draw replacement '
    545                              'spares (default: pool:%s)' % _SPARE_DEFAULT)
    546     parser.add_argument('-n', '--dry-run', action='store_true',
    547                         help='Report actions to take in the form of '
    548                              'shell commands')
    549     parser.add_argument('-v', '--verbose', action='store_true',
    550                         help='Print more detail about calculations for debug '
    551                              'purposes.')
    552 
    553     parser.add_argument('-m', '--max-broken', default=2, type=int,
    554                         metavar='COUNT',
    555                         help='Only rebalance a pool if it has at most '
    556                              'COUNT broken DUTs.')
    557     parser.add_argument('-f', '--force-rebalance', action='store_true',
    558                         help='Forcefully rebalance all DUTs in a pool, even '
    559                              'if it has a large number of broken DUTs. '
    560                              'Before doing this, please investigate whether '
    561                              'there is a bug that is bricking devices in the '
    562                              'lab.')
    563     parser.add_argument('--production', action='store_true',
    564                         help='Treat this as a production run. This will '
    565                              'collect metrics.')
    566 
    567     parser.add_argument(
    568             '--all-models',
    569             action='store_true',
    570             help='Rebalance all managed models.  This will do a very expensive '
    571                  'check to see how many models have at least one broken DUT. '
    572                  'To bypass that check, set --max-broken-models to 0.',
    573     )
    574     parser.add_argument(
    575             '--max-broken-models', default=None, type=int, metavar='COUNT',
    576             help='Only rebalance all models if number of models with broken '
    577                  'DUTs in the specified pool is less than COUNT.',
    578     )
    579 
    580     parser.add_argument('pool',
    581                         metavar='POOL',
    582                         help='Name of the pool to balance.  Use %s to balance '
    583                              'all critical pools' % _ALL_CRITICAL_POOLS)
    584     parser.add_argument('models', nargs='*', metavar='MODEL',
    585                         help='Names of models to balance.')
    586 
    587     parser.add_argument('-p', '--phase', metavar='PHASE',
    588                         help='Optional phase label to restrict balance '
    589                         'operation to.')
    590 
    591     parser.add_argument('--sku', type=str,
    592                         help='Optional name of sku to restrict to.')
    593 
    594     arguments = parser.parse_args(argv[1:])
    595 
    596     # Error-check arguments.
    597     if arguments.models and arguments.all_models:
    598         parser.error('Cannot specify individual models on the command line '
    599                      'when using --all-models.')
    600     if (arguments.pool == _ALL_CRITICAL_POOLS and
    601         arguments.spare != _SPARE_DEFAULT):
    602         parser.error('Cannot specify --spare pool to be %s when balancing all '
    603                      'critical pools.' % _SPARE_DEFAULT)
    604     for p in (arguments.spare, arguments.pool):
    605         if not _VALID_POOL_PATTERN.match(p):
    606             parser.error('Invalid pool name: %s' % p)
    607     return arguments
    608 
    609 
    610 def infer_balancer_targets(afe, arguments, pools):
    611     """Take some arguments and translate them to a list of models to balance
    612 
    613     Args:
    614     @param afe           AFE object to be used for taking inventory.
    615     @param arguments     Parsed command line arguments.
    616     @param pools         The list of pools to balance.
    617 
    618     @returns    a list of (model, labels) tuples to be balanced
    619 
    620     """
    621     balancer_targets = []
    622 
    623     for pool in pools:
    624         if arguments.all_models:
    625             inventory = lab_inventory.get_inventory(afe)
    626             quarantine = _too_many_broken(inventory, pool, arguments)
    627             if quarantine:
    628                 _log_error('Refusing to balance all models for %s pool, '
    629                            'too many models with at least 1 broken DUT '
    630                            'detected.', pool)
    631             else:
    632                 for model in inventory.get_pool_models(pool):
    633                     labels = labellib.LabelsMapping()
    634                     labels['model'] = model
    635                     if arguments.phase:
    636                         labels['phase'] = arguments.phase
    637                     balancer_targets.append((pool, labels.getlabels()))
    638             metrics.Boolean(
    639                 'chromeos/autotest/balance_pools/unchanged_pools').set(
    640                     quarantine, fields={'pool': pool})
    641             _log_message('Pool %s quarantine status: %s', pool, quarantine)
    642         else:
    643             for model in arguments.models:
    644                 labels = labellib.LabelsMapping()
    645                 labels['model'] = model
    646                 if arguments.sku:
    647                     labels['sku'] = arguments.sku
    648                 if arguments.phase:
    649                     labels['phase'] = arguments.phase
    650                 balancer_targets.append((pool, labels.getlabels()))
    651     return balancer_targets
    652 
    653 
    654 def main(argv):
    655     """Standard main routine.
    656 
    657     @param argv  Command line arguments including `sys.argv[0]`.
    658 
    659     """
    660     arguments = _parse_command(argv)
    661     if arguments.production:
    662         metrics_manager = site_utils.SetupTsMonGlobalState('balance_pools',
    663                                                            indirect=True)
    664     else:
    665         metrics_manager = site_utils.TrivialContextManager()
    666 
    667     with metrics_manager:
    668         with metrics.SuccessCounter('chromeos/autotest/balance_pools/runs'):
    669             end_time = time.time()
    670             start_time = end_time - 24 * 60 * 60
    671             afe = frontend_wrappers.RetryingAFE(server=arguments.web)
    672 
    673             def balancer(pool, labels):
    674                 """Balance the specified model.
    675 
    676                 @param pool: The pool to rebalance for the model.
    677                 @param labels: labels to restrict to balancing operations
    678                         within.
    679                 """
    680                 _balance_model(arguments, afe, pool, labels,
    681                                start_time, end_time)
    682                 _log_message('')
    683 
    684             pools = (lab_inventory.CRITICAL_POOLS
    685                     if arguments.pool == _ALL_CRITICAL_POOLS
    686                     else [arguments.pool])
    687             balancer_targets = infer_balancer_targets(afe, arguments, pools)
    688             try:
    689                 parallel.RunTasksInProcessPool(
    690                         balancer,
    691                         balancer_targets,
    692                         processes=8,
    693                 )
    694             except KeyboardInterrupt:
    695                 pass
    696 
    697 
    698 if __name__ == '__main__':
    699     main(sys.argv)
    700