Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2015 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Create e-mail reports of the Lab's DUT inventory.
      7 
      8 Gathers a list of all DUTs of interest in the Lab, segregated by
      9 model and pool, and determines whether each DUT is working or
     10 broken.  Then, send one or more e-mail reports summarizing the
     11 status to e-mail addresses provided on the command line.
     12 
     13 usage:  lab_inventory.py [ options ] [ model ... ]
     14 
     15 Options:
     16 --duration / -d <hours>
     17     How far back in time to search job history to determine DUT
     18     status.
     19 
     20 --model-notify <address>[,<address>]
     21     Send the "model status" e-mail to all the specified e-mail
     22     addresses.
     23 
     24 --pool-notify <address>[,<address>]
     25     Send the "pool status" e-mail to all the specified e-mail
     26     addresses.
     27 
     28 --recommend <number>
     29     When generating the "model status" e-mail, include a list of
     30     <number> specific DUTs to be recommended for repair.
     31 
     32 --repair-loops
     33     Scan the inventory for DUTs stuck in repair loops, and report them
     34     via a Monarch presence metric.
     35 
     36 --logdir <directory>
     37     Log progress and actions in a file under this directory.  Text
     38     of any e-mail sent will also be logged in a timestamped file in
     39     this directory.
     40 
     41 --debug
     42     Suppress all logging, metrics reporting, and sending e-mail.
     43     Instead, write the output that would be generated onto stdout.
     44 
     45 <model> arguments:
     46     With no arguments, gathers the status for all models in the lab.
     47     With one or more named models on the command line, restricts
     48     reporting to just those models.
     49 
     50 """
     51 
     52 
     53 import argparse
     54 import collections
     55 import logging
     56 import logging.handlers
     57 import os
     58 import re
     59 import sys
     60 import time
     61 
     62 import common
     63 from autotest_lib.client.bin import utils
     64 from autotest_lib.client.common_lib import time_utils
     65 from autotest_lib.server import constants
     66 from autotest_lib.server import site_utils
     67 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     68 from autotest_lib.server.hosts import servo_host
     69 from autotest_lib.server.lib import status_history
     70 from autotest_lib.site_utils import gmail_lib
     71 from chromite.lib import metrics
     72 
     73 
     74 CRITICAL_POOLS = constants.Pools.CRITICAL_POOLS
     75 SPARE_POOL = constants.Pools.SPARE_POOL
     76 MANAGED_POOLS = constants.Pools.MANAGED_POOLS
     77 
     78 # _EXCLUDED_LABELS - A set of labels that disqualify a DUT from
     79 #     monitoring by this script.  Currently, we're excluding these:
     80 #   + 'adb' - We're not ready to monitor Android or Brillo hosts.
     81 #   + 'board:guado_moblab' - These are maintained by a separate
     82 #     process that doesn't use this script.
     83 
     84 _EXCLUDED_LABELS = {'adb', 'board:guado_moblab'}
     85 
     86 # _DEFAULT_DURATION:
     87 #     Default value used for the --duration command line option.
     88 #     Specifies how far back in time to search in order to determine
     89 #     DUT status.
     90 
     91 _DEFAULT_DURATION = 24
     92 
     93 # _LOGDIR:
     94 #     Relative path used in the calculation of the default setting for
     95 #     the --logdir option.  The full path is relative to the root of the
     96 #     autotest directory, as determined from sys.argv[0].
     97 # _LOGFILE:
     98 #     Basename of a file to which general log information will be
     99 #     written.
    100 # _LOG_FORMAT:
    101 #     Format string for log messages.
    102 
    103 _LOGDIR = os.path.join('logs', 'dut-data')
    104 _LOGFILE = 'lab-inventory.log'
    105 _LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s'
    106 
    107 # Pattern describing location-based host names in the Chrome OS test
    108 # labs.  Each DUT hostname designates the DUT's location:
    109 #   * A lab (room) that's physically separated from other labs
    110 #     (i.e. there's a door).
    111 #   * A row (or aisle) of DUTs within the lab.
    112 #   * A vertical rack of shelves on the row.
    113 #   * A specific host on one shelf of the rack.
    114 
    115 _HOSTNAME_PATTERN = re.compile(
    116         r'(chromeos\d+)-row(\d+)-rack(\d+)-host(\d+)')
    117 
    118 # _REPAIR_LOOP_THRESHOLD:
    119 #    The number of repeated Repair tasks that must be seen to declare
    120 #    that a DUT is stuck in a repair loop.
    121 
    122 _REPAIR_LOOP_THRESHOLD = 4
    123 
    124 
    125 class _HostSetInventory(object):
    126     """Maintains a set of related `HostJobHistory` objects.
    127 
    128     The collection is segregated into disjoint categories of "working",
    129     "broken", and "idle" DUTs.  Accessor methods allow finding both the
    130     list of DUTs in each category, as well as counts of each category.
    131 
    132     Performance note:  Certain methods in this class are potentially
    133     expensive:
    134       * `get_working()`
    135       * `get_working_list()`
    136       * `get_broken()`
    137       * `get_broken_list()`
    138       * `get_idle()`
    139       * `get_idle_list()`
    140     The first time any one of these methods is called, it causes
    141     multiple RPC calls with a relatively expensive set of database
    142     queries.  However, the results of the queries are cached in the
    143     individual `HostJobHistory` objects, so only the first call
    144     actually pays the full cost.
    145 
    146     Additionally, `get_working_list()`, `get_broken_list()` and
    147     `get_idle_list()` cache their return values to avoid recalculating
    148     lists at every call; this caching is separate from the caching of
    149     RPC results described above.
    150 
    151     This class is deliberately constructed to delay the RPC cost until
    152     the accessor methods are called (rather than to query in
    153     `record_host()`) so that it's possible to construct a complete
    154     `_LabInventory` without making the expensive queries at creation
    155     time.  `_populate_model_counts()`, below, assumes this behavior.
    156 
    157     Current usage of this class is that all DUTs are part of a single
    158     scheduling pool of DUTs; however, this class make no assumptions
    159     about the actual relationship among the DUTs.
    160     """
    161 
    162     def __init__(self):
    163         self._histories = []
    164         self._working_list = None
    165         self._broken_list = None
    166         self._idle_list = None
    167 
    168 
    169     def record_host(self, host_history):
    170         """Add one `HostJobHistory` object to the collection.
    171 
    172         @param host_history The `HostJobHistory` object to be
    173                             remembered.
    174 
    175         """
    176         self._working_list = None
    177         self._broken_list = None
    178         self._idle_list = None
    179         self._histories.append(host_history)
    180 
    181 
    182     def get_working_list(self):
    183         """Return a list of all working DUTs in the pool.
    184 
    185         Filter `self._histories` for histories where the last
    186         diagnosis is `WORKING`.
    187 
    188         Cache the result so that we only cacluate it once.
    189 
    190         @return A list of HostJobHistory objects.
    191 
    192         """
    193         if self._working_list is None:
    194             self._working_list = [h for h in self._histories
    195                     if h.last_diagnosis()[0] == status_history.WORKING]
    196         return self._working_list
    197 
    198 
    199     def get_working(self):
    200         """Return the number of working DUTs in the pool."""
    201         return len(self.get_working_list())
    202 
    203 
    204     def get_broken_list(self):
    205         """Return a list of all broken DUTs in the pool.
    206 
    207         Filter `self._histories` for histories where the last
    208         diagnosis is `BROKEN`.
    209 
    210         Cache the result so that we only cacluate it once.
    211 
    212         @return A list of HostJobHistory objects.
    213 
    214         """
    215         if self._broken_list is None:
    216             self._broken_list = [h for h in self._histories
    217                     if h.last_diagnosis()[0] == status_history.BROKEN]
    218         return self._broken_list
    219 
    220 
    221     def get_broken(self):
    222         """Return the number of broken DUTs in the pool."""
    223         return len(self.get_broken_list())
    224 
    225 
    226     def get_idle_list(self):
    227         """Return a list of all idle DUTs in the pool.
    228 
    229         Filter `self._histories` for histories where the last
    230         diagnosis is `UNUSED` or `UNKNOWN`.
    231 
    232         Cache the result so that we only cacluate it once.
    233 
    234         @return A list of HostJobHistory objects.
    235 
    236         """
    237         idle_statuses = {status_history.UNUSED, status_history.UNKNOWN}
    238         if self._idle_list is None:
    239             self._idle_list = [h for h in self._histories
    240                     if h.last_diagnosis()[0] in idle_statuses]
    241         return self._idle_list
    242 
    243 
    244     def get_idle(self):
    245         """Return the number of idle DUTs in the pool."""
    246         return len(self.get_idle_list())
    247 
    248 
    249     def get_total(self):
    250         """Return the total number of DUTs in the pool."""
    251         return len(self._histories)
    252 
    253 
    254 class _PoolSetInventory(object):
    255     """Maintains a set of `HostJobHistory`s for a set of pools.
    256 
    257     The collection is segregated into disjoint categories of "working",
    258     "broken", and "idle" DUTs.  Accessor methods allow finding both the
    259     list of DUTs in each category, as well as counts of each category.
    260     Accessor queries can be for an individual pool, or against all
    261     pools.
    262 
    263     Performance note:  This class relies on `_HostSetInventory`.  Public
    264     methods in this class generally rely on methods of the same name in
    265     the underlying class, and so will have the same underlying
    266     performance characteristics.
    267     """
    268 
    269     def __init__(self, pools):
    270         self._histories_by_pool = {
    271             pool: _HostSetInventory() for pool in pools
    272         }
    273 
    274     def record_host(self, host_history):
    275         """Add one `HostJobHistory` object to the collection.
    276 
    277         @param host_history The `HostJobHistory` object to be
    278                             remembered.
    279 
    280         """
    281         pool = host_history.host_pool
    282         self._histories_by_pool[pool].record_host(host_history)
    283 
    284 
    285     def _count_pool(self, get_pool_count, pool=None):
    286         """Internal helper to count hosts in a given pool.
    287 
    288         The `get_pool_count` parameter is a function to calculate
    289         the exact count of interest for the pool.
    290 
    291         @param get_pool_count  Function to return a count from a
    292                                _PoolCount object.
    293         @param pool            The pool to be counted.  If `None`,
    294                                return the total across all pools.
    295 
    296         """
    297         if pool is None:
    298             return sum([get_pool_count(cached_history) for cached_history in
    299                         self._histories_by_pool.values()])
    300         else:
    301             return get_pool_count(self._histories_by_pool[pool])
    302 
    303 
    304     def get_working_list(self):
    305         """Return a list of all working DUTs (across all pools).
    306 
    307         Go through all HostJobHistory objects across all pools, selecting the
    308         ones where the last diagnosis is `WORKING`.
    309 
    310         @return A list of HostJobHistory objects.
    311 
    312         """
    313         l = []
    314         for p in self._histories_by_pool.values():
    315             l.extend(p.get_working_list())
    316         return l
    317 
    318 
    319     def get_working(self, pool=None):
    320         """Return the number of working DUTs in a pool.
    321 
    322         @param pool  The pool to be counted.  If `None`, return the
    323                      total across all pools.
    324 
    325         @return The total number of working DUTs in the selected
    326                 pool(s).
    327         """
    328         return self._count_pool(_HostSetInventory.get_working, pool)
    329 
    330 
    331     def get_broken_list(self):
    332         """Return a list of all broken DUTs (across all pools).
    333 
    334         Go through all HostJobHistory objects in the across all pools,
    335         selecting the ones where the last diagnosis is `BROKEN`.
    336 
    337         @return A list of HostJobHistory objects.
    338 
    339         """
    340         l = []
    341         for p in self._histories_by_pool.values():
    342             l.extend(p.get_broken_list())
    343         return l
    344 
    345 
    346     def get_broken(self, pool=None):
    347         """Return the number of broken DUTs in a pool.
    348 
    349         @param pool  The pool to be counted.  If `None`, return the
    350                      total across all pools.
    351 
    352         @return The total number of broken DUTs in the selected pool(s).
    353         """
    354         return self._count_pool(_HostSetInventory.get_broken, pool)
    355 
    356 
    357     def get_idle_list(self, pool=None):
    358         """Return a list of all idle DUTs in the given pool.
    359 
    360         Go through all HostJobHistory objects in the given pool, selecting the
    361         ones where the last diagnosis is `UNUSED` or `UNKNOWN`.
    362 
    363         @param pool: The pool to be counted. If `None`, return the total list
    364                      across all pools.
    365 
    366         @return A list of HostJobHistory objects.
    367 
    368         """
    369         if pool is None:
    370             l = []
    371             for p in self._histories_by_pool.itervalues():
    372                 l.extend(p.get_idle_list())
    373             return l
    374         else:
    375             return self._histories_by_pool[pool].get_idle_list()
    376 
    377 
    378     def get_idle(self, pool=None):
    379         """Return the number of idle DUTs in a pool.
    380 
    381         @param pool: The pool to be counted. If `None`, return the total
    382                      across all pools.
    383 
    384         @return The total number of idle DUTs in the selected pool(s).
    385         """
    386         return self._count_pool(_HostSetInventory.get_idle, pool)
    387 
    388 
    389     def get_spares_buffer(self, spare_pool=SPARE_POOL):
    390         """Return the the nominal number of working spares.
    391 
    392         Calculates and returns how many working spares there would
    393         be in the spares pool if all broken DUTs were in the spares
    394         pool.  This number may be negative, indicating a shortfall
    395         in the critical pools.
    396 
    397         @return The total number DUTs in the spares pool, less the total
    398                 number of broken DUTs in all pools.
    399         """
    400         return self.get_total(spare_pool) - self.get_broken()
    401 
    402 
    403     def get_total(self, pool=None):
    404         """Return the total number of DUTs in a pool.
    405 
    406         @param pool  The pool to be counted.  If `None`, return the
    407                      total across all pools.
    408 
    409         @return The total number of DUTs in the selected pool(s).
    410         """
    411         return self._count_pool(_HostSetInventory.get_total, pool)
    412 
    413 
    414 def _eligible_host(afehost):
    415     """Return whether this host is eligible for monitoring.
    416 
    417     A host is eligible if it has a (unique) 'model' label, it's in
    418     exactly one pool, and it has no labels from the
    419     `_EXCLUDED_LABELS` set.
    420 
    421     @param afehost  The host to be tested for eligibility.
    422     """
    423     # DUTs without an existing, unique 'model' or 'pool' label
    424     # aren't meant to exist in the managed inventory; their presence
    425     # generally indicates an error in the database.  Unfortunately
    426     # such errors have been seen to occur from time to time.
    427     #
    428     # The _LabInventory constructor requires hosts to conform to the
    429     # label restrictions, and may fail if they don't.  Failing an
    430     # inventory run for a single bad entry is the wrong thing, so we
    431     # ignore the problem children here, to keep them out of the
    432     # inventory.
    433     models = [l for l in afehost.labels
    434                  if l.startswith(constants.Labels.MODEL_PREFIX)]
    435     pools = [l for l in afehost.labels
    436                  if l.startswith(constants.Labels.POOL_PREFIX)]
    437     excluded = _EXCLUDED_LABELS.intersection(afehost.labels)
    438     return len(models) == 1 and len(pools) == 1 and not excluded
    439 
    440 
    441 class _LabInventory(collections.Mapping):
    442     """Collection of `HostJobHistory` objects for the Lab's inventory.
    443 
    444     This is a dict-like collection indexed by model.  Indexing returns
    445     the _PoolSetInventory object associated with the model.
    446     """
    447 
    448     @classmethod
    449     def create_inventory(cls, afe, start_time, end_time, modellist=[]):
    450         """Return a Lab inventory with specified parameters.
    451 
    452         By default, gathers inventory from `HostJobHistory` objects for
    453         all DUTs in the `MANAGED_POOLS` list.  If `modellist` is
    454         supplied, the inventory will be restricted to only the given
    455         models.
    456 
    457         @param afe          AFE object for constructing the
    458                             `HostJobHistory` objects.
    459         @param start_time   Start time for the `HostJobHistory` objects.
    460         @param end_time     End time for the `HostJobHistory` objects.
    461         @param modellist    List of models to include.  If empty,
    462                             include all available models.
    463         @return A `_LabInventory` object for the specified models.
    464 
    465         """
    466         target_pools = MANAGED_POOLS
    467         label_list = [constants.Labels.POOL_PREFIX + l for l in target_pools]
    468         afehosts = afe.get_hosts(labels__name__in=label_list)
    469         if modellist:
    470             # We're deliberately not checking host eligibility in this
    471             # code path.  This is a debug path, not used in production;
    472             # it may be useful to include ineligible hosts here.
    473             modelhosts = []
    474             for model in modellist:
    475                 model_label = constants.Labels.MODEL_PREFIX + model
    476                 host_list = [h for h in afehosts
    477                                   if model_label in h.labels]
    478                 modelhosts.extend(host_list)
    479             afehosts = modelhosts
    480         else:
    481             afehosts = [h for h in afehosts if _eligible_host(h)]
    482         create = lambda host: (
    483                 status_history.HostJobHistory(afe, host,
    484                                               start_time, end_time))
    485         return cls([create(host) for host in afehosts], target_pools)
    486 
    487 
    488     def __init__(self, histories, pools):
    489         models = {h.host_model for h in histories}
    490         self._modeldata = {model: _PoolSetInventory(pools) for model in models}
    491         self._dut_count = len(histories)
    492         for h in histories:
    493             self[h.host_model].record_host(h)
    494         self._boards = {h.host_board for h in histories}
    495 
    496 
    497     def __getitem__(self, key):
    498         return self._modeldata.__getitem__(key)
    499 
    500 
    501     def __len__(self):
    502         return self._modeldata.__len__()
    503 
    504 
    505     def __iter__(self):
    506         return self._modeldata.__iter__()
    507 
    508 
    509     def reportable_items(self, spare_pool=SPARE_POOL):
    510         """Iterate over  all items subject to reporting.
    511 
    512         Yields the contents of `self.iteritems()` filtered to include
    513         only reportable models.  A model is reportable if it has DUTs in
    514         both `spare_pool` and at least one other pool.
    515 
    516         @param spare_pool  The spare pool to be tested for reporting.
    517         """
    518         for model, histories in self.iteritems():
    519             spares = histories.get_total(spare_pool)
    520             total = histories.get_total()
    521             if spares != 0 and spares != total:
    522                 yield model, histories
    523 
    524 
    525     def get_num_duts(self):
    526         """Return the total number of DUTs in the inventory."""
    527         return self._dut_count
    528 
    529 
    530     def get_num_models(self):
    531         """Return the total number of models in the inventory."""
    532         return len(self)
    533 
    534 
    535     def get_pool_models(self, pool):
    536         """Return all models in `pool`.
    537 
    538         @param pool The pool to be inventoried for models.
    539         """
    540         return {m for m, h in self.iteritems() if h.get_total(pool)}
    541 
    542 
    543     def get_boards(self):
    544         return self._boards
    545 
    546 
    547 def _sort_by_location(inventory_list):
    548     """Return a list of DUTs, organized by location.
    549 
    550     Take the given list of `HostJobHistory` objects, separate it
    551     into a list per lab, and sort each lab's list by location.  The
    552     order of sorting within a lab is
    553       * By row number within the lab,
    554       * then by rack number within the row,
    555       * then by host shelf number within the rack.
    556 
    557     Return a list of the sorted lists.
    558 
    559     Implementation note: host locations are sorted by converting
    560     each location into a base 100 number.  If row, rack or
    561     host numbers exceed the range [0..99], then sorting will
    562     break down.
    563 
    564     @return A list of sorted lists of DUTs.
    565 
    566     """
    567     BASE = 100
    568     lab_lists = {}
    569     for history in inventory_list:
    570         location = _HOSTNAME_PATTERN.match(history.host.hostname)
    571         if location:
    572             lab = location.group(1)
    573             key = 0
    574             for idx in location.group(2, 3, 4):
    575                 key = BASE * key + int(idx)
    576             lab_lists.setdefault(lab, []).append((key, history))
    577     return_list = []
    578     for dut_list in lab_lists.values():
    579         dut_list.sort(key=lambda t: t[0])
    580         return_list.append([t[1] for t in dut_list])
    581     return return_list
    582 
    583 
    584 def _score_repair_set(buffer_counts, repair_list):
    585     """Return a numeric score rating a set of DUTs to be repaired.
    586 
    587     `buffer_counts` is a dictionary mapping model names to the size of
    588     the model's spares buffer.
    589 
    590     `repair_list` is a list of `HostJobHistory` objects for the DUTs to
    591     be repaired.
    592 
    593     This function calculates the new set of buffer counts that would
    594     result from the proposed repairs, and scores the new set using two
    595     numbers:
    596       * Worst case buffer count for any model (higher is better).  This
    597         is the more significant number for comparison.
    598       * Number of models at the worst case (lower is better).  This is
    599         the less significant number.
    600 
    601     Implementation note:  The score could fail to reflect the intended
    602     criteria if there are more than 1000 models in the inventory.
    603 
    604     @param spare_counts   A dictionary mapping models to buffer counts.
    605     @param repair_list    A list of `HostJobHistory` objects for the
    606                           DUTs to be repaired.
    607     @return A numeric score.
    608     """
    609     # Go through `buffer_counts`, and create a list of new counts
    610     # that records the buffer count for each model after repair.
    611     # The new list of counts discards the model names, as they don't
    612     # contribute to the final score.
    613     _NMODELS = 1000
    614     pools = {h.host_pool for h in repair_list}
    615     repair_inventory = _LabInventory(repair_list, pools)
    616     new_counts = []
    617     for m, c in buffer_counts.iteritems():
    618         if m in repair_inventory:
    619             newcount = repair_inventory[m].get_total()
    620         else:
    621             newcount = 0
    622         new_counts.append(c + newcount)
    623     # Go through the new list of counts.  Find the worst available
    624     # spares count, and count how many times that worst case occurs.
    625     worst_count = new_counts[0]
    626     num_worst = 1
    627     for c in new_counts[1:]:
    628         if c == worst_count:
    629             num_worst += 1
    630         elif c < worst_count:
    631             worst_count = c
    632             num_worst = 1
    633     # Return the calculated score
    634     return _NMODELS * worst_count - num_worst
    635 
    636 
    637 def _generate_repair_recommendation(inventory, num_recommend):
    638     """Return a summary of selected DUTs needing repair.
    639 
    640     Returns a message recommending a list of broken DUTs to be repaired.
    641     The list of DUTs is selected based on these criteria:
    642       * No more than `num_recommend` DUTs will be listed.
    643       * All DUTs must be in the same lab.
    644       * DUTs should be selected for some degree of physical proximity.
    645       * DUTs for models with a low spares buffer are more important than
    646         DUTs with larger buffers.
    647 
    648     The algorithm used will guarantee that at least one DUT from a model
    649     with the lowest spares buffer will be recommended.  If the worst
    650     spares buffer number is shared by more than one model, the algorithm
    651     will tend to prefer repair sets that include more of those models
    652     over sets that cover fewer models.
    653 
    654     @param inventory      `_LabInventory` object from which to generate
    655                           recommendations.
    656     @param num_recommend  Number of DUTs to recommend for repair.
    657 
    658     """
    659     logging.debug('Creating DUT repair recommendations')
    660     model_buffer_counts = {}
    661     broken_list = []
    662     for model, counts in inventory.reportable_items():
    663         logging.debug('Listing failed DUTs for %s', model)
    664         if counts.get_broken() != 0:
    665             model_buffer_counts[model] = counts.get_spares_buffer()
    666             broken_list.extend(counts.get_broken_list())
    667     # N.B. The logic inside this loop may seem complicated, but
    668     # simplification is hard:
    669     #   * Calculating an initial recommendation outside of
    670     #     the loop likely would make things more complicated,
    671     #     not less.
    672     #   * It's necessary to calculate an initial lab slice once per
    673     #     lab _before_ the while loop, in case the number of broken
    674     #     DUTs in a lab is less than `num_recommend`.
    675     recommendation = None
    676     best_score = None
    677     for lab_duts in _sort_by_location(broken_list):
    678         start = 0
    679         end = num_recommend
    680         lab_slice = lab_duts[start : end]
    681         lab_score = _score_repair_set(model_buffer_counts, lab_slice)
    682         while end < len(lab_duts):
    683             start += 1
    684             end += 1
    685             new_slice = lab_duts[start : end]
    686             new_score = _score_repair_set(model_buffer_counts, new_slice)
    687             if new_score > lab_score:
    688                 lab_slice = new_slice
    689                 lab_score = new_score
    690         if recommendation is None or lab_score > best_score:
    691             recommendation = lab_slice
    692             best_score = lab_score
    693     # N.B. The trailing space in `line_fmt` is manadatory:  Without it,
    694     # Gmail will parse the URL wrong.  Don't ask.  If you simply _must_
    695     # know more, go try it yourself...
    696     line_fmt = '%-30s %-16s %-6s\n    %s '
    697     message = ['Repair recommendations:\n',
    698                line_fmt % ( 'Hostname', 'Model', 'Servo?', 'Logs URL')]
    699     for h in recommendation:
    700         servo_name = servo_host.make_servo_hostname(h.host.hostname)
    701         servo_present = utils.host_is_in_lab_zone(servo_name)
    702         _, event = h.last_diagnosis()
    703         line = line_fmt % (
    704                 h.host.hostname, h.host_model,
    705                 'Yes' if servo_present else 'No', event.job_url)
    706         message.append(line)
    707     return '\n'.join(message)
    708 
    709 
    710 def _generate_model_inventory_message(inventory):
    711     """Generate the "model inventory" e-mail message.
    712 
    713     The model inventory is a list by model summarizing the number of
    714     working, broken, and idle DUTs, and the total shortfall or surplus
    715     of working devices relative to the minimum critical pool
    716     requirement.
    717 
    718     The report omits models with no DUTs in the spare pool or with no
    719     DUTs in a critical pool.
    720 
    721     N.B. For sample output text formattted as users can expect to
    722     see it in e-mail and log files, refer to the unit tests.
    723 
    724     @param inventory  `_LabInventory` object to be reported on.
    725     @return String with the inventory message to be sent.
    726     """
    727     logging.debug('Creating model inventory')
    728     nworking = 0
    729     nbroken = 0
    730     nidle = 0
    731     nbroken_models = 0
    732     ntotal_models = 0
    733     summaries = []
    734     column_names = (
    735         'Model', 'Avail', 'Bad', 'Idle', 'Good', 'Spare', 'Total')
    736     for model, counts in inventory.reportable_items():
    737         logging.debug('Counting %2d DUTS for model %s',
    738                       counts.get_total(), model)
    739         # Summary elements laid out in the same order as the column
    740         # headers:
    741         #     Model Avail   Bad  Idle  Good  Spare Total
    742         #      e[0]  e[1]  e[2]  e[3]  e[4]  e[5]  e[6]
    743         element = (model,
    744                    counts.get_spares_buffer(),
    745                    counts.get_broken(),
    746                    counts.get_idle(),
    747                    counts.get_working(),
    748                    counts.get_total(SPARE_POOL),
    749                    counts.get_total())
    750         if element[2]:
    751             summaries.append(element)
    752             nbroken_models += 1
    753         ntotal_models += 1
    754         nbroken += element[2]
    755         nidle += element[3]
    756         nworking += element[4]
    757     ntotal = nworking + nbroken + nidle
    758     summaries = sorted(summaries, key=lambda e: (e[1], -e[2]))
    759     broken_percent = int(round(100.0 * nbroken / ntotal))
    760     idle_percent = int(round(100.0 * nidle / ntotal))
    761     working_percent = 100 - broken_percent - idle_percent
    762     message = ['Summary of DUTs in inventory:',
    763                '%10s %10s %10s %6s' % ('Bad', 'Idle', 'Good', 'Total'),
    764                '%5d %3d%% %5d %3d%% %5d %3d%% %6d' % (
    765                    nbroken, broken_percent,
    766                    nidle, idle_percent,
    767                    nworking, working_percent,
    768                    ntotal),
    769                '',
    770                'Models with failures: %d' % nbroken_models,
    771                'Models in inventory:  %d' % ntotal_models,
    772                '', '',
    773                'Full model inventory:\n',
    774                '%-22s %5s %5s %5s %5s %5s %5s' % column_names]
    775     message.extend(
    776             ['%-22s %5d %5d %5d %5d %5d %5d' % e for e in summaries])
    777     return '\n'.join(message)
    778 
    779 
    780 _POOL_INVENTORY_HEADER = '''\
    781 Notice to Infrastructure deputies:  All models shown below are at
    782 less than full strength, please take action to resolve the issues.
    783 Once you're satisified that failures won't recur, failed DUTs can
    784 be replaced with spares by running `balance_pool`.  Detailed
    785 instructions can be found here:
    786     http://go/cros-manage-duts
    787 '''
    788 
    789 
    790 def _generate_pool_inventory_message(inventory):
    791     """Generate the "pool inventory" e-mail message.
    792 
    793     The pool inventory is a list by pool and model summarizing the
    794     number of working and broken DUTs in the pool.  Only models with
    795     at least one broken DUT are included in the list.
    796 
    797     N.B. For sample output text formattted as users can expect to see it
    798     in e-mail and log files, refer to the unit tests.
    799 
    800     @param inventory  `_LabInventory` object to be reported on.
    801     @return String with the inventory message to be sent.
    802     """
    803     logging.debug('Creating pool inventory')
    804     message = [_POOL_INVENTORY_HEADER]
    805     newline = ''
    806     for pool in CRITICAL_POOLS:
    807         message.append(
    808             '%sStatus for pool:%s, by model:' % (newline, pool))
    809         message.append(
    810             '%-20s   %5s %5s %5s %5s' % (
    811                 'Model', 'Bad', 'Idle', 'Good', 'Total'))
    812         data_list = []
    813         for model, counts in inventory.iteritems():
    814             logging.debug('Counting %2d DUTs for %s, %s',
    815                           counts.get_total(pool), model, pool)
    816             broken = counts.get_broken(pool)
    817             idle = counts.get_idle(pool)
    818             # models at full strength are not reported
    819             if not broken and not idle:
    820                 continue
    821             working = counts.get_working(pool)
    822             total = counts.get_total(pool)
    823             data_list.append((model, broken, idle, working, total))
    824         if data_list:
    825             data_list = sorted(data_list, key=lambda d: -d[1])
    826             message.extend(
    827                 ['%-20s   %5d %5d %5d %5d' % t for t in data_list])
    828         else:
    829             message.append('(All models at full strength)')
    830         newline = '\n'
    831     return '\n'.join(message)
    832 
    833 
    834 _IDLE_INVENTORY_HEADER = '''\
    835 Notice to Infrastructure deputies:  The hosts shown below haven't
    836 run any jobs for at least 24 hours. Please check each host; locked
    837 hosts should normally be unlocked; stuck jobs should normally be
    838 aborted.
    839 '''
    840 
    841 
    842 def _generate_idle_inventory_message(inventory):
    843     """Generate the "idle inventory" e-mail message.
    844 
    845     The idle inventory is a host list with corresponding pool and model,
    846     where the hosts are idle (`UNKWOWN` or `UNUSED`).
    847 
    848     N.B. For sample output text format as users can expect to
    849     see it in e-mail and log files, refer to the unit tests.
    850 
    851     @param inventory  `_LabInventory` object to be reported on.
    852     @return String with the inventory message to be sent.
    853 
    854     """
    855     logging.debug('Creating idle inventory')
    856     message = [_IDLE_INVENTORY_HEADER]
    857     message.append('Idle Host List:')
    858     message.append('%-30s %-20s %s' % ('Hostname', 'Model', 'Pool'))
    859     data_list = []
    860     for pool in MANAGED_POOLS:
    861         for model, counts in inventory.iteritems():
    862             logging.debug('Counting %2d DUTs for %s, %s',
    863                           counts.get_total(pool), model, pool)
    864             data_list.extend([(dut.host.hostname, model, pool)
    865                                   for dut in counts.get_idle_list(pool)])
    866     if data_list:
    867         message.extend(['%-30s %-20s %s' % t for t in data_list])
    868     else:
    869         message.append('(No idle DUTs)')
    870     return '\n'.join(message)
    871 
    872 
    873 def _send_email(arguments, tag, subject, recipients, body):
    874     """Send an inventory e-mail message.
    875 
    876     The message is logged in the selected log directory using `tag` for
    877     the file name.
    878 
    879     If the --debug option was requested, the message is neither logged
    880     nor sent, but merely printed on stdout.
    881 
    882     @param arguments   Parsed command-line options.
    883     @param tag         Tag identifying the inventory for logging
    884                        purposes.
    885     @param subject     E-mail Subject: header line.
    886     @param recipients  E-mail addresses for the To: header line.
    887     @param body        E-mail message body.
    888     """
    889     logging.debug('Generating email: "%s"', subject)
    890     all_recipients = ', '.join(recipients)
    891     report_body = '\n'.join([
    892             'To: %s' % all_recipients,
    893             'Subject: %s' % subject,
    894             '', body, ''])
    895     if arguments.debug:
    896         print report_body
    897     else:
    898         filename = os.path.join(arguments.logdir, tag)
    899         try:
    900             report_file = open(filename, 'w')
    901             report_file.write(report_body)
    902             report_file.close()
    903         except EnvironmentError as e:
    904             logging.error('Failed to write %s:  %s', filename, e)
    905         try:
    906             gmail_lib.send_email(all_recipients, subject, body)
    907         except Exception as e:
    908             logging.error('Failed to send e-mail to %s:  %s',
    909                           all_recipients, e)
    910 
    911 
    912 def _populate_model_counts(inventory):
    913     """Gather model counts while providing interactive feedback.
    914 
    915     Gathering the status of all individual DUTs in the lab can take
    916     considerable time (~30 minutes at the time of this writing).
    917     Normally, we pay that cost by querying as we go.  However, with
    918     the `--debug` option, we expect a human being to be watching the
    919     progress in real time.  So, we force the first (expensive) queries
    920     to happen up front, and provide simple ASCII output on sys.stdout
    921     to show a progress bar and results.
    922 
    923     @param inventory  `_LabInventory` object from which to gather
    924                       counts.
    925     """
    926     n = 0
    927     total_broken = 0
    928     for counts in inventory.itervalues():
    929         n += 1
    930         if n % 10 == 5:
    931             c = '+'
    932         elif n % 10 == 0:
    933             c = '%d' % ((n / 10) % 10)
    934         else:
    935             c = '.'
    936         sys.stdout.write(c)
    937         sys.stdout.flush()
    938         # This next call is where all the time goes - it forces all of a
    939         # model's `HostJobHistory` objects to query the database and
    940         # cache their results.
    941         total_broken += counts.get_broken()
    942     sys.stdout.write('\n')
    943     sys.stdout.write('Found %d broken DUTs\n' % total_broken)
    944 
    945 
    946 def _perform_model_inventory(arguments, inventory, timestamp):
    947     """Perform the model inventory report.
    948 
    949     The model inventory report consists of the following:
    950       * A list of DUTs that are recommended to be repaired.  This list
    951         is optional, and only appears if the `--recommend` option is
    952         present.
    953       * A list of all models that have failed DUTs, with counts
    954         of working, broken, and spare DUTs, among others.
    955 
    956     @param arguments  Command-line arguments as returned by
    957                       `ArgumentParser`
    958     @param inventory  `_LabInventory` object to be reported on.
    959     @param timestamp  A string used to identify this run's timestamp
    960                       in logs and email output.
    961     """
    962     if arguments.recommend:
    963         recommend_message = _generate_repair_recommendation(
    964                 inventory, arguments.recommend) + '\n\n\n'
    965     else:
    966         recommend_message = ''
    967     model_message = _generate_model_inventory_message(inventory)
    968     _send_email(arguments,
    969                 'models-%s.txt' % timestamp,
    970                 'DUT model inventory %s' % timestamp,
    971                 arguments.model_notify,
    972                 recommend_message + model_message)
    973 
    974 
    975 def _perform_pool_inventory(arguments, inventory, timestamp):
    976     """Perform the pool inventory report.
    977 
    978     The pool inventory report consists of the following:
    979       * A list of all critical pools that have failed DUTs, with counts
    980         of working, broken, and idle DUTs.
    981       * A list of all idle DUTs by hostname including the model and
    982         pool.
    983 
    984     @param arguments  Command-line arguments as returned by
    985                       `ArgumentParser`
    986     @param inventory  `_LabInventory` object to be reported on.
    987     @param timestamp  A string used to identify this run's timestamp in
    988                       logs and email output.
    989     """
    990     pool_message = _generate_pool_inventory_message(inventory)
    991     idle_message = _generate_idle_inventory_message(inventory)
    992     _send_email(arguments,
    993                 'pools-%s.txt' % timestamp,
    994                 'DUT pool inventory %s' % timestamp,
    995                 arguments.pool_notify,
    996                 pool_message + '\n\n\n' + idle_message)
    997 
    998 
    999 def _dut_in_repair_loop(history):
   1000     """Return whether a DUT's history indicates a repair loop.
   1001 
   1002     A DUT is considered looping if it runs no tests, and no tasks pass
   1003     other than repair tasks.
   1004 
   1005     @param history  An instance of `status_history.HostJobHistory` to be
   1006                     scanned for a repair loop.  The caller guarantees
   1007                     that this history corresponds to a working DUT.
   1008     @returns  Return a true value if the DUT's most recent history
   1009               indicates a repair loop.
   1010     """
   1011     # Our caller passes only histories for working DUTs; that means
   1012     # we've already paid the cost of fetching the diagnosis task, and
   1013     # we know that the task was successful.  The diagnosis task will be
   1014     # one of the tasks we must scan to find a loop, so if the task isn't
   1015     # a repair task, then our history includes a successful non-repair
   1016     # task, and we're not looping.
   1017     #
   1018     # The for loop below  is very expensive, because it must fetch the
   1019     # full history, regardless of how many tasks we examine.  At the
   1020     # time of this writing, this check against the diagnosis task
   1021     # reduces the cost of finding loops in the full inventory from hours
   1022     # to minutes.
   1023     if history.last_diagnosis()[1].name != 'Repair':
   1024         return False
   1025     repair_ok_count = 0
   1026     for task in history:
   1027         if not task.is_special:
   1028             # This is a test, so we're not looping.
   1029             return False
   1030         if task.diagnosis == status_history.BROKEN:
   1031             # Failed a repair, so we're not looping.
   1032             return False
   1033         if (task.diagnosis == status_history.WORKING
   1034                 and task.name != 'Repair'):
   1035             # Non-repair task succeeded, so we're not looping.
   1036             return False
   1037         # At this point, we have either a failed non-repair task, or
   1038         # a successful repair.
   1039         if task.name == 'Repair':
   1040             repair_ok_count += 1
   1041             if repair_ok_count >= _REPAIR_LOOP_THRESHOLD:
   1042                 return True
   1043 
   1044 
   1045 def _perform_repair_loop_report(arguments, inventory):
   1046     """Scan the inventory for DUTs stuck in a repair loop.
   1047 
   1048     This routine walks through the given inventory looking for DUTs
   1049     where the most recent history shows that the DUT is regularly
   1050     passing repair tasks, but has not run any tests.
   1051 
   1052     @param arguments  Command-line arguments as returned by
   1053                       `ArgumentParser`
   1054     @param inventory  `_LabInventory` object to be reported on.
   1055     """
   1056     loop_presence = metrics.BooleanMetric(
   1057         'chromeos/autotest/inventory/repair_loops',
   1058         'DUTs stuck in repair loops')
   1059     logging.info('Scanning for DUTs in repair loops.')
   1060     for counts in inventory.itervalues():
   1061         for history in counts.get_working_list():
   1062             # Managed DUTs with names that don't match
   1063             # _HOSTNAME_PATTERN shouldn't be possible.  However, we
   1064             # don't want arbitrary strings being attached to the
   1065             # 'dut_hostname' field, so for safety, we exclude all
   1066             # anomalies.
   1067             if not _HOSTNAME_PATTERN.match(history.hostname):
   1068                 continue
   1069             if _dut_in_repair_loop(history):
   1070                 fields = {'dut_hostname': history.hostname,
   1071                           'model': history.host_model,
   1072                           'pool': history.host_pool}
   1073                 logging.info('Looping DUT: %(dut_hostname)s, '
   1074                              'model: %(model)s, pool: %(pool)s',
   1075                              fields)
   1076                 loop_presence.set(True, fields=fields)
   1077 
   1078 
   1079 def _log_startup(arguments, startup_time):
   1080     """Log the start of this inventory run.
   1081 
   1082     Print various log messages indicating the start of the run.  Return
   1083     a string based on `startup_time` that will be used to identify this
   1084     run in log files and e-mail messages.
   1085 
   1086     @param startup_time   A UNIX timestamp marking the moment when
   1087                           this inventory run began.
   1088     @returns  A timestamp string that will be used to identify this run
   1089               in logs and email output.
   1090     """
   1091     timestamp = time.strftime('%Y-%m-%d.%H',
   1092                               time.localtime(startup_time))
   1093     logging.debug('Starting lab inventory for %s', timestamp)
   1094     if arguments.model_notify:
   1095         if arguments.recommend:
   1096             logging.debug('Will include repair recommendations')
   1097         logging.debug('Will include model inventory')
   1098     if arguments.pool_notify:
   1099         logging.debug('Will include pool inventory')
   1100     return timestamp
   1101 
   1102 
   1103 def _create_inventory(arguments, end_time):
   1104     """Create the `_LabInventory` instance to use for reporting.
   1105 
   1106     @param end_time   A UNIX timestamp for the end of the time range
   1107                       to be searched in this inventory run.
   1108     """
   1109     start_time = end_time - arguments.duration * 60 * 60
   1110     afe = frontend_wrappers.RetryingAFE(server=None)
   1111     inventory = _LabInventory.create_inventory(
   1112             afe, start_time, end_time, arguments.modelnames)
   1113     logging.info('Found %d hosts across %d models',
   1114                      inventory.get_num_duts(),
   1115                      inventory.get_num_models())
   1116     return inventory
   1117 
   1118 
   1119 def _perform_inventory_reports(arguments):
   1120     """Perform all inventory checks requested on the command line.
   1121 
   1122     Create the initial inventory and run through the inventory reports
   1123     as called for by the parsed command-line arguments.
   1124 
   1125     @param arguments  Command-line arguments as returned by
   1126                       `ArgumentParser`.
   1127     """
   1128     startup_time = time.time()
   1129     timestamp = _log_startup(arguments, startup_time)
   1130     inventory = _create_inventory(arguments, startup_time)
   1131     if arguments.debug:
   1132         _populate_model_counts(inventory)
   1133     if arguments.model_notify:
   1134         _perform_model_inventory(arguments, inventory, timestamp)
   1135     if arguments.pool_notify:
   1136         _perform_pool_inventory(arguments, inventory, timestamp)
   1137     if arguments.repair_loops:
   1138         _perform_repair_loop_report(arguments, inventory)
   1139 
   1140 
   1141 def _separate_email_addresses(address_list):
   1142     """Parse a list of comma-separated lists of e-mail addresses.
   1143 
   1144     @param address_list  A list of strings containing comma
   1145                          separate e-mail addresses.
   1146     @return A list of the individual e-mail addresses.
   1147 
   1148     """
   1149     newlist = []
   1150     for arg in address_list:
   1151         newlist.extend([email.strip() for email in arg.split(',')])
   1152     return newlist
   1153 
   1154 
   1155 def _verify_arguments(arguments):
   1156     """Validate command-line arguments.
   1157 
   1158     Join comma separated e-mail addresses for `--model-notify` and
   1159     `--pool-notify` in separate option arguments into a single list.
   1160 
   1161     For non-debug uses, require that at least one inventory report be
   1162     requested.  For debug, if a report isn't specified, treat it as "run
   1163     all the reports."
   1164 
   1165     The return value indicates success or failure; in the case of
   1166     failure, we also write an error message to stderr.
   1167 
   1168     @param arguments  Command-line arguments as returned by
   1169                       `ArgumentParser`
   1170     @return True if the arguments are semantically good, or False
   1171             if the arguments don't meet requirements.
   1172 
   1173     """
   1174     arguments.model_notify = _separate_email_addresses(
   1175             arguments.model_notify)
   1176     arguments.pool_notify = _separate_email_addresses(
   1177             arguments.pool_notify)
   1178     if not any([arguments.model_notify, arguments.pool_notify,
   1179                 arguments.repair_loops]):
   1180         if not arguments.debug:
   1181             sys.stderr.write('Must request at least one report via '
   1182                              '--model-notify, --pool-notify, or '
   1183                              '--repair-loops\n')
   1184             return False
   1185         else:
   1186             # We want to run all the e-mail reports.  An empty notify
   1187             # list will cause a report to be skipped, so make sure the
   1188             # lists are non-empty.
   1189             arguments.model_notify = ['']
   1190             arguments.pool_notify = ['']
   1191     return True
   1192 
   1193 
   1194 def _get_default_logdir(script):
   1195     """Get the default directory for the `--logdir` option.
   1196 
   1197     The default log directory is based on the parent directory
   1198     containing this script.
   1199 
   1200     @param script  Path to this script file.
   1201     @return A path to a directory.
   1202 
   1203     """
   1204     basedir = os.path.dirname(os.path.abspath(script))
   1205     basedir = os.path.dirname(basedir)
   1206     return os.path.join(basedir, _LOGDIR)
   1207 
   1208 
   1209 def _parse_command(argv):
   1210     """Parse the command line arguments.
   1211 
   1212     Create an argument parser for this command's syntax, parse the
   1213     command line, and return the result of the ArgumentParser
   1214     parse_args() method.
   1215 
   1216     @param argv Standard command line argument vector; argv[0] is
   1217                 assumed to be the command name.
   1218     @return Result returned by ArgumentParser.parse_args().
   1219 
   1220     """
   1221     parser = argparse.ArgumentParser(
   1222             prog=argv[0],
   1223             description='Gather and report lab inventory statistics')
   1224     parser.add_argument('-d', '--duration', type=int,
   1225                         default=_DEFAULT_DURATION, metavar='HOURS',
   1226                         help='number of hours back to search for status'
   1227                              ' (default: %d)' % _DEFAULT_DURATION)
   1228     parser.add_argument('--model-notify', action='append',
   1229                         default=[], metavar='ADDRESS',
   1230                         help='Generate model inventory message, '
   1231                         'and send it to the given e-mail address(es)')
   1232     parser.add_argument('--pool-notify', action='append',
   1233                         default=[], metavar='ADDRESS',
   1234                         help='Generate pool inventory message, '
   1235                              'and send it to the given address(es)')
   1236     parser.add_argument('-r', '--recommend', type=int, default=None,
   1237                         help=('Specify how many DUTs should be '
   1238                               'recommended for repair (default: no '
   1239                               'recommendation)'))
   1240     parser.add_argument('--repair-loops', action='store_true',
   1241                         help='Check for devices stuck in repair loops.')
   1242     parser.add_argument('--debug-metrics', action='store_true',
   1243                         help='Include debug information about the metrics '
   1244                              'that would be reported ')
   1245     parser.add_argument('--debug', action='store_true',
   1246                         help='Print e-mail messages on stdout '
   1247                              'without sending them.')
   1248     parser.add_argument('--logdir', default=_get_default_logdir(argv[0]),
   1249                         help='Directory where logs will be written.')
   1250     parser.add_argument('modelnames', nargs='*',
   1251                         metavar='MODEL',
   1252                         help='names of models to report on '
   1253                              '(default: all models)')
   1254     arguments = parser.parse_args(argv[1:])
   1255     if not _verify_arguments(arguments):
   1256         return None
   1257     return arguments
   1258 
   1259 
   1260 def _configure_logging(arguments):
   1261     """Configure the `logging` module for our needs.
   1262 
   1263     How we log depends on whether the `--debug` option was provided on
   1264     the command line.
   1265       * Without the option, we configure the logging to capture all
   1266         potentially relevant events in a log file.  The log file is
   1267         configured to rotate once a week on Friday evening, preserving
   1268         ~3 months worth of history.
   1269       * With the option, we expect stdout to contain other
   1270         human-readable output (including the contents of the e-mail
   1271         messages), so we restrict the output to INFO level.
   1272 
   1273     For convenience, when `--debug` is on, the logging format has
   1274     no adornments, so that a call like `logging.info(msg)` simply writes
   1275     `msg` to stdout, plus a trailing newline.
   1276 
   1277     @param arguments  Command-line arguments as returned by
   1278                       `ArgumentParser`
   1279     """
   1280     root_logger = logging.getLogger()
   1281     if arguments.debug:
   1282         root_logger.setLevel(logging.INFO)
   1283         handler = logging.StreamHandler(sys.stdout)
   1284         handler.setFormatter(logging.Formatter())
   1285     else:
   1286         if not os.path.exists(arguments.logdir):
   1287             os.mkdir(arguments.logdir)
   1288         root_logger.setLevel(logging.DEBUG)
   1289         logfile = os.path.join(arguments.logdir, _LOGFILE)
   1290         handler = logging.handlers.TimedRotatingFileHandler(
   1291                 logfile, when='W4', backupCount=13)
   1292         formatter = logging.Formatter(_LOG_FORMAT,
   1293                                       time_utils.TIME_FMT)
   1294         handler.setFormatter(formatter)
   1295     # TODO(jrbarnette) This is gross.  Importing client.bin.utils
   1296     # implicitly imported logging_config, which calls
   1297     # logging.basicConfig() *at module level*.  That gives us an
   1298     # extra logging handler that we don't want.  So, clear out all
   1299     # the handlers here.
   1300     for h in root_logger.handlers:
   1301         root_logger.removeHandler(h)
   1302     root_logger.addHandler(handler)
   1303 
   1304 
   1305 def main(argv):
   1306     """Standard main routine.
   1307 
   1308     @param argv  Command line arguments, including `sys.argv[0]`.
   1309     """
   1310     arguments = _parse_command(argv)
   1311     if not arguments:
   1312         sys.exit(1)
   1313     _configure_logging(arguments)
   1314     try:
   1315         if arguments.debug_metrics or not arguments.debug:
   1316             metrics_file = None if not arguments.debug_metrics else '/dev/null'
   1317             with site_utils.SetupTsMonGlobalState(
   1318                     'repair_loops', debug_file=metrics_file,
   1319                     auto_flush=False):
   1320                 _perform_inventory_reports(arguments)
   1321             metrics.Flush()
   1322         else:
   1323             _perform_inventory_reports(arguments)
   1324     except KeyboardInterrupt:
   1325         pass
   1326     except EnvironmentError as e:
   1327         logging.exception('Unexpected OS error: %s', e)
   1328     except Exception as e:
   1329         logging.exception('Unexpected exception: %s', e)
   1330 
   1331 
   1332 def get_inventory(afe):
   1333     end_time = int(time.time())
   1334     start_time = end_time - 24 * 60 * 60
   1335     return _LabInventory.create_inventory(afe, start_time, end_time)
   1336 
   1337 
   1338 def get_managed_boards(afe):
   1339     return get_inventory(afe).get_boards()
   1340 
   1341 
   1342 if __name__ == '__main__':
   1343     main(sys.argv)
   1344