Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright 2018 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """A simple service to monitor DUT statuses from master db/afe."""
      7 import collections
      8 import logging
      9 import sys
     10 import time
     11 
     12 import common
     13 from autotest_lib.server import constants
     14 from autotest_lib.server import frontend
     15 from chromite.lib import metrics
     16 from chromite.lib import ts_mon_config
     17 
     18 from infra_libs import ts_mon
     19 
     20 
     21 DutCountBucket = collections.namedtuple('DutCountBucket',
     22                                         ['board',
     23                                          'model',
     24                                          'pool',
     25                                          'is_locked',
     26                                          'status']
     27                                         )
     28 
     29 
     30 def _get_bucket_for_host(host):
     31     """Determine the counter bucket for |host|.
     32 
     33     Args:
     34         host: A Host object as returned by afe.
     35 
     36     Returns:
     37         A DutCountBucket instance describing the bucket for this host.
     38     """
     39     board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
     40     model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
     41     pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
     42     if pool in constants.Pools.MANAGED_POOLS:
     43         pool = 'managed:' + pool
     44     status = host.status or '[None]'
     45     is_locked = host.locked
     46     return DutCountBucket(board, model, pool, is_locked, status)
     47 
     48 
     49 def _get_unique_label(labels, prefix):
     50     """Return the labels for a given prefix, with prefix stripped.
     51 
     52     If prefixed label does not occur, return '[None]'
     53     If prefixed label occurs multiply, return '[Multiple]'
     54 
     55     _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'
     56 
     57     _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'
     58 
     59     _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
     60     """
     61     ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
     62     if not ls:
     63         return '[None]'
     64     elif len(ls) == 1:
     65         return ls[0]
     66     else:
     67         return '[Multiple]'
     68 
     69 
     70 def main(argv):
     71     """Entry point for dut_mon."""
     72     logging.getLogger().setLevel(logging.INFO)
     73 
     74     with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
     75         afe = frontend.AFE()
     76         counters = collections.defaultdict(lambda: 0)
     77 
     78         field_spec = [ts_mon.StringField('board'),
     79                       ts_mon.StringField('model'),
     80                       ts_mon.StringField('pool'),
     81                       ts_mon.BooleanField('is_locked'),
     82                       ts_mon.StringField('status'),
     83                       ]
     84         dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
     85                                   description='The number of duts in a given '
     86                                               'state and bucket.',
     87                                   field_spec=field_spec)
     88         tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
     89                                      description='Tick counter of dut_mon.')
     90 
     91         while True:
     92             # Note: We reset all counters to zero in each loop rather than
     93             # creating a new defaultdict, because we want to ensure that any
     94             # gauges that were previously set to a nonzero value by this process
     95             # get set back to zero if necessary.
     96             for k in counters:
     97                 counters[k] = 0
     98 
     99             logging.info('Fetching all hosts.')
    100             hosts = afe.get_hosts()
    101             logging.info('Fetched %s hosts.', len(hosts))
    102             for host in hosts:
    103                 fields = _get_bucket_for_host(host)
    104                 counters[fields] += 1
    105 
    106             for field, value in counters.iteritems():
    107                 logging.info('%s %s', field, value)
    108                 dut_count.set(value, fields=field.__dict__)
    109 
    110             tick_count.increment()
    111             logging.info('Sleeping for 2 minutes.')
    112             time.sleep(120)
    113 
    114 
    115 if __name__ == '__main__':
    116     main(sys.argv)
    117