Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 #
      3 # Copyright (c) 2015 The Chromium OS Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 
      8 """Script to check the number of long-running processes.
      9 
     10 This script gets the number of processes for "gsutil" and "autoserv"
     11 that are running more than 24 hours, and throws the number to stats
     12 dashboard.
     13 
     14 This script depends on the "etimes" user-defined format of "ps".
     15 Goobuntu 14.04 has the version of ps that supports etimes, but not
     16 Goobuntu 12.04.
     17 """
     18 
     19 
     20 import subprocess
     21 
     22 from autotest_lib.server import site_utils
     23 
     24 try:
     25     from chromite.lib import metrics
     26 except ImportError:
     27     metrics = site_utils.metrics_mock
     28 
     29 
     30 PROGRAM_TO_CHECK_SET = set(['gsutil', 'autoserv'])
     31 
     32 def check_proc(prog, max_elapsed_sec):
     33     """Check the number of long-running processes for a given program.
     34 
     35     Finds out the number of processes for a given program that have run
     36     more than a given elapsed time.
     37     Sends out the number to stats dashboard.
     38 
     39     @param prog: Program name.
     40     @param max_elapsed_sec: Max elapsed time in seconds. Processes that
     41                             have run more than this value will be caught.
     42     """
     43     cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | '
     44            'wc -l' % (prog, max_elapsed_sec))
     45     count = int(subprocess.check_output(cmd, shell = True))
     46 
     47     if prog not in PROGRAM_TO_CHECK_SET:
     48         prog = 'unknown'
     49 
     50     metrics.Gauge('chromeos/autotest/hung_processes').set(
     51             count, fields={'program': prog}
     52     )
     53 
     54 
     55 def main():
     56     """Main script. """
     57     with site_utils.SetupTsMonGlobalState('check_hung_proc', short_lived=True):
     58         for p in PROGRAM_TO_CHECK_SET:
     59             check_proc(p, 86400)
     60 
     61 
     62 if __name__ == '__main__':
     63     main()
     64