1 #!/usr/bin/python 2 # Copyright 2014 The Chromium OS Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import errno 7 import json 8 import mmap 9 import optparse 10 import os 11 import signal 12 import sys 13 import syslog 14 import time 15 16 # The prefix of FIFO files used when using background processes. 17 RESULT_FIFO_PREFIX = '/tmp/update_engine_performance_monitor_fifo' 18 19 class UpdateEnginePerformanceMonitor(object): 20 """Performance and resource usage monitor script. 21 22 This script is intended to run on the DUT and will dump 23 performance data as a JSON document when done. It can be run in 24 the background using the --start-bg and --stop-bg options. 25 """ 26 27 def __init__(self, verbose, timeout_seconds): 28 """Instance initializer. 29 30 @param verbose: if True, prints debug info stderr. 31 32 @param timeout_seconds: maximum amount of time to monitor for. 33 """ 34 self.verbose = verbose 35 self.timeout_seconds = timeout_seconds 36 self.timer = time.time() 37 38 39 @staticmethod 40 def get_update_engine_pids(): 41 """Gets all processes (tasks) in the update-engine cgroup. 42 43 @return a list of process identifiers. 44 """ 45 with open('/sys/fs/cgroup/cpu/update-engine/tasks') as f: 46 return [int(i) for i in f.read().split()] 47 48 49 @staticmethod 50 def get_info_for_pid(pid, pids_processed): 51 """Get information about a process. 52 53 The returned information is a tuple where the first element is 54 the process name and the second element is the RSS size in 55 bytes. The task and its siblings (e.g. tasks belonging to the 56 same process) will be set in the |pids_processed| set. 57 58 @param pid: the task to get information about. 59 60 @param pids_processed: set of process identifiers. 61 62 @return a tuple with information. 63 """ 64 try: 65 with open('/proc/%d/stat' % pid) as f: 66 fields = f.read().split() 67 # According to the proc(4) man page, field 23 is the 68 # number of pages in the resident set. 69 comm = fields[1] 70 rss = int(fields[23]) * mmap.PAGESIZE 71 tasks = os.listdir('/proc/%d/task'%pid) 72 # Mark all tasks belonging to the process to avoid 73 # double-counting their RSS. 74 for t in tasks: 75 pids_processed.add(int(t)) 76 return rss, comm 77 except (IOError, OSError) as e: 78 # It's possible that the task vanished in the window 79 # between reading the 'tasks' file and when attempting to 80 # read from it (ditto for iterating over the 'task' 81 # directory). Handle this gracefully. 82 if e.errno == errno.ENOENT: 83 return 0, '' 84 raise 85 86 87 def do_sample(self): 88 """Sampling method. 89 90 This collects information about all the processes in the 91 update-engine cgroup. The information is used to e.g. maintain 92 historical peaks etc. 93 """ 94 if self.verbose: 95 sys.stderr.write('========================================\n') 96 rss_total = 0 97 pids = self.get_update_engine_pids() 98 pids_processed = set() 99 # Loop over all PIDs (tasks) in the update-engine cgroup and 100 # be careful not to double-count PIDs (tasks) belonging to the 101 # same process. 102 for pid in pids: 103 if pid not in pids_processed: 104 rss, comm = self.get_info_for_pid(pid, pids_processed) 105 rss_total += rss 106 if self.verbose: 107 sys.stderr.write('pid %d %s -> %d KiB\n' % 108 (pid, comm, rss/1024)) 109 else: 110 if self.verbose: 111 sys.stderr.write('pid %d already counted\n' % pid) 112 self.rss_peak = max(rss_total, self.rss_peak) 113 if self.verbose: 114 sys.stderr.write('Total = %d KiB\n' % (rss_total / 1024)) 115 sys.stderr.write('Peak = %d KiB\n' % (self.rss_peak / 1024)) 116 117 118 def signal_handler(self, signal, frame): 119 """Signal handler used to terminate monitoring. 120 121 @param signal: the signal delivered. 122 123 @param frame: the interrupted stack frame. 124 """ 125 self.request_exit = True 126 127 128 def run(self, signum): 129 """Main sampling loop. 130 131 Periodically sample and process performance data until the 132 signal specified by |signum| is sent to the 133 process. Returns recorded data as a string. 134 135 @param signum: the signal to wait (e.g. signal.SIGTERM) or None. 136 137 @return a string with JSON data or None if the timeout 138 deadline has been exceeded. 139 """ 140 if signum: 141 signal.signal(signum, self.signal_handler) 142 self.rss_peak = 0 143 self.request_exit = False 144 timeout_deadline = time.time() + self.timeout_seconds 145 while not self.request_exit: 146 monitor.do_sample() 147 time.sleep(0.1) 148 if time.time() > timeout_deadline: 149 return None 150 return json.dumps({'rss_peak': self.rss_peak, 151 'update_length': int(time.time() - self.timer)}) 152 153 154 class WriteToSyslog: 155 """File-like object to log messages to syslog. 156 157 Instances of this object can be assigned to e.g. sys.stderr to log 158 errors/backtraces to syslog. 159 """ 160 161 def __init__(self, ident): 162 """Instance initializer. 163 164 @param ident: string to identify program by. 165 """ 166 syslog.openlog(ident, syslog.LOG_PID, syslog.LOG_DAEMON) 167 168 169 def write(self, data): 170 """Overridden write() method. 171 172 @param data: the data to write. 173 """ 174 syslog.syslog(syslog.LOG_ERR, data) 175 176 177 def daemonize_and_print_pid_on_stdout(): 178 """Daemonizes and prints the daemon process pid on stdout and 179 exits. 180 181 When this function returns, the process is a properly detached daemon 182 process parented by pid 1. This is basically the standard double-fork 183 daemonization dance as described in W. Richard Stevens, "Advanced 184 Programming in the Unix Environment", 1992, Addison-Wesley, ISBN 185 0-201-56317-7 186 """ 187 first_child = os.fork() 188 if first_child != 0: 189 # Exit first child. 190 sys.exit(0) 191 os.chdir('/') 192 os.setsid() 193 os.umask(0) 194 second_child = os.fork() 195 if second_child != 0: 196 # Parent, write child pid to stdout and exit. 197 print second_child 198 sys.exit(0) 199 # Redirect native stdin, stdout, stderr file descriptors to /dev/null. 200 si = open(os.devnull, 'r') 201 so = open(os.devnull, 'a+') 202 se = open(os.devnull, 'a+', 0) 203 os.dup2(si.fileno(), sys.stdin.fileno()) 204 os.dup2(so.fileno(), sys.stdout.fileno()) 205 os.dup2(se.fileno(), sys.stderr.fileno()) 206 # Send stderr to syslog. Note that this will only work for Python 207 # code in this process - it will not work for native code or child 208 # processes. If this is ever needed, use subprocess.Popen() to 209 # spawn logger(1) and connect its stdin fd with the stderr fd in 210 # this process. 211 sys.stderr = WriteToSyslog('update_engine_performance_monitor.py') 212 213 214 if __name__ == '__main__': 215 parser = optparse.OptionParser() 216 parser.add_option('-v', '--verbose', action='store_true', 217 dest='verbose', help='print debug info to stderr') 218 parser.add_option('--timeout', action='store', type='int', default=3600, 219 dest='timeout_seconds', metavar='<SECONDS>', 220 help='maximum amount of time to monitor for') 221 parser.add_option('--start-bg', action='store_true', 222 dest='start_bg', help='start background instance ' 223 'and print its PID on stdout') 224 parser.add_option('--stop-bg', action='store', type='int', default=0, 225 dest='stop_bg', metavar='<PID>', 226 help='stop running background instance and dump ' 227 'its recorded data') 228 (options, args) = parser.parse_args() 229 230 monitor = UpdateEnginePerformanceMonitor(options.verbose, 231 options.timeout_seconds) 232 if options.start_bg: 233 # If starting a background instance, fork a child and write 234 # its PID on stdout in the parent process. In the child 235 # process, setup a FIFO and monitor until SIGTERM is 236 # called. When that happes, write the JSON result to the FIFO. 237 # 238 # Since this is expected to be called via ssh we need to 239 # completely detach from the session - otherwise the remote 240 # ssh(1) invocation will hang until our background instance is 241 # gone. 242 daemonize_and_print_pid_on_stdout() 243 # Prepare the FIFO ahead of time since it'll serve as an extra 244 # sanity check in --stop-bg before sending SIGTERM to the 245 # given pid. 246 instance_pid = os.getpid() 247 fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid) 248 if os.path.exists(fifo_path): 249 os.unlink(fifo_path) 250 os.mkfifo(fifo_path) 251 # Now monitor. 252 sys.stderr.write('Starting background collection.\n') 253 json_str = monitor.run(signal.SIGTERM) 254 sys.stderr.write('Stopping background collection.\n') 255 if json_str: 256 fifo = open(fifo_path, 'w') 257 fifo.write(json_str) 258 fifo.close() 259 os.unlink(fifo_path) 260 elif options.stop_bg: 261 # If stopping a background instance, check that the FIFO is 262 # really there and if so, signal the monitoring process and 263 # wait for it to write the JSON result on the FIFO. 264 instance_pid = options.stop_bg 265 fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid) 266 if not os.path.exists(fifo_path): 267 sys.stderr.write('No instance with PID %d. Check syslog for ' 268 'messages.\n' % instance_pid) 269 sys.exit(1) 270 os.kill(instance_pid, signal.SIGTERM) 271 fifo = open(fifo_path, 'r') 272 json_str = fifo.read() 273 print json_str 274 fifo.close() 275 else: 276 # Monitor in foreground until Ctrl+C is pressed, then dump 277 # JSON on stdout. This is useful for hacking on this script, 278 # especially in conjunction with --verbose. 279 json_str = monitor.run(signal.SIGINT) 280 if json_str: 281 print json_str 282