1 #!/usr/bin/env python 2 # 3 # stackcount Count events and their stack traces. 4 # For Linux, uses BCC, eBPF. 5 # 6 # USAGE: stackcount.py [-h] [-p PID] [-i INTERVAL] [-D DURATION] [-T] [-r] [-s] 7 # [-P] [-K] [-U] [-v] [-d] [-f] [--debug] 8 # 9 # The pattern is a string with optional '*' wildcards, similar to file 10 # globbing. If you'd prefer to use regular expressions, use the -r option. 11 # 12 # Copyright 2016 Netflix, Inc. 13 # Licensed under the Apache License, Version 2.0 (the "License") 14 # 15 # 12-Jan-2016 Brendan Gregg Created this. 16 # 09-Jul-2016 Sasha Goldshtein Generalized for uprobes and tracepoints. 17 18 from __future__ import print_function 19 from bcc import BPF, USDT 20 from time import sleep, strftime 21 import argparse 22 import re 23 import signal 24 import sys 25 import traceback 26 27 debug = False 28 29 class Probe(object): 30 def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, 31 pid=None, per_pid=False): 32 """Init a new probe. 33 34 Init the probe from the pattern provided by the user. The supported 35 patterns mimic the 'trace' and 'argdist' tools, but are simpler because 36 we don't have to distinguish between probes and retprobes. 37 38 func -- probe a kernel function 39 lib:func -- probe a user-space function in the library 'lib' 40 p::func -- same thing as 'func' 41 p:lib:func -- same thing as 'lib:func' 42 t:cat:event -- probe a kernel tracepoint 43 u:lib:probe -- probe a USDT tracepoint 44 """ 45 self.kernel_stack = kernel_stack 46 self.user_stack = user_stack 47 parts = pattern.split(':') 48 if len(parts) == 1: 49 parts = ["p", "", parts[0]] 50 elif len(parts) == 2: 51 parts = ["p", parts[0], parts[1]] 52 elif len(parts) == 3: 53 if parts[0] == "t": 54 parts = ["t", "", "%s:%s" % tuple(parts[1:])] 55 if parts[0] not in ["p", "t", "u"]: 56 raise Exception("Type must be 'p', 't', or 'u', but got %s" % 57 parts[0]) 58 else: 59 raise Exception("Too many ':'-separated components in pattern %s" % 60 pattern) 61 62 (self.type, self.library, self.pattern) = parts 63 if not use_regex: 64 self.pattern = self.pattern.replace('*', '.*') 65 self.pattern = '^' + self.pattern + '$' 66 67 if (self.type == "p" and self.library) or self.type == "u": 68 libpath = BPF.find_library(self.library) 69 if libpath is None: 70 # This might be an executable (e.g. 'bash') 71 libpath = BPF.find_exe(self.library) 72 if libpath is None or len(libpath) == 0: 73 raise Exception("unable to find library %s" % self.library) 74 self.library = libpath 75 76 self.pid = pid 77 self.per_pid = per_pid 78 self.matched = 0 79 80 def is_kernel_probe(self): 81 return self.type == "t" or (self.type == "p" and self.library == "") 82 83 def attach(self): 84 if self.type == "p": 85 if self.library: 86 self.bpf.attach_uprobe(name=self.library, 87 sym_re=self.pattern, 88 fn_name="trace_count", 89 pid=self.pid or -1) 90 self.matched = self.bpf.num_open_uprobes() 91 else: 92 self.bpf.attach_kprobe(event_re=self.pattern, 93 fn_name="trace_count") 94 self.matched = self.bpf.num_open_kprobes() 95 elif self.type == "t": 96 self.bpf.attach_tracepoint(tp_re=self.pattern, 97 fn_name="trace_count") 98 self.matched = self.bpf.num_open_tracepoints() 99 elif self.type == "u": 100 pass # Nothing to do -- attach already happened in `load` 101 102 if self.matched == 0: 103 raise Exception("No functions matched by pattern %s" % 104 self.pattern) 105 106 def load(self): 107 ctx_name = "ctx" 108 stack_trace = "" 109 if self.user_stack: 110 stack_trace += """ 111 key.user_stack_id = stack_traces.get_stackid( 112 %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK 113 );""" % (ctx_name) 114 else: 115 stack_trace += "key.user_stack_id = -1;" 116 if self.kernel_stack: 117 stack_trace += """ 118 key.kernel_stack_id = stack_traces.get_stackid( 119 %s, BPF_F_REUSE_STACKID 120 );""" % (ctx_name) 121 else: 122 stack_trace += "key.kernel_stack_id = -1;" 123 124 trace_count_text = """ 125 int trace_count(void *ctx) { 126 FILTER 127 struct key_t key = {}; 128 key.tgid = GET_TGID; 129 STORE_COMM 130 %s 131 counts.increment(key); 132 return 0; 133 } 134 """ 135 trace_count_text = trace_count_text % (stack_trace) 136 137 bpf_text = """#include <uapi/linux/ptrace.h> 138 #include <linux/sched.h> 139 140 struct key_t { 141 // no pid (thread ID) so that we do not needlessly split this key 142 u32 tgid; 143 int kernel_stack_id; 144 int user_stack_id; 145 char name[TASK_COMM_LEN]; 146 }; 147 148 BPF_HASH(counts, struct key_t); 149 BPF_STACK_TRACE(stack_traces, 1024); 150 """ 151 152 # We really mean the tgid from the kernel's perspective, which is in 153 # the top 32 bits of bpf_get_current_pid_tgid(). 154 if self.is_kernel_probe() and self.pid: 155 trace_count_text = trace_count_text.replace('FILTER', 156 ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 157 'if (pid != %d) { return 0; }') % (self.pid)) 158 else: 159 trace_count_text = trace_count_text.replace('FILTER', '') 160 161 # We need per-pid statistics when tracing a user-space process, because 162 # the meaning of the symbols depends on the pid. We also need them if 163 # per-pid statistics were requested with -P, or for user stacks. 164 if self.per_pid or not self.is_kernel_probe() or self.user_stack: 165 trace_count_text = trace_count_text.replace('GET_TGID', 166 'bpf_get_current_pid_tgid() >> 32') 167 trace_count_text = trace_count_text.replace('STORE_COMM', 168 'bpf_get_current_comm(&key.name, sizeof(key.name));') 169 else: 170 # kernel stacks only. skip splitting on PID so these aggregate 171 # together, and don't store the process name. 172 trace_count_text = trace_count_text.replace( 173 'GET_TGID', '0xffffffff') 174 trace_count_text = trace_count_text.replace('STORE_COMM', '') 175 176 self.usdt = None 177 if self.type == "u": 178 self.usdt = USDT(path=self.library, pid=self.pid) 179 for probe in self.usdt.enumerate_probes(): 180 if not self.pid and (probe.bin_path != self.library): 181 continue 182 if re.match(self.pattern, probe.name): 183 # This hack is required because the bpf_usdt_readarg 184 # functions generated need different function names for 185 # each attached probe. If we just stick to trace_count, 186 # we'd get multiple bpf_usdt_readarg helpers with the same 187 # name when enabling more than one USDT probe. 188 new_func = "trace_count_%d" % self.matched 189 bpf_text += trace_count_text.replace( 190 "trace_count", new_func) 191 self.usdt.enable_probe(probe.name, new_func) 192 self.matched += 1 193 if debug: 194 print(self.usdt.get_text()) 195 else: 196 bpf_text += trace_count_text 197 198 if debug: 199 print(bpf_text) 200 self.bpf = BPF(text=bpf_text, 201 usdt_contexts=[self.usdt] if self.usdt else []) 202 203 class Tool(object): 204 def __init__(self): 205 examples = """examples: 206 ./stackcount submit_bio # count kernel stack traces for submit_bio 207 ./stackcount -d ip_output # include a user/kernel stack delimiter 208 ./stackcount -s ip_output # show symbol offsets 209 ./stackcount -sv ip_output # show offsets and raw addresses (verbose) 210 ./stackcount 'tcp_send*' # count stacks for funcs matching tcp_send* 211 ./stackcount -r '^tcp_send.*' # same as above, using regular expressions 212 ./stackcount -Ti 5 ip_output # output every 5 seconds, with timestamps 213 ./stackcount -p 185 ip_output # count ip_output stacks for PID 185 only 214 ./stackcount -p 185 c:malloc # count stacks for malloc in PID 185 215 ./stackcount t:sched:sched_fork # count stacks for sched_fork tracepoint 216 ./stackcount -p 185 u:node:* # count stacks for all USDT probes in node 217 ./stackcount -K t:sched:sched_switch # kernel stacks only 218 ./stackcount -U t:sched:sched_switch # user stacks only 219 """ 220 parser = argparse.ArgumentParser( 221 description="Count events and their stack traces", 222 formatter_class=argparse.RawDescriptionHelpFormatter, 223 epilog=examples) 224 parser.add_argument("-p", "--pid", type=int, 225 help="trace this PID only") 226 parser.add_argument("-i", "--interval", 227 help="summary interval, seconds") 228 parser.add_argument("-D", "--duration", 229 help="total duration of trace, seconds") 230 parser.add_argument("-T", "--timestamp", action="store_true", 231 help="include timestamp on output") 232 parser.add_argument("-r", "--regexp", action="store_true", 233 help="use regular expressions. Default is \"*\" wildcards only.") 234 parser.add_argument("-s", "--offset", action="store_true", 235 help="show address offsets") 236 parser.add_argument("-P", "--perpid", action="store_true", 237 help="display stacks separately for each process") 238 parser.add_argument("-K", "--kernel-stacks-only", 239 action="store_true", help="kernel stack only", default=False) 240 parser.add_argument("-U", "--user-stacks-only", 241 action="store_true", help="user stack only", default=False) 242 parser.add_argument("-v", "--verbose", action="store_true", 243 help="show raw addresses") 244 parser.add_argument("-d", "--delimited", action="store_true", 245 help="insert delimiter between kernel/user stacks") 246 parser.add_argument("-f", "--folded", action="store_true", 247 help="output folded format") 248 parser.add_argument("--debug", action="store_true", 249 help="print BPF program before starting (for debugging purposes)") 250 parser.add_argument("pattern", 251 help="search expression for events") 252 self.args = parser.parse_args() 253 global debug 254 debug = self.args.debug 255 256 if self.args.duration and not self.args.interval: 257 self.args.interval = self.args.duration 258 if not self.args.interval: 259 self.args.interval = 99999999 260 261 if self.args.kernel_stacks_only and self.args.user_stacks_only: 262 print("ERROR: -K and -U are mutually exclusive. If you want " + 263 "both stacks, that is the default.") 264 exit() 265 if not self.args.kernel_stacks_only and not self.args.user_stacks_only: 266 self.kernel_stack = True 267 self.user_stack = True 268 else: 269 self.kernel_stack = self.args.kernel_stacks_only 270 self.user_stack = self.args.user_stacks_only 271 272 self.probe = Probe(self.args.pattern, 273 self.kernel_stack, self.user_stack, 274 self.args.regexp, self.args.pid, self.args.perpid) 275 self.need_delimiter = self.args.delimited and not ( 276 self.args.kernel_stacks_only or self.args.user_stacks_only) 277 278 def _print_kframe(self, addr): 279 print(" ", end="") 280 if self.args.verbose: 281 print("%-16x " % addr, end="") 282 if self.args.offset: 283 print("%s" % self.probe.bpf.ksym(addr, show_offset=True)) 284 else: 285 print("%s" % self.probe.bpf.ksym(addr)) 286 287 def _print_uframe(self, addr, pid): 288 print(" ", end="") 289 if self.args.verbose: 290 print("%-16x " % addr, end="") 291 if self.args.offset: 292 print("%s" % self.probe.bpf.sym(addr, pid, show_offset=True)) 293 else: 294 print("%s" % self.probe.bpf.sym(addr, pid)) 295 296 @staticmethod 297 def _signal_ignore(signal, frame): 298 print() 299 300 def _print_comm(self, comm, pid): 301 print(" %s [%d]" % (comm, pid)) 302 303 def run(self): 304 self.probe.load() 305 self.probe.attach() 306 if not self.args.folded: 307 print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." % 308 (self.probe.matched, self.args.pattern)) 309 b = self.probe.bpf 310 exiting = 0 if self.args.interval else 1 311 seconds = 0 312 while True: 313 try: 314 sleep(int(self.args.interval)) 315 seconds += int(self.args.interval) 316 except KeyboardInterrupt: 317 exiting = 1 318 # as cleanup can take many seconds, trap Ctrl-C: 319 signal.signal(signal.SIGINT, Tool._signal_ignore) 320 if self.args.duration and seconds >= int(self.args.duration): 321 exiting = 1 322 323 if not self.args.folded: 324 print() 325 if self.args.timestamp: 326 print("%-8s\n" % strftime("%H:%M:%S"), end="") 327 328 counts = self.probe.bpf["counts"] 329 stack_traces = self.probe.bpf["stack_traces"] 330 self.comm_cache = {} 331 for k, v in sorted(counts.items(), 332 key=lambda counts: counts[1].value): 333 user_stack = [] if k.user_stack_id < 0 else \ 334 stack_traces.walk(k.user_stack_id) 335 kernel_stack = [] if k.kernel_stack_id < 0 else \ 336 stack_traces.walk(k.kernel_stack_id) 337 338 if self.args.folded: 339 # print folded stack output 340 user_stack = list(user_stack) 341 kernel_stack = list(kernel_stack) 342 line = [k.name.decode('utf-8', 'replace')] + \ 343 [b.sym(addr, k.tgid) for addr in 344 reversed(user_stack)] + \ 345 (self.need_delimiter and ["-"] or []) + \ 346 [b.ksym(addr) for addr in reversed(kernel_stack)] 347 print("%s %d" % (";".join(line), v.value)) 348 else: 349 # print multi-line stack output 350 for addr in kernel_stack: 351 self._print_kframe(addr) 352 if self.need_delimiter: 353 print(" --") 354 for addr in user_stack: 355 self._print_uframe(addr, k.tgid) 356 if not self.args.pid and k.tgid != 0xffffffff: 357 self._print_comm(k.name, k.tgid) 358 print(" %d\n" % v.value) 359 counts.clear() 360 361 if exiting: 362 if not self.args.folded: 363 print("Detaching...") 364 exit() 365 366 if __name__ == "__main__": 367 try: 368 Tool().run() 369 except Exception: 370 if debug: 371 traceback.print_exc() 372 elif sys.exc_info()[0] is not SystemExit: 373 print(sys.exc_info()[1]) 374