Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # stackcount    Count events and their stack traces.
      4 #               For Linux, uses BCC, eBPF.
      5 #
      6 # USAGE: stackcount.py [-h] [-p PID] [-i INTERVAL] [-D DURATION] [-T] [-r] [-s]
      7 #                      [-P] [-K] [-U] [-v] [-d] [-f] [--debug]
      8 #
      9 # The pattern is a string with optional '*' wildcards, similar to file
     10 # globbing. If you'd prefer to use regular expressions, use the -r option.
     11 #
     12 # Copyright 2016 Netflix, Inc.
     13 # Licensed under the Apache License, Version 2.0 (the "License")
     14 #
     15 # 12-Jan-2016	Brendan Gregg	    Created this.
     16 # 09-Jul-2016   Sasha Goldshtein    Generalized for uprobes and tracepoints.
     17 
     18 from __future__ import print_function
     19 from bcc import BPF, USDT
     20 from time import sleep, strftime
     21 import argparse
     22 import re
     23 import signal
     24 import sys
     25 import traceback
     26 
     27 debug = False
     28 
     29 class Probe(object):
     30     def __init__(self, pattern, kernel_stack, user_stack, use_regex=False,
     31                  pid=None, per_pid=False):
     32         """Init a new probe.
     33 
     34         Init the probe from the pattern provided by the user. The supported
     35         patterns mimic the 'trace' and 'argdist' tools, but are simpler because
     36         we don't have to distinguish between probes and retprobes.
     37 
     38             func            -- probe a kernel function
     39             lib:func        -- probe a user-space function in the library 'lib'
     40             p::func         -- same thing as 'func'
     41             p:lib:func      -- same thing as 'lib:func'
     42             t:cat:event     -- probe a kernel tracepoint
     43             u:lib:probe     -- probe a USDT tracepoint
     44         """
     45         self.kernel_stack = kernel_stack
     46         self.user_stack = user_stack
     47         parts = pattern.split(':')
     48         if len(parts) == 1:
     49             parts = ["p", "", parts[0]]
     50         elif len(parts) == 2:
     51             parts = ["p", parts[0], parts[1]]
     52         elif len(parts) == 3:
     53             if parts[0] == "t":
     54                 parts = ["t", "", "%s:%s" % tuple(parts[1:])]
     55             if parts[0] not in ["p", "t", "u"]:
     56                 raise Exception("Type must be 'p', 't', or 'u', but got %s" %
     57                                 parts[0])
     58         else:
     59             raise Exception("Too many ':'-separated components in pattern %s" %
     60                             pattern)
     61 
     62         (self.type, self.library, self.pattern) = parts
     63         if not use_regex:
     64             self.pattern = self.pattern.replace('*', '.*')
     65             self.pattern = '^' + self.pattern + '$'
     66 
     67         if (self.type == "p" and self.library) or self.type == "u":
     68             libpath = BPF.find_library(self.library)
     69             if libpath is None:
     70                 # This might be an executable (e.g. 'bash')
     71                 libpath = BPF.find_exe(self.library)
     72             if libpath is None or len(libpath) == 0:
     73                 raise Exception("unable to find library %s" % self.library)
     74             self.library = libpath
     75 
     76         self.pid = pid
     77         self.per_pid = per_pid
     78         self.matched = 0
     79 
     80     def is_kernel_probe(self):
     81         return self.type == "t" or (self.type == "p" and self.library == "")
     82 
     83     def attach(self):
     84         if self.type == "p":
     85             if self.library:
     86                 self.bpf.attach_uprobe(name=self.library,
     87                                        sym_re=self.pattern,
     88                                        fn_name="trace_count",
     89                                        pid=self.pid or -1)
     90                 self.matched = self.bpf.num_open_uprobes()
     91             else:
     92                 self.bpf.attach_kprobe(event_re=self.pattern,
     93                                        fn_name="trace_count")
     94                 self.matched = self.bpf.num_open_kprobes()
     95         elif self.type == "t":
     96             self.bpf.attach_tracepoint(tp_re=self.pattern,
     97                                        fn_name="trace_count")
     98             self.matched = self.bpf.num_open_tracepoints()
     99         elif self.type == "u":
    100             pass    # Nothing to do -- attach already happened in `load`
    101 
    102         if self.matched == 0:
    103             raise Exception("No functions matched by pattern %s" %
    104                             self.pattern)
    105 
    106     def load(self):
    107         ctx_name = "ctx"
    108         stack_trace = ""
    109         if self.user_stack:
    110                 stack_trace += """
    111                     key.user_stack_id = stack_traces.get_stackid(
    112                       %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
    113                     );""" % (ctx_name)
    114         else:
    115                 stack_trace += "key.user_stack_id = -1;"
    116         if self.kernel_stack:
    117                 stack_trace += """
    118                     key.kernel_stack_id = stack_traces.get_stackid(
    119                       %s, BPF_F_REUSE_STACKID
    120                     );""" % (ctx_name)
    121         else:
    122                 stack_trace += "key.kernel_stack_id = -1;"
    123 
    124         trace_count_text = """
    125 int trace_count(void *ctx) {
    126     FILTER
    127     struct key_t key = {};
    128     key.tgid = GET_TGID;
    129     STORE_COMM
    130     %s
    131     counts.increment(key);
    132     return 0;
    133 }
    134         """
    135         trace_count_text = trace_count_text % (stack_trace)
    136 
    137         bpf_text = """#include <uapi/linux/ptrace.h>
    138 #include <linux/sched.h>
    139 
    140 struct key_t {
    141     // no pid (thread ID) so that we do not needlessly split this key
    142     u32 tgid;
    143     int kernel_stack_id;
    144     int user_stack_id;
    145     char name[TASK_COMM_LEN];
    146 };
    147 
    148 BPF_HASH(counts, struct key_t);
    149 BPF_STACK_TRACE(stack_traces, 1024);
    150         """
    151 
    152         # We really mean the tgid from the kernel's perspective, which is in
    153         # the top 32 bits of bpf_get_current_pid_tgid().
    154         if self.is_kernel_probe() and self.pid:
    155             trace_count_text = trace_count_text.replace('FILTER',
    156                 ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' +
    157                 'if (pid != %d) { return 0; }') % (self.pid))
    158         else:
    159             trace_count_text = trace_count_text.replace('FILTER', '')
    160 
    161         # We need per-pid statistics when tracing a user-space process, because
    162         # the meaning of the symbols depends on the pid. We also need them if
    163         # per-pid statistics were requested with -P, or for user stacks.
    164         if self.per_pid or not self.is_kernel_probe() or self.user_stack:
    165             trace_count_text = trace_count_text.replace('GET_TGID',
    166                                         'bpf_get_current_pid_tgid() >> 32')
    167             trace_count_text = trace_count_text.replace('STORE_COMM',
    168                         'bpf_get_current_comm(&key.name, sizeof(key.name));')
    169         else:
    170             # kernel stacks only. skip splitting on PID so these aggregate
    171             # together, and don't store the process name.
    172             trace_count_text = trace_count_text.replace(
    173                                     'GET_TGID', '0xffffffff')
    174             trace_count_text = trace_count_text.replace('STORE_COMM', '')
    175 
    176         self.usdt = None
    177         if self.type == "u":
    178             self.usdt = USDT(path=self.library, pid=self.pid)
    179             for probe in self.usdt.enumerate_probes():
    180                 if not self.pid and (probe.bin_path != self.library):
    181                     continue
    182                 if re.match(self.pattern, probe.name):
    183                     # This hack is required because the bpf_usdt_readarg
    184                     # functions generated need different function names for
    185                     # each attached probe. If we just stick to trace_count,
    186                     # we'd get multiple bpf_usdt_readarg helpers with the same
    187                     # name when enabling more than one USDT probe.
    188                     new_func = "trace_count_%d" % self.matched
    189                     bpf_text += trace_count_text.replace(
    190                                             "trace_count", new_func)
    191                     self.usdt.enable_probe(probe.name, new_func)
    192                     self.matched += 1
    193             if debug:
    194                 print(self.usdt.get_text())
    195         else:
    196             bpf_text += trace_count_text
    197 
    198         if debug:
    199             print(bpf_text)
    200         self.bpf = BPF(text=bpf_text,
    201                        usdt_contexts=[self.usdt] if self.usdt else [])
    202 
    203 class Tool(object):
    204     def __init__(self):
    205         examples = """examples:
    206     ./stackcount submit_bio         # count kernel stack traces for submit_bio
    207     ./stackcount -d ip_output       # include a user/kernel stack delimiter
    208     ./stackcount -s ip_output       # show symbol offsets
    209     ./stackcount -sv ip_output      # show offsets and raw addresses (verbose)
    210     ./stackcount 'tcp_send*'        # count stacks for funcs matching tcp_send*
    211     ./stackcount -r '^tcp_send.*'   # same as above, using regular expressions
    212     ./stackcount -Ti 5 ip_output    # output every 5 seconds, with timestamps
    213     ./stackcount -p 185 ip_output   # count ip_output stacks for PID 185 only
    214     ./stackcount -p 185 c:malloc    # count stacks for malloc in PID 185
    215     ./stackcount t:sched:sched_fork # count stacks for sched_fork tracepoint
    216     ./stackcount -p 185 u:node:*    # count stacks for all USDT probes in node
    217     ./stackcount -K t:sched:sched_switch   # kernel stacks only
    218     ./stackcount -U t:sched:sched_switch   # user stacks only
    219         """
    220         parser = argparse.ArgumentParser(
    221             description="Count events and their stack traces",
    222             formatter_class=argparse.RawDescriptionHelpFormatter,
    223             epilog=examples)
    224         parser.add_argument("-p", "--pid", type=int,
    225             help="trace this PID only")
    226         parser.add_argument("-i", "--interval",
    227             help="summary interval, seconds")
    228         parser.add_argument("-D", "--duration",
    229             help="total duration of trace, seconds")
    230         parser.add_argument("-T", "--timestamp", action="store_true",
    231             help="include timestamp on output")
    232         parser.add_argument("-r", "--regexp", action="store_true",
    233             help="use regular expressions. Default is \"*\" wildcards only.")
    234         parser.add_argument("-s", "--offset", action="store_true",
    235             help="show address offsets")
    236         parser.add_argument("-P", "--perpid", action="store_true",
    237             help="display stacks separately for each process")
    238         parser.add_argument("-K", "--kernel-stacks-only",
    239             action="store_true", help="kernel stack only", default=False)
    240         parser.add_argument("-U", "--user-stacks-only",
    241             action="store_true", help="user stack only", default=False)
    242         parser.add_argument("-v", "--verbose", action="store_true",
    243             help="show raw addresses")
    244         parser.add_argument("-d", "--delimited", action="store_true",
    245             help="insert delimiter between kernel/user stacks")
    246         parser.add_argument("-f", "--folded", action="store_true",
    247             help="output folded format")
    248         parser.add_argument("--debug", action="store_true",
    249             help="print BPF program before starting (for debugging purposes)")
    250         parser.add_argument("pattern",
    251             help="search expression for events")
    252         self.args = parser.parse_args()
    253         global debug
    254         debug = self.args.debug
    255 
    256         if self.args.duration and not self.args.interval:
    257             self.args.interval = self.args.duration
    258         if not self.args.interval:
    259             self.args.interval = 99999999
    260 
    261         if self.args.kernel_stacks_only and self.args.user_stacks_only:
    262             print("ERROR: -K and -U are mutually exclusive. If you want " +
    263                 "both stacks, that is the default.")
    264             exit()
    265         if not self.args.kernel_stacks_only and not self.args.user_stacks_only:
    266             self.kernel_stack = True
    267             self.user_stack = True
    268         else:
    269             self.kernel_stack = self.args.kernel_stacks_only
    270             self.user_stack = self.args.user_stacks_only
    271 
    272         self.probe = Probe(self.args.pattern,
    273                            self.kernel_stack, self.user_stack,
    274                            self.args.regexp, self.args.pid, self.args.perpid)
    275         self.need_delimiter = self.args.delimited and not (
    276                     self.args.kernel_stacks_only or self.args.user_stacks_only)
    277 
    278     def _print_kframe(self, addr):
    279         print("  ", end="")
    280         if self.args.verbose:
    281             print("%-16x " % addr, end="")
    282         if self.args.offset:
    283             print("%s" % self.probe.bpf.ksym(addr, show_offset=True))
    284         else:
    285             print("%s" % self.probe.bpf.ksym(addr))
    286 
    287     def _print_uframe(self, addr, pid):
    288         print("  ", end="")
    289         if self.args.verbose:
    290             print("%-16x " % addr, end="")
    291         if self.args.offset:
    292             print("%s" % self.probe.bpf.sym(addr, pid, show_offset=True))
    293         else:
    294             print("%s" % self.probe.bpf.sym(addr, pid))
    295 
    296     @staticmethod
    297     def _signal_ignore(signal, frame):
    298         print()
    299 
    300     def _print_comm(self, comm, pid):
    301         print("    %s [%d]" % (comm, pid))
    302 
    303     def run(self):
    304         self.probe.load()
    305         self.probe.attach()
    306         if not self.args.folded:
    307             print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." %
    308                   (self.probe.matched, self.args.pattern))
    309         b = self.probe.bpf
    310         exiting = 0 if self.args.interval else 1
    311         seconds = 0
    312         while True:
    313             try:
    314                 sleep(int(self.args.interval))
    315                 seconds += int(self.args.interval)
    316             except KeyboardInterrupt:
    317                 exiting = 1
    318                 # as cleanup can take many seconds, trap Ctrl-C:
    319                 signal.signal(signal.SIGINT, Tool._signal_ignore)
    320             if self.args.duration and seconds >= int(self.args.duration):
    321                 exiting = 1
    322 
    323             if not self.args.folded:
    324                 print()
    325             if self.args.timestamp:
    326                 print("%-8s\n" % strftime("%H:%M:%S"), end="")
    327 
    328             counts = self.probe.bpf["counts"]
    329             stack_traces = self.probe.bpf["stack_traces"]
    330             self.comm_cache = {}
    331             for k, v in sorted(counts.items(),
    332                                key=lambda counts: counts[1].value):
    333                 user_stack = [] if k.user_stack_id < 0 else \
    334                     stack_traces.walk(k.user_stack_id)
    335                 kernel_stack = [] if k.kernel_stack_id < 0 else \
    336                     stack_traces.walk(k.kernel_stack_id)
    337 
    338                 if self.args.folded:
    339                     # print folded stack output
    340                     user_stack = list(user_stack)
    341                     kernel_stack = list(kernel_stack)
    342                     line = [k.name.decode('utf-8', 'replace')] + \
    343                         [b.sym(addr, k.tgid) for addr in
    344                         reversed(user_stack)] + \
    345                         (self.need_delimiter and ["-"] or []) + \
    346                         [b.ksym(addr) for addr in reversed(kernel_stack)]
    347                     print("%s %d" % (";".join(line), v.value))
    348                 else:
    349                     # print multi-line stack output
    350                     for addr in kernel_stack:
    351                         self._print_kframe(addr)
    352                     if self.need_delimiter:
    353                         print("    --")
    354                     for addr in user_stack:
    355                         self._print_uframe(addr, k.tgid)
    356                     if not self.args.pid and k.tgid != 0xffffffff:
    357                         self._print_comm(k.name, k.tgid)
    358                     print("    %d\n" % v.value)
    359             counts.clear()
    360 
    361             if exiting:
    362                 if not self.args.folded:
    363                     print("Detaching...")
    364                 exit()
    365 
    366 if __name__ == "__main__":
    367     try:
    368         Tool().run()
    369     except Exception:
    370         if debug:
    371             traceback.print_exc()
    372         elif sys.exc_info()[0] is not SystemExit:
    373             print(sys.exc_info()[1])
    374