Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # @lint-avoid-python-3-compatibility-imports
      3 #
      4 # tcpaccept Trace TCP accept()s.
      5 #           For Linux, uses BCC, eBPF. Embedded C.
      6 #
      7 # USAGE: tcpaccept [-h] [-t] [-p PID]
      8 #
      9 # This uses dynamic tracing of the kernel inet_csk_accept() socket function
     10 # (from tcp_prot.accept), and will need to be modified to match kernel changes.
     11 #
     12 # Copyright (c) 2015 Brendan Gregg.
     13 # Licensed under the Apache License, Version 2.0 (the "License")
     14 #
     15 # 13-Oct-2015   Brendan Gregg   Created this.
     16 # 14-Feb-2016      "      "     Switch to bpf_perf_output.
     17 
     18 from __future__ import print_function
     19 from bcc import BPF
     20 from socket import inet_ntop, AF_INET, AF_INET6
     21 from struct import pack
     22 import argparse
     23 import ctypes as ct
     24 
     25 # arguments
     26 examples = """examples:
     27     ./tcpaccept           # trace all TCP accept()s
     28     ./tcpaccept -t        # include timestamps
     29     ./tcpaccept -p 181    # only trace PID 181
     30 """
     31 parser = argparse.ArgumentParser(
     32     description="Trace TCP accepts",
     33     formatter_class=argparse.RawDescriptionHelpFormatter,
     34     epilog=examples)
     35 parser.add_argument("-t", "--timestamp", action="store_true",
     36     help="include timestamp on output")
     37 parser.add_argument("-p", "--pid",
     38     help="trace this PID only")
     39 parser.add_argument("--ebpf", action="store_true",
     40     help=argparse.SUPPRESS)
     41 args = parser.parse_args()
     42 debug = 0
     43 
     44 # define BPF program
     45 bpf_text = """
     46 #include <uapi/linux/ptrace.h>
     47 #include <net/sock.h>
     48 #include <bcc/proto.h>
     49 
     50 // separate data structs for ipv4 and ipv6
     51 struct ipv4_data_t {
     52     u64 ts_us;
     53     u32 pid;
     54     u32 saddr;
     55     u32 daddr;
     56     u64 ip;
     57     u16 lport;
     58     char task[TASK_COMM_LEN];
     59 };
     60 BPF_PERF_OUTPUT(ipv4_events);
     61 
     62 struct ipv6_data_t {
     63     u64 ts_us;
     64     u32 pid;
     65     unsigned __int128 saddr;
     66     unsigned __int128 daddr;
     67     u64 ip;
     68     u16 lport;
     69     char task[TASK_COMM_LEN];
     70 };
     71 BPF_PERF_OUTPUT(ipv6_events);
     72 """
     73 
     74 #
     75 # The following is the code for older kernels(Linux pre-4.16).
     76 # It uses kprobes to instrument inet_csk_accept(). On Linux 4.16 and
     77 # later, the sock:inet_sock_set_state tracepoint should be used instead, as
     78 # is done by the code that follows this. 
     79 #
     80 bpf_text_kprobe = """
     81 int kretprobe__inet_csk_accept(struct pt_regs *ctx)
     82 {
     83     struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
     84     u32 pid = bpf_get_current_pid_tgid();
     85 
     86     if (newsk == NULL)
     87         return 0;
     88 
     89     // check this is TCP
     90     u8 protocol = 0;
     91     // workaround for reading the sk_protocol bitfield:
     92     
     93     // Following comments add by Joe Yin:
     94     // Unfortunately,it can not work since Linux 4.10,
     95     // because the sk_wmem_queued is not following the bitfield of sk_protocol.
     96     // And the following member is sk_gso_max_segs.
     97     // So, we can use this:
     98     // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3);
     99     // In order to  diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, 
    100     // sk_lingertime is closed to the gso_max_segs_offset,and  
    101     // the offset between the two members is 4 
    102 
    103     int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
    104     int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
    105 
    106     if (sk_lingertime_offset - gso_max_segs_offset == 4) 
    107         // 4.10+ with little endian
    108 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    109         protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3);
    110     else
    111         // pre-4.10 with little endian
    112         protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 3);
    113 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    114         // 4.10+ with big endian
    115         protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 1);
    116     else
    117         // pre-4.10 with big endian
    118         protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 1);
    119 #else
    120 # error "Fix your compiler's __BYTE_ORDER__?!"
    121 #endif
    122 
    123     if (protocol != IPPROTO_TCP)
    124         return 0;
    125 
    126     // pull in details
    127     u16 family = 0, lport = 0;
    128     family = newsk->__sk_common.skc_family;
    129     lport = newsk->__sk_common.skc_num;
    130 
    131     if (family == AF_INET) {
    132         struct ipv4_data_t data4 = {.pid = pid, .ip = 4};
    133         data4.ts_us = bpf_ktime_get_ns() / 1000;
    134         data4.saddr = newsk->__sk_common.skc_rcv_saddr;
    135         data4.daddr = newsk->__sk_common.skc_daddr;
    136         data4.lport = lport;
    137         bpf_get_current_comm(&data4.task, sizeof(data4.task));
    138         ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
    139 
    140     } else if (family == AF_INET6) {
    141         struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
    142         data6.ts_us = bpf_ktime_get_ns() / 1000;
    143         bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
    144             &newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
    145         bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
    146             &newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
    147         data6.lport = lport;
    148         bpf_get_current_comm(&data6.task, sizeof(data6.task));
    149         ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
    150     }
    151     // else drop
    152 
    153     return 0;
    154 }
    155 """
    156 
    157 bpf_text_tracepoint = """
    158 TRACEPOINT_PROBE(sock, inet_sock_set_state)
    159 {
    160     if (args->protocol != IPPROTO_TCP)
    161         return 0;
    162     u32 pid = bpf_get_current_pid_tgid();
    163     // pull in details
    164     u16 family = 0, lport = 0;
    165     family = args->family;
    166     lport = args->sport;
    167 
    168     if (family == AF_INET) {
    169         struct ipv4_data_t data4 = {.pid = pid, .ip = 4};
    170         data4.ts_us = bpf_ktime_get_ns() / 1000;
    171         __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
    172         __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
    173         data4.lport = lport;
    174         bpf_get_current_comm(&data4.task, sizeof(data4.task));
    175         ipv4_events.perf_submit(args, &data4, sizeof(data4));
    176     } else if (family == AF_INET6) {
    177         struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
    178         data6.ts_us = bpf_ktime_get_ns() / 1000;
    179         __builtin_memcpy(&data6.saddr, args->saddr, sizeof(data6.saddr));
    180         __builtin_memcpy(&data6.daddr, args->daddr, sizeof(data6.daddr));
    181         data6.lport = lport;
    182         bpf_get_current_comm(&data6.task, sizeof(data6.task));
    183         ipv6_events.perf_submit(args, &data6, sizeof(data6));
    184     }
    185     // else drop
    186 
    187     return 0;
    188 }
    189 """
    190 
    191 if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
    192     bpf_text += bpf_text_tracepoint
    193 else:
    194     bpf_text += bpf_text_kprobe
    195 
    196 
    197 # code substitutions
    198 if args.pid:
    199     bpf_text = bpf_text.replace('FILTER',
    200         'if (pid != %s) { return 0; }' % args.pid)
    201 else:
    202     bpf_text = bpf_text.replace('FILTER', '')
    203 if debug or args.ebpf:
    204     print(bpf_text)
    205     if args.ebpf:
    206         exit()
    207 
    208 # event data
    209 TASK_COMM_LEN = 16      # linux/sched.h
    210 
    211 class Data_ipv4(ct.Structure):
    212     _fields_ = [
    213         ("ts_us", ct.c_ulonglong),
    214         ("pid", ct.c_uint),
    215         ("saddr", ct.c_uint),
    216         ("daddr", ct.c_uint),
    217         ("ip", ct.c_ulonglong),
    218         ("lport", ct.c_ushort),
    219         ("task", ct.c_char * TASK_COMM_LEN)
    220     ]
    221 
    222 class Data_ipv6(ct.Structure):
    223     _fields_ = [
    224         ("ts_us", ct.c_ulonglong),
    225         ("pid", ct.c_uint),
    226         ("saddr", (ct.c_ulonglong * 2)),
    227         ("daddr", (ct.c_ulonglong * 2)),
    228         ("ip", ct.c_ulonglong),
    229         ("lport", ct.c_ushort),
    230         ("task", ct.c_char * TASK_COMM_LEN)
    231     ]
    232 
    233 # process event
    234 def print_ipv4_event(cpu, data, size):
    235     event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
    236     global start_ts
    237     if args.timestamp:
    238         if start_ts == 0:
    239             start_ts = event.ts_us
    240         print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
    241     print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid,
    242         event.task.decode('utf-8', 'replace'), event.ip,
    243         inet_ntop(AF_INET, pack("I", event.daddr)),
    244         inet_ntop(AF_INET, pack("I", event.saddr)), event.lport))
    245 
    246 def print_ipv6_event(cpu, data, size):
    247     event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
    248     global start_ts
    249     if args.timestamp:
    250         if start_ts == 0:
    251             start_ts = event.ts_us
    252         print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
    253     print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid,
    254         event.task.decode('utf-8', 'replace'), event.ip,
    255         inet_ntop(AF_INET6, event.daddr),inet_ntop(AF_INET6, event.saddr),
    256         event.lport))
    257 
    258 # initialize BPF
    259 b = BPF(text=bpf_text)
    260 
    261 # header
    262 if args.timestamp:
    263     print("%-9s" % ("TIME(s)"), end="")
    264 print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "RADDR",
    265     "LADDR", "LPORT"))
    266 
    267 start_ts = 0
    268 
    269 # read events
    270 b["ipv4_events"].open_perf_buffer(print_ipv4_event)
    271 b["ipv6_events"].open_perf_buffer(print_ipv6_event)
    272 while 1:
    273     b.perf_buffer_poll()
    274