1 #!/usr/bin/python 2 # @lint-avoid-python-3-compatibility-imports 3 # 4 # tcpaccept Trace TCP accept()s. 5 # For Linux, uses BCC, eBPF. Embedded C. 6 # 7 # USAGE: tcpaccept [-h] [-t] [-p PID] 8 # 9 # This uses dynamic tracing of the kernel inet_csk_accept() socket function 10 # (from tcp_prot.accept), and will need to be modified to match kernel changes. 11 # 12 # Copyright (c) 2015 Brendan Gregg. 13 # Licensed under the Apache License, Version 2.0 (the "License") 14 # 15 # 13-Oct-2015 Brendan Gregg Created this. 16 # 14-Feb-2016 " " Switch to bpf_perf_output. 17 18 from __future__ import print_function 19 from bcc import BPF 20 from socket import inet_ntop, AF_INET, AF_INET6 21 from struct import pack 22 import argparse 23 import ctypes as ct 24 25 # arguments 26 examples = """examples: 27 ./tcpaccept # trace all TCP accept()s 28 ./tcpaccept -t # include timestamps 29 ./tcpaccept -p 181 # only trace PID 181 30 """ 31 parser = argparse.ArgumentParser( 32 description="Trace TCP accepts", 33 formatter_class=argparse.RawDescriptionHelpFormatter, 34 epilog=examples) 35 parser.add_argument("-t", "--timestamp", action="store_true", 36 help="include timestamp on output") 37 parser.add_argument("-p", "--pid", 38 help="trace this PID only") 39 parser.add_argument("--ebpf", action="store_true", 40 help=argparse.SUPPRESS) 41 args = parser.parse_args() 42 debug = 0 43 44 # define BPF program 45 bpf_text = """ 46 #include <uapi/linux/ptrace.h> 47 #include <net/sock.h> 48 #include <bcc/proto.h> 49 50 // separate data structs for ipv4 and ipv6 51 struct ipv4_data_t { 52 u64 ts_us; 53 u32 pid; 54 u32 saddr; 55 u32 daddr; 56 u64 ip; 57 u16 lport; 58 char task[TASK_COMM_LEN]; 59 }; 60 BPF_PERF_OUTPUT(ipv4_events); 61 62 struct ipv6_data_t { 63 u64 ts_us; 64 u32 pid; 65 unsigned __int128 saddr; 66 unsigned __int128 daddr; 67 u64 ip; 68 u16 lport; 69 char task[TASK_COMM_LEN]; 70 }; 71 BPF_PERF_OUTPUT(ipv6_events); 72 """ 73 74 # 75 # The following is the code for older kernels(Linux pre-4.16). 76 # It uses kprobes to instrument inet_csk_accept(). On Linux 4.16 and 77 # later, the sock:inet_sock_set_state tracepoint should be used instead, as 78 # is done by the code that follows this. 79 # 80 bpf_text_kprobe = """ 81 int kretprobe__inet_csk_accept(struct pt_regs *ctx) 82 { 83 struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); 84 u32 pid = bpf_get_current_pid_tgid(); 85 86 if (newsk == NULL) 87 return 0; 88 89 // check this is TCP 90 u8 protocol = 0; 91 // workaround for reading the sk_protocol bitfield: 92 93 // Following comments add by Joe Yin: 94 // Unfortunately,it can not work since Linux 4.10, 95 // because the sk_wmem_queued is not following the bitfield of sk_protocol. 96 // And the following member is sk_gso_max_segs. 97 // So, we can use this: 98 // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3); 99 // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, 100 // sk_lingertime is closed to the gso_max_segs_offset,and 101 // the offset between the two members is 4 102 103 int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs); 104 int sk_lingertime_offset = offsetof(struct sock, sk_lingertime); 105 106 if (sk_lingertime_offset - gso_max_segs_offset == 4) 107 // 4.10+ with little endian 108 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 109 protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3); 110 else 111 // pre-4.10 with little endian 112 protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 3); 113 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 114 // 4.10+ with big endian 115 protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 1); 116 else 117 // pre-4.10 with big endian 118 protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 1); 119 #else 120 # error "Fix your compiler's __BYTE_ORDER__?!" 121 #endif 122 123 if (protocol != IPPROTO_TCP) 124 return 0; 125 126 // pull in details 127 u16 family = 0, lport = 0; 128 family = newsk->__sk_common.skc_family; 129 lport = newsk->__sk_common.skc_num; 130 131 if (family == AF_INET) { 132 struct ipv4_data_t data4 = {.pid = pid, .ip = 4}; 133 data4.ts_us = bpf_ktime_get_ns() / 1000; 134 data4.saddr = newsk->__sk_common.skc_rcv_saddr; 135 data4.daddr = newsk->__sk_common.skc_daddr; 136 data4.lport = lport; 137 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 138 ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); 139 140 } else if (family == AF_INET6) { 141 struct ipv6_data_t data6 = {.pid = pid, .ip = 6}; 142 data6.ts_us = bpf_ktime_get_ns() / 1000; 143 bpf_probe_read(&data6.saddr, sizeof(data6.saddr), 144 &newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 145 bpf_probe_read(&data6.daddr, sizeof(data6.daddr), 146 &newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 147 data6.lport = lport; 148 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 149 ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); 150 } 151 // else drop 152 153 return 0; 154 } 155 """ 156 157 bpf_text_tracepoint = """ 158 TRACEPOINT_PROBE(sock, inet_sock_set_state) 159 { 160 if (args->protocol != IPPROTO_TCP) 161 return 0; 162 u32 pid = bpf_get_current_pid_tgid(); 163 // pull in details 164 u16 family = 0, lport = 0; 165 family = args->family; 166 lport = args->sport; 167 168 if (family == AF_INET) { 169 struct ipv4_data_t data4 = {.pid = pid, .ip = 4}; 170 data4.ts_us = bpf_ktime_get_ns() / 1000; 171 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); 172 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); 173 data4.lport = lport; 174 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 175 ipv4_events.perf_submit(args, &data4, sizeof(data4)); 176 } else if (family == AF_INET6) { 177 struct ipv6_data_t data6 = {.pid = pid, .ip = 6}; 178 data6.ts_us = bpf_ktime_get_ns() / 1000; 179 __builtin_memcpy(&data6.saddr, args->saddr, sizeof(data6.saddr)); 180 __builtin_memcpy(&data6.daddr, args->daddr, sizeof(data6.daddr)); 181 data6.lport = lport; 182 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 183 ipv6_events.perf_submit(args, &data6, sizeof(data6)); 184 } 185 // else drop 186 187 return 0; 188 } 189 """ 190 191 if (BPF.tracepoint_exists("sock", "inet_sock_set_state")): 192 bpf_text += bpf_text_tracepoint 193 else: 194 bpf_text += bpf_text_kprobe 195 196 197 # code substitutions 198 if args.pid: 199 bpf_text = bpf_text.replace('FILTER', 200 'if (pid != %s) { return 0; }' % args.pid) 201 else: 202 bpf_text = bpf_text.replace('FILTER', '') 203 if debug or args.ebpf: 204 print(bpf_text) 205 if args.ebpf: 206 exit() 207 208 # event data 209 TASK_COMM_LEN = 16 # linux/sched.h 210 211 class Data_ipv4(ct.Structure): 212 _fields_ = [ 213 ("ts_us", ct.c_ulonglong), 214 ("pid", ct.c_uint), 215 ("saddr", ct.c_uint), 216 ("daddr", ct.c_uint), 217 ("ip", ct.c_ulonglong), 218 ("lport", ct.c_ushort), 219 ("task", ct.c_char * TASK_COMM_LEN) 220 ] 221 222 class Data_ipv6(ct.Structure): 223 _fields_ = [ 224 ("ts_us", ct.c_ulonglong), 225 ("pid", ct.c_uint), 226 ("saddr", (ct.c_ulonglong * 2)), 227 ("daddr", (ct.c_ulonglong * 2)), 228 ("ip", ct.c_ulonglong), 229 ("lport", ct.c_ushort), 230 ("task", ct.c_char * TASK_COMM_LEN) 231 ] 232 233 # process event 234 def print_ipv4_event(cpu, data, size): 235 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents 236 global start_ts 237 if args.timestamp: 238 if start_ts == 0: 239 start_ts = event.ts_us 240 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="") 241 print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, 242 event.task.decode('utf-8', 'replace'), event.ip, 243 inet_ntop(AF_INET, pack("I", event.daddr)), 244 inet_ntop(AF_INET, pack("I", event.saddr)), event.lport)) 245 246 def print_ipv6_event(cpu, data, size): 247 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents 248 global start_ts 249 if args.timestamp: 250 if start_ts == 0: 251 start_ts = event.ts_us 252 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="") 253 print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, 254 event.task.decode('utf-8', 'replace'), event.ip, 255 inet_ntop(AF_INET6, event.daddr),inet_ntop(AF_INET6, event.saddr), 256 event.lport)) 257 258 # initialize BPF 259 b = BPF(text=bpf_text) 260 261 # header 262 if args.timestamp: 263 print("%-9s" % ("TIME(s)"), end="") 264 print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "RADDR", 265 "LADDR", "LPORT")) 266 267 start_ts = 0 268 269 # read events 270 b["ipv4_events"].open_perf_buffer(print_ipv4_event) 271 b["ipv6_events"].open_perf_buffer(print_ipv6_event) 272 while 1: 273 b.perf_buffer_poll() 274