1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // This file is shared between executor and csource package. 5 6 #include <stdlib.h> 7 #include <sys/syscall.h> 8 #include <sys/types.h> 9 #include <unistd.h> 10 11 #if SYZ_EXECUTOR 12 struct cover_t; 13 static void cover_reset(cover_t* cov); 14 #endif 15 16 #if SYZ_EXECUTOR || SYZ_THREADED 17 #include <linux/futex.h> 18 #include <pthread.h> 19 20 typedef struct { 21 int state; 22 } event_t; 23 24 static void event_init(event_t* ev) 25 { 26 ev->state = 0; 27 } 28 29 static void event_reset(event_t* ev) 30 { 31 ev->state = 0; 32 } 33 34 static void event_set(event_t* ev) 35 { 36 if (ev->state) 37 fail("event already set"); 38 __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); 39 syscall(SYS_futex, &ev->state, FUTEX_WAKE); 40 } 41 42 static void event_wait(event_t* ev) 43 { 44 while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 45 syscall(SYS_futex, &ev->state, FUTEX_WAIT, 0, 0); 46 } 47 48 static int event_isset(event_t* ev) 49 { 50 return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); 51 } 52 53 static int event_timedwait(event_t* ev, uint64 timeout) 54 { 55 uint64 start = current_time_ms(); 56 uint64 now = start; 57 for (;;) { 58 uint64 remain = timeout - (now - start); 59 struct timespec ts; 60 ts.tv_sec = remain / 1000; 61 ts.tv_nsec = (remain % 1000) * 1000 * 1000; 62 syscall(SYS_futex, &ev->state, FUTEX_WAIT, 0, &ts); 63 if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED)) 64 return 1; 65 now = current_time_ms(); 66 if (now - start > timeout) 67 return 0; 68 } 69 } 70 #endif 71 72 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE || SYZ_ENABLE_NETDEV 73 #include <stdarg.h> 74 #include <stdbool.h> 75 #include <string.h> 76 77 static void vsnprintf_check(char* str, size_t size, const char* format, va_list args) 78 { 79 int rv; 80 81 rv = vsnprintf(str, size, format, args); 82 if (rv < 0) 83 fail("tun: snprintf failed"); 84 if ((size_t)rv >= size) 85 fail("tun: string '%s...' doesn't fit into buffer", str); 86 } 87 88 #define COMMAND_MAX_LEN 128 89 #define PATH_PREFIX "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin " 90 #define PATH_PREFIX_LEN (sizeof(PATH_PREFIX) - 1) 91 92 static void execute_command(bool panic, const char* format, ...) 93 { 94 va_list args; 95 char command[PATH_PREFIX_LEN + COMMAND_MAX_LEN]; 96 int rv; 97 98 va_start(args, format); 99 // Executor process does not have any env, including PATH. 100 // On some distributions, system/shell adds a minimal PATH, on some it does not. 101 // Set own standard PATH to make it work across distributions. 102 memcpy(command, PATH_PREFIX, PATH_PREFIX_LEN); 103 vsnprintf_check(command + PATH_PREFIX_LEN, COMMAND_MAX_LEN, format, args); 104 va_end(args); 105 rv = system(command); 106 if (rv) { 107 if (panic) 108 fail("command '%s' failed: %d", &command[0], rv); 109 debug("command '%s': %d\n", &command[0], rv); 110 } 111 } 112 #endif 113 114 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE 115 #include <arpa/inet.h> 116 #include <errno.h> 117 #include <fcntl.h> 118 #include <linux/if.h> 119 #include <linux/if_ether.h> 120 #include <linux/if_tun.h> 121 #include <linux/ip.h> 122 #include <linux/tcp.h> 123 #include <net/if_arp.h> 124 #include <stdarg.h> 125 #include <stdbool.h> 126 #include <sys/ioctl.h> 127 #include <sys/stat.h> 128 129 static int tunfd = -1; 130 static int tun_frags_enabled; 131 132 // We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp). 133 // Rest of the packet (if any) will be silently truncated which is fine. 134 #define SYZ_TUN_MAX_PACKET_SIZE 1000 135 136 #define TUN_IFACE "syz_tun" 137 138 #define LOCAL_MAC "aa:aa:aa:aa:aa:aa" 139 #define REMOTE_MAC "aa:aa:aa:aa:aa:bb" 140 141 #define LOCAL_IPV4 "172.20.20.170" 142 #define REMOTE_IPV4 "172.20.20.187" 143 144 #define LOCAL_IPV6 "fe80::aa" 145 #define REMOTE_IPV6 "fe80::bb" 146 147 #ifndef IFF_NAPI 148 #define IFF_NAPI 0x0010 149 #endif 150 #ifndef IFF_NAPI_FRAGS 151 #define IFF_NAPI_FRAGS 0x0020 152 #endif 153 154 static void initialize_tun(void) 155 { 156 #if SYZ_EXECUTOR 157 if (!flag_enable_tun) 158 return; 159 #endif 160 tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); 161 if (tunfd == -1) { 162 #if SYZ_EXECUTOR 163 fail("tun: can't open /dev/net/tun\n"); 164 #else 165 printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); 166 printf("otherwise fuzzing or reproducing might not work as intended\n"); 167 return; 168 #endif 169 } 170 // Remap tun onto higher fd number to hide it from fuzzer and to keep 171 // fd numbers stable regardless of whether tun is opened or not (also see kMaxFd). 172 const int kTunFd = 240; 173 if (dup2(tunfd, kTunFd) < 0) 174 fail("dup2(tunfd, kTunFd) failed"); 175 close(tunfd); 176 tunfd = kTunFd; 177 178 struct ifreq ifr; 179 memset(&ifr, 0, sizeof(ifr)); 180 strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); 181 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS; 182 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { 183 // IFF_NAPI_FRAGS requires root, so try without it. 184 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 185 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) 186 fail("tun: ioctl(TUNSETIFF) failed"); 187 } 188 // If IFF_NAPI_FRAGS is not supported it will be silently dropped, 189 // so query the effective flags. 190 if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0) 191 fail("tun: ioctl(TUNGETIFF) failed"); 192 tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0; 193 debug("tun_frags_enabled=%d\n", tun_frags_enabled); 194 195 // Disable IPv6 DAD, otherwise the address remains unusable until DAD completes. 196 // Don't panic because this is an optional config. 197 execute_command(0, "sysctl -w net.ipv6.conf.%s.accept_dad=0", TUN_IFACE); 198 199 // Disable IPv6 router solicitation to prevent IPv6 spam. 200 // Don't panic because this is an optional config. 201 execute_command(0, "sysctl -w net.ipv6.conf.%s.router_solicitations=0", TUN_IFACE); 202 // There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam. 203 204 execute_command(1, "ip link set dev %s address %s", TUN_IFACE, LOCAL_MAC); 205 execute_command(1, "ip addr add %s/24 dev %s", LOCAL_IPV4, TUN_IFACE); 206 execute_command(1, "ip neigh add %s lladdr %s dev %s nud permanent", 207 REMOTE_IPV4, REMOTE_MAC, TUN_IFACE); 208 // Don't panic because ipv6 may be not enabled in kernel. 209 execute_command(0, "ip -6 addr add %s/120 dev %s", LOCAL_IPV6, TUN_IFACE); 210 execute_command(0, "ip -6 neigh add %s lladdr %s dev %s nud permanent", 211 REMOTE_IPV6, REMOTE_MAC, TUN_IFACE); 212 execute_command(1, "ip link set dev %s up", TUN_IFACE); 213 } 214 #endif 215 216 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV 217 #include <arpa/inet.h> 218 #include <errno.h> 219 #include <fcntl.h> 220 #include <linux/if.h> 221 #include <linux/if_ether.h> 222 #include <linux/if_tun.h> 223 #include <linux/ip.h> 224 #include <linux/tcp.h> 225 #include <net/if_arp.h> 226 #include <stdarg.h> 227 #include <stdbool.h> 228 #include <sys/ioctl.h> 229 #include <sys/stat.h> 230 #include <sys/uio.h> 231 232 // Addresses are chosen to be in the same subnet as tun addresses. 233 #define DEV_IPV4 "172.20.20.%d" 234 #define DEV_IPV6 "fe80::%02hx" 235 #define DEV_MAC "aa:aa:aa:aa:aa:%02hx" 236 237 static void snprintf_check(char* str, size_t size, const char* format, ...) 238 { 239 va_list args; 240 241 va_start(args, format); 242 vsnprintf_check(str, size, format, args); 243 va_end(args); 244 } 245 246 // We test in a separate namespace, which does not have any network devices initially (even lo). 247 // Create/up as many as we can. 248 static void initialize_netdevices(void) 249 { 250 #if SYZ_EXECUTOR 251 if (!flag_enable_net_dev) 252 return; 253 #endif 254 unsigned i; 255 const char* devtypes[] = {"ip6gretap", "bridge", "vcan", "bond", "team"}; 256 // If you extend this array, also update netdev_addr_id in vnet.txt. 257 const char* devnames[] = {"lo", "sit0", "bridge0", "vcan0", "tunl0", 258 "gre0", "gretap0", "ip_vti0", "ip6_vti0", 259 "ip6tnl0", "ip6gre0", "ip6gretap0", 260 "erspan0", "bond0", "veth0", "veth1", "team0", 261 "veth0_to_bridge", "veth1_to_bridge", 262 "veth0_to_bond", "veth1_to_bond", 263 "veth0_to_team", "veth1_to_team"}; 264 const char* devmasters[] = {"bridge", "bond", "team"}; 265 266 for (i = 0; i < sizeof(devtypes) / (sizeof(devtypes[0])); i++) 267 execute_command(0, "ip link add dev %s0 type %s", devtypes[i], devtypes[i]); 268 // This adds connected veth0 and veth1 devices. 269 execute_command(0, "ip link add type veth"); 270 271 // This creates connected bridge/bond/team_slave devices of type veth, 272 // and makes them slaves of bridge/bond/team devices, respectively. 273 // Note: slave devices don't need MAC/IP addresses, only master devices. 274 // veth0_to_* is not slave devices, which still need ip addresses. 275 for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { 276 execute_command(0, "ip link add name %s_slave_0 type veth peer name veth0_to_%s", devmasters[i], devmasters[i]); 277 execute_command(0, "ip link add name %s_slave_1 type veth peer name veth1_to_%s", devmasters[i], devmasters[i]); 278 execute_command(0, "ip link set %s_slave_0 master %s0", devmasters[i], devmasters[i]); 279 execute_command(0, "ip link set %s_slave_1 master %s0", devmasters[i], devmasters[i]); 280 execute_command(0, "ip link set veth0_to_%s up", devmasters[i]); 281 execute_command(0, "ip link set veth1_to_%s up", devmasters[i]); 282 } 283 // bond/team_slave_* will set up automatically when set their master. 284 // But bridge_slave_* need to set up manually. 285 execute_command(0, "ip link set bridge_slave_0 up"); 286 execute_command(0, "ip link set bridge_slave_1 up"); 287 288 for (i = 0; i < sizeof(devnames) / (sizeof(devnames[0])); i++) { 289 char addr[32]; 290 // Assign some unique address to devices. Some devices won't up without this. 291 // Devices that don't need these addresses will simply ignore them. 292 // Shift addresses by 10 because 0 subnet address can mean special things. 293 snprintf_check(addr, sizeof(addr), DEV_IPV4, i + 10); 294 execute_command(0, "ip -4 addr add %s/24 dev %s", addr, devnames[i]); 295 snprintf_check(addr, sizeof(addr), DEV_IPV6, i + 10); 296 execute_command(0, "ip -6 addr add %s/120 dev %s", addr, devnames[i]); 297 snprintf_check(addr, sizeof(addr), DEV_MAC, i + 10); 298 execute_command(0, "ip link set dev %s address %s", devnames[i], addr); 299 execute_command(0, "ip link set dev %s up", devnames[i]); 300 } 301 } 302 #endif 303 304 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE && (__NR_syz_extract_tcp_res || SYZ_REPEAT) 305 #include <errno.h> 306 307 static int read_tun(char* data, int size) 308 { 309 if (tunfd < 0) 310 return -1; 311 312 int rv = read(tunfd, data, size); 313 if (rv < 0) { 314 if (errno == EAGAIN) 315 return -1; 316 // Tun sometimes returns this, unclear if it's a kernel bug or not. 317 if (errno == EBADFD) 318 return -1; 319 fail("tun: read failed with %d", rv); 320 } 321 return rv; 322 } 323 #endif 324 325 #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_TUN_ENABLE 326 #include <stdbool.h> 327 #include <sys/uio.h> 328 329 #define MAX_FRAGS 4 330 struct vnet_fragmentation { 331 uint32 full; 332 uint32 count; 333 uint32 frags[MAX_FRAGS]; 334 }; 335 336 static long syz_emit_ethernet(long a0, long a1, long a2) 337 { 338 // syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt]) 339 // vnet_fragmentation { 340 // full int32[0:1] 341 // count int32[1:4] 342 // frags array[int32[0:4096], 4] 343 // } 344 if (tunfd < 0) 345 return (uintptr_t)-1; 346 347 uint32 length = a0; 348 char* data = (char*)a1; 349 debug_dump_data(data, length); 350 351 struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2; 352 struct iovec vecs[MAX_FRAGS + 1]; 353 uint32 nfrags = 0; 354 if (!tun_frags_enabled || frags == NULL) { 355 vecs[nfrags].iov_base = data; 356 vecs[nfrags].iov_len = length; 357 nfrags++; 358 } else { 359 bool full = true; 360 uint32 i, count = 0; 361 NONFAILING(full = frags->full); 362 NONFAILING(count = frags->count); 363 if (count > MAX_FRAGS) 364 count = MAX_FRAGS; 365 for (i = 0; i < count && length != 0; i++) { 366 uint32 size = 0; 367 NONFAILING(size = frags->frags[i]); 368 if (size > length) 369 size = length; 370 vecs[nfrags].iov_base = data; 371 vecs[nfrags].iov_len = size; 372 nfrags++; 373 data += size; 374 length -= size; 375 } 376 if (length != 0 && (full || nfrags == 0)) { 377 vecs[nfrags].iov_base = data; 378 vecs[nfrags].iov_len = length; 379 nfrags++; 380 } 381 } 382 return writev(tunfd, vecs, nfrags); 383 } 384 #endif 385 386 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_TUN_ENABLE 387 static void flush_tun() 388 { 389 #if SYZ_EXECUTOR 390 if (!flag_enable_tun) 391 return; 392 #endif 393 char data[SYZ_TUN_MAX_PACKET_SIZE]; 394 while (read_tun(&data[0], sizeof(data)) != -1) { 395 } 396 } 397 #endif 398 399 #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_TUN_ENABLE 400 #ifndef __ANDROID__ 401 // Can't include <linux/ipv6.h>, since it causes 402 // conflicts due to some structs redefinition. 403 struct ipv6hdr { 404 __u8 priority : 4, 405 version : 4; 406 __u8 flow_lbl[3]; 407 408 __be16 payload_len; 409 __u8 nexthdr; 410 __u8 hop_limit; 411 412 struct in6_addr saddr; 413 struct in6_addr daddr; 414 }; 415 #endif 416 417 struct tcp_resources { 418 uint32 seq; 419 uint32 ack; 420 }; 421 422 static long syz_extract_tcp_res(long a0, long a1, long a2) 423 { 424 // syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32) 425 426 if (tunfd < 0) 427 return (uintptr_t)-1; 428 429 char data[SYZ_TUN_MAX_PACKET_SIZE]; 430 int rv = read_tun(&data[0], sizeof(data)); 431 if (rv == -1) 432 return (uintptr_t)-1; 433 size_t length = rv; 434 debug_dump_data(data, length); 435 436 struct tcphdr* tcphdr; 437 438 if (length < sizeof(struct ethhdr)) 439 return (uintptr_t)-1; 440 struct ethhdr* ethhdr = (struct ethhdr*)&data[0]; 441 442 if (ethhdr->h_proto == htons(ETH_P_IP)) { 443 if (length < sizeof(struct ethhdr) + sizeof(struct iphdr)) 444 return (uintptr_t)-1; 445 struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)]; 446 if (iphdr->protocol != IPPROTO_TCP) 447 return (uintptr_t)-1; 448 if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr)) 449 return (uintptr_t)-1; 450 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4]; 451 } else { 452 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) 453 return (uintptr_t)-1; 454 struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)]; 455 // TODO: parse and skip extension headers. 456 if (ipv6hdr->nexthdr != IPPROTO_TCP) 457 return (uintptr_t)-1; 458 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) 459 return (uintptr_t)-1; 460 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)]; 461 } 462 463 struct tcp_resources* res = (struct tcp_resources*)a0; 464 NONFAILING(res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1))); 465 NONFAILING(res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2))); 466 467 debug("extracted seq: %08x\n", res->seq); 468 debug("extracted ack: %08x\n", res->ack); 469 470 return 0; 471 } 472 #endif 473 474 #if SYZ_EXECUTOR || __NR_syz_open_dev 475 #include <fcntl.h> 476 #include <string.h> 477 #include <sys/stat.h> 478 #include <sys/types.h> 479 480 static long syz_open_dev(long a0, long a1, long a2) 481 { 482 if (a0 == 0xc || a0 == 0xb) { 483 // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd 484 // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd 485 char buf[128]; 486 sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2); 487 return open(buf, O_RDWR, 0); 488 } else { 489 // syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd 490 char buf[1024]; 491 char* hash; 492 NONFAILING(strncpy(buf, (char*)a0, sizeof(buf) - 1)); 493 buf[sizeof(buf) - 1] = 0; 494 while ((hash = strchr(buf, '#'))) { 495 *hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone. 496 a1 /= 10; 497 } 498 return open(buf, a2, 0); 499 } 500 } 501 #endif 502 503 #if SYZ_EXECUTOR || __NR_syz_open_procfs 504 #include <fcntl.h> 505 #include <string.h> 506 #include <sys/stat.h> 507 #include <sys/types.h> 508 509 static long syz_open_procfs(long a0, long a1) 510 { 511 // syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd 512 513 char buf[128]; 514 memset(buf, 0, sizeof(buf)); 515 if (a0 == 0) { 516 NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1)); 517 } else if (a0 == -1) { 518 NONFAILING(snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1)); 519 } else { 520 NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1)); 521 } 522 int fd = open(buf, O_RDWR); 523 if (fd == -1) 524 fd = open(buf, O_RDONLY); 525 return fd; 526 } 527 #endif 528 529 #if SYZ_EXECUTOR || __NR_syz_open_pts 530 #include <fcntl.h> 531 #include <sys/ioctl.h> 532 #include <sys/stat.h> 533 #include <sys/types.h> 534 535 static long syz_open_pts(long a0, long a1) 536 { 537 // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty] 538 int ptyno = 0; 539 if (ioctl(a0, TIOCGPTN, &ptyno)) 540 return -1; 541 char buf[128]; 542 sprintf(buf, "/dev/pts/%d", ptyno); 543 return open(buf, a1, 0); 544 } 545 #endif 546 547 #if SYZ_EXECUTOR || __NR_syz_init_net_socket 548 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 549 #include <fcntl.h> 550 #include <sched.h> 551 #include <sys/stat.h> 552 #include <sys/types.h> 553 #include <unistd.h> 554 555 const int kInitNetNsFd = 239; // see kMaxFd 556 // syz_init_net_socket opens a socket in init net namespace. 557 // Used for families that can only be created in init net namespace. 558 static long syz_init_net_socket(long domain, long type, long proto) 559 { 560 int netns = open("/proc/self/ns/net", O_RDONLY); 561 if (netns == -1) 562 return netns; 563 if (setns(kInitNetNsFd, 0)) 564 return -1; 565 int sock = syscall(__NR_socket, domain, type, proto); 566 int err = errno; 567 if (setns(netns, 0)) 568 fail("setns(netns) failed"); 569 close(netns); 570 errno = err; 571 return sock; 572 } 573 #else 574 static long syz_init_net_socket(long domain, long type, long proto) 575 { 576 return syscall(__NR_socket, domain, type, proto); 577 } 578 #endif 579 #endif 580 581 #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id 582 #include <errno.h> 583 #include <linux/genetlink.h> 584 #include <linux/netlink.h> 585 #include <sys/socket.h> 586 #include <sys/types.h> 587 588 static long syz_genetlink_get_family_id(long name) 589 { 590 char buf[512] = {0}; 591 struct nlmsghdr* hdr = (struct nlmsghdr*)buf; 592 struct genlmsghdr* genlhdr = (struct genlmsghdr*)NLMSG_DATA(hdr); 593 struct nlattr* attr = (struct nlattr*)(genlhdr + 1); 594 hdr->nlmsg_len = sizeof(*hdr) + sizeof(*genlhdr) + sizeof(*attr) + GENL_NAMSIZ; 595 hdr->nlmsg_type = GENL_ID_CTRL; 596 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 597 genlhdr->cmd = CTRL_CMD_GETFAMILY; 598 attr->nla_type = CTRL_ATTR_FAMILY_NAME; 599 attr->nla_len = sizeof(*attr) + GENL_NAMSIZ; 600 NONFAILING(strncpy((char*)(attr + 1), (char*)name, GENL_NAMSIZ)); 601 struct iovec iov = {hdr, hdr->nlmsg_len}; 602 struct sockaddr_nl addr = {0}; 603 addr.nl_family = AF_NETLINK; 604 debug("syz_genetlink_get_family_id(%s)\n", (char*)(attr + 1)); 605 int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 606 if (fd == -1) { 607 debug("syz_genetlink_get_family_id: socket failed: %d\n", errno); 608 return -1; 609 } 610 struct msghdr msg = {&addr, sizeof(addr), &iov, 1, NULL, 0, 0}; 611 if (sendmsg(fd, &msg, 0) == -1) { 612 debug("syz_genetlink_get_family_id: sendmsg failed: %d\n", errno); 613 close(fd); 614 return -1; 615 } 616 ssize_t n = recv(fd, buf, sizeof(buf), 0); 617 close(fd); 618 if (n <= 0) { 619 debug("syz_genetlink_get_family_id: recv failed: %d\n", errno); 620 return -1; 621 } 622 if (hdr->nlmsg_type != GENL_ID_CTRL) { 623 debug("syz_genetlink_get_family_id: wrong reply type: %d\n", hdr->nlmsg_type); 624 return -1; 625 } 626 for (; (char*)attr < buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { 627 if (attr->nla_type == CTRL_ATTR_FAMILY_ID) 628 return *(uint16*)(attr + 1); 629 } 630 debug("syz_genetlink_get_family_id: no CTRL_ATTR_FAMILY_ID attr\n"); 631 return -1; 632 } 633 #endif 634 635 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 636 #include <errno.h> 637 #include <fcntl.h> 638 #include <linux/loop.h> 639 #include <sys/ioctl.h> 640 #include <sys/stat.h> 641 #include <sys/types.h> 642 643 struct fs_image_segment { 644 void* data; 645 uintptr_t size; 646 uintptr_t offset; 647 }; 648 649 #define IMAGE_MAX_SEGMENTS 4096 650 #define IMAGE_MAX_SIZE (129 << 20) 651 652 #if GOARCH_386 653 #define SYZ_memfd_create 356 654 #elif GOARCH_amd64 655 #define SYZ_memfd_create 319 656 #elif GOARCH_arm 657 #define SYZ_memfd_create 385 658 #elif GOARCH_arm64 659 #define SYZ_memfd_create 279 660 #elif GOARCH_ppc64le 661 #define SYZ_memfd_create 360 662 #endif 663 #endif 664 665 #if SYZ_EXECUTOR || __NR_syz_read_part_table 666 // syz_read_part_table(size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]]) 667 static long syz_read_part_table(unsigned long size, unsigned long nsegs, long segments) 668 { 669 char loopname[64], linkname[64]; 670 int loopfd, err = 0, res = -1; 671 unsigned long i, j; 672 // See the comment in syz_mount_image. 673 struct fs_image_segment* segs = (struct fs_image_segment*)segments; 674 675 if (nsegs > IMAGE_MAX_SEGMENTS) 676 nsegs = IMAGE_MAX_SEGMENTS; 677 for (i = 0; i < nsegs; i++) { 678 if (segs[i].size > IMAGE_MAX_SIZE) 679 segs[i].size = IMAGE_MAX_SIZE; 680 segs[i].offset %= IMAGE_MAX_SIZE; 681 if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size) 682 segs[i].offset = IMAGE_MAX_SIZE - segs[i].size; 683 if (size < segs[i].offset + segs[i].offset) 684 size = segs[i].offset + segs[i].offset; 685 } 686 if (size > IMAGE_MAX_SIZE) 687 size = IMAGE_MAX_SIZE; 688 int memfd = syscall(SYZ_memfd_create, "syz_read_part_table", 0); 689 if (memfd == -1) { 690 err = errno; 691 goto error; 692 } 693 if (ftruncate(memfd, size)) { 694 err = errno; 695 goto error_close_memfd; 696 } 697 for (i = 0; i < nsegs; i++) { 698 if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) { 699 debug("syz_read_part_table: pwrite[%u] failed: %d\n", (int)i, errno); 700 } 701 } 702 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 703 loopfd = open(loopname, O_RDWR); 704 if (loopfd == -1) { 705 err = errno; 706 goto error_close_memfd; 707 } 708 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 709 if (errno != EBUSY) { 710 err = errno; 711 goto error_close_loop; 712 } 713 ioctl(loopfd, LOOP_CLR_FD, 0); 714 usleep(1000); 715 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 716 err = errno; 717 goto error_close_loop; 718 } 719 } 720 struct loop_info64 info; 721 if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) { 722 err = errno; 723 goto error_clear_loop; 724 } 725 #if SYZ_EXECUTOR 726 cover_reset(0); 727 #endif 728 info.lo_flags |= LO_FLAGS_PARTSCAN; 729 if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) { 730 err = errno; 731 goto error_clear_loop; 732 } 733 res = 0; 734 // If we managed to parse some partitions, symlink them into our work dir. 735 for (i = 1, j = 0; i < 8; i++) { 736 snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i); 737 struct stat statbuf; 738 if (stat(loopname, &statbuf) == 0) { 739 snprintf(linkname, sizeof(linkname), "./file%d", (int)j++); 740 if (symlink(loopname, linkname)) { 741 debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno); 742 } 743 } 744 } 745 error_clear_loop: 746 ioctl(loopfd, LOOP_CLR_FD, 0); 747 error_close_loop: 748 close(loopfd); 749 error_close_memfd: 750 close(memfd); 751 error: 752 errno = err; 753 return res; 754 } 755 #endif 756 757 #if SYZ_EXECUTOR || __NR_syz_mount_image 758 #include <string.h> 759 #include <sys/mount.h> 760 761 //syz_mount_image(fs ptr[in, string[disk_filesystems]], dir ptr[in, filename], size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]], flags flags[mount_flags], opts ptr[in, fs_options[vfat_options]]) 762 //fs_image_segment { 763 // data ptr[in, array[int8]] 764 // size len[data, intptr] 765 // offset intptr 766 //} 767 static long syz_mount_image(long fsarg, long dir, unsigned long size, unsigned long nsegs, long segments, long flags, long optsarg) 768 { 769 char loopname[64], fs[32], opts[256]; 770 int loopfd, err = 0, res = -1; 771 unsigned long i; 772 // Strictly saying we ought to do a nonfailing copyout of segments into a local var. 773 // But some filesystems have large number of segments (2000+), 774 // we can't allocate that much on stack and allocating elsewhere is problematic, 775 // so we just use the memory allocated by fuzzer. 776 struct fs_image_segment* segs = (struct fs_image_segment*)segments; 777 778 if (nsegs > IMAGE_MAX_SEGMENTS) 779 nsegs = IMAGE_MAX_SEGMENTS; 780 for (i = 0; i < nsegs; i++) { 781 if (segs[i].size > IMAGE_MAX_SIZE) 782 segs[i].size = IMAGE_MAX_SIZE; 783 segs[i].offset %= IMAGE_MAX_SIZE; 784 if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size) 785 segs[i].offset = IMAGE_MAX_SIZE - segs[i].size; 786 if (size < segs[i].offset + segs[i].offset) 787 size = segs[i].offset + segs[i].offset; 788 } 789 if (size > IMAGE_MAX_SIZE) 790 size = IMAGE_MAX_SIZE; 791 int memfd = syscall(SYZ_memfd_create, "syz_mount_image", 0); 792 if (memfd == -1) { 793 err = errno; 794 goto error; 795 } 796 if (ftruncate(memfd, size)) { 797 err = errno; 798 goto error_close_memfd; 799 } 800 for (i = 0; i < nsegs; i++) { 801 if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) { 802 debug("syz_mount_image: pwrite[%u] failed: %d\n", (int)i, errno); 803 } 804 } 805 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 806 loopfd = open(loopname, O_RDWR); 807 if (loopfd == -1) { 808 err = errno; 809 goto error_close_memfd; 810 } 811 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 812 if (errno != EBUSY) { 813 err = errno; 814 goto error_close_loop; 815 } 816 ioctl(loopfd, LOOP_CLR_FD, 0); 817 usleep(1000); 818 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 819 err = errno; 820 goto error_close_loop; 821 } 822 } 823 mkdir((char*)dir, 0777); 824 memset(fs, 0, sizeof(fs)); 825 NONFAILING(strncpy(fs, (char*)fsarg, sizeof(fs) - 1)); 826 memset(opts, 0, sizeof(opts)); 827 // Leave some space for the additional options we append below. 828 NONFAILING(strncpy(opts, (char*)optsarg, sizeof(opts) - 32)); 829 if (strcmp(fs, "iso9660") == 0) { 830 flags |= MS_RDONLY; 831 } else if (strncmp(fs, "ext", 3) == 0) { 832 // For ext2/3/4 we have to have errors=continue because the image 833 // can contain errors=panic flag and can legally crash kernel. 834 if (strstr(opts, "errors=panic") || strstr(opts, "errors=remount-ro") == 0) 835 strcat(opts, ",errors=continue"); 836 } else if (strcmp(fs, "xfs") == 0) { 837 // For xfs we need nouuid because xfs has a global uuids table 838 // and if two parallel executors mounts fs with the same uuid, second mount fails. 839 strcat(opts, ",nouuid"); 840 } 841 debug("syz_mount_image: size=%llu segs=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, (uint64)nsegs, loopname, (char*)dir, fs, (uint64)flags, opts); 842 #if SYZ_EXECUTOR 843 cover_reset(0); 844 #endif 845 if (mount(loopname, (char*)dir, fs, flags, opts)) { 846 err = errno; 847 goto error_clear_loop; 848 } 849 res = 0; 850 error_clear_loop: 851 ioctl(loopfd, LOOP_CLR_FD, 0); 852 error_close_loop: 853 close(loopfd); 854 error_close_memfd: 855 close(memfd); 856 error: 857 errno = err; 858 return res; 859 } 860 #endif 861 862 #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu 863 #include <errno.h> 864 #include <fcntl.h> 865 #include <linux/kvm.h> 866 #include <stdarg.h> 867 #include <stddef.h> 868 #include <sys/ioctl.h> 869 #include <sys/stat.h> 870 871 #if defined(__x86_64__) 872 #include "common_kvm_amd64.h" 873 #elif defined(__aarch64__) 874 #include "common_kvm_arm64.h" 875 #else 876 static long syz_kvm_setup_cpu(long a0, long a1, long a2, long a3, long a4, long a5, long a6, long a7) 877 { 878 return 0; 879 } 880 #endif 881 #endif 882 883 #if SYZ_EXECUTOR || SYZ_FAULT_INJECTION || SYZ_SANDBOX_NAMESPACE || SYZ_ENABLE_CGROUPS 884 #include <errno.h> 885 #include <fcntl.h> 886 #include <stdarg.h> 887 #include <stdbool.h> 888 #include <string.h> 889 #include <sys/stat.h> 890 #include <sys/types.h> 891 892 static bool write_file(const char* file, const char* what, ...) 893 { 894 char buf[1024]; 895 va_list args; 896 va_start(args, what); 897 vsnprintf(buf, sizeof(buf), what, args); 898 va_end(args); 899 buf[sizeof(buf) - 1] = 0; 900 int len = strlen(buf); 901 902 int fd = open(file, O_WRONLY | O_CLOEXEC); 903 if (fd == -1) 904 return false; 905 if (write(fd, buf, len) != len) { 906 int err = errno; 907 close(fd); 908 errno = err; 909 return false; 910 } 911 close(fd); 912 return true; 913 } 914 #endif 915 916 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE 917 #include <errno.h> 918 #include <linux/net.h> 919 #include <netinet/in.h> 920 #include <string.h> 921 #include <sys/socket.h> 922 923 // checkpoint/reset_net_namespace partially resets net namespace to initial state 924 // after each test. Currently it resets only ipv4 netfilter state. 925 // Ideally, we just create a new net namespace for each test, 926 // however it's too slow (1-1.5 seconds per namespace, not parallelizable). 927 928 // Linux headers do not compile for C++, so we have to define the structs manualy. 929 #define XT_TABLE_SIZE 1536 930 #define XT_MAX_ENTRIES 10 931 932 struct xt_counters { 933 uint64 pcnt, bcnt; 934 }; 935 936 struct ipt_getinfo { 937 char name[32]; 938 unsigned int valid_hooks; 939 unsigned int hook_entry[5]; 940 unsigned int underflow[5]; 941 unsigned int num_entries; 942 unsigned int size; 943 }; 944 945 struct ipt_get_entries { 946 char name[32]; 947 unsigned int size; 948 void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; 949 }; 950 951 struct ipt_replace { 952 char name[32]; 953 unsigned int valid_hooks; 954 unsigned int num_entries; 955 unsigned int size; 956 unsigned int hook_entry[5]; 957 unsigned int underflow[5]; 958 unsigned int num_counters; 959 struct xt_counters* counters; 960 char entrytable[XT_TABLE_SIZE]; 961 }; 962 963 struct ipt_table_desc { 964 const char* name; 965 struct ipt_getinfo info; 966 struct ipt_replace replace; 967 }; 968 969 static struct ipt_table_desc ipv4_tables[] = { 970 {.name = "filter"}, 971 {.name = "nat"}, 972 {.name = "mangle"}, 973 {.name = "raw"}, 974 {.name = "security"}, 975 }; 976 977 static struct ipt_table_desc ipv6_tables[] = { 978 {.name = "filter"}, 979 {.name = "nat"}, 980 {.name = "mangle"}, 981 {.name = "raw"}, 982 {.name = "security"}, 983 }; 984 985 #define IPT_BASE_CTL 64 986 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) 987 #define IPT_SO_GET_INFO (IPT_BASE_CTL) 988 #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) 989 990 struct arpt_getinfo { 991 char name[32]; 992 unsigned int valid_hooks; 993 unsigned int hook_entry[3]; 994 unsigned int underflow[3]; 995 unsigned int num_entries; 996 unsigned int size; 997 }; 998 999 struct arpt_get_entries { 1000 char name[32]; 1001 unsigned int size; 1002 void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; 1003 }; 1004 1005 struct arpt_replace { 1006 char name[32]; 1007 unsigned int valid_hooks; 1008 unsigned int num_entries; 1009 unsigned int size; 1010 unsigned int hook_entry[3]; 1011 unsigned int underflow[3]; 1012 unsigned int num_counters; 1013 struct xt_counters* counters; 1014 char entrytable[XT_TABLE_SIZE]; 1015 }; 1016 1017 struct arpt_table_desc { 1018 const char* name; 1019 struct arpt_getinfo info; 1020 struct arpt_replace replace; 1021 }; 1022 1023 static struct arpt_table_desc arpt_tables[] = { 1024 {.name = "filter"}, 1025 }; 1026 1027 #define ARPT_BASE_CTL 96 1028 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) 1029 #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) 1030 #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) 1031 1032 static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 1033 { 1034 struct ipt_get_entries entries; 1035 socklen_t optlen; 1036 int fd, i; 1037 1038 fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 1039 if (fd == -1) { 1040 switch (errno) { 1041 case EAFNOSUPPORT: 1042 case ENOPROTOOPT: 1043 return; 1044 } 1045 fail("iptable checkpoint %d: socket failed", family); 1046 } 1047 for (i = 0; i < num_tables; i++) { 1048 struct ipt_table_desc* table = &tables[i]; 1049 strcpy(table->info.name, table->name); 1050 strcpy(table->replace.name, table->name); 1051 optlen = sizeof(table->info); 1052 if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { 1053 switch (errno) { 1054 case EPERM: 1055 case ENOENT: 1056 case ENOPROTOOPT: 1057 continue; 1058 } 1059 fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family); 1060 } 1061 debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n", 1062 table->name, family, table->info.num_entries, 1063 table->info.valid_hooks, table->info.size); 1064 if (table->info.size > sizeof(table->replace.entrytable)) 1065 fail("iptable checkpoint %s/%d: table size is too large: %u", 1066 table->name, family, table->info.size); 1067 if (table->info.num_entries > XT_MAX_ENTRIES) 1068 fail("iptable checkpoint %s/%d: too many counters: %u", 1069 table->name, family, table->info.num_entries); 1070 memset(&entries, 0, sizeof(entries)); 1071 strcpy(entries.name, table->name); 1072 entries.size = table->info.size; 1073 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 1074 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 1075 fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_ENTRIES)", 1076 table->name, family); 1077 table->replace.valid_hooks = table->info.valid_hooks; 1078 table->replace.num_entries = table->info.num_entries; 1079 table->replace.size = table->info.size; 1080 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 1081 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 1082 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 1083 } 1084 close(fd); 1085 } 1086 1087 static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 1088 { 1089 struct xt_counters counters[XT_MAX_ENTRIES]; 1090 struct ipt_get_entries entries; 1091 struct ipt_getinfo info; 1092 socklen_t optlen; 1093 int fd, i; 1094 1095 fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 1096 if (fd == -1) { 1097 switch (errno) { 1098 case EAFNOSUPPORT: 1099 case ENOPROTOOPT: 1100 return; 1101 } 1102 fail("iptable %d: socket failed", family); 1103 } 1104 for (i = 0; i < num_tables; i++) { 1105 struct ipt_table_desc* table = &tables[i]; 1106 if (table->info.valid_hooks == 0) 1107 continue; 1108 memset(&info, 0, sizeof(info)); 1109 strcpy(info.name, table->name); 1110 optlen = sizeof(info); 1111 if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) 1112 fail("iptable %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family); 1113 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 1114 memset(&entries, 0, sizeof(entries)); 1115 strcpy(entries.name, table->name); 1116 entries.size = table->info.size; 1117 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 1118 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 1119 fail("iptable %s/%d: getsockopt(IPT_SO_GET_ENTRIES)", table->name, family); 1120 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 1121 continue; 1122 } 1123 debug("iptable %s/%d: resetting\n", table->name, family); 1124 table->replace.num_counters = info.num_entries; 1125 table->replace.counters = counters; 1126 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 1127 if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) 1128 fail("iptable %s/%d: setsockopt(IPT_SO_SET_REPLACE)", table->name, family); 1129 } 1130 close(fd); 1131 } 1132 1133 static void checkpoint_arptables(void) 1134 { 1135 struct arpt_get_entries entries; 1136 socklen_t optlen; 1137 unsigned i; 1138 int fd; 1139 1140 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1141 if (fd == -1) { 1142 switch (errno) { 1143 case EAFNOSUPPORT: 1144 case ENOPROTOOPT: 1145 return; 1146 } 1147 fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 1148 } 1149 for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 1150 struct arpt_table_desc* table = &arpt_tables[i]; 1151 strcpy(table->info.name, table->name); 1152 strcpy(table->replace.name, table->name); 1153 optlen = sizeof(table->info); 1154 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { 1155 switch (errno) { 1156 case EPERM: 1157 case ENOENT: 1158 case ENOPROTOOPT: 1159 continue; 1160 } 1161 fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_INFO)", table->name); 1162 } 1163 debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n", 1164 table->name, table->info.num_entries, table->info.valid_hooks, table->info.size); 1165 if (table->info.size > sizeof(table->replace.entrytable)) 1166 fail("arptable checkpoint %s: table size is too large: %u", 1167 table->name, table->info.size); 1168 if (table->info.num_entries > XT_MAX_ENTRIES) 1169 fail("arptable checkpoint %s: too many counters: %u", 1170 table->name, table->info.num_entries); 1171 memset(&entries, 0, sizeof(entries)); 1172 strcpy(entries.name, table->name); 1173 entries.size = table->info.size; 1174 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 1175 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 1176 fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name); 1177 table->replace.valid_hooks = table->info.valid_hooks; 1178 table->replace.num_entries = table->info.num_entries; 1179 table->replace.size = table->info.size; 1180 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 1181 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 1182 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 1183 } 1184 close(fd); 1185 } 1186 1187 static void reset_arptables() 1188 { 1189 struct xt_counters counters[XT_MAX_ENTRIES]; 1190 struct arpt_get_entries entries; 1191 struct arpt_getinfo info; 1192 socklen_t optlen; 1193 unsigned i; 1194 int fd; 1195 1196 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1197 if (fd == -1) { 1198 switch (errno) { 1199 case EAFNOSUPPORT: 1200 case ENOPROTOOPT: 1201 return; 1202 } 1203 fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 1204 } 1205 for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 1206 struct arpt_table_desc* table = &arpt_tables[i]; 1207 if (table->info.valid_hooks == 0) 1208 continue; 1209 memset(&info, 0, sizeof(info)); 1210 strcpy(info.name, table->name); 1211 optlen = sizeof(info); 1212 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) 1213 fail("arptable %s:getsockopt(ARPT_SO_GET_INFO)", table->name); 1214 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 1215 memset(&entries, 0, sizeof(entries)); 1216 strcpy(entries.name, table->name); 1217 entries.size = table->info.size; 1218 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 1219 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 1220 fail("arptable %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name); 1221 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 1222 continue; 1223 debug("arptable %s: data changed\n", table->name); 1224 } else { 1225 debug("arptable %s: header changed\n", table->name); 1226 } 1227 debug("arptable %s: resetting\n", table->name); 1228 table->replace.num_counters = info.num_entries; 1229 table->replace.counters = counters; 1230 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 1231 if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) 1232 fail("arptable %s: setsockopt(ARPT_SO_SET_REPLACE)", table->name); 1233 } 1234 close(fd); 1235 } 1236 1237 #include <linux/if.h> 1238 #include <linux/netfilter_bridge/ebtables.h> 1239 1240 struct ebt_table_desc { 1241 const char* name; 1242 struct ebt_replace replace; 1243 char entrytable[XT_TABLE_SIZE]; 1244 }; 1245 1246 static struct ebt_table_desc ebt_tables[] = { 1247 {.name = "filter"}, 1248 {.name = "nat"}, 1249 {.name = "broute"}, 1250 }; 1251 1252 static void checkpoint_ebtables(void) 1253 { 1254 socklen_t optlen; 1255 unsigned i; 1256 int fd; 1257 1258 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1259 if (fd == -1) { 1260 switch (errno) { 1261 case EAFNOSUPPORT: 1262 case ENOPROTOOPT: 1263 return; 1264 } 1265 fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 1266 } 1267 for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 1268 struct ebt_table_desc* table = &ebt_tables[i]; 1269 strcpy(table->replace.name, table->name); 1270 optlen = sizeof(table->replace); 1271 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { 1272 switch (errno) { 1273 case EPERM: 1274 case ENOENT: 1275 case ENOPROTOOPT: 1276 continue; 1277 } 1278 fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_INFO)", table->name); 1279 } 1280 debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n", 1281 table->name, table->replace.nentries, table->replace.valid_hooks, 1282 table->replace.entries_size); 1283 if (table->replace.entries_size > sizeof(table->entrytable)) 1284 fail("ebtable checkpoint %s: table size is too large: %u", 1285 table->name, table->replace.entries_size); 1286 table->replace.num_counters = 0; 1287 table->replace.entries = table->entrytable; 1288 optlen = sizeof(table->replace) + table->replace.entries_size; 1289 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) 1290 fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_ENTRIES)", table->name); 1291 } 1292 close(fd); 1293 } 1294 1295 static void reset_ebtables() 1296 { 1297 struct ebt_replace replace; 1298 char entrytable[XT_TABLE_SIZE]; 1299 socklen_t optlen; 1300 unsigned i, j, h; 1301 int fd; 1302 1303 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1304 if (fd == -1) { 1305 switch (errno) { 1306 case EAFNOSUPPORT: 1307 case ENOPROTOOPT: 1308 return; 1309 } 1310 fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 1311 } 1312 for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 1313 struct ebt_table_desc* table = &ebt_tables[i]; 1314 if (table->replace.valid_hooks == 0) 1315 continue; 1316 memset(&replace, 0, sizeof(replace)); 1317 strcpy(replace.name, table->name); 1318 optlen = sizeof(replace); 1319 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) 1320 fail("ebtable %s: getsockopt(EBT_SO_GET_INFO)", table->name); 1321 replace.num_counters = 0; 1322 table->replace.entries = 0; 1323 for (h = 0; h < NF_BR_NUMHOOKS; h++) 1324 table->replace.hook_entry[h] = 0; 1325 if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { 1326 memset(&entrytable, 0, sizeof(entrytable)); 1327 replace.entries = entrytable; 1328 optlen = sizeof(replace) + replace.entries_size; 1329 if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) 1330 fail("ebtable %s: getsockopt(EBT_SO_GET_ENTRIES)", table->name); 1331 if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) 1332 continue; 1333 } 1334 debug("ebtable %s: resetting\n", table->name); 1335 // Kernel does not seem to return actual entry points (wat?). 1336 for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { 1337 if (table->replace.valid_hooks & (1 << h)) { 1338 table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; 1339 j++; 1340 } 1341 } 1342 table->replace.entries = table->entrytable; 1343 optlen = sizeof(table->replace) + table->replace.entries_size; 1344 if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) 1345 fail("ebtable %s: setsockopt(EBT_SO_SET_ENTRIES)", table->name); 1346 } 1347 close(fd); 1348 } 1349 1350 static void checkpoint_net_namespace(void) 1351 { 1352 #if SYZ_EXECUTOR 1353 if (flag_sandbox == sandbox_setuid) 1354 return; 1355 #endif 1356 checkpoint_ebtables(); 1357 checkpoint_arptables(); 1358 checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 1359 checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 1360 } 1361 1362 static void reset_net_namespace(void) 1363 { 1364 #if SYZ_EXECUTOR 1365 if (flag_sandbox == sandbox_setuid) 1366 return; 1367 #endif 1368 reset_ebtables(); 1369 reset_arptables(); 1370 reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 1371 reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 1372 } 1373 #endif 1374 1375 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS 1376 #include <fcntl.h> 1377 #include <sys/mount.h> 1378 #include <sys/stat.h> 1379 #include <sys/types.h> 1380 1381 static void setup_cgroups() 1382 { 1383 if (mkdir("/syzcgroup", 0777)) { 1384 debug("mkdir(/syzcgroup) failed: %d\n", errno); 1385 } 1386 if (mkdir("/syzcgroup/unified", 0777)) { 1387 debug("mkdir(/syzcgroup/unified) failed: %d\n", errno); 1388 } 1389 if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { 1390 debug("mount(cgroup2) failed: %d\n", errno); 1391 } 1392 if (chmod("/syzcgroup/unified", 0777)) { 1393 debug("chmod(/syzcgroup/unified) failed: %d\n", errno); 1394 } 1395 if (!write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma")) { 1396 debug("write(cgroup.subtree_control) failed: %d\n", errno); 1397 } 1398 if (mkdir("/syzcgroup/cpu", 0777)) { 1399 debug("mkdir(/syzcgroup/cpu) failed: %d\n", errno); 1400 } 1401 if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) { 1402 debug("mount(cgroup cpu) failed: %d\n", errno); 1403 } 1404 if (!write_file("/syzcgroup/cpu/cgroup.clone_children", "1")) { 1405 debug("write(/syzcgroup/cpu/cgroup.clone_children) failed: %d\n", errno); 1406 } 1407 if (chmod("/syzcgroup/cpu", 0777)) { 1408 debug("chmod(/syzcgroup/cpu) failed: %d\n", errno); 1409 } 1410 if (mkdir("/syzcgroup/net", 0777)) { 1411 debug("mkdir(/syzcgroup/net) failed: %d\n", errno); 1412 } 1413 if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) { 1414 debug("mount(cgroup net) failed: %d\n", errno); 1415 } 1416 if (chmod("/syzcgroup/net", 0777)) { 1417 debug("chmod(/syzcgroup/net) failed: %d\n", errno); 1418 } 1419 } 1420 1421 // TODO(dvyukov): this should be under a separate define for separate minimization, 1422 // but for now we bundle this with cgroups. 1423 static void setup_binfmt_misc() 1424 { 1425 if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) { 1426 debug("mount(binfmt_misc) failed: %d\n", errno); 1427 } 1428 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:")) { 1429 debug("write(/proc/sys/fs/binfmt_misc/register, syz0) failed: %d\n", errno); 1430 } 1431 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) { 1432 debug("write(/proc/sys/fs/binfmt_misc/register, syz1) failed: %d\n", errno); 1433 } 1434 } 1435 #endif 1436 1437 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 1438 #include <errno.h> 1439 #include <sys/mount.h> 1440 1441 static void setup_common() 1442 { 1443 if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { 1444 debug("mount(fusectl) failed: %d\n", errno); 1445 } 1446 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS 1447 setup_cgroups(); 1448 setup_binfmt_misc(); 1449 #endif 1450 } 1451 #endif 1452 1453 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 1454 #include <sched.h> 1455 #include <sys/prctl.h> 1456 #include <sys/resource.h> 1457 #include <sys/time.h> 1458 #include <sys/wait.h> 1459 1460 static void loop(); 1461 1462 static void sandbox_common() 1463 { 1464 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 1465 setpgrp(); 1466 setsid(); 1467 1468 #if SYZ_EXECUTOR || __NR_syz_init_net_socket 1469 int netns = open("/proc/self/ns/net", O_RDONLY); 1470 if (netns == -1) 1471 fail("open(/proc/self/ns/net) failed"); 1472 if (dup2(netns, kInitNetNsFd) < 0) 1473 fail("dup2(netns, kInitNetNsFd) failed"); 1474 close(netns); 1475 #endif 1476 1477 struct rlimit rlim; 1478 rlim.rlim_cur = rlim.rlim_max = 160 << 20; 1479 setrlimit(RLIMIT_AS, &rlim); 1480 rlim.rlim_cur = rlim.rlim_max = 8 << 20; 1481 setrlimit(RLIMIT_MEMLOCK, &rlim); 1482 rlim.rlim_cur = rlim.rlim_max = 136 << 20; 1483 setrlimit(RLIMIT_FSIZE, &rlim); 1484 rlim.rlim_cur = rlim.rlim_max = 1 << 20; 1485 setrlimit(RLIMIT_STACK, &rlim); 1486 rlim.rlim_cur = rlim.rlim_max = 0; 1487 setrlimit(RLIMIT_CORE, &rlim); 1488 rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd 1489 setrlimit(RLIMIT_NOFILE, &rlim); 1490 1491 // CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems, 1492 // so we do them separately of clone in do_sandbox_namespace. 1493 if (unshare(CLONE_NEWNS)) { 1494 debug("unshare(CLONE_NEWNS): %d\n", errno); 1495 } 1496 if (unshare(CLONE_NEWIPC)) { 1497 debug("unshare(CLONE_NEWIPC): %d\n", errno); 1498 } 1499 if (unshare(0x02000000)) { 1500 debug("unshare(CLONE_NEWCGROUP): %d\n", errno); 1501 } 1502 if (unshare(CLONE_NEWUTS)) { 1503 debug("unshare(CLONE_NEWUTS): %d\n", errno); 1504 } 1505 if (unshare(CLONE_SYSVSEM)) { 1506 debug("unshare(CLONE_SYSVSEM): %d\n", errno); 1507 } 1508 } 1509 1510 int wait_for_loop(int pid) 1511 { 1512 if (pid < 0) 1513 fail("sandbox fork failed"); 1514 debug("spawned loop pid %d\n", pid); 1515 int status = 0; 1516 while (waitpid(-1, &status, __WALL) != pid) { 1517 } 1518 return WEXITSTATUS(status); 1519 } 1520 #endif 1521 1522 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE 1523 #include <sched.h> 1524 #include <sys/types.h> 1525 1526 static int do_sandbox_none(void) 1527 { 1528 // CLONE_NEWPID takes effect for the first child of the current process, 1529 // so we do it before fork to make the loop "init" process of the namespace. 1530 // We ought to do fail here, but sandbox=none is used in pkg/ipc tests 1531 // and they are usually run under non-root. 1532 // Also since debug is stripped by pkg/csource, we need to do {} 1533 // even though we generally don't do {} around single statements. 1534 if (unshare(CLONE_NEWPID)) { 1535 debug("unshare(CLONE_NEWPID): %d\n", errno); 1536 } 1537 int pid = fork(); 1538 if (pid != 0) 1539 return wait_for_loop(pid); 1540 1541 setup_common(); 1542 sandbox_common(); 1543 if (unshare(CLONE_NEWNET)) { 1544 debug("unshare(CLONE_NEWNET): %d\n", errno); 1545 } 1546 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE 1547 initialize_tun(); 1548 #endif 1549 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV 1550 initialize_netdevices(); 1551 #endif 1552 loop(); 1553 doexit(1); 1554 } 1555 #endif 1556 1557 #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID 1558 #include <grp.h> 1559 #include <sched.h> 1560 #include <sys/prctl.h> 1561 1562 static int do_sandbox_setuid(void) 1563 { 1564 if (unshare(CLONE_NEWPID)) { 1565 debug("unshare(CLONE_NEWPID): %d\n", errno); 1566 } 1567 int pid = fork(); 1568 if (pid != 0) 1569 return wait_for_loop(pid); 1570 1571 setup_common(); 1572 sandbox_common(); 1573 if (unshare(CLONE_NEWNET)) { 1574 debug("unshare(CLONE_NEWNET): %d\n", errno); 1575 } 1576 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE 1577 initialize_tun(); 1578 #endif 1579 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV 1580 initialize_netdevices(); 1581 #endif 1582 1583 const int nobody = 65534; 1584 if (setgroups(0, NULL)) 1585 fail("failed to setgroups"); 1586 if (syscall(SYS_setresgid, nobody, nobody, nobody)) 1587 fail("failed to setresgid"); 1588 if (syscall(SYS_setresuid, nobody, nobody, nobody)) 1589 fail("failed to setresuid"); 1590 1591 // This is required to open /proc/self/* files. 1592 // Otherwise they are owned by root and we can't open them after setuid. 1593 // See task_dump_owner function in kernel. 1594 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); 1595 1596 loop(); 1597 doexit(1); 1598 } 1599 #endif 1600 1601 #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE 1602 #include <linux/capability.h> 1603 #include <sched.h> 1604 #include <sys/mman.h> 1605 #include <sys/mount.h> 1606 1607 static int real_uid; 1608 static int real_gid; 1609 __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; 1610 1611 static int namespace_sandbox_proc(void* arg) 1612 { 1613 sandbox_common(); 1614 1615 // /proc/self/setgroups is not present on some systems, ignore error. 1616 write_file("/proc/self/setgroups", "deny"); 1617 if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) 1618 fail("write of /proc/self/uid_map failed"); 1619 if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) 1620 fail("write of /proc/self/gid_map failed"); 1621 1622 // CLONE_NEWNET must always happen before tun setup, 1623 // because we want the tun device in the test namespace. 1624 if (unshare(CLONE_NEWNET)) 1625 fail("unshare(CLONE_NEWNET)"); 1626 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE 1627 // We setup tun here as it needs to be in the test net namespace, 1628 // which in turn needs to be in the test user namespace. 1629 // However, IFF_NAPI_FRAGS will fail as we are not root already. 1630 // There does not seem to be a call sequence that would satisfy all of that. 1631 initialize_tun(); 1632 #endif 1633 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV 1634 initialize_netdevices(); 1635 #endif 1636 1637 if (mkdir("./syz-tmp", 0777)) 1638 fail("mkdir(syz-tmp) failed"); 1639 if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) 1640 fail("mount(tmpfs) failed"); 1641 if (mkdir("./syz-tmp/newroot", 0777)) 1642 fail("mkdir failed"); 1643 if (mkdir("./syz-tmp/newroot/dev", 0700)) 1644 fail("mkdir failed"); 1645 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; 1646 if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) 1647 fail("mount(dev) failed"); 1648 if (mkdir("./syz-tmp/newroot/proc", 0700)) 1649 fail("mkdir failed"); 1650 if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL)) 1651 fail("mount(proc) failed"); 1652 if (mkdir("./syz-tmp/newroot/selinux", 0700)) 1653 fail("mkdir failed"); 1654 // selinux mount used to be at /selinux, but then moved to /sys/fs/selinux. 1655 const char* selinux_path = "./syz-tmp/newroot/selinux"; 1656 if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { 1657 if (errno != ENOENT) 1658 fail("mount(/selinux) failed"); 1659 if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) 1660 fail("mount(/sys/fs/selinux) failed"); 1661 } 1662 if (mkdir("./syz-tmp/newroot/sys", 0700)) 1663 fail("mkdir failed"); 1664 if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) 1665 fail("mount(sysfs) failed"); 1666 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS 1667 if (mkdir("./syz-tmp/newroot/syzcgroup", 0700)) 1668 fail("mkdir failed"); 1669 if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700)) 1670 fail("mkdir failed"); 1671 if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700)) 1672 fail("mkdir failed"); 1673 if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700)) 1674 fail("mkdir failed"); 1675 if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) { 1676 debug("mount(cgroup2, MS_BIND) failed: %d\n", errno); 1677 } 1678 if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) { 1679 debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno); 1680 } 1681 if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) { 1682 debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno); 1683 } 1684 #endif 1685 if (mkdir("./syz-tmp/pivot", 0777)) 1686 fail("mkdir failed"); 1687 if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { 1688 debug("pivot_root failed\n"); 1689 if (chdir("./syz-tmp")) 1690 fail("chdir failed"); 1691 } else { 1692 debug("pivot_root OK\n"); 1693 if (chdir("/")) 1694 fail("chdir failed"); 1695 if (umount2("./pivot", MNT_DETACH)) 1696 fail("umount failed"); 1697 } 1698 if (chroot("./newroot")) 1699 fail("chroot failed"); 1700 if (chdir("/")) 1701 fail("chdir failed"); 1702 1703 // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes. 1704 // Previously it lead to hangs because the loop process stopped due to SIGSTOP. 1705 // Note that a process can always ptrace its direct children, which is enough 1706 // for testing purposes. 1707 struct __user_cap_header_struct cap_hdr = {}; 1708 struct __user_cap_data_struct cap_data[2] = {}; 1709 cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; 1710 cap_hdr.pid = getpid(); 1711 if (syscall(SYS_capget, &cap_hdr, &cap_data)) 1712 fail("capget failed"); 1713 cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE); 1714 cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE); 1715 cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE); 1716 if (syscall(SYS_capset, &cap_hdr, &cap_data)) 1717 fail("capset failed"); 1718 1719 loop(); 1720 doexit(1); 1721 } 1722 1723 static int do_sandbox_namespace(void) 1724 { 1725 int pid; 1726 1727 setup_common(); 1728 real_uid = getuid(); 1729 real_gid = getgid(); 1730 mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows 1731 pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], 1732 CLONE_NEWUSER | CLONE_NEWPID, 0); 1733 return wait_for_loop(pid); 1734 } 1735 #endif 1736 1737 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR 1738 #include <dirent.h> 1739 #include <errno.h> 1740 #include <string.h> 1741 #include <sys/ioctl.h> 1742 #include <sys/mount.h> 1743 1744 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 1745 1746 // One does not simply remove a directory. 1747 // There can be mounts, so we need to try to umount. 1748 // Moreover, a mount can be mounted several times, so we need to try to umount in a loop. 1749 // Moreover, after umount a dir can become non-empty again, so we need another loop. 1750 // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty. 1751 static void remove_dir(const char* dir) 1752 { 1753 DIR* dp; 1754 struct dirent* ep; 1755 int iter = 0; 1756 retry: 1757 while (umount2(dir, MNT_DETACH) == 0) { 1758 debug("umount(%s)\n", dir); 1759 } 1760 dp = opendir(dir); 1761 if (dp == NULL) { 1762 if (errno == EMFILE) { 1763 // This happens when the test process casts prlimit(NOFILE) on us. 1764 // Ideally we somehow prevent test processes from messing with parent processes. 1765 // But full sandboxing is expensive, so let's ignore this error for now. 1766 exitf("opendir(%s) failed due to NOFILE, exiting", dir); 1767 } 1768 exitf("opendir(%s) failed", dir); 1769 } 1770 while ((ep = readdir(dp))) { 1771 if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) 1772 continue; 1773 char filename[FILENAME_MAX]; 1774 snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); 1775 // If it's 9p mount with broken transport, lstat will fail. 1776 // So try to umount first. 1777 while (umount2(filename, MNT_DETACH) == 0) { 1778 debug("umount(%s)\n", filename); 1779 } 1780 struct stat st; 1781 if (lstat(filename, &st)) 1782 exitf("lstat(%s) failed", filename); 1783 if (S_ISDIR(st.st_mode)) { 1784 remove_dir(filename); 1785 continue; 1786 } 1787 int i; 1788 for (i = 0;; i++) { 1789 debug("unlink(%s)\n", filename); 1790 if (unlink(filename) == 0) 1791 break; 1792 if (errno == EPERM) { 1793 // Try to reset FS_XFLAG_IMMUTABLE. 1794 int fd = open(filename, O_RDONLY); 1795 if (fd != -1) { 1796 long flags = 0; 1797 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) 1798 debug("reset FS_XFLAG_IMMUTABLE\n"); 1799 close(fd); 1800 continue; 1801 } 1802 } 1803 if (errno == EROFS) { 1804 debug("ignoring EROFS\n"); 1805 break; 1806 } 1807 if (errno != EBUSY || i > 100) 1808 exitf("unlink(%s) failed", filename); 1809 debug("umount(%s)\n", filename); 1810 if (umount2(filename, MNT_DETACH)) 1811 exitf("umount(%s) failed", filename); 1812 } 1813 } 1814 closedir(dp); 1815 int i; 1816 for (i = 0;; i++) { 1817 debug("rmdir(%s)\n", dir); 1818 if (rmdir(dir) == 0) 1819 break; 1820 if (i < 100) { 1821 if (errno == EPERM) { 1822 // Try to reset FS_XFLAG_IMMUTABLE. 1823 int fd = open(dir, O_RDONLY); 1824 if (fd != -1) { 1825 long flags = 0; 1826 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) 1827 debug("reset FS_XFLAG_IMMUTABLE\n"); 1828 close(fd); 1829 continue; 1830 } 1831 } 1832 if (errno == EROFS) { 1833 debug("ignoring EROFS\n"); 1834 break; 1835 } 1836 if (errno == EBUSY) { 1837 debug("umount(%s)\n", dir); 1838 if (umount2(dir, MNT_DETACH)) 1839 exitf("umount(%s) failed", dir); 1840 continue; 1841 } 1842 if (errno == ENOTEMPTY) { 1843 if (iter < 100) { 1844 iter++; 1845 goto retry; 1846 } 1847 } 1848 } 1849 exitf("rmdir(%s) failed", dir); 1850 } 1851 } 1852 #endif 1853 1854 #if SYZ_EXECUTOR || SYZ_FAULT_INJECTION 1855 #include <fcntl.h> 1856 #include <string.h> 1857 #include <sys/stat.h> 1858 #include <sys/types.h> 1859 1860 static int inject_fault(int nth) 1861 { 1862 int fd; 1863 char buf[16]; 1864 1865 fd = open("/proc/thread-self/fail-nth", O_RDWR); 1866 // We treat errors here as temporal/non-critical because we see 1867 // occasional ENOENT/EACCES errors returned. It seems that fuzzer 1868 // somehow gets its hands to it. 1869 if (fd == -1) 1870 exitf("failed to open /proc/thread-self/fail-nth"); 1871 sprintf(buf, "%d", nth + 1); 1872 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) 1873 exitf("failed to write /proc/thread-self/fail-nth"); 1874 return fd; 1875 } 1876 #endif 1877 1878 #if SYZ_EXECUTOR 1879 static int fault_injected(int fail_fd) 1880 { 1881 char buf[16]; 1882 int n = read(fail_fd, buf, sizeof(buf) - 1); 1883 if (n <= 0) 1884 exitf("failed to read /proc/thread-self/fail-nth"); 1885 int res = n == 2 && buf[0] == '0' && buf[1] == '\n'; 1886 buf[0] = '0'; 1887 if (write(fail_fd, buf, 1) != 1) 1888 exitf("failed to write /proc/thread-self/fail-nth"); 1889 close(fail_fd); 1890 return res; 1891 } 1892 #endif 1893 1894 #if SYZ_EXECUTOR || SYZ_REPEAT 1895 #include <dirent.h> 1896 #include <errno.h> 1897 #include <fcntl.h> 1898 #include <signal.h> 1899 #include <string.h> 1900 #include <sys/stat.h> 1901 #include <sys/types.h> 1902 #include <sys/wait.h> 1903 1904 static void kill_and_wait(int pid, int* status) 1905 { 1906 kill(-pid, SIGKILL); 1907 kill(pid, SIGKILL); 1908 int i; 1909 // First, give it up to 100 ms to surrender. 1910 for (i = 0; i < 100; i++) { 1911 if (waitpid(-1, status, WNOHANG | __WALL) == pid) 1912 return; 1913 usleep(1000); 1914 } 1915 // Now, try to abort fuse connections as they cause deadlocks, 1916 // see Documentation/filesystems/fuse.txt for details. 1917 // There is no good way to figure out the right connections 1918 // provided that the process could use unshare(CLONE_NEWNS), 1919 // so we abort all. 1920 debug("kill is not working\n"); 1921 DIR* dir = opendir("/sys/fs/fuse/connections"); 1922 if (dir) { 1923 for (;;) { 1924 struct dirent* ent = readdir(dir); 1925 if (!ent) 1926 break; 1927 if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) 1928 continue; 1929 char abort[300]; 1930 snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); 1931 int fd = open(abort, O_WRONLY); 1932 if (fd == -1) { 1933 debug("failed to open %s: %d\n", abort, errno); 1934 continue; 1935 } 1936 debug("aborting fuse conn %s\n", ent->d_name); 1937 if (write(fd, abort, 1) < 0) { 1938 debug("failed to abort: %d\n", errno); 1939 } 1940 close(fd); 1941 } 1942 closedir(dir); 1943 } else { 1944 debug("failed to open /sys/fs/fuse/connections: %d\n", errno); 1945 } 1946 // Now, just wait, no other options. 1947 while (waitpid(-1, status, __WALL) != pid) { 1948 } 1949 } 1950 #endif 1951 1952 #if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_ENABLE_CGROUPS || SYZ_RESET_NET_NAMESPACE) 1953 #include <fcntl.h> 1954 #include <sys/ioctl.h> 1955 #include <sys/stat.h> 1956 #include <sys/types.h> 1957 #include <unistd.h> 1958 1959 #define SYZ_HAVE_SETUP_LOOP 1 1960 static void setup_loop() 1961 { 1962 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS 1963 int pid = getpid(); 1964 char cgroupdir[64]; 1965 char procs_file[128]; 1966 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 1967 if (mkdir(cgroupdir, 0777)) { 1968 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 1969 } 1970 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir); 1971 if (!write_file(procs_file, "%d", pid)) { 1972 debug("write(%s) failed: %d\n", procs_file, errno); 1973 } 1974 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 1975 if (mkdir(cgroupdir, 0777)) { 1976 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 1977 } 1978 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir); 1979 if (!write_file(procs_file, "%d", pid)) { 1980 debug("write(%s) failed: %d\n", procs_file, errno); 1981 } 1982 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 1983 if (mkdir(cgroupdir, 0777)) { 1984 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 1985 } 1986 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir); 1987 if (!write_file(procs_file, "%d", pid)) { 1988 debug("write(%s) failed: %d\n", procs_file, errno); 1989 } 1990 #endif 1991 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE 1992 checkpoint_net_namespace(); 1993 #endif 1994 } 1995 #endif 1996 1997 #if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_RESET_NET_NAMESPACE || __NR_syz_mount_image || __NR_syz_read_part_table) 1998 #define SYZ_HAVE_RESET_LOOP 1 1999 static void reset_loop() 2000 { 2001 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 2002 char buf[64]; 2003 snprintf(buf, sizeof(buf), "/dev/loop%llu", procid); 2004 int loopfd = open(buf, O_RDWR); 2005 if (loopfd != -1) { 2006 ioctl(loopfd, LOOP_CLR_FD, 0); 2007 close(loopfd); 2008 } 2009 #endif 2010 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE 2011 reset_net_namespace(); 2012 #endif 2013 } 2014 #endif 2015 2016 #if SYZ_EXECUTOR || SYZ_REPEAT 2017 #include <sys/prctl.h> 2018 2019 #define SYZ_HAVE_SETUP_TEST 1 2020 static void setup_test() 2021 { 2022 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 2023 setpgrp(); 2024 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS 2025 char cgroupdir[64]; 2026 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 2027 if (symlink(cgroupdir, "./cgroup")) { 2028 debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno); 2029 } 2030 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 2031 if (symlink(cgroupdir, "./cgroup.cpu")) { 2032 debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno); 2033 } 2034 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 2035 if (symlink(cgroupdir, "./cgroup.net")) { 2036 debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno); 2037 } 2038 #endif 2039 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE 2040 // Read all remaining packets from tun to better 2041 // isolate consequently executing programs. 2042 flush_tun(); 2043 #endif 2044 } 2045 2046 #define SYZ_HAVE_RESET_TEST 1 2047 static void reset_test() 2048 { 2049 // Keeping a 9p transport pipe open will hang the proccess dead, 2050 // so close all opened file descriptors. 2051 int fd; 2052 for (fd = 3; fd < 30; fd++) 2053 close(fd); 2054 } 2055 #endif 2056