1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // +build 5 6 #include <algorithm> 7 #include <errno.h> 8 #include <signal.h> 9 #include <stdarg.h> 10 #include <stddef.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <time.h> 16 #include <unistd.h> 17 18 #include "defs.h" 19 20 #if defined(__GNUC__) 21 #define SYSCALLAPI 22 #define NORETURN __attribute__((noreturn)) 23 #define ALIGNED(N) __attribute__((aligned(N))) 24 #define PRINTF __attribute__((format(printf, 1, 2))) 25 #else 26 // Assuming windows/cl. 27 #define SYSCALLAPI WINAPI 28 #define NORETURN __declspec(noreturn) 29 #define ALIGNED(N) __declspec(align(N)) 30 #define PRINTF 31 #endif 32 33 #ifndef GIT_REVISION 34 #define GIT_REVISION "unknown" 35 #endif 36 37 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 38 39 // uint64 is impossible to printf without using the clumsy and verbose "%" PRId64. 40 // So we define and use uint64. Note: pkg/csource does s/uint64/uint64/. 41 // Also define uint32/16/8 for consistency. 42 typedef unsigned long long uint64; 43 typedef unsigned int uint32; 44 typedef unsigned short uint16; 45 typedef unsigned char uint8; 46 47 // exit/_exit do not necessary work (e.g. if fuzzer sets seccomp filter that prohibits exit_group). 48 // Use doexit instead. We must redefine exit to something that exists in stdlib, 49 // because some standard libraries contain "using ::exit;", but has different signature. 50 #define exit vsnprintf 51 52 // Note: zircon max fd is 256. 53 // Some common_OS.h files know about this constant for RLIMIT_NOFILE. 54 const int kMaxFd = 250; 55 const int kMaxThreads = 16; 56 const int kInPipeFd = kMaxFd - 1; // remapped from stdin 57 const int kOutPipeFd = kMaxFd - 2; // remapped from stdout 58 const int kCoverFd = kOutPipeFd - kMaxThreads; 59 const int kMaxArgs = 9; 60 const int kCoverSize = 256 << 10; 61 const int kFailStatus = 67; 62 const int kRetryStatus = 69; 63 const int kErrorStatus = 68; 64 65 // Logical error (e.g. invalid input program), use as an assert() alternative. 66 NORETURN PRINTF void fail(const char* msg, ...); 67 // Kernel error (e.g. wrong syscall return value). 68 NORETURN PRINTF void error(const char* msg, ...); 69 // Just exit (e.g. due to temporal ENOMEM error). 70 NORETURN PRINTF void exitf(const char* msg, ...); 71 // Print debug output, does not add \n at the end of msg as opposed to the previous functions. 72 PRINTF void debug(const char* msg, ...); 73 void debug_dump_data(const char* data, int length); 74 NORETURN void doexit(int status); 75 76 static void receive_execute(); 77 static void reply_execute(int status); 78 79 #if GOOS_akaros 80 static void resend_execute(int fd); 81 #endif 82 83 #if SYZ_EXECUTOR_USES_FORK_SERVER 84 static void receive_handshake(); 85 static void reply_handshake(); 86 #endif 87 88 #if SYZ_EXECUTOR_USES_SHMEM 89 const int kMaxOutput = 16 << 20; 90 const int kInFd = 3; 91 const int kOutFd = 4; 92 uint32* output_data; 93 uint32* output_pos; 94 static uint32* write_output(uint32 v); 95 static void write_completed(uint32 completed); 96 static uint32 hash(uint32 a); 97 static bool dedup(uint32 sig); 98 #endif 99 100 enum sandbox_type { 101 sandbox_none, 102 sandbox_setuid, 103 sandbox_namespace, 104 }; 105 106 bool flag_debug; 107 bool flag_cover; 108 bool flag_sandbox_privs; 109 sandbox_type flag_sandbox; 110 bool flag_enable_tun; 111 bool flag_enable_net_dev; 112 bool flag_enable_fault_injection; 113 114 bool flag_collect_cover; 115 bool flag_dedup_cover; 116 bool flag_threaded; 117 bool flag_collide; 118 119 // If true, then executor should write the comparisons data to fuzzer. 120 bool flag_collect_comps; 121 122 // Inject fault into flag_fault_nth-th operation in flag_fault_call-th syscall. 123 bool flag_inject_fault; 124 int flag_fault_call; 125 int flag_fault_nth; 126 127 #define SYZ_EXECUTOR 1 128 #include "common.h" 129 130 const int kMaxCommands = 1000; 131 const int kMaxInput = 2 << 20; 132 133 const uint64 instr_eof = -1; 134 const uint64 instr_copyin = -2; 135 const uint64 instr_copyout = -3; 136 137 const uint64 arg_const = 0; 138 const uint64 arg_result = 1; 139 const uint64 arg_data = 2; 140 const uint64 arg_csum = 3; 141 142 const uint64 binary_format_native = 0; 143 const uint64 binary_format_bigendian = 1; 144 const uint64 binary_format_strdec = 2; 145 const uint64 binary_format_strhex = 3; 146 const uint64 binary_format_stroct = 4; 147 148 const uint64 no_copyout = -1; 149 150 int running; 151 uint32 completed; 152 bool collide; 153 bool is_kernel_64_bit = true; 154 155 ALIGNED(64 << 10) 156 char input_data[kMaxInput]; 157 158 // Checksum kinds. 159 const uint64 arg_csum_inet = 0; 160 161 // Checksum chunk kinds. 162 const uint64 arg_csum_chunk_data = 0; 163 const uint64 arg_csum_chunk_const = 1; 164 165 typedef long(SYSCALLAPI* syscall_t)(long, long, long, long, long, long, long, long, long); 166 167 struct call_t { 168 const char* name; 169 int sys_nr; 170 syscall_t call; 171 }; 172 173 struct cover_t { 174 int fd; 175 uint32 size; 176 char* data; 177 char* data_end; 178 }; 179 180 struct thread_t { 181 int id; 182 bool created; 183 event_t ready; 184 event_t done; 185 uint64* copyout_pos; 186 uint64 copyout_index; 187 bool colliding; 188 bool executing; 189 int call_index; 190 int call_num; 191 int num_args; 192 long args[kMaxArgs]; 193 long res; 194 uint32 reserrno; 195 bool fault_injected; 196 cover_t cov; 197 }; 198 199 static thread_t threads[kMaxThreads]; 200 static thread_t* last_scheduled; 201 202 struct res_t { 203 bool executed; 204 uint64 val; 205 }; 206 207 res_t results[kMaxCommands]; 208 209 const uint64 kInMagic = 0xbadc0ffeebadface; 210 const uint32 kOutMagic = 0xbadf00d; 211 212 struct handshake_req { 213 uint64 magic; 214 uint64 flags; // env flags 215 uint64 pid; 216 }; 217 218 struct handshake_reply { 219 uint32 magic; 220 }; 221 222 struct execute_req { 223 uint64 magic; 224 uint64 env_flags; 225 uint64 exec_flags; 226 uint64 pid; 227 uint64 fault_call; 228 uint64 fault_nth; 229 uint64 prog_size; 230 }; 231 232 struct execute_reply { 233 uint32 magic; 234 uint32 done; 235 uint32 status; 236 }; 237 238 // call_reply.flags 239 const uint32 call_flag_executed = 1 << 0; 240 const uint32 call_flag_finished = 1 << 1; 241 const uint32 call_flag_blocked = 1 << 2; 242 const uint32 call_flag_fault_injected = 1 << 3; 243 244 struct call_reply { 245 execute_reply header; 246 uint32 call_index; 247 uint32 call_num; 248 uint32 reserrno; 249 uint32 flags; 250 uint32 signal_size; 251 uint32 cover_size; 252 uint32 comps_size; 253 // signal/cover/comps follow 254 }; 255 256 enum { 257 KCOV_CMP_CONST = 1, 258 KCOV_CMP_SIZE1 = 0, 259 KCOV_CMP_SIZE2 = 2, 260 KCOV_CMP_SIZE4 = 4, 261 KCOV_CMP_SIZE8 = 6, 262 KCOV_CMP_SIZE_MASK = 6, 263 }; 264 265 struct kcov_comparison_t { 266 // Note: comparisons are always 64-bits regardless of kernel bitness. 267 uint64 type; 268 uint64 arg1; 269 uint64 arg2; 270 uint64 pc; 271 272 bool ignore() const; 273 void write(); 274 bool operator==(const struct kcov_comparison_t& other) const; 275 bool operator<(const struct kcov_comparison_t& other) const; 276 }; 277 278 static thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos); 279 static void handle_completion(thread_t* th); 280 static void copyout_call_results(thread_t* th); 281 static void write_call_output(thread_t* th, bool finished); 282 static void execute_call(thread_t* th); 283 static void thread_create(thread_t* th, int id); 284 static void* worker_thread(void* arg); 285 static uint64 read_input(uint64** input_posp, bool peek = false); 286 static uint64 read_arg(uint64** input_posp); 287 static uint64 read_const_arg(uint64** input_posp, uint64* size_p, uint64* bf, uint64* bf_off_p, uint64* bf_len_p); 288 static uint64 read_result(uint64** input_posp); 289 static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len); 290 static bool copyout(char* addr, uint64 size, uint64* res); 291 static void setup_control_pipes(); 292 293 #include "syscalls.h" 294 295 #if GOOS_linux 296 #include "executor_linux.h" 297 #elif GOOS_fuchsia 298 #include "executor_fuchsia.h" 299 #elif GOOS_akaros 300 #include "executor_akaros.h" 301 #elif GOOS_freebsd || GOOS_netbsd 302 #include "executor_bsd.h" 303 #elif GOOS_windows 304 #include "executor_windows.h" 305 #elif GOOS_test 306 #include "executor_test.h" 307 #else 308 #error "unknown OS" 309 #endif 310 311 #include "test.h" 312 313 int main(int argc, char** argv) 314 { 315 if (argc == 2 && strcmp(argv[1], "version") == 0) { 316 puts(GOOS " " GOARCH " " SYZ_REVISION " " GIT_REVISION); 317 return 0; 318 } 319 if (argc == 2 && strcmp(argv[1], "test") == 0) 320 return run_tests(); 321 322 os_init(argc, argv, (void*)SYZ_DATA_OFFSET, SYZ_NUM_PAGES * SYZ_PAGE_SIZE); 323 324 #if SYZ_EXECUTOR_USES_SHMEM 325 if (mmap(&input_data[0], kMaxInput, PROT_READ, MAP_PRIVATE | MAP_FIXED, kInFd, 0) != &input_data[0]) 326 fail("mmap of input file failed"); 327 // The output region is the only thing in executor process for which consistency matters. 328 // If it is corrupted ipc package will fail to parse its contents and panic. 329 // But fuzzer constantly invents new ways of how to currupt the region, 330 // so we map the region at a (hopefully) hard to guess address with random offset, 331 // surrounded by unmapped pages. 332 // The address chosen must also work on 32-bit kernels with 1GB user address space. 333 void* preferred = (void*)(0x1b2bc20000ull + (1 << 20) * (getpid() % 128)); 334 output_data = (uint32*)mmap(preferred, kMaxOutput, 335 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, 0); 336 if (output_data != preferred) 337 fail("mmap of output file failed"); 338 339 // Prevent test programs to mess with these fds. 340 // Due to races in collider mode, a program can e.g. ftruncate one of these fds, 341 // which will cause fuzzer to crash. 342 close(kInFd); 343 close(kOutFd); 344 #endif 345 346 use_temporary_dir(); 347 install_segv_handler(); 348 setup_control_pipes(); 349 #if SYZ_EXECUTOR_USES_FORK_SERVER 350 receive_handshake(); 351 #else 352 receive_execute(); 353 #endif 354 if (flag_cover) { 355 for (int i = 0; i < kMaxThreads; i++) { 356 threads[i].cov.fd = kCoverFd + i; 357 cover_open(&threads[i].cov); 358 } 359 } 360 361 int status = 0; 362 switch (flag_sandbox) { 363 case sandbox_none: 364 status = do_sandbox_none(); 365 break; 366 case sandbox_setuid: 367 status = do_sandbox_setuid(); 368 break; 369 case sandbox_namespace: 370 status = do_sandbox_namespace(); 371 break; 372 default: 373 fail("unknown sandbox type"); 374 } 375 #if SYZ_EXECUTOR_USES_FORK_SERVER 376 // Other statuses happen when fuzzer processes manages to kill loop. 377 if (status != kFailStatus && status != kErrorStatus) 378 status = kRetryStatus; 379 // If an external sandbox process wraps executor, the out pipe will be closed 380 // before the sandbox process exits this will make ipc package kill the sandbox. 381 // As the result sandbox process will exit with exit status 9 instead of the executor 382 // exit status (notably kRetryStatus). Consequently, ipc will treat it as hard 383 // failure rather than a temporal failure. So we duplicate the exit status on the pipe. 384 reply_execute(status); 385 errno = 0; 386 if (status == kFailStatus) 387 fail("loop failed"); 388 if (status == kErrorStatus) 389 error("loop errored"); 390 // Loop can be killed by a test process with e.g.: 391 // ptrace(PTRACE_SEIZE, 1, 0, 0x100040) 392 // This is unfortunate, but I don't have a better solution than ignoring it for now. 393 exitf("loop exited with status %d", status); 394 // Unreachable. 395 return 1; 396 #else 397 reply_execute(status); 398 return status; 399 #endif 400 } 401 402 void setup_control_pipes() 403 { 404 if (dup2(0, kInPipeFd) < 0) 405 fail("dup2(0, kInPipeFd) failed"); 406 if (dup2(1, kOutPipeFd) < 0) 407 fail("dup2(1, kOutPipeFd) failed"); 408 if (dup2(2, 1) < 0) 409 fail("dup2(2, 1) failed"); 410 // We used to close(0), but now we dup stderr to stdin to keep fd numbers 411 // stable across executor and C programs generated by pkg/csource. 412 if (dup2(2, 0) < 0) 413 fail("dup2(2, 0) failed"); 414 } 415 416 void parse_env_flags(uint64 flags) 417 { 418 flag_debug = flags & (1 << 0); 419 flag_cover = flags & (1 << 1); 420 flag_sandbox = sandbox_none; 421 if (flags & (1 << 2)) 422 flag_sandbox = sandbox_setuid; 423 else if (flags & (1 << 3)) 424 flag_sandbox = sandbox_namespace; 425 flag_enable_tun = flags & (1 << 4); 426 flag_enable_net_dev = flags & (1 << 5); 427 flag_enable_fault_injection = flags & (1 << 6); 428 } 429 430 #if SYZ_EXECUTOR_USES_FORK_SERVER 431 void receive_handshake() 432 { 433 handshake_req req = {}; 434 int n = read(kInPipeFd, &req, sizeof(req)); 435 if (n != sizeof(req)) 436 fail("handshake read failed: %d", n); 437 if (req.magic != kInMagic) 438 fail("bad handshake magic 0x%llx", req.magic); 439 parse_env_flags(req.flags); 440 procid = req.pid; 441 } 442 443 void reply_handshake() 444 { 445 handshake_reply reply = {}; 446 reply.magic = kOutMagic; 447 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 448 fail("control pipe write failed"); 449 } 450 #endif 451 452 static execute_req last_execute_req; 453 454 void receive_execute() 455 { 456 execute_req& req = last_execute_req; 457 if (read(kInPipeFd, &req, sizeof(req)) != (ssize_t)sizeof(req)) 458 fail("control pipe read failed"); 459 if (req.magic != kInMagic) 460 fail("bad execute request magic 0x%llx", req.magic); 461 if (req.prog_size > kMaxInput) 462 fail("bad execute prog size 0x%llx", req.prog_size); 463 parse_env_flags(req.env_flags); 464 procid = req.pid; 465 flag_collect_cover = req.exec_flags & (1 << 0); 466 flag_dedup_cover = req.exec_flags & (1 << 1); 467 flag_inject_fault = req.exec_flags & (1 << 2); 468 flag_collect_comps = req.exec_flags & (1 << 3); 469 flag_threaded = req.exec_flags & (1 << 4); 470 flag_collide = req.exec_flags & (1 << 5); 471 flag_fault_call = req.fault_call; 472 flag_fault_nth = req.fault_nth; 473 if (!flag_threaded) 474 flag_collide = false; 475 debug("exec opts: pid=%llu threaded=%d collide=%d cover=%d comps=%d dedup=%d fault=%d/%d/%d prog=%llu\n", 476 procid, flag_threaded, flag_collide, flag_collect_cover, flag_collect_comps, 477 flag_dedup_cover, flag_inject_fault, flag_fault_call, flag_fault_nth, 478 req.prog_size); 479 if (SYZ_EXECUTOR_USES_SHMEM) { 480 if (req.prog_size) 481 fail("need_prog: no program"); 482 return; 483 } 484 if (req.prog_size == 0) 485 fail("need_prog: no program"); 486 uint64 pos = 0; 487 for (;;) { 488 ssize_t rv = read(kInPipeFd, input_data + pos, sizeof(input_data) - pos); 489 if (rv < 0) 490 fail("read failed"); 491 pos += rv; 492 if (rv == 0 || pos >= req.prog_size) 493 break; 494 } 495 if (pos != req.prog_size) 496 fail("bad input size %lld, want %lld", pos, req.prog_size); 497 } 498 499 #if GOOS_akaros 500 void resend_execute(int fd) 501 { 502 execute_req& req = last_execute_req; 503 if (write(fd, &req, sizeof(req)) != sizeof(req)) 504 fail("child pipe header write failed"); 505 if (write(fd, input_data, req.prog_size) != (ssize_t)req.prog_size) 506 fail("child pipe program write failed"); 507 } 508 #endif 509 510 void reply_execute(int status) 511 { 512 execute_reply reply = {}; 513 reply.magic = kOutMagic; 514 reply.done = true; 515 reply.status = status; 516 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 517 fail("control pipe write failed"); 518 } 519 520 // execute_one executes program stored in input_data. 521 void execute_one() 522 { 523 // Duplicate global collide variable on stack. 524 // Fuzzer once come up with ioctl(fd, FIONREAD, 0x920000), 525 // where 0x920000 was exactly collide address, so every iteration reset collide to 0. 526 bool colliding = false; 527 #if SYZ_EXECUTOR_USES_SHMEM 528 output_pos = output_data; 529 write_output(0); // Number of executed syscalls (updated later). 530 #endif 531 uint64 start = current_time_ms(); 532 533 retry: 534 uint64* input_pos = (uint64*)input_data; 535 536 if (flag_cover && !colliding && !flag_threaded) 537 cover_enable(&threads[0].cov, flag_collect_comps); 538 539 int call_index = 0; 540 for (;;) { 541 uint64 call_num = read_input(&input_pos); 542 if (call_num == instr_eof) 543 break; 544 if (call_num == instr_copyin) { 545 char* addr = (char*)read_input(&input_pos); 546 uint64 typ = read_input(&input_pos); 547 switch (typ) { 548 case arg_const: { 549 uint64 size, bf, bf_off, bf_len; 550 uint64 arg = read_const_arg(&input_pos, &size, &bf, &bf_off, &bf_len); 551 copyin(addr, arg, size, bf, bf_off, bf_len); 552 break; 553 } 554 case arg_result: { 555 uint64 meta = read_input(&input_pos); 556 uint64 size = meta & 0xff; 557 uint64 bf = meta >> 8; 558 uint64 val = read_result(&input_pos); 559 copyin(addr, val, size, bf, 0, 0); 560 break; 561 } 562 case arg_data: { 563 uint64 size = read_input(&input_pos); 564 NONFAILING(memcpy(addr, input_pos, size)); 565 // Read out the data. 566 for (uint64 i = 0; i < (size + 7) / 8; i++) 567 read_input(&input_pos); 568 break; 569 } 570 case arg_csum: { 571 debug("checksum found at %p\n", addr); 572 uint64 size = read_input(&input_pos); 573 char* csum_addr = addr; 574 uint64 csum_kind = read_input(&input_pos); 575 switch (csum_kind) { 576 case arg_csum_inet: { 577 if (size != 2) 578 fail("inet checksum must be 2 bytes, not %llu", size); 579 debug("calculating checksum for %p\n", csum_addr); 580 struct csum_inet csum; 581 csum_inet_init(&csum); 582 uint64 chunks_num = read_input(&input_pos); 583 uint64 chunk; 584 for (chunk = 0; chunk < chunks_num; chunk++) { 585 uint64 chunk_kind = read_input(&input_pos); 586 uint64 chunk_value = read_input(&input_pos); 587 uint64 chunk_size = read_input(&input_pos); 588 switch (chunk_kind) { 589 case arg_csum_chunk_data: 590 debug("#%lld: data chunk, addr: %llx, size: %llu\n", chunk, chunk_value, chunk_size); 591 NONFAILING(csum_inet_update(&csum, (const uint8*)chunk_value, chunk_size)); 592 break; 593 case arg_csum_chunk_const: 594 if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8) { 595 fail("bad checksum const chunk size %lld\n", chunk_size); 596 } 597 // Here we assume that const values come to us big endian. 598 debug("#%lld: const chunk, value: %llx, size: %llu\n", chunk, chunk_value, chunk_size); 599 csum_inet_update(&csum, (const uint8*)&chunk_value, chunk_size); 600 break; 601 default: 602 fail("bad checksum chunk kind %llu", chunk_kind); 603 } 604 } 605 uint16 csum_value = csum_inet_digest(&csum); 606 debug("writing inet checksum %hx to %p\n", csum_value, csum_addr); 607 copyin(csum_addr, csum_value, 2, binary_format_native, 0, 0); 608 break; 609 } 610 default: 611 fail("bad checksum kind %llu", csum_kind); 612 } 613 break; 614 } 615 default: 616 fail("bad argument type %llu", typ); 617 } 618 continue; 619 } 620 if (call_num == instr_copyout) { 621 read_input(&input_pos); // index 622 read_input(&input_pos); // addr 623 read_input(&input_pos); // size 624 // The copyout will happen when/if the call completes. 625 continue; 626 } 627 628 // Normal syscall. 629 if (call_num >= ARRAY_SIZE(syscalls)) 630 fail("invalid command number %llu", call_num); 631 uint64 copyout_index = read_input(&input_pos); 632 uint64 num_args = read_input(&input_pos); 633 if (num_args > kMaxArgs) 634 fail("command has bad number of arguments %llu", num_args); 635 uint64 args[kMaxArgs] = {}; 636 for (uint64 i = 0; i < num_args; i++) 637 args[i] = read_arg(&input_pos); 638 for (uint64 i = num_args; i < 6; i++) 639 args[i] = 0; 640 thread_t* th = schedule_call(call_index++, call_num, colliding, copyout_index, 641 num_args, args, input_pos); 642 643 if (colliding && (call_index % 2) == 0) { 644 // Don't wait for every other call. 645 // We already have results from the previous execution. 646 } else if (flag_threaded) { 647 // Wait for call completion. 648 // Note: sys knows about this 25ms timeout when it generates timespec/timeval values. 649 const uint64 timeout_ms = flag_debug ? 1000 : 45; 650 if (event_timedwait(&th->done, timeout_ms)) 651 handle_completion(th); 652 // Check if any of previous calls have completed. 653 for (int i = 0; i < kMaxThreads; i++) { 654 th = &threads[i]; 655 if (th->executing && event_isset(&th->done)) 656 handle_completion(th); 657 } 658 } else { 659 // Execute directly. 660 if (th != &threads[0]) 661 fail("using non-main thread in non-thread mode"); 662 event_reset(&th->ready); 663 execute_call(th); 664 event_set(&th->done); 665 handle_completion(th); 666 } 667 } 668 669 if (!colliding && !collide && running > 0) { 670 // Give unfinished syscalls some additional time. 671 last_scheduled = 0; 672 uint64 wait = 100; 673 uint64 wait_start = current_time_ms(); 674 uint64 wait_end = wait_start + wait; 675 if (wait_end < start + 800) 676 wait_end = start + 800; 677 while (running > 0 && current_time_ms() <= wait_end) { 678 sleep_ms(1); 679 for (int i = 0; i < kMaxThreads; i++) { 680 thread_t* th = &threads[i]; 681 if (th->executing && event_isset(&th->done)) 682 handle_completion(th); 683 } 684 } 685 // Write output coverage for unfinished calls. 686 if (running > 0) { 687 for (int i = 0; i < kMaxThreads; i++) { 688 thread_t* th = &threads[i]; 689 if (th->executing) { 690 if (flag_cover) 691 cover_collect(&th->cov); 692 write_call_output(th, false); 693 } 694 } 695 } 696 } 697 698 if (flag_collide && !flag_inject_fault && !colliding && !collide) { 699 debug("enabling collider\n"); 700 collide = colliding = true; 701 goto retry; 702 } 703 } 704 705 thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos) 706 { 707 // Find a spare thread to execute the call. 708 int i; 709 for (i = 0; i < kMaxThreads; i++) { 710 thread_t* th = &threads[i]; 711 if (!th->created) 712 thread_create(th, i); 713 if (event_isset(&th->done)) { 714 if (th->executing) 715 handle_completion(th); 716 break; 717 } 718 } 719 if (i == kMaxThreads) 720 exitf("out of threads"); 721 thread_t* th = &threads[i]; 722 debug("scheduling call %d [%s] on thread %d\n", call_index, syscalls[call_num].name, th->id); 723 if (event_isset(&th->ready) || !event_isset(&th->done) || th->executing) 724 fail("bad thread state in schedule: ready=%d done=%d executing=%d", 725 event_isset(&th->ready), event_isset(&th->done), th->executing); 726 last_scheduled = th; 727 th->colliding = colliding; 728 th->copyout_pos = pos; 729 th->copyout_index = copyout_index; 730 event_reset(&th->done); 731 th->executing = true; 732 th->call_index = call_index; 733 th->call_num = call_num; 734 th->num_args = num_args; 735 for (int i = 0; i < kMaxArgs; i++) 736 th->args[i] = args[i]; 737 event_set(&th->ready); 738 running++; 739 return th; 740 } 741 742 #if SYZ_EXECUTOR_USES_SHMEM 743 template <typename cover_t> 744 void write_coverage_signal(thread_t* th, uint32* signal_count_pos, uint32* cover_count_pos) 745 { 746 // Write out feedback signals. 747 // Currently it is code edges computed as xor of two subsequent basic block PCs. 748 cover_t* cover_data = ((cover_t*)th->cov.data) + 1; 749 uint32 nsig = 0; 750 cover_t prev = 0; 751 for (uint32 i = 0; i < th->cov.size; i++) { 752 cover_t pc = cover_data[i]; 753 if (!cover_check(pc)) { 754 debug("got bad pc: 0x%llx\n", (uint64)pc); 755 doexit(0); 756 } 757 cover_t sig = pc ^ prev; 758 prev = hash(pc); 759 if (dedup(sig)) 760 continue; 761 write_output(sig); 762 nsig++; 763 } 764 // Write out number of signals. 765 *signal_count_pos = nsig; 766 767 if (!flag_collect_cover) 768 return; 769 // Write out real coverage (basic block PCs). 770 uint32 cover_size = th->cov.size; 771 if (flag_dedup_cover) { 772 cover_t* end = cover_data + cover_size; 773 std::sort(cover_data, end); 774 cover_size = std::unique(cover_data, end) - cover_data; 775 } 776 // Truncate PCs to uint32 assuming that they fit into 32-bits. 777 // True for x86_64 and arm64 without KASLR. 778 for (uint32 i = 0; i < cover_size; i++) 779 write_output(cover_data[i]); 780 *cover_count_pos = cover_size; 781 } 782 #endif 783 784 void handle_completion(thread_t* th) 785 { 786 debug("completion of call %d [%s] on thread %d\n", th->call_index, syscalls[th->call_num].name, th->id); 787 if (event_isset(&th->ready) || !event_isset(&th->done) || !th->executing) 788 fail("bad thread state in completion: ready=%d done=%d executing=%d", 789 event_isset(&th->ready), event_isset(&th->done), th->executing); 790 if (th->res != (long)-1) 791 copyout_call_results(th); 792 if (!collide && !th->colliding) 793 write_call_output(th, true); 794 th->executing = false; 795 running--; 796 if (running < 0) 797 fail("running = %d", running); 798 } 799 800 void copyout_call_results(thread_t* th) 801 { 802 if (th->copyout_index != no_copyout) { 803 if (th->copyout_index >= kMaxCommands) 804 fail("result idx %lld overflows kMaxCommands", th->copyout_index); 805 results[th->copyout_index].executed = true; 806 results[th->copyout_index].val = th->res; 807 } 808 for (bool done = false; !done;) { 809 uint64 instr = read_input(&th->copyout_pos); 810 switch (instr) { 811 case instr_copyout: { 812 uint64 index = read_input(&th->copyout_pos); 813 if (index >= kMaxCommands) 814 fail("result idx %lld overflows kMaxCommands", index); 815 char* addr = (char*)read_input(&th->copyout_pos); 816 uint64 size = read_input(&th->copyout_pos); 817 uint64 val = 0; 818 if (copyout(addr, size, &val)) { 819 results[index].executed = true; 820 results[index].val = val; 821 } 822 debug("copyout 0x%llx from %p\n", val, addr); 823 break; 824 } 825 default: 826 done = true; 827 break; 828 } 829 } 830 } 831 832 void write_call_output(thread_t* th, bool finished) 833 { 834 uint32 reserrno = 999; 835 uint32 call_flags = call_flag_executed; 836 const bool blocked = th != last_scheduled; 837 if (finished) { 838 reserrno = th->res != -1 ? 0 : th->reserrno; 839 call_flags |= call_flag_finished | 840 (blocked ? call_flag_blocked : 0) | 841 (th->fault_injected ? call_flag_fault_injected : 0); 842 } 843 #if SYZ_EXECUTOR_USES_SHMEM 844 write_output(th->call_index); 845 write_output(th->call_num); 846 write_output(reserrno); 847 write_output(call_flags); 848 uint32* signal_count_pos = write_output(0); // filled in later 849 uint32* cover_count_pos = write_output(0); // filled in later 850 uint32* comps_count_pos = write_output(0); // filled in later 851 852 if (flag_collect_comps) { 853 // Collect only the comparisons 854 uint32 ncomps = th->cov.size; 855 kcov_comparison_t* start = (kcov_comparison_t*)(th->cov.data + sizeof(uint64)); 856 kcov_comparison_t* end = start + ncomps; 857 if ((char*)end > th->cov.data_end) 858 fail("too many comparisons %u", ncomps); 859 std::sort(start, end); 860 ncomps = std::unique(start, end) - start; 861 uint32 comps_size = 0; 862 for (uint32 i = 0; i < ncomps; ++i) { 863 if (start[i].ignore()) 864 continue; 865 comps_size++; 866 start[i].write(); 867 } 868 // Write out number of comparisons. 869 *comps_count_pos = comps_size; 870 } else if (flag_cover) { 871 if (is_kernel_64_bit) 872 write_coverage_signal<uint64>(th, signal_count_pos, cover_count_pos); 873 else 874 write_coverage_signal<uint32>(th, signal_count_pos, cover_count_pos); 875 } 876 debug("out #%u: index=%u num=%u errno=%d finished=%d blocked=%d sig=%u cover=%u comps=%u\n", 877 completed, th->call_index, th->call_num, reserrno, finished, blocked, 878 *signal_count_pos, *cover_count_pos, *comps_count_pos); 879 completed++; 880 write_completed(completed); 881 #else 882 call_reply reply; 883 reply.header.magic = kOutMagic; 884 reply.header.done = 0; 885 reply.header.status = 0; 886 reply.call_index = th->call_index; 887 reply.call_num = th->call_num; 888 reply.reserrno = reserrno; 889 reply.flags = call_flags; 890 reply.signal_size = 0; 891 reply.cover_size = 0; 892 reply.comps_size = 0; 893 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 894 fail("control pipe call write failed"); 895 debug("out: index=%u num=%u errno=%d finished=%d blocked=%d\n", 896 th->call_index, th->call_num, reserrno, finished, blocked); 897 #endif 898 } 899 900 void thread_create(thread_t* th, int id) 901 { 902 th->created = true; 903 th->id = id; 904 th->executing = false; 905 event_init(&th->ready); 906 event_init(&th->done); 907 event_set(&th->done); 908 if (flag_threaded) 909 thread_start(worker_thread, th); 910 } 911 912 void* worker_thread(void* arg) 913 { 914 thread_t* th = (thread_t*)arg; 915 916 if (flag_cover) 917 cover_enable(&th->cov, flag_collect_comps); 918 for (;;) { 919 event_wait(&th->ready); 920 event_reset(&th->ready); 921 execute_call(th); 922 event_set(&th->done); 923 } 924 return 0; 925 } 926 927 void execute_call(thread_t* th) 928 { 929 const call_t* call = &syscalls[th->call_num]; 930 debug("#%d: %s(", th->id, call->name); 931 for (int i = 0; i < th->num_args; i++) { 932 if (i != 0) 933 debug(", "); 934 debug("0x%lx", th->args[i]); 935 } 936 debug(")\n"); 937 938 int fail_fd = -1; 939 if (flag_inject_fault && th->call_index == flag_fault_call) { 940 if (collide) 941 fail("both collide and fault injection are enabled"); 942 debug("injecting fault into %d-th operation\n", flag_fault_nth); 943 fail_fd = inject_fault(flag_fault_nth); 944 } 945 946 if (flag_cover) 947 cover_reset(&th->cov); 948 errno = 0; 949 th->res = execute_syscall(call, th->args); 950 th->reserrno = errno; 951 if (th->res == -1 && th->reserrno == 0) 952 th->reserrno = EINVAL; // our syz syscalls may misbehave 953 if (flag_cover) { 954 cover_collect(&th->cov); 955 debug("#%d: read cover size = %u\n", th->id, th->cov.size); 956 if (th->cov.size >= kCoverSize) 957 fail("#%d: too much cover %u", th->id, th->cov.size); 958 } 959 th->fault_injected = false; 960 961 if (flag_inject_fault && th->call_index == flag_fault_call) { 962 th->fault_injected = fault_injected(fail_fd); 963 debug("fault injected: %d\n", th->fault_injected); 964 } 965 966 if (th->res == -1) 967 debug("#%d: %s = errno(%d)\n", th->id, call->name, th->reserrno); 968 else 969 debug("#%d: %s = 0x%lx\n", th->id, call->name, th->res); 970 } 971 972 #if SYZ_EXECUTOR_USES_SHMEM 973 static uint32 hash(uint32 a) 974 { 975 a = (a ^ 61) ^ (a >> 16); 976 a = a + (a << 3); 977 a = a ^ (a >> 4); 978 a = a * 0x27d4eb2d; 979 a = a ^ (a >> 15); 980 return a; 981 } 982 983 const uint32 dedup_table_size = 8 << 10; 984 uint32 dedup_table[dedup_table_size]; 985 986 // Poorman's best-effort hashmap-based deduplication. 987 // The hashmap is global which means that we deduplicate across different calls. 988 // This is OK because we are interested only in new signals. 989 static bool dedup(uint32 sig) 990 { 991 for (uint32 i = 0; i < 4; i++) { 992 uint32 pos = (sig + i) % dedup_table_size; 993 if (dedup_table[pos] == sig) 994 return true; 995 if (dedup_table[pos] == 0) { 996 dedup_table[pos] = sig; 997 return false; 998 } 999 } 1000 dedup_table[sig % dedup_table_size] = sig; 1001 return false; 1002 } 1003 #endif 1004 1005 void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len) 1006 { 1007 if (bf != binary_format_native && (bf_off != 0 || bf_len != 0)) 1008 fail("bitmask for string format %llu/%llu", bf_off, bf_len); 1009 switch (bf) { 1010 case binary_format_native: 1011 NONFAILING(switch (size) { 1012 case 1: 1013 STORE_BY_BITMASK(uint8, addr, val, bf_off, bf_len); 1014 break; 1015 case 2: 1016 STORE_BY_BITMASK(uint16, addr, val, bf_off, bf_len); 1017 break; 1018 case 4: 1019 STORE_BY_BITMASK(uint32, addr, val, bf_off, bf_len); 1020 break; 1021 case 8: 1022 STORE_BY_BITMASK(uint64, addr, val, bf_off, bf_len); 1023 break; 1024 default: 1025 fail("copyin: bad argument size %llu", size); 1026 }); 1027 break; 1028 case binary_format_strdec: 1029 if (size != 20) 1030 fail("bad strdec size %llu", size); 1031 NONFAILING(sprintf((char*)addr, "%020llu", val)); 1032 break; 1033 case binary_format_strhex: 1034 if (size != 18) 1035 fail("bad strhex size %llu", size); 1036 NONFAILING(sprintf((char*)addr, "0x%016llx", val)); 1037 break; 1038 case binary_format_stroct: 1039 if (size != 23) 1040 fail("bad stroct size %llu", size); 1041 NONFAILING(sprintf((char*)addr, "%023llo", val)); 1042 break; 1043 default: 1044 fail("unknown binary format %llu", bf); 1045 } 1046 } 1047 1048 bool copyout(char* addr, uint64 size, uint64* res) 1049 { 1050 bool ok = false; 1051 NONFAILING( 1052 switch (size) { 1053 case 1: 1054 *res = *(uint8*)addr; 1055 break; 1056 case 2: 1057 *res = *(uint16*)addr; 1058 break; 1059 case 4: 1060 *res = *(uint32*)addr; 1061 break; 1062 case 8: 1063 *res = *(uint64*)addr; 1064 break; 1065 default: 1066 fail("copyout: bad argument size %llu", size); 1067 } __atomic_store_n(&ok, true, __ATOMIC_RELEASE);); 1068 return ok; 1069 } 1070 1071 uint64 read_arg(uint64** input_posp) 1072 { 1073 uint64 typ = read_input(input_posp); 1074 switch (typ) { 1075 case arg_const: { 1076 uint64 size, bf, bf_off, bf_len; 1077 uint64 val = read_const_arg(input_posp, &size, &bf, &bf_off, &bf_len); 1078 if (bf != binary_format_native) 1079 fail("bad argument binary format %llu", bf); 1080 if (bf_off != 0 || bf_len != 0) 1081 fail("bad argument bitfield %llu/%llu", bf_off, bf_len); 1082 return val; 1083 } 1084 case arg_result: { 1085 uint64 meta = read_input(input_posp); 1086 uint64 bf = meta >> 8; 1087 if (bf != binary_format_native) 1088 fail("bad result argument format %llu", bf); 1089 return read_result(input_posp); 1090 } 1091 default: 1092 fail("bad argument type %llu", typ); 1093 } 1094 } 1095 1096 uint64 read_const_arg(uint64** input_posp, uint64* size_p, uint64* bf_p, uint64* bf_off_p, uint64* bf_len_p) 1097 { 1098 uint64 meta = read_input(input_posp); 1099 uint64 val = read_input(input_posp); 1100 *size_p = meta & 0xff; 1101 uint64 bf = (meta >> 8) & 0xff; 1102 *bf_off_p = (meta >> 16) & 0xff; 1103 *bf_len_p = (meta >> 24) & 0xff; 1104 uint64 pid_stride = meta >> 32; 1105 val += pid_stride * procid; 1106 if (bf == binary_format_bigendian) { 1107 bf = binary_format_native; 1108 switch (*size_p) { 1109 case 2: 1110 val = htobe16(val); 1111 break; 1112 case 4: 1113 val = htobe32(val); 1114 break; 1115 case 8: 1116 val = htobe64(val); 1117 break; 1118 default: 1119 fail("bad big-endian int size %llu", *size_p); 1120 } 1121 } 1122 *bf_p = bf; 1123 return val; 1124 } 1125 1126 uint64 read_result(uint64** input_posp) 1127 { 1128 uint64 idx = read_input(input_posp); 1129 uint64 op_div = read_input(input_posp); 1130 uint64 op_add = read_input(input_posp); 1131 uint64 arg = read_input(input_posp); 1132 if (idx >= kMaxCommands) 1133 fail("command refers to bad result %lld", idx); 1134 if (results[idx].executed) { 1135 arg = results[idx].val; 1136 if (op_div != 0) 1137 arg = arg / op_div; 1138 arg += op_add; 1139 } 1140 return arg; 1141 } 1142 1143 uint64 read_input(uint64** input_posp, bool peek) 1144 { 1145 uint64* input_pos = *input_posp; 1146 if ((char*)input_pos >= input_data + kMaxInput) 1147 fail("input command overflows input %p: [%p:%p)", input_pos, input_data, input_data + kMaxInput); 1148 if (!peek) 1149 *input_posp = input_pos + 1; 1150 return *input_pos; 1151 } 1152 1153 #if SYZ_EXECUTOR_USES_SHMEM 1154 uint32* write_output(uint32 v) 1155 { 1156 if (output_pos < output_data || (char*)output_pos >= (char*)output_data + kMaxOutput) 1157 fail("output overflow: pos=%p region=[%p:%p]", 1158 output_pos, output_data, (char*)output_data + kMaxOutput); 1159 *output_pos = v; 1160 return output_pos++; 1161 } 1162 1163 void write_completed(uint32 completed) 1164 { 1165 __atomic_store_n(output_data, completed, __ATOMIC_RELEASE); 1166 } 1167 #endif 1168 1169 #if SYZ_EXECUTOR_USES_SHMEM 1170 void kcov_comparison_t::write() 1171 { 1172 // Write order: type arg1 arg2 pc. 1173 write_output((uint32)type); 1174 1175 // KCOV converts all arguments of size x first to uintx_t and then to 1176 // uint64. We want to properly extend signed values, e.g we want 1177 // int8 c = 0xfe to be represented as 0xfffffffffffffffe. 1178 // Note that uint8 c = 0xfe will be represented the same way. 1179 // This is ok because during hints processing we will anyways try 1180 // the value 0x00000000000000fe. 1181 switch (type & KCOV_CMP_SIZE_MASK) { 1182 case KCOV_CMP_SIZE1: 1183 arg1 = (uint64)(long long)(signed char)arg1; 1184 arg2 = (uint64)(long long)(signed char)arg2; 1185 break; 1186 case KCOV_CMP_SIZE2: 1187 arg1 = (uint64)(long long)(short)arg1; 1188 arg2 = (uint64)(long long)(short)arg2; 1189 break; 1190 case KCOV_CMP_SIZE4: 1191 arg1 = (uint64)(long long)(int)arg1; 1192 arg2 = (uint64)(long long)(int)arg2; 1193 break; 1194 } 1195 bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8; 1196 if (!is_size_8) { 1197 write_output((uint32)arg1); 1198 write_output((uint32)arg2); 1199 return; 1200 } 1201 // If we have 64 bits arguments then write them in Little-endian. 1202 write_output((uint32)(arg1 & 0xFFFFFFFF)); 1203 write_output((uint32)(arg1 >> 32)); 1204 write_output((uint32)(arg2 & 0xFFFFFFFF)); 1205 write_output((uint32)(arg2 >> 32)); 1206 } 1207 1208 bool kcov_comparison_t::ignore() const 1209 { 1210 // Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help. 1211 if (arg1 == 0 && (arg2 == 0 || (type & KCOV_CMP_CONST))) 1212 return true; 1213 if ((type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8) { 1214 // This can be a pointer (assuming 64-bit kernel). 1215 // First of all, we want avert fuzzer from our output region. 1216 // Without this fuzzer manages to discover and corrupt it. 1217 uint64 out_start = (uint64)output_data; 1218 uint64 out_end = out_start + kMaxOutput; 1219 if (arg1 >= out_start && arg1 <= out_end) 1220 return true; 1221 if (arg2 >= out_start && arg2 <= out_end) 1222 return true; 1223 #if defined(GOOS_linux) 1224 // Filter out kernel physical memory addresses. 1225 // These are internal kernel comparisons and should not be interesting. 1226 // The range covers first 1TB of physical mapping. 1227 uint64 kmem_start = (uint64)0xffff880000000000ull; 1228 uint64 kmem_end = (uint64)0xffff890000000000ull; 1229 bool kptr1 = arg1 >= kmem_start && arg1 <= kmem_end; 1230 bool kptr2 = arg2 >= kmem_start && arg2 <= kmem_end; 1231 if (kptr1 && kptr2) 1232 return true; 1233 if (kptr1 && arg2 == 0) 1234 return true; 1235 if (kptr2 && arg1 == 0) 1236 return true; 1237 #endif 1238 } 1239 return false; 1240 } 1241 1242 bool kcov_comparison_t::operator==(const struct kcov_comparison_t& other) const 1243 { 1244 // We don't check for PC equality now, because it is not used. 1245 return type == other.type && arg1 == other.arg1 && arg2 == other.arg2; 1246 } 1247 1248 bool kcov_comparison_t::operator<(const struct kcov_comparison_t& other) const 1249 { 1250 if (type != other.type) 1251 return type < other.type; 1252 if (arg1 != other.arg1) 1253 return arg1 < other.arg1; 1254 // We don't check for PC equality now, because it is not used. 1255 return arg2 < other.arg2; 1256 } 1257 #endif 1258 1259 void fail(const char* msg, ...) 1260 { 1261 int e = errno; 1262 va_list args; 1263 va_start(args, msg); 1264 vfprintf(stderr, msg, args); 1265 va_end(args); 1266 fprintf(stderr, " (errno %d)\n", e); 1267 // ENOMEM/EAGAIN is frequent cause of failures in fuzzing context, 1268 // so handle it here as non-fatal error. 1269 doexit((e == ENOMEM || e == EAGAIN) ? kRetryStatus : kFailStatus); 1270 } 1271 1272 void error(const char* msg, ...) 1273 { 1274 va_list args; 1275 va_start(args, msg); 1276 vfprintf(stderr, msg, args); 1277 va_end(args); 1278 fprintf(stderr, "\n"); 1279 doexit(kErrorStatus); 1280 } 1281 1282 void exitf(const char* msg, ...) 1283 { 1284 int e = errno; 1285 va_list args; 1286 va_start(args, msg); 1287 vfprintf(stderr, msg, args); 1288 va_end(args); 1289 fprintf(stderr, " (errno %d)\n", e); 1290 doexit(kRetryStatus); 1291 } 1292 1293 void debug(const char* msg, ...) 1294 { 1295 if (!flag_debug) 1296 return; 1297 va_list args; 1298 va_start(args, msg); 1299 vfprintf(stderr, msg, args); 1300 va_end(args); 1301 fflush(stderr); 1302 } 1303 1304 void debug_dump_data(const char* data, int length) 1305 { 1306 if (!flag_debug) 1307 return; 1308 for (int i = 0; i < length; i++) { 1309 debug("%02x ", data[i] & 0xff); 1310 if (i % 16 == 15) 1311 debug("\n"); 1312 } 1313 debug("\n"); 1314 } 1315