1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "debuggerd/handler.h" 30 31 #include <errno.h> 32 #include <fcntl.h> 33 #include <inttypes.h> 34 #include <linux/futex.h> 35 #include <pthread.h> 36 #include <sched.h> 37 #include <signal.h> 38 #include <stddef.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <sys/capability.h> 43 #include <sys/mman.h> 44 #include <sys/prctl.h> 45 #include <sys/socket.h> 46 #include <sys/syscall.h> 47 #include <sys/un.h> 48 #include <sys/wait.h> 49 #include <unistd.h> 50 51 #include <async_safe/log.h> 52 53 #include "dump_type.h" 54 55 // see man(2) prctl, specifically the section about PR_GET_NAME 56 #define MAX_TASK_NAME_LEN (16) 57 58 #if defined(__LP64__) 59 #define CRASH_DUMP_NAME "crash_dump64" 60 #else 61 #define CRASH_DUMP_NAME "crash_dump32" 62 #endif 63 64 #define CRASH_DUMP_PATH "/system/bin/" CRASH_DUMP_NAME 65 66 // Wrappers that directly invoke the respective syscalls, in case the cached values are invalid. 67 #pragma GCC poison getpid gettid 68 static pid_t __getpid() { 69 return syscall(__NR_getpid); 70 } 71 72 static pid_t __gettid() { 73 return syscall(__NR_gettid); 74 } 75 76 static inline void futex_wait(volatile void* ftx, int value) { 77 syscall(__NR_futex, ftx, FUTEX_WAIT, value, nullptr, nullptr, 0); 78 } 79 80 class ErrnoRestorer { 81 public: 82 ErrnoRestorer() : saved_errno_(errno) { 83 } 84 85 ~ErrnoRestorer() { 86 errno = saved_errno_; 87 } 88 89 private: 90 int saved_errno_; 91 }; 92 93 extern "C" void debuggerd_fallback_handler(siginfo_t*, ucontext_t*, void*); 94 95 static debuggerd_callbacks_t g_callbacks; 96 97 // Mutex to ensure only one crashing thread dumps itself. 98 static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER; 99 100 // Don't use async_safe_fatal because it exits via abort, which might put us back into 101 // a signal handler. 102 static void __noreturn __printflike(1, 2) fatal(const char* fmt, ...) { 103 va_list args; 104 va_start(args, fmt); 105 async_safe_format_log_va_list(ANDROID_LOG_FATAL, "libc", fmt, args); 106 _exit(1); 107 } 108 109 static void __noreturn __printflike(1, 2) fatal_errno(const char* fmt, ...) { 110 int err = errno; 111 va_list args; 112 va_start(args, fmt); 113 114 char buf[4096]; 115 async_safe_format_buffer_va_list(buf, sizeof(buf), fmt, args); 116 fatal("%s: %s", buf, strerror(err)); 117 } 118 119 static bool get_main_thread_name(char* buf, size_t len) { 120 int fd = open("/proc/self/comm", O_RDONLY | O_CLOEXEC); 121 if (fd == -1) { 122 return false; 123 } 124 125 ssize_t rc = read(fd, buf, len); 126 close(fd); 127 if (rc == -1) { 128 return false; 129 } else if (rc == 0) { 130 // Should never happen? 131 return false; 132 } 133 134 // There's a trailing newline, replace it with a NUL. 135 buf[rc - 1] = '\0'; 136 return true; 137 } 138 139 /* 140 * Writes a summary of the signal to the log file. We do this so that, if 141 * for some reason we're not able to contact debuggerd, there is still some 142 * indication of the failure in the log. 143 * 144 * We could be here as a result of native heap corruption, or while a 145 * mutex is being held, so we don't want to use any libc functions that 146 * could allocate memory or hold a lock. 147 */ 148 static void log_signal_summary(int signum, const siginfo_t* info) { 149 char thread_name[MAX_TASK_NAME_LEN + 1]; // one more for termination 150 if (prctl(PR_GET_NAME, reinterpret_cast<unsigned long>(thread_name), 0, 0, 0) != 0) { 151 strcpy(thread_name, "<name unknown>"); 152 } else { 153 // short names are null terminated by prctl, but the man page 154 // implies that 16 byte names are not. 155 thread_name[MAX_TASK_NAME_LEN] = 0; 156 } 157 158 if (signum == DEBUGGER_SIGNAL) { 159 async_safe_format_log(ANDROID_LOG_INFO, "libc", "Requested dump for tid %d (%s)", __gettid(), 160 thread_name); 161 return; 162 } 163 164 const char* signal_name = "???"; 165 bool has_address = false; 166 switch (signum) { 167 case SIGABRT: 168 signal_name = "SIGABRT"; 169 break; 170 case SIGBUS: 171 signal_name = "SIGBUS"; 172 has_address = true; 173 break; 174 case SIGFPE: 175 signal_name = "SIGFPE"; 176 has_address = true; 177 break; 178 case SIGILL: 179 signal_name = "SIGILL"; 180 has_address = true; 181 break; 182 case SIGSEGV: 183 signal_name = "SIGSEGV"; 184 has_address = true; 185 break; 186 #if defined(SIGSTKFLT) 187 case SIGSTKFLT: 188 signal_name = "SIGSTKFLT"; 189 break; 190 #endif 191 case SIGSYS: 192 signal_name = "SIGSYS"; 193 break; 194 case SIGTRAP: 195 signal_name = "SIGTRAP"; 196 break; 197 } 198 199 // "info" will be null if the siginfo_t information was not available. 200 // Many signals don't have an address or a code. 201 char code_desc[32]; // ", code -6" 202 char addr_desc[32]; // ", fault addr 0x1234" 203 addr_desc[0] = code_desc[0] = 0; 204 if (info != nullptr) { 205 async_safe_format_buffer(code_desc, sizeof(code_desc), ", code %d", info->si_code); 206 if (has_address) { 207 async_safe_format_buffer(addr_desc, sizeof(addr_desc), ", fault addr %p", info->si_addr); 208 } 209 } 210 211 char main_thread_name[MAX_TASK_NAME_LEN + 1]; 212 if (!get_main_thread_name(main_thread_name, sizeof(main_thread_name))) { 213 strncpy(main_thread_name, "<unknown>", sizeof(main_thread_name)); 214 } 215 216 async_safe_format_log( 217 ANDROID_LOG_FATAL, "libc", "Fatal signal %d (%s)%s%s in tid %d (%s), pid %d (%s)", signum, 218 signal_name, code_desc, addr_desc, __gettid(), thread_name, __getpid(), main_thread_name); 219 } 220 221 /* 222 * Returns true if the handler for signal "signum" has SA_SIGINFO set. 223 */ 224 static bool have_siginfo(int signum) { 225 struct sigaction old_action; 226 if (sigaction(signum, nullptr, &old_action) < 0) { 227 async_safe_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s", 228 strerror(errno)); 229 return false; 230 } 231 return (old_action.sa_flags & SA_SIGINFO) != 0; 232 } 233 234 static void raise_caps() { 235 // Raise CapInh to match CapPrm, so that we can set the ambient bits. 236 __user_cap_header_struct capheader; 237 memset(&capheader, 0, sizeof(capheader)); 238 capheader.version = _LINUX_CAPABILITY_VERSION_3; 239 capheader.pid = 0; 240 241 __user_cap_data_struct capdata[2]; 242 if (capget(&capheader, &capdata[0]) == -1) { 243 fatal_errno("capget failed"); 244 } 245 246 if (capdata[0].permitted != capdata[0].inheritable || 247 capdata[1].permitted != capdata[1].inheritable) { 248 capdata[0].inheritable = capdata[0].permitted; 249 capdata[1].inheritable = capdata[1].permitted; 250 251 if (capset(&capheader, &capdata[0]) == -1) { 252 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "capset failed: %s", strerror(errno)); 253 } 254 } 255 256 // Set the ambient capability bits so that crash_dump gets all of our caps and can ptrace us. 257 uint64_t capmask = capdata[0].inheritable; 258 capmask |= static_cast<uint64_t>(capdata[1].inheritable) << 32; 259 for (unsigned long i = 0; i < 64; ++i) { 260 if (capmask & (1ULL << i)) { 261 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) != 0) { 262 async_safe_format_log(ANDROID_LOG_ERROR, "libc", 263 "failed to raise ambient capability %lu: %s", i, strerror(errno)); 264 } 265 } 266 } 267 } 268 269 struct debugger_thread_info { 270 bool crash_dump_started; 271 pid_t crashing_tid; 272 pid_t pseudothread_tid; 273 int signal_number; 274 siginfo_t* info; 275 }; 276 277 // Logging and contacting debuggerd requires free file descriptors, which we might not have. 278 // Work around this by spawning a "thread" that shares its parent's address space, but not its file 279 // descriptor table, so that we can close random file descriptors without affecting the original 280 // process. Note that this doesn't go through pthread_create, so TLS is shared with the spawning 281 // process. 282 static void* pseudothread_stack; 283 284 static DebuggerdDumpType get_dump_type(const debugger_thread_info* thread_info) { 285 if (thread_info->signal_number == DEBUGGER_SIGNAL && thread_info->info->si_value.sival_int) { 286 return kDebuggerdNativeBacktrace; 287 } 288 289 return kDebuggerdTombstone; 290 } 291 292 static int debuggerd_dispatch_pseudothread(void* arg) { 293 debugger_thread_info* thread_info = static_cast<debugger_thread_info*>(arg); 294 295 for (int i = 0; i < 1024; ++i) { 296 close(i); 297 } 298 299 int devnull = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR)); 300 301 // devnull will be 0. 302 TEMP_FAILURE_RETRY(dup2(devnull, STDOUT_FILENO)); 303 TEMP_FAILURE_RETRY(dup2(devnull, STDERR_FILENO)); 304 305 int pipefds[2]; 306 if (pipe(pipefds) != 0) { 307 fatal_errno("failed to create pipe"); 308 } 309 310 // Don't use fork(2) to avoid calling pthread_atfork handlers. 311 int forkpid = clone(nullptr, nullptr, 0, nullptr); 312 if (forkpid == -1) { 313 async_safe_format_log(ANDROID_LOG_FATAL, "libc", 314 "failed to fork in debuggerd signal handler: %s", strerror(errno)); 315 } else if (forkpid == 0) { 316 TEMP_FAILURE_RETRY(dup2(pipefds[1], STDOUT_FILENO)); 317 close(pipefds[0]); 318 close(pipefds[1]); 319 320 raise_caps(); 321 322 char main_tid[10]; 323 char pseudothread_tid[10]; 324 char debuggerd_dump_type[10]; 325 async_safe_format_buffer(main_tid, sizeof(main_tid), "%d", thread_info->crashing_tid); 326 async_safe_format_buffer(pseudothread_tid, sizeof(pseudothread_tid), "%d", 327 thread_info->pseudothread_tid); 328 async_safe_format_buffer(debuggerd_dump_type, sizeof(debuggerd_dump_type), "%d", 329 get_dump_type(thread_info)); 330 331 execl(CRASH_DUMP_PATH, CRASH_DUMP_NAME, main_tid, pseudothread_tid, debuggerd_dump_type, 332 nullptr); 333 334 fatal_errno("exec failed"); 335 } else { 336 close(pipefds[1]); 337 char buf[4]; 338 ssize_t rc = TEMP_FAILURE_RETRY(read(pipefds[0], &buf, sizeof(buf))); 339 if (rc == -1) { 340 async_safe_format_log(ANDROID_LOG_FATAL, "libc", "read of IPC pipe failed: %s", 341 strerror(errno)); 342 } else if (rc == 0) { 343 async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper failed to exec"); 344 } else if (rc != 1) { 345 async_safe_format_log(ANDROID_LOG_FATAL, "libc", 346 "read of IPC pipe returned unexpected value: %zd", rc); 347 } else { 348 if (buf[0] != '\1') { 349 async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper reported failure"); 350 } else { 351 thread_info->crash_dump_started = true; 352 } 353 } 354 close(pipefds[0]); 355 356 // Don't leave a zombie child. 357 int status; 358 if (TEMP_FAILURE_RETRY(waitpid(forkpid, &status, 0)) == -1) { 359 async_safe_format_log(ANDROID_LOG_FATAL, "libc", "failed to wait for crash_dump helper: %s", 360 strerror(errno)); 361 } else if (WIFSTOPPED(status) || WIFSIGNALED(status)) { 362 async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper crashed or stopped"); 363 thread_info->crash_dump_started = false; 364 } 365 } 366 367 syscall(__NR_exit, 0); 368 return 0; 369 } 370 371 static void resend_signal(siginfo_t* info, bool crash_dump_started) { 372 // Signals can either be fatal or nonfatal. 373 // For fatal signals, crash_dump will send us the signal we crashed with 374 // before resuming us, so that processes using waitpid on us will see that we 375 // exited with the correct exit status (e.g. so that sh will report 376 // "Segmentation fault" instead of "Killed"). For this to work, we need 377 // to deregister our signal handler for that signal before continuing. 378 if (info->si_signo != DEBUGGER_SIGNAL) { 379 signal(info->si_signo, SIG_DFL); 380 } 381 382 // We need to return from our signal handler so that crash_dump can see the 383 // signal via ptrace and dump the thread that crashed. However, returning 384 // does not guarantee that the signal will be thrown again, even for SIGSEGV 385 // and friends, since the signal could have been sent manually. We blocked 386 // all signals when registering the handler, so resending the signal (using 387 // rt_tgsigqueueinfo(2) to preserve SA_SIGINFO) will cause it to be delivered 388 // when our signal handler returns. 389 if (crash_dump_started || info->si_signo != DEBUGGER_SIGNAL) { 390 int rc = syscall(SYS_rt_tgsigqueueinfo, __getpid(), __gettid(), info->si_signo, info); 391 if (rc != 0) { 392 fatal_errno("failed to resend signal during crash"); 393 } 394 } 395 } 396 397 // Handler that does crash dumping by forking and doing the processing in the child. 398 // Do this by ptracing the relevant thread, and then execing debuggerd to do the actual dump. 399 static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* context) { 400 // Make sure we don't change the value of errno, in case a signal comes in between the process 401 // making a syscall and checking errno. 402 ErrnoRestorer restorer; 403 404 // It's possible somebody cleared the SA_SIGINFO flag, which would mean 405 // our "info" arg holds an undefined value. 406 if (!have_siginfo(signal_number)) { 407 info = nullptr; 408 } 409 410 struct siginfo si = {}; 411 if (!info) { 412 memset(&si, 0, sizeof(si)); 413 si.si_signo = signal_number; 414 si.si_code = SI_USER; 415 si.si_pid = __getpid(); 416 si.si_uid = getuid(); 417 info = &si; 418 } else if (info->si_code >= 0 || info->si_code == SI_TKILL) { 419 // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels 420 // that contain commit 66dd34a (3.9+). The manpage claims to only allow 421 // negative si_code values that are not SI_TKILL, but 66dd34a changed the 422 // check to allow all si_code values in calls coming from inside the house. 423 } 424 425 void* abort_message = nullptr; 426 if (g_callbacks.get_abort_message) { 427 abort_message = g_callbacks.get_abort_message(); 428 } 429 430 if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) { 431 // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely, 432 // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing 433 // ANR trace. 434 debuggerd_fallback_handler(info, static_cast<ucontext_t*>(context), abort_message); 435 resend_signal(info, false); 436 return; 437 } 438 439 // Only allow one thread to handle a signal at a time. 440 int ret = pthread_mutex_lock(&crash_mutex); 441 if (ret != 0) { 442 async_safe_format_log(ANDROID_LOG_INFO, "libc", "pthread_mutex_lock failed: %s", strerror(ret)); 443 return; 444 } 445 446 log_signal_summary(signal_number, info); 447 448 // If this was a fatal crash, populate si_value with the abort message address if possible. 449 // Note that applications can set an abort message without aborting. 450 if (abort_message && signal_number != DEBUGGER_SIGNAL) { 451 info->si_value.sival_ptr = abort_message; 452 } 453 454 debugger_thread_info thread_info = { 455 .crash_dump_started = false, 456 .pseudothread_tid = -1, 457 .crashing_tid = __gettid(), 458 .signal_number = signal_number, 459 .info = info 460 }; 461 462 // Set PR_SET_DUMPABLE to 1, so that crash_dump can ptrace us. 463 int orig_dumpable = prctl(PR_GET_DUMPABLE); 464 if (prctl(PR_SET_DUMPABLE, 1) != 0) { 465 fatal_errno("failed to set dumpable"); 466 } 467 468 // Essentially pthread_create without CLONE_FILES (see debuggerd_dispatch_pseudothread). 469 pid_t child_pid = 470 clone(debuggerd_dispatch_pseudothread, pseudothread_stack, 471 CLONE_THREAD | CLONE_SIGHAND | CLONE_VM | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, 472 &thread_info, nullptr, nullptr, &thread_info.pseudothread_tid); 473 if (child_pid == -1) { 474 fatal_errno("failed to spawn debuggerd dispatch thread"); 475 } 476 477 // Wait for the child to start... 478 futex_wait(&thread_info.pseudothread_tid, -1); 479 480 // and then wait for it to finish. 481 futex_wait(&thread_info.pseudothread_tid, child_pid); 482 483 // Restore PR_SET_DUMPABLE to its original value. 484 if (prctl(PR_SET_DUMPABLE, orig_dumpable) != 0) { 485 fatal_errno("failed to restore dumpable"); 486 } 487 488 // Signals can either be fatal or nonfatal. 489 // For fatal signals, crash_dump will PTRACE_CONT us with the signal we 490 // crashed with, so that processes using waitpid on us will see that we 491 // exited with the correct exit status (e.g. so that sh will report 492 // "Segmentation fault" instead of "Killed"). For this to work, we need 493 // to deregister our signal handler for that signal before continuing. 494 if (signal_number != DEBUGGER_SIGNAL) { 495 signal(signal_number, SIG_DFL); 496 } 497 498 resend_signal(info, thread_info.crash_dump_started); 499 if (info->si_signo == DEBUGGER_SIGNAL) { 500 // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from 501 // starting to dump right before our death. 502 pthread_mutex_unlock(&crash_mutex); 503 } 504 } 505 506 void debuggerd_init(debuggerd_callbacks_t* callbacks) { 507 if (callbacks) { 508 g_callbacks = *callbacks; 509 } 510 511 void* thread_stack_allocation = 512 mmap(nullptr, PAGE_SIZE * 3, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 513 if (thread_stack_allocation == MAP_FAILED) { 514 fatal_errno("failed to allocate debuggerd thread stack"); 515 } 516 517 char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE; 518 if (mprotect(stack, PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) { 519 fatal_errno("failed to mprotect debuggerd thread stack"); 520 } 521 522 // Stack grows negatively, set it to the last byte in the page... 523 stack = (stack + PAGE_SIZE - 1); 524 // and align it. 525 stack -= 15; 526 pseudothread_stack = stack; 527 528 struct sigaction action; 529 memset(&action, 0, sizeof(action)); 530 sigfillset(&action.sa_mask); 531 action.sa_sigaction = debuggerd_signal_handler; 532 action.sa_flags = SA_RESTART | SA_SIGINFO; 533 534 // Use the alternate signal stack if available so we can catch stack overflows. 535 action.sa_flags |= SA_ONSTACK; 536 debuggerd_register_handlers(&action); 537 } 538