Home | History | Annotate | Download | only in handler
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include "debuggerd/handler.h"
     30 
     31 #include <errno.h>
     32 #include <fcntl.h>
     33 #include <inttypes.h>
     34 #include <linux/futex.h>
     35 #include <pthread.h>
     36 #include <sched.h>
     37 #include <signal.h>
     38 #include <stddef.h>
     39 #include <stdio.h>
     40 #include <stdlib.h>
     41 #include <string.h>
     42 #include <sys/capability.h>
     43 #include <sys/mman.h>
     44 #include <sys/prctl.h>
     45 #include <sys/socket.h>
     46 #include <sys/syscall.h>
     47 #include <sys/un.h>
     48 #include <sys/wait.h>
     49 #include <unistd.h>
     50 
     51 #include <async_safe/log.h>
     52 
     53 #include "dump_type.h"
     54 
     55 // see man(2) prctl, specifically the section about PR_GET_NAME
     56 #define MAX_TASK_NAME_LEN (16)
     57 
     58 #if defined(__LP64__)
     59 #define CRASH_DUMP_NAME "crash_dump64"
     60 #else
     61 #define CRASH_DUMP_NAME "crash_dump32"
     62 #endif
     63 
     64 #define CRASH_DUMP_PATH "/system/bin/" CRASH_DUMP_NAME
     65 
     66 // Wrappers that directly invoke the respective syscalls, in case the cached values are invalid.
     67 #pragma GCC poison getpid gettid
     68 static pid_t __getpid() {
     69   return syscall(__NR_getpid);
     70 }
     71 
     72 static pid_t __gettid() {
     73   return syscall(__NR_gettid);
     74 }
     75 
     76 static inline void futex_wait(volatile void* ftx, int value) {
     77   syscall(__NR_futex, ftx, FUTEX_WAIT, value, nullptr, nullptr, 0);
     78 }
     79 
     80 class ErrnoRestorer {
     81  public:
     82   ErrnoRestorer() : saved_errno_(errno) {
     83   }
     84 
     85   ~ErrnoRestorer() {
     86     errno = saved_errno_;
     87   }
     88 
     89  private:
     90   int saved_errno_;
     91 };
     92 
     93 extern "C" void debuggerd_fallback_handler(siginfo_t*, ucontext_t*, void*);
     94 
     95 static debuggerd_callbacks_t g_callbacks;
     96 
     97 // Mutex to ensure only one crashing thread dumps itself.
     98 static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER;
     99 
    100 // Don't use async_safe_fatal because it exits via abort, which might put us back into
    101 // a signal handler.
    102 static void __noreturn __printflike(1, 2) fatal(const char* fmt, ...) {
    103   va_list args;
    104   va_start(args, fmt);
    105   async_safe_format_log_va_list(ANDROID_LOG_FATAL, "libc", fmt, args);
    106   _exit(1);
    107 }
    108 
    109 static void __noreturn __printflike(1, 2) fatal_errno(const char* fmt, ...) {
    110   int err = errno;
    111   va_list args;
    112   va_start(args, fmt);
    113 
    114   char buf[4096];
    115   async_safe_format_buffer_va_list(buf, sizeof(buf), fmt, args);
    116   fatal("%s: %s", buf, strerror(err));
    117 }
    118 
    119 static bool get_main_thread_name(char* buf, size_t len) {
    120   int fd = open("/proc/self/comm", O_RDONLY | O_CLOEXEC);
    121   if (fd == -1) {
    122     return false;
    123   }
    124 
    125   ssize_t rc = read(fd, buf, len);
    126   close(fd);
    127   if (rc == -1) {
    128     return false;
    129   } else if (rc == 0) {
    130     // Should never happen?
    131     return false;
    132   }
    133 
    134   // There's a trailing newline, replace it with a NUL.
    135   buf[rc - 1] = '\0';
    136   return true;
    137 }
    138 
    139 /*
    140  * Writes a summary of the signal to the log file.  We do this so that, if
    141  * for some reason we're not able to contact debuggerd, there is still some
    142  * indication of the failure in the log.
    143  *
    144  * We could be here as a result of native heap corruption, or while a
    145  * mutex is being held, so we don't want to use any libc functions that
    146  * could allocate memory or hold a lock.
    147  */
    148 static void log_signal_summary(int signum, const siginfo_t* info) {
    149   char thread_name[MAX_TASK_NAME_LEN + 1];  // one more for termination
    150   if (prctl(PR_GET_NAME, reinterpret_cast<unsigned long>(thread_name), 0, 0, 0) != 0) {
    151     strcpy(thread_name, "<name unknown>");
    152   } else {
    153     // short names are null terminated by prctl, but the man page
    154     // implies that 16 byte names are not.
    155     thread_name[MAX_TASK_NAME_LEN] = 0;
    156   }
    157 
    158   if (signum == DEBUGGER_SIGNAL) {
    159     async_safe_format_log(ANDROID_LOG_INFO, "libc", "Requested dump for tid %d (%s)", __gettid(),
    160                           thread_name);
    161     return;
    162   }
    163 
    164   const char* signal_name = "???";
    165   bool has_address = false;
    166   switch (signum) {
    167     case SIGABRT:
    168       signal_name = "SIGABRT";
    169       break;
    170     case SIGBUS:
    171       signal_name = "SIGBUS";
    172       has_address = true;
    173       break;
    174     case SIGFPE:
    175       signal_name = "SIGFPE";
    176       has_address = true;
    177       break;
    178     case SIGILL:
    179       signal_name = "SIGILL";
    180       has_address = true;
    181       break;
    182     case SIGSEGV:
    183       signal_name = "SIGSEGV";
    184       has_address = true;
    185       break;
    186 #if defined(SIGSTKFLT)
    187     case SIGSTKFLT:
    188       signal_name = "SIGSTKFLT";
    189       break;
    190 #endif
    191     case SIGSYS:
    192       signal_name = "SIGSYS";
    193       break;
    194     case SIGTRAP:
    195       signal_name = "SIGTRAP";
    196       break;
    197   }
    198 
    199   // "info" will be null if the siginfo_t information was not available.
    200   // Many signals don't have an address or a code.
    201   char code_desc[32];  // ", code -6"
    202   char addr_desc[32];  // ", fault addr 0x1234"
    203   addr_desc[0] = code_desc[0] = 0;
    204   if (info != nullptr) {
    205     async_safe_format_buffer(code_desc, sizeof(code_desc), ", code %d", info->si_code);
    206     if (has_address) {
    207       async_safe_format_buffer(addr_desc, sizeof(addr_desc), ", fault addr %p", info->si_addr);
    208     }
    209   }
    210 
    211   char main_thread_name[MAX_TASK_NAME_LEN + 1];
    212   if (!get_main_thread_name(main_thread_name, sizeof(main_thread_name))) {
    213     strncpy(main_thread_name, "<unknown>", sizeof(main_thread_name));
    214   }
    215 
    216   async_safe_format_log(
    217       ANDROID_LOG_FATAL, "libc", "Fatal signal %d (%s)%s%s in tid %d (%s), pid %d (%s)", signum,
    218       signal_name, code_desc, addr_desc, __gettid(), thread_name, __getpid(), main_thread_name);
    219 }
    220 
    221 /*
    222  * Returns true if the handler for signal "signum" has SA_SIGINFO set.
    223  */
    224 static bool have_siginfo(int signum) {
    225   struct sigaction old_action;
    226   if (sigaction(signum, nullptr, &old_action) < 0) {
    227     async_safe_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s",
    228                           strerror(errno));
    229     return false;
    230   }
    231   return (old_action.sa_flags & SA_SIGINFO) != 0;
    232 }
    233 
    234 static void raise_caps() {
    235   // Raise CapInh to match CapPrm, so that we can set the ambient bits.
    236   __user_cap_header_struct capheader;
    237   memset(&capheader, 0, sizeof(capheader));
    238   capheader.version = _LINUX_CAPABILITY_VERSION_3;
    239   capheader.pid = 0;
    240 
    241   __user_cap_data_struct capdata[2];
    242   if (capget(&capheader, &capdata[0]) == -1) {
    243     fatal_errno("capget failed");
    244   }
    245 
    246   if (capdata[0].permitted != capdata[0].inheritable ||
    247       capdata[1].permitted != capdata[1].inheritable) {
    248     capdata[0].inheritable = capdata[0].permitted;
    249     capdata[1].inheritable = capdata[1].permitted;
    250 
    251     if (capset(&capheader, &capdata[0]) == -1) {
    252       async_safe_format_log(ANDROID_LOG_ERROR, "libc", "capset failed: %s", strerror(errno));
    253     }
    254   }
    255 
    256   // Set the ambient capability bits so that crash_dump gets all of our caps and can ptrace us.
    257   uint64_t capmask = capdata[0].inheritable;
    258   capmask |= static_cast<uint64_t>(capdata[1].inheritable) << 32;
    259   for (unsigned long i = 0; i < 64; ++i) {
    260     if (capmask & (1ULL << i)) {
    261       if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) != 0) {
    262         async_safe_format_log(ANDROID_LOG_ERROR, "libc",
    263                               "failed to raise ambient capability %lu: %s", i, strerror(errno));
    264       }
    265     }
    266   }
    267 }
    268 
    269 struct debugger_thread_info {
    270   bool crash_dump_started;
    271   pid_t crashing_tid;
    272   pid_t pseudothread_tid;
    273   int signal_number;
    274   siginfo_t* info;
    275 };
    276 
    277 // Logging and contacting debuggerd requires free file descriptors, which we might not have.
    278 // Work around this by spawning a "thread" that shares its parent's address space, but not its file
    279 // descriptor table, so that we can close random file descriptors without affecting the original
    280 // process. Note that this doesn't go through pthread_create, so TLS is shared with the spawning
    281 // process.
    282 static void* pseudothread_stack;
    283 
    284 static DebuggerdDumpType get_dump_type(const debugger_thread_info* thread_info) {
    285   if (thread_info->signal_number == DEBUGGER_SIGNAL && thread_info->info->si_value.sival_int) {
    286     return kDebuggerdNativeBacktrace;
    287   }
    288 
    289   return kDebuggerdTombstone;
    290 }
    291 
    292 static int debuggerd_dispatch_pseudothread(void* arg) {
    293   debugger_thread_info* thread_info = static_cast<debugger_thread_info*>(arg);
    294 
    295   for (int i = 0; i < 1024; ++i) {
    296     close(i);
    297   }
    298 
    299   int devnull = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR));
    300 
    301   // devnull will be 0.
    302   TEMP_FAILURE_RETRY(dup2(devnull, STDOUT_FILENO));
    303   TEMP_FAILURE_RETRY(dup2(devnull, STDERR_FILENO));
    304 
    305   int pipefds[2];
    306   if (pipe(pipefds) != 0) {
    307     fatal_errno("failed to create pipe");
    308   }
    309 
    310   // Don't use fork(2) to avoid calling pthread_atfork handlers.
    311   int forkpid = clone(nullptr, nullptr, 0, nullptr);
    312   if (forkpid == -1) {
    313     async_safe_format_log(ANDROID_LOG_FATAL, "libc",
    314                           "failed to fork in debuggerd signal handler: %s", strerror(errno));
    315   } else if (forkpid == 0) {
    316     TEMP_FAILURE_RETRY(dup2(pipefds[1], STDOUT_FILENO));
    317     close(pipefds[0]);
    318     close(pipefds[1]);
    319 
    320     raise_caps();
    321 
    322     char main_tid[10];
    323     char pseudothread_tid[10];
    324     char debuggerd_dump_type[10];
    325     async_safe_format_buffer(main_tid, sizeof(main_tid), "%d", thread_info->crashing_tid);
    326     async_safe_format_buffer(pseudothread_tid, sizeof(pseudothread_tid), "%d",
    327                              thread_info->pseudothread_tid);
    328     async_safe_format_buffer(debuggerd_dump_type, sizeof(debuggerd_dump_type), "%d",
    329                              get_dump_type(thread_info));
    330 
    331     execl(CRASH_DUMP_PATH, CRASH_DUMP_NAME, main_tid, pseudothread_tid, debuggerd_dump_type,
    332           nullptr);
    333 
    334     fatal_errno("exec failed");
    335   } else {
    336     close(pipefds[1]);
    337     char buf[4];
    338     ssize_t rc = TEMP_FAILURE_RETRY(read(pipefds[0], &buf, sizeof(buf)));
    339     if (rc == -1) {
    340       async_safe_format_log(ANDROID_LOG_FATAL, "libc", "read of IPC pipe failed: %s",
    341                             strerror(errno));
    342     } else if (rc == 0) {
    343       async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper failed to exec");
    344     } else if (rc != 1) {
    345       async_safe_format_log(ANDROID_LOG_FATAL, "libc",
    346                             "read of IPC pipe returned unexpected value: %zd", rc);
    347     } else {
    348       if (buf[0] != '\1') {
    349         async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper reported failure");
    350       } else {
    351         thread_info->crash_dump_started = true;
    352       }
    353     }
    354     close(pipefds[0]);
    355 
    356     // Don't leave a zombie child.
    357     int status;
    358     if (TEMP_FAILURE_RETRY(waitpid(forkpid, &status, 0)) == -1) {
    359       async_safe_format_log(ANDROID_LOG_FATAL, "libc", "failed to wait for crash_dump helper: %s",
    360                             strerror(errno));
    361     } else if (WIFSTOPPED(status) || WIFSIGNALED(status)) {
    362       async_safe_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper crashed or stopped");
    363       thread_info->crash_dump_started = false;
    364     }
    365   }
    366 
    367   syscall(__NR_exit, 0);
    368   return 0;
    369 }
    370 
    371 static void resend_signal(siginfo_t* info, bool crash_dump_started) {
    372   // Signals can either be fatal or nonfatal.
    373   // For fatal signals, crash_dump will send us the signal we crashed with
    374   // before resuming us, so that processes using waitpid on us will see that we
    375   // exited with the correct exit status (e.g. so that sh will report
    376   // "Segmentation fault" instead of "Killed"). For this to work, we need
    377   // to deregister our signal handler for that signal before continuing.
    378   if (info->si_signo != DEBUGGER_SIGNAL) {
    379     signal(info->si_signo, SIG_DFL);
    380   }
    381 
    382   // We need to return from our signal handler so that crash_dump can see the
    383   // signal via ptrace and dump the thread that crashed. However, returning
    384   // does not guarantee that the signal will be thrown again, even for SIGSEGV
    385   // and friends, since the signal could have been sent manually. We blocked
    386   // all signals when registering the handler, so resending the signal (using
    387   // rt_tgsigqueueinfo(2) to preserve SA_SIGINFO) will cause it to be delivered
    388   // when our signal handler returns.
    389   if (crash_dump_started || info->si_signo != DEBUGGER_SIGNAL) {
    390     int rc = syscall(SYS_rt_tgsigqueueinfo, __getpid(), __gettid(), info->si_signo, info);
    391     if (rc != 0) {
    392       fatal_errno("failed to resend signal during crash");
    393     }
    394   }
    395 }
    396 
    397 // Handler that does crash dumping by forking and doing the processing in the child.
    398 // Do this by ptracing the relevant thread, and then execing debuggerd to do the actual dump.
    399 static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* context) {
    400   // Make sure we don't change the value of errno, in case a signal comes in between the process
    401   // making a syscall and checking errno.
    402   ErrnoRestorer restorer;
    403 
    404   // It's possible somebody cleared the SA_SIGINFO flag, which would mean
    405   // our "info" arg holds an undefined value.
    406   if (!have_siginfo(signal_number)) {
    407     info = nullptr;
    408   }
    409 
    410   struct siginfo si = {};
    411   if (!info) {
    412     memset(&si, 0, sizeof(si));
    413     si.si_signo = signal_number;
    414     si.si_code = SI_USER;
    415     si.si_pid = __getpid();
    416     si.si_uid = getuid();
    417     info = &si;
    418   } else if (info->si_code >= 0 || info->si_code == SI_TKILL) {
    419     // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels
    420     // that contain commit 66dd34a (3.9+). The manpage claims to only allow
    421     // negative si_code values that are not SI_TKILL, but 66dd34a changed the
    422     // check to allow all si_code values in calls coming from inside the house.
    423   }
    424 
    425   void* abort_message = nullptr;
    426   if (g_callbacks.get_abort_message) {
    427     abort_message = g_callbacks.get_abort_message();
    428   }
    429 
    430   if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
    431     // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
    432     // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
    433     // ANR trace.
    434     debuggerd_fallback_handler(info, static_cast<ucontext_t*>(context), abort_message);
    435     resend_signal(info, false);
    436     return;
    437   }
    438 
    439   // Only allow one thread to handle a signal at a time.
    440   int ret = pthread_mutex_lock(&crash_mutex);
    441   if (ret != 0) {
    442     async_safe_format_log(ANDROID_LOG_INFO, "libc", "pthread_mutex_lock failed: %s", strerror(ret));
    443     return;
    444   }
    445 
    446   log_signal_summary(signal_number, info);
    447 
    448   // If this was a fatal crash, populate si_value with the abort message address if possible.
    449   // Note that applications can set an abort message without aborting.
    450   if (abort_message && signal_number != DEBUGGER_SIGNAL) {
    451     info->si_value.sival_ptr = abort_message;
    452   }
    453 
    454   debugger_thread_info thread_info = {
    455     .crash_dump_started = false,
    456     .pseudothread_tid = -1,
    457     .crashing_tid = __gettid(),
    458     .signal_number = signal_number,
    459     .info = info
    460   };
    461 
    462   // Set PR_SET_DUMPABLE to 1, so that crash_dump can ptrace us.
    463   int orig_dumpable = prctl(PR_GET_DUMPABLE);
    464   if (prctl(PR_SET_DUMPABLE, 1) != 0) {
    465     fatal_errno("failed to set dumpable");
    466   }
    467 
    468   // Essentially pthread_create without CLONE_FILES (see debuggerd_dispatch_pseudothread).
    469   pid_t child_pid =
    470     clone(debuggerd_dispatch_pseudothread, pseudothread_stack,
    471           CLONE_THREAD | CLONE_SIGHAND | CLONE_VM | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
    472           &thread_info, nullptr, nullptr, &thread_info.pseudothread_tid);
    473   if (child_pid == -1) {
    474     fatal_errno("failed to spawn debuggerd dispatch thread");
    475   }
    476 
    477   // Wait for the child to start...
    478   futex_wait(&thread_info.pseudothread_tid, -1);
    479 
    480   // and then wait for it to finish.
    481   futex_wait(&thread_info.pseudothread_tid, child_pid);
    482 
    483   // Restore PR_SET_DUMPABLE to its original value.
    484   if (prctl(PR_SET_DUMPABLE, orig_dumpable) != 0) {
    485     fatal_errno("failed to restore dumpable");
    486   }
    487 
    488   // Signals can either be fatal or nonfatal.
    489   // For fatal signals, crash_dump will PTRACE_CONT us with the signal we
    490   // crashed with, so that processes using waitpid on us will see that we
    491   // exited with the correct exit status (e.g. so that sh will report
    492   // "Segmentation fault" instead of "Killed"). For this to work, we need
    493   // to deregister our signal handler for that signal before continuing.
    494   if (signal_number != DEBUGGER_SIGNAL) {
    495     signal(signal_number, SIG_DFL);
    496   }
    497 
    498   resend_signal(info, thread_info.crash_dump_started);
    499   if (info->si_signo == DEBUGGER_SIGNAL) {
    500     // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
    501     // starting to dump right before our death.
    502     pthread_mutex_unlock(&crash_mutex);
    503   }
    504 }
    505 
    506 void debuggerd_init(debuggerd_callbacks_t* callbacks) {
    507   if (callbacks) {
    508     g_callbacks = *callbacks;
    509   }
    510 
    511   void* thread_stack_allocation =
    512     mmap(nullptr, PAGE_SIZE * 3, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    513   if (thread_stack_allocation == MAP_FAILED) {
    514     fatal_errno("failed to allocate debuggerd thread stack");
    515   }
    516 
    517   char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE;
    518   if (mprotect(stack, PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) {
    519     fatal_errno("failed to mprotect debuggerd thread stack");
    520   }
    521 
    522   // Stack grows negatively, set it to the last byte in the page...
    523   stack = (stack + PAGE_SIZE - 1);
    524   // and align it.
    525   stack -= 15;
    526   pseudothread_stack = stack;
    527 
    528   struct sigaction action;
    529   memset(&action, 0, sizeof(action));
    530   sigfillset(&action.sa_mask);
    531   action.sa_sigaction = debuggerd_signal_handler;
    532   action.sa_flags = SA_RESTART | SA_SIGINFO;
    533 
    534   // Use the alternate signal stack if available so we can catch stack overflows.
    535   action.sa_flags |= SA_ONSTACK;
    536   debuggerd_register_handlers(&action);
    537 }
    538