Home | History | Annotate | Download | only in debuggerd
      1 /*
      2  * Copyright 2016, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <arpa/inet.h>
     18 #include <dirent.h>
     19 #include <fcntl.h>
     20 #include <stdlib.h>
     21 #include <sys/prctl.h>
     22 #include <sys/ptrace.h>
     23 #include <sys/types.h>
     24 #include <sys/un.h>
     25 #include <sys/wait.h>
     26 #include <syscall.h>
     27 #include <unistd.h>
     28 
     29 #include <limits>
     30 #include <map>
     31 #include <memory>
     32 #include <set>
     33 #include <vector>
     34 
     35 #include <android-base/file.h>
     36 #include <android-base/logging.h>
     37 #include <android-base/parseint.h>
     38 #include <android-base/properties.h>
     39 #include <android-base/stringprintf.h>
     40 #include <android-base/strings.h>
     41 #include <android-base/unique_fd.h>
     42 #include <cutils/sockets.h>
     43 #include <log/log.h>
     44 #include <private/android_filesystem_config.h>
     45 #include <procinfo/process.h>
     46 
     47 #define ATRACE_TAG ATRACE_TAG_BIONIC
     48 #include <utils/Trace.h>
     49 
     50 #include <unwindstack/Regs.h>
     51 
     52 #include "libdebuggerd/backtrace.h"
     53 #include "libdebuggerd/tombstone.h"
     54 #include "libdebuggerd/utility.h"
     55 
     56 #include "debuggerd/handler.h"
     57 #include "tombstoned/tombstoned.h"
     58 
     59 #include "protocol.h"
     60 #include "util.h"
     61 
     62 using android::base::unique_fd;
     63 using android::base::StringPrintf;
     64 
     65 using unwindstack::Regs;
     66 
     67 static bool pid_contains_tid(int pid_proc_fd, pid_t tid) {
     68   struct stat st;
     69   std::string task_path = StringPrintf("task/%d", tid);
     70   return fstatat(pid_proc_fd, task_path.c_str(), &st, 0) == 0;
     71 }
     72 
     73 static pid_t get_tracer(pid_t tracee) {
     74   // Check to see if the thread is being ptraced by another process.
     75   android::procinfo::ProcessInfo process_info;
     76   if (android::procinfo::GetProcessInfo(tracee, &process_info)) {
     77     return process_info.tracer;
     78   }
     79   return -1;
     80 }
     81 
     82 // Attach to a thread, and verify that it's still a member of the given process
     83 static bool ptrace_seize_thread(int pid_proc_fd, pid_t tid, std::string* error, int flags = 0) {
     84   if (ptrace(PTRACE_SEIZE, tid, 0, flags) != 0) {
     85     if (errno == EPERM) {
     86       pid_t tracer = get_tracer(tid);
     87       if (tracer != -1) {
     88         *error = StringPrintf("failed to attach to thread %d, already traced by %d (%s)", tid,
     89                               tracer, get_process_name(tracer).c_str());
     90         return false;
     91       }
     92     }
     93 
     94     *error = StringPrintf("failed to attach to thread %d: %s", tid, strerror(errno));
     95     return false;
     96   }
     97 
     98   // Make sure that the task we attached to is actually part of the pid we're dumping.
     99   if (!pid_contains_tid(pid_proc_fd, tid)) {
    100     if (ptrace(PTRACE_DETACH, tid, 0, 0) != 0) {
    101       PLOG(WARNING) << "failed to detach from thread " << tid;
    102     }
    103     *error = StringPrintf("thread %d is not in process", tid);
    104     return false;
    105   }
    106 
    107   return true;
    108 }
    109 
    110 static bool wait_for_stop(pid_t tid, int* received_signal) {
    111   while (true) {
    112     int status;
    113     pid_t result = waitpid(tid, &status, __WALL);
    114     if (result != tid) {
    115       PLOG(ERROR) << "waitpid failed on " << tid << " while detaching";
    116       return false;
    117     }
    118 
    119     if (WIFSTOPPED(status)) {
    120       if (status >> 16 == PTRACE_EVENT_STOP) {
    121         *received_signal = 0;
    122       } else {
    123         *received_signal = WSTOPSIG(status);
    124       }
    125       return true;
    126     }
    127   }
    128 }
    129 
    130 // Interrupt a process and wait for it to be interrupted.
    131 static bool ptrace_interrupt(pid_t tid, int* received_signal) {
    132   if (ptrace(PTRACE_INTERRUPT, tid, 0, 0) == 0) {
    133     return wait_for_stop(tid, received_signal);
    134   }
    135 
    136   PLOG(ERROR) << "failed to interrupt " << tid << " to detach";
    137   return false;
    138 }
    139 
    140 static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data) {
    141   ATRACE_CALL();
    142   android::base::unique_fd amfd(socket_local_client(
    143       "/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM));
    144   if (amfd.get() == -1) {
    145     PLOG(ERROR) << "unable to connect to activity manager";
    146     return false;
    147   }
    148 
    149   struct timeval tv = {
    150     .tv_sec = 1,
    151     .tv_usec = 0,
    152   };
    153   if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
    154     PLOG(ERROR) << "failed to set send timeout on activity manager socket";
    155     return false;
    156   }
    157   tv.tv_sec = 3;  // 3 seconds on handshake read
    158   if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
    159     PLOG(ERROR) << "failed to set receive timeout on activity manager socket";
    160     return false;
    161   }
    162 
    163   // Activity Manager protocol: binary 32-bit network-byte-order ints for the
    164   // pid and signal number, followed by the raw text of the dump, culminating
    165   // in a zero byte that marks end-of-data.
    166   uint32_t datum = htonl(pid);
    167   if (!android::base::WriteFully(amfd, &datum, 4)) {
    168     PLOG(ERROR) << "AM pid write failed";
    169     return false;
    170   }
    171   datum = htonl(signal);
    172   if (!android::base::WriteFully(amfd, &datum, 4)) {
    173     PLOG(ERROR) << "AM signal write failed";
    174     return false;
    175   }
    176   if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size() + 1)) {
    177     PLOG(ERROR) << "AM data write failed";
    178     return false;
    179   }
    180 
    181   // 3 sec timeout reading the ack; we're fine if the read fails.
    182   char ack;
    183   android::base::ReadFully(amfd, &ack, 1);
    184   return true;
    185 }
    186 
    187 // Globals used by the abort handler.
    188 static pid_t g_target_thread = -1;
    189 static bool g_tombstoned_connected = false;
    190 static unique_fd g_tombstoned_socket;
    191 static unique_fd g_output_fd;
    192 
    193 static void DefuseSignalHandlers() {
    194   // Don't try to dump ourselves.
    195   struct sigaction action = {};
    196   action.sa_handler = SIG_DFL;
    197   debuggerd_register_handlers(&action);
    198 
    199   sigset_t mask;
    200   sigemptyset(&mask);
    201   if (sigprocmask(SIG_SETMASK, &mask, nullptr) != 0) {
    202     PLOG(FATAL) << "failed to set signal mask";
    203   }
    204 }
    205 
    206 static void Initialize(char** argv) {
    207   android::base::InitLogging(argv);
    208   android::base::SetAborter([](const char* abort_msg) {
    209     // If we abort before we get an output fd, contact tombstoned to let any
    210     // potential listeners know that we failed.
    211     if (!g_tombstoned_connected) {
    212       if (!tombstoned_connect(g_target_thread, &g_tombstoned_socket, &g_output_fd,
    213                               kDebuggerdAnyIntercept)) {
    214         // We failed to connect, not much we can do.
    215         LOG(ERROR) << "failed to connected to tombstoned to report failure";
    216         _exit(1);
    217       }
    218     }
    219 
    220     dprintf(g_output_fd.get(), "crash_dump failed to dump process");
    221     if (g_target_thread != 1) {
    222       dprintf(g_output_fd.get(), " %d: %s\n", g_target_thread, abort_msg);
    223     } else {
    224       dprintf(g_output_fd.get(), ": %s\n", abort_msg);
    225     }
    226 
    227     _exit(1);
    228   });
    229 }
    230 
    231 static void ParseArgs(int argc, char** argv, pid_t* pseudothread_tid, DebuggerdDumpType* dump_type) {
    232   if (argc != 4) {
    233     LOG(FATAL) << "wrong number of args: " << argc << " (expected 4)";
    234   }
    235 
    236   if (!android::base::ParseInt(argv[1], &g_target_thread, 1, std::numeric_limits<pid_t>::max())) {
    237     LOG(FATAL) << "invalid target tid: " << argv[1];
    238   }
    239 
    240   if (!android::base::ParseInt(argv[2], pseudothread_tid, 1, std::numeric_limits<pid_t>::max())) {
    241     LOG(FATAL) << "invalid pseudothread tid: " << argv[2];
    242   }
    243 
    244   int dump_type_int;
    245   if (!android::base::ParseInt(argv[3], &dump_type_int, 0, 1)) {
    246     LOG(FATAL) << "invalid requested dump type: " << argv[3];
    247   }
    248   *dump_type = static_cast<DebuggerdDumpType>(dump_type_int);
    249 }
    250 
    251 static void ReadCrashInfo(unique_fd& fd, siginfo_t* siginfo,
    252                           std::unique_ptr<unwindstack::Regs>* regs, uintptr_t* abort_address) {
    253   std::aligned_storage<sizeof(CrashInfo) + 1, alignof(CrashInfo)>::type buf;
    254   ssize_t rc = TEMP_FAILURE_RETRY(read(fd.get(), &buf, sizeof(buf)));
    255   if (rc == -1) {
    256     PLOG(FATAL) << "failed to read target ucontext";
    257   } else if (rc != sizeof(CrashInfo)) {
    258     LOG(FATAL) << "read " << rc << " bytes when reading target crash information, expected "
    259                << sizeof(CrashInfo);
    260   }
    261 
    262   CrashInfo* crash_info = reinterpret_cast<CrashInfo*>(&buf);
    263   if (crash_info->version != 1) {
    264     LOG(FATAL) << "version mismatch, expected 1, received " << crash_info->version;
    265   }
    266 
    267   *siginfo = crash_info->siginfo;
    268   regs->reset(Regs::CreateFromUcontext(Regs::CurrentArch(), &crash_info->ucontext));
    269   *abort_address = crash_info->abort_msg_address;
    270 }
    271 
    272 // Wait for a process to clone and return the child's pid.
    273 // Note: this leaves the parent in PTRACE_EVENT_STOP.
    274 static pid_t wait_for_clone(pid_t pid, bool resume_child) {
    275   int status;
    276   pid_t result = TEMP_FAILURE_RETRY(waitpid(pid, &status, __WALL));
    277   if (result == -1) {
    278     PLOG(FATAL) << "failed to waitpid";
    279   }
    280 
    281   if (WIFEXITED(status)) {
    282     LOG(FATAL) << "traced process exited with status " << WEXITSTATUS(status);
    283   } else if (WIFSIGNALED(status)) {
    284     LOG(FATAL) << "traced process exited with signal " << WTERMSIG(status);
    285   } else if (!WIFSTOPPED(status)) {
    286     LOG(FATAL) << "process didn't stop? (status = " << status << ")";
    287   }
    288 
    289   if (status >> 8 != (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
    290     LOG(FATAL) << "process didn't stop due to PTRACE_O_TRACECLONE (status = " << status << ")";
    291   }
    292 
    293   pid_t child;
    294   if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &child) != 0) {
    295     PLOG(FATAL) << "failed to get child pid via PTRACE_GETEVENTMSG";
    296   }
    297 
    298   int stop_signal;
    299   if (!wait_for_stop(child, &stop_signal)) {
    300     PLOG(FATAL) << "failed to waitpid on child";
    301   }
    302 
    303   CHECK_EQ(0, stop_signal);
    304 
    305   if (resume_child) {
    306     if (ptrace(PTRACE_CONT, child, 0, 0) != 0) {
    307       PLOG(FATAL) << "failed to resume child (pid = " << child << ")";
    308     }
    309   }
    310 
    311   return child;
    312 }
    313 
    314 static pid_t wait_for_vm_process(pid_t pseudothread_tid) {
    315   // The pseudothread will double-fork, we want its grandchild.
    316   pid_t intermediate = wait_for_clone(pseudothread_tid, true);
    317   pid_t vm_pid = wait_for_clone(intermediate, false);
    318   if (ptrace(PTRACE_DETACH, intermediate, 0, 0) != 0) {
    319     PLOG(FATAL) << "failed to detach from intermediate vm process";
    320   }
    321 
    322   return vm_pid;
    323 }
    324 
    325 int main(int argc, char** argv) {
    326   DefuseSignalHandlers();
    327 
    328   atrace_begin(ATRACE_TAG, "before reparent");
    329   pid_t target_process = getppid();
    330 
    331   // Open /proc/`getppid()` before we daemonize.
    332   std::string target_proc_path = "/proc/" + std::to_string(target_process);
    333   int target_proc_fd = open(target_proc_path.c_str(), O_DIRECTORY | O_RDONLY);
    334   if (target_proc_fd == -1) {
    335     PLOG(FATAL) << "failed to open " << target_proc_path;
    336   }
    337 
    338   // Make sure getppid() hasn't changed.
    339   if (getppid() != target_process) {
    340     LOG(FATAL) << "parent died";
    341   }
    342   atrace_end(ATRACE_TAG);
    343 
    344   // Reparent ourselves to init, so that the signal handler can waitpid on the
    345   // original process to avoid leaving a zombie for non-fatal dumps.
    346   // Move the input/output pipes off of stdout/stderr, out of paranoia.
    347   unique_fd output_pipe(dup(STDOUT_FILENO));
    348   unique_fd input_pipe(dup(STDIN_FILENO));
    349 
    350   unique_fd fork_exit_read, fork_exit_write;
    351   if (!Pipe(&fork_exit_read, &fork_exit_write)) {
    352     PLOG(FATAL) << "failed to create pipe";
    353   }
    354 
    355   pid_t forkpid = fork();
    356   if (forkpid == -1) {
    357     PLOG(FATAL) << "fork failed";
    358   } else if (forkpid == 0) {
    359     fork_exit_read.reset();
    360   } else {
    361     // We need the pseudothread to live until we get around to verifying the vm pid against it.
    362     // The last thing it does is block on a waitpid on us, so wait until our child tells us to die.
    363     fork_exit_write.reset();
    364     char buf;
    365     TEMP_FAILURE_RETRY(read(fork_exit_read.get(), &buf, sizeof(buf)));
    366     _exit(0);
    367   }
    368 
    369   ATRACE_NAME("after reparent");
    370   pid_t pseudothread_tid;
    371   DebuggerdDumpType dump_type;
    372   uintptr_t abort_address = 0;
    373 
    374   Initialize(argv);
    375   ParseArgs(argc, argv, &pseudothread_tid, &dump_type);
    376 
    377   // Die if we take too long.
    378   //
    379   // Note: processes with many threads and minidebug-info can take a bit to
    380   //       unwind, do not make this too small. b/62828735
    381   alarm(30);
    382 
    383   // Get the process name (aka cmdline).
    384   std::string process_name = get_process_name(g_target_thread);
    385 
    386   // Collect the list of open files.
    387   OpenFilesList open_files;
    388   {
    389     ATRACE_NAME("open files");
    390     populate_open_files_list(g_target_thread, &open_files);
    391   }
    392 
    393   // In order to reduce the duration that we pause the process for, we ptrace
    394   // the threads, fetch their registers and associated information, and then
    395   // fork a separate process as a snapshot of the process's address space.
    396   std::set<pid_t> threads;
    397   if (!android::procinfo::GetProcessTids(g_target_thread, &threads)) {
    398     PLOG(FATAL) << "failed to get process threads";
    399   }
    400 
    401   std::map<pid_t, ThreadInfo> thread_info;
    402   siginfo_t siginfo;
    403   std::string error;
    404 
    405   {
    406     ATRACE_NAME("ptrace");
    407     for (pid_t thread : threads) {
    408       // Trace the pseudothread separately, so we can use different options.
    409       if (thread == pseudothread_tid) {
    410         continue;
    411       }
    412 
    413       if (!ptrace_seize_thread(target_proc_fd, thread, &error)) {
    414         bool fatal = thread == g_target_thread;
    415         LOG(fatal ? FATAL : WARNING) << error;
    416       }
    417 
    418       ThreadInfo info;
    419       info.pid = target_process;
    420       info.tid = thread;
    421       info.process_name = process_name;
    422       info.thread_name = get_thread_name(thread);
    423 
    424       if (!ptrace_interrupt(thread, &info.signo)) {
    425         PLOG(WARNING) << "failed to ptrace interrupt thread " << thread;
    426         ptrace(PTRACE_DETACH, thread, 0, 0);
    427         continue;
    428       }
    429 
    430       if (thread == g_target_thread) {
    431         // Read the thread's registers along with the rest of the crash info out of the pipe.
    432         ReadCrashInfo(input_pipe, &siginfo, &info.registers, &abort_address);
    433         info.siginfo = &siginfo;
    434         info.signo = info.siginfo->si_signo;
    435       } else {
    436         info.registers.reset(Regs::RemoteGet(thread));
    437         if (!info.registers) {
    438           PLOG(WARNING) << "failed to fetch registers for thread " << thread;
    439           ptrace(PTRACE_DETACH, thread, 0, 0);
    440           continue;
    441         }
    442       }
    443 
    444       thread_info[thread] = std::move(info);
    445     }
    446   }
    447 
    448   // Trace the pseudothread with PTRACE_O_TRACECLONE and tell it to fork.
    449   if (!ptrace_seize_thread(target_proc_fd, pseudothread_tid, &error, PTRACE_O_TRACECLONE)) {
    450     LOG(FATAL) << "failed to seize pseudothread: " << error;
    451   }
    452 
    453   if (TEMP_FAILURE_RETRY(write(output_pipe.get(), "\1", 1)) != 1) {
    454     PLOG(FATAL) << "failed to write to pseudothread";
    455   }
    456 
    457   pid_t vm_pid = wait_for_vm_process(pseudothread_tid);
    458   if (ptrace(PTRACE_DETACH, pseudothread_tid, 0, 0) != 0) {
    459     PLOG(FATAL) << "failed to detach from pseudothread";
    460   }
    461 
    462   // The pseudothread can die now.
    463   fork_exit_write.reset();
    464 
    465   // Defer the message until later, for readability.
    466   bool wait_for_gdb = android::base::GetBoolProperty("debug.debuggerd.wait_for_gdb", false);
    467   if (siginfo.si_signo == DEBUGGER_SIGNAL) {
    468     wait_for_gdb = false;
    469   }
    470 
    471   // Detach from all of our attached threads before resuming.
    472   for (const auto& [tid, thread] : thread_info) {
    473     int resume_signal = thread.signo == DEBUGGER_SIGNAL ? 0 : thread.signo;
    474     if (wait_for_gdb) {
    475       resume_signal = 0;
    476       if (tgkill(target_process, tid, SIGSTOP) != 0) {
    477         PLOG(WARNING) << "failed to send SIGSTOP to " << tid;
    478       }
    479     }
    480 
    481     LOG(DEBUG) << "detaching from thread " << tid;
    482     if (ptrace(PTRACE_DETACH, tid, 0, resume_signal) != 0) {
    483       PLOG(ERROR) << "failed to detach from thread " << tid;
    484     }
    485   }
    486 
    487   // Drop our capabilities now that we've fetched all of the information we need.
    488   drop_capabilities();
    489 
    490   {
    491     ATRACE_NAME("tombstoned_connect");
    492     LOG(INFO) << "obtaining output fd from tombstoned, type: " << dump_type;
    493     g_tombstoned_connected =
    494         tombstoned_connect(g_target_thread, &g_tombstoned_socket, &g_output_fd, dump_type);
    495   }
    496 
    497   if (g_tombstoned_connected) {
    498     if (TEMP_FAILURE_RETRY(dup2(g_output_fd.get(), STDOUT_FILENO)) == -1) {
    499       PLOG(ERROR) << "failed to dup2 output fd (" << g_output_fd.get() << ") to STDOUT_FILENO";
    500     }
    501   } else {
    502     unique_fd devnull(TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR)));
    503     TEMP_FAILURE_RETRY(dup2(devnull.get(), STDOUT_FILENO));
    504     g_output_fd = std::move(devnull);
    505   }
    506 
    507   LOG(INFO) << "performing dump of process " << target_process << " (target tid = " << g_target_thread
    508             << ")";
    509 
    510   int signo = siginfo.si_signo;
    511   bool fatal_signal = signo != DEBUGGER_SIGNAL;
    512   bool backtrace = false;
    513 
    514   // si_value is special when used with DEBUGGER_SIGNAL.
    515   //   0: dump tombstone
    516   //   1: dump backtrace
    517   if (!fatal_signal) {
    518     int si_val = siginfo.si_value.sival_int;
    519     if (si_val == 0) {
    520       backtrace = false;
    521     } else if (si_val == 1) {
    522       backtrace = true;
    523     } else {
    524       LOG(WARNING) << "unknown si_value value " << si_val;
    525     }
    526   }
    527 
    528   // TODO: Use seccomp to lock ourselves down.
    529   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(vm_pid, false));
    530   if (!map) {
    531     LOG(FATAL) << "failed to create backtrace map";
    532   }
    533 
    534   std::shared_ptr<unwindstack::Memory> process_memory = map->GetProcessMemory();
    535   if (!process_memory) {
    536     LOG(FATAL) << "failed to get unwindstack::Memory handle";
    537   }
    538 
    539   std::string amfd_data;
    540   if (backtrace) {
    541     ATRACE_NAME("dump_backtrace");
    542     dump_backtrace(std::move(g_output_fd), map.get(), thread_info, g_target_thread);
    543   } else {
    544     ATRACE_NAME("engrave_tombstone");
    545     engrave_tombstone(std::move(g_output_fd), map.get(), process_memory.get(), thread_info,
    546                       g_target_thread, abort_address, &open_files, &amfd_data);
    547   }
    548 
    549   if (fatal_signal) {
    550     // Don't try to notify ActivityManager if it just crashed, or we might hang until timeout.
    551     if (thread_info[target_process].thread_name != "system_server") {
    552       activity_manager_notify(target_process, signo, amfd_data);
    553     }
    554   }
    555 
    556   if (wait_for_gdb) {
    557     // Use ALOGI to line up with output from engrave_tombstone.
    558     ALOGI(
    559         "***********************************************************\n"
    560         "* Process %d has been suspended while crashing.\n"
    561         "* To attach gdbserver and start gdb, run this on the host:\n"
    562         "*\n"
    563         "*     gdbclient.py -p %d\n"
    564         "*\n"
    565         "***********************************************************",
    566         target_process, target_process);
    567   }
    568 
    569   // Close stdout before we notify tombstoned of completion.
    570   close(STDOUT_FILENO);
    571   if (g_tombstoned_connected && !tombstoned_notify_completion(g_tombstoned_socket.get())) {
    572     LOG(ERROR) << "failed to notify tombstoned of completion";
    573   }
    574 
    575   return 0;
    576 }
    577