Home | History | Annotate | Download | only in debuggerd
      1 /*
      2  * Copyright 2016, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <arpa/inet.h>
     18 #include <dirent.h>
     19 #include <fcntl.h>
     20 #include <stdlib.h>
     21 #include <sys/capability.h>
     22 #include <sys/prctl.h>
     23 #include <sys/ptrace.h>
     24 #include <sys/types.h>
     25 #include <sys/un.h>
     26 #include <syscall.h>
     27 #include <unistd.h>
     28 
     29 #include <limits>
     30 #include <map>
     31 #include <memory>
     32 #include <set>
     33 #include <vector>
     34 
     35 #include <android-base/file.h>
     36 #include <android-base/logging.h>
     37 #include <android-base/parseint.h>
     38 #include <android-base/properties.h>
     39 #include <android-base/stringprintf.h>
     40 #include <android-base/strings.h>
     41 #include <android-base/unique_fd.h>
     42 #include <cutils/sockets.h>
     43 #include <log/log.h>
     44 #include <private/android_filesystem_config.h>
     45 #include <procinfo/process.h>
     46 #include <selinux/selinux.h>
     47 
     48 #include "backtrace.h"
     49 #include "tombstone.h"
     50 #include "utility.h"
     51 
     52 #include "debuggerd/handler.h"
     53 #include "debuggerd/protocol.h"
     54 #include "debuggerd/tombstoned.h"
     55 #include "debuggerd/util.h"
     56 
     57 using android::base::unique_fd;
     58 using android::base::ReadFileToString;
     59 using android::base::StringPrintf;
     60 using android::base::Trim;
     61 
     62 static std::string get_process_name(pid_t pid) {
     63   std::string result = "<unknown>";
     64   ReadFileToString(StringPrintf("/proc/%d/cmdline", pid), &result);
     65   return result;
     66 }
     67 
     68 static std::string get_thread_name(pid_t tid) {
     69   std::string result = "<unknown>";
     70   ReadFileToString(StringPrintf("/proc/%d/comm", tid), &result);
     71   return Trim(result);
     72 }
     73 
     74 static bool pid_contains_tid(int pid_proc_fd, pid_t tid) {
     75   struct stat st;
     76   std::string task_path = StringPrintf("task/%d", tid);
     77   return fstatat(pid_proc_fd, task_path.c_str(), &st, 0) == 0;
     78 }
     79 
     80 // Attach to a thread, and verify that it's still a member of the given process
     81 static bool ptrace_seize_thread(int pid_proc_fd, pid_t tid, std::string* error) {
     82   if (ptrace(PTRACE_SEIZE, tid, 0, 0) != 0) {
     83     *error = StringPrintf("failed to attach to thread %d: %s", tid, strerror(errno));
     84     return false;
     85   }
     86 
     87   // Make sure that the task we attached to is actually part of the pid we're dumping.
     88   if (!pid_contains_tid(pid_proc_fd, tid)) {
     89     if (ptrace(PTRACE_DETACH, tid, 0, 0) != 0) {
     90       PLOG(FATAL) << "failed to detach from thread " << tid;
     91     }
     92     *error = StringPrintf("thread %d is not in process", tid);
     93     return false;
     94   }
     95 
     96   // Put the task into ptrace-stop state.
     97   if (ptrace(PTRACE_INTERRUPT, tid, 0, 0) != 0) {
     98     PLOG(FATAL) << "failed to interrupt thread " << tid;
     99   }
    100 
    101   return true;
    102 }
    103 
    104 static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data) {
    105   android::base::unique_fd amfd(socket_local_client(
    106       "/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM));
    107   if (amfd.get() == -1) {
    108     PLOG(ERROR) << "unable to connect to activity manager";
    109     return false;
    110   }
    111 
    112   struct timeval tv = {
    113     .tv_sec = 1,
    114     .tv_usec = 0,
    115   };
    116   if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
    117     PLOG(ERROR) << "failed to set send timeout on activity manager socket";
    118     return false;
    119   }
    120   tv.tv_sec = 3;  // 3 seconds on handshake read
    121   if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
    122     PLOG(ERROR) << "failed to set receive timeout on activity manager socket";
    123     return false;
    124   }
    125 
    126   // Activity Manager protocol: binary 32-bit network-byte-order ints for the
    127   // pid and signal number, followed by the raw text of the dump, culminating
    128   // in a zero byte that marks end-of-data.
    129   uint32_t datum = htonl(pid);
    130   if (!android::base::WriteFully(amfd, &datum, 4)) {
    131     PLOG(ERROR) << "AM pid write failed";
    132     return false;
    133   }
    134   datum = htonl(signal);
    135   if (!android::base::WriteFully(amfd, &datum, 4)) {
    136     PLOG(ERROR) << "AM signal write failed";
    137     return false;
    138   }
    139   if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size() + 1)) {
    140     PLOG(ERROR) << "AM data write failed";
    141     return false;
    142   }
    143 
    144   // 3 sec timeout reading the ack; we're fine if the read fails.
    145   char ack;
    146   android::base::ReadFully(amfd, &ack, 1);
    147   return true;
    148 }
    149 
    150 static void signal_handler(int) {
    151   // We can't log easily, because the heap might be corrupt.
    152   // Just die and let the surrounding log context explain things.
    153   _exit(1);
    154 }
    155 
    156 static void abort_handler(pid_t target, const bool& tombstoned_connected,
    157                           unique_fd& tombstoned_socket, unique_fd& output_fd,
    158                           const char* abort_msg) {
    159   // If we abort before we get an output fd, contact tombstoned to let any
    160   // potential listeners know that we failed.
    161   if (!tombstoned_connected) {
    162     if (!tombstoned_connect(target, &tombstoned_socket, &output_fd)) {
    163       // We failed to connect, not much we can do.
    164       LOG(ERROR) << "failed to connected to tombstoned to report failure";
    165       _exit(1);
    166     }
    167   }
    168 
    169   dprintf(output_fd.get(), "crash_dump failed to dump process");
    170   if (target != 1) {
    171     dprintf(output_fd.get(), " %d: %s\n", target, abort_msg);
    172   } else {
    173     dprintf(output_fd.get(), ": %s\n", abort_msg);
    174   }
    175 
    176   _exit(1);
    177 }
    178 
    179 static void drop_capabilities() {
    180   __user_cap_header_struct capheader;
    181   memset(&capheader, 0, sizeof(capheader));
    182   capheader.version = _LINUX_CAPABILITY_VERSION_3;
    183   capheader.pid = 0;
    184 
    185   __user_cap_data_struct capdata[2];
    186   memset(&capdata, 0, sizeof(capdata));
    187 
    188   if (capset(&capheader, &capdata[0]) == -1) {
    189     PLOG(FATAL) << "failed to drop capabilities";
    190   }
    191 
    192   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0) {
    193     PLOG(FATAL) << "failed to set PR_SET_NO_NEW_PRIVS";
    194   }
    195 }
    196 
    197 int main(int argc, char** argv) {
    198   pid_t target = getppid();
    199   bool tombstoned_connected = false;
    200   unique_fd tombstoned_socket;
    201   unique_fd output_fd;
    202 
    203   android::base::InitLogging(argv);
    204   android::base::SetAborter([&](const char* abort_msg) {
    205     abort_handler(target, tombstoned_connected, tombstoned_socket, output_fd, abort_msg);
    206   });
    207 
    208   // Don't try to dump ourselves.
    209   struct sigaction action = {};
    210   action.sa_handler = signal_handler;
    211   debuggerd_register_handlers(&action);
    212 
    213   if (argc != 3) {
    214     return 1;
    215   }
    216 
    217   sigset_t mask;
    218   sigemptyset(&mask);
    219   if (sigprocmask(SIG_SETMASK, &mask, nullptr) != 0) {
    220     PLOG(FATAL) << "failed to set signal mask";
    221   }
    222 
    223   pid_t main_tid;
    224   pid_t pseudothread_tid;
    225 
    226   if (!android::base::ParseInt(argv[1], &main_tid, 1, std::numeric_limits<pid_t>::max())) {
    227     LOG(FATAL) << "invalid main tid: " << argv[1];
    228   }
    229 
    230   if (!android::base::ParseInt(argv[2], &pseudothread_tid, 1, std::numeric_limits<pid_t>::max())) {
    231     LOG(FATAL) << "invalid pseudothread tid: " << argv[2];
    232   }
    233 
    234   if (target == 1) {
    235     LOG(FATAL) << "target died before we could attach (received main tid = " << main_tid << ")";
    236   }
    237 
    238   android::procinfo::ProcessInfo target_info;
    239   if (!android::procinfo::GetProcessInfo(main_tid, &target_info)) {
    240     LOG(FATAL) << "failed to fetch process info for target " << main_tid;
    241   }
    242 
    243   if (main_tid != target_info.tid || target != target_info.pid) {
    244     LOG(FATAL) << "target info mismatch, expected pid " << target << ", tid " << main_tid
    245                << ", received pid " << target_info.pid << ", tid " << target_info.tid;
    246   }
    247 
    248   // Open /proc/`getppid()` in the original process, and pass it down to the forked child.
    249   std::string target_proc_path = "/proc/" + std::to_string(target);
    250   int target_proc_fd = open(target_proc_path.c_str(), O_DIRECTORY | O_RDONLY);
    251   if (target_proc_fd == -1) {
    252     PLOG(FATAL) << "failed to open " << target_proc_path;
    253   }
    254 
    255   // Make sure our parent didn't die.
    256   if (getppid() != target) {
    257     PLOG(FATAL) << "parent died";
    258   }
    259 
    260   // Reparent ourselves to init, so that the signal handler can waitpid on the
    261   // original process to avoid leaving a zombie for non-fatal dumps.
    262   pid_t forkpid = fork();
    263   if (forkpid == -1) {
    264     PLOG(FATAL) << "fork failed";
    265   } else if (forkpid != 0) {
    266     exit(0);
    267   }
    268 
    269   // Die if we take too long.
    270   //
    271   // Note: processes with many threads and minidebug-info can take a bit to
    272   //       unwind, do not make this too small. b/62828735
    273   alarm(5);
    274 
    275   std::string attach_error;
    276 
    277   // Seize the main thread.
    278   if (!ptrace_seize_thread(target_proc_fd, main_tid, &attach_error)) {
    279     LOG(FATAL) << attach_error;
    280   }
    281 
    282   // Seize the siblings.
    283   std::map<pid_t, std::string> threads;
    284   {
    285     std::set<pid_t> siblings;
    286     if (!android::procinfo::GetProcessTids(target, &siblings)) {
    287       PLOG(FATAL) << "failed to get process siblings";
    288     }
    289 
    290     // but not the already attached main thread.
    291     siblings.erase(main_tid);
    292     // or the handler pseudothread.
    293     siblings.erase(pseudothread_tid);
    294 
    295     for (pid_t sibling_tid : siblings) {
    296       if (!ptrace_seize_thread(target_proc_fd, sibling_tid, &attach_error)) {
    297         LOG(WARNING) << attach_error;
    298       } else {
    299         threads.emplace(sibling_tid, get_thread_name(sibling_tid));
    300       }
    301     }
    302   }
    303 
    304   // Collect the backtrace map, open files, and process/thread names, while we still have caps.
    305   std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(main_tid));
    306   if (!backtrace_map) {
    307     LOG(FATAL) << "failed to create backtrace map";
    308   }
    309 
    310   // Collect the list of open files.
    311   OpenFilesList open_files;
    312   populate_open_files_list(target, &open_files);
    313 
    314   std::string process_name = get_process_name(main_tid);
    315   threads.emplace(main_tid, get_thread_name(main_tid));
    316 
    317   // Drop our capabilities now that we've attached to the threads we care about.
    318   drop_capabilities();
    319 
    320   LOG(INFO) << "obtaining output fd from tombstoned";
    321   tombstoned_connected = tombstoned_connect(target, &tombstoned_socket, &output_fd);
    322 
    323   // Write a '\1' to stdout to tell the crashing process to resume.
    324   // It also restores the value of PR_SET_DUMPABLE at this point.
    325   if (TEMP_FAILURE_RETRY(write(STDOUT_FILENO, "\1", 1)) == -1) {
    326     PLOG(ERROR) << "failed to communicate to target process";
    327   }
    328 
    329   if (tombstoned_connected) {
    330     if (TEMP_FAILURE_RETRY(dup2(output_fd.get(), STDOUT_FILENO)) == -1) {
    331       PLOG(ERROR) << "failed to dup2 output fd (" << output_fd.get() << ") to STDOUT_FILENO";
    332     }
    333   } else {
    334     unique_fd devnull(TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR)));
    335     TEMP_FAILURE_RETRY(dup2(devnull.get(), STDOUT_FILENO));
    336     output_fd = std::move(devnull);
    337   }
    338 
    339   LOG(INFO) << "performing dump of process " << target << " (target tid = " << main_tid << ")";
    340 
    341   // At this point, the thread that made the request has been attached and is
    342   // in ptrace-stopped state. After resumption, the triggering signal that has
    343   // been queued will be delivered.
    344   if (ptrace(PTRACE_CONT, main_tid, 0, 0) != 0) {
    345     PLOG(ERROR) << "PTRACE_CONT(" << main_tid << ") failed";
    346     exit(1);
    347   }
    348 
    349   siginfo_t siginfo = {};
    350   if (!wait_for_signal(main_tid, &siginfo)) {
    351     printf("failed to wait for signal in tid %d: %s\n", main_tid, strerror(errno));
    352     exit(1);
    353   }
    354 
    355   int signo = siginfo.si_signo;
    356   bool fatal_signal = signo != DEBUGGER_SIGNAL;
    357   bool backtrace = false;
    358   uintptr_t abort_address = 0;
    359 
    360   // si_value can represent three things:
    361   //   0: dump tombstone
    362   //   1: dump backtrace
    363   //   everything else: abort message address (implies dump tombstone)
    364   if (siginfo.si_value.sival_int == 1) {
    365     backtrace = true;
    366   } else if (siginfo.si_value.sival_ptr != nullptr) {
    367     abort_address = reinterpret_cast<uintptr_t>(siginfo.si_value.sival_ptr);
    368   }
    369 
    370   // TODO: Use seccomp to lock ourselves down.
    371 
    372   std::string amfd_data;
    373   if (backtrace) {
    374     dump_backtrace(output_fd.get(), backtrace_map.get(), target, main_tid, process_name, threads, 0);
    375   } else {
    376     engrave_tombstone(output_fd.get(), backtrace_map.get(), &open_files, target, main_tid,
    377                       process_name, threads, abort_address, fatal_signal ? &amfd_data : nullptr);
    378   }
    379 
    380   // We don't actually need to PTRACE_DETACH, as long as our tracees aren't in
    381   // group-stop state, which is true as long as no stopping signals are sent.
    382 
    383   bool wait_for_gdb = android::base::GetBoolProperty("debug.debuggerd.wait_for_gdb", false);
    384   if (!fatal_signal || siginfo.si_code == SI_USER) {
    385     // Don't wait_for_gdb when the process didn't actually crash.
    386     wait_for_gdb = false;
    387   }
    388 
    389   // If the process crashed or we need to send it SIGSTOP for wait_for_gdb,
    390   // get it in a state where it can receive signals, and then send the relevant
    391   // signal.
    392   if (wait_for_gdb || fatal_signal) {
    393     if (ptrace(PTRACE_INTERRUPT, main_tid, 0, 0) != 0) {
    394       PLOG(ERROR) << "failed to use PTRACE_INTERRUPT on " << main_tid;
    395     }
    396 
    397     if (tgkill(target, main_tid, wait_for_gdb ? SIGSTOP : signo) != 0) {
    398       PLOG(ERROR) << "failed to resend signal " << signo << " to " << main_tid;
    399     }
    400   }
    401 
    402   if (wait_for_gdb) {
    403     // Use ALOGI to line up with output from engrave_tombstone.
    404     ALOGI(
    405       "***********************************************************\n"
    406       "* Process %d has been suspended while crashing.\n"
    407       "* To attach gdbserver and start gdb, run this on the host:\n"
    408       "*\n"
    409       "*     gdbclient.py -p %d\n"
    410       "*\n"
    411       "***********************************************************",
    412       target, main_tid);
    413   }
    414 
    415   if (fatal_signal) {
    416     // Don't try to notify ActivityManager if it just crashed, or we might hang until timeout.
    417     if (target_info.name != "system_server" || target_info.uid != AID_SYSTEM) {
    418       activity_manager_notify(target, signo, amfd_data);
    419     }
    420   }
    421 
    422   // Close stdout before we notify tombstoned of completion.
    423   close(STDOUT_FILENO);
    424   if (tombstoned_connected && !tombstoned_notify_completion(tombstoned_socket.get())) {
    425     LOG(ERROR) << "failed to notify tombstoned of completion";
    426   }
    427 
    428   return 0;
    429 }
    430