1 /* 2 * Copyright 2016, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <arpa/inet.h> 18 #include <dirent.h> 19 #include <fcntl.h> 20 #include <stdlib.h> 21 #include <sys/prctl.h> 22 #include <sys/ptrace.h> 23 #include <sys/types.h> 24 #include <sys/un.h> 25 #include <sys/wait.h> 26 #include <syscall.h> 27 #include <unistd.h> 28 29 #include <limits> 30 #include <map> 31 #include <memory> 32 #include <set> 33 #include <vector> 34 35 #include <android-base/file.h> 36 #include <android-base/logging.h> 37 #include <android-base/parseint.h> 38 #include <android-base/properties.h> 39 #include <android-base/stringprintf.h> 40 #include <android-base/strings.h> 41 #include <android-base/unique_fd.h> 42 #include <cutils/sockets.h> 43 #include <log/log.h> 44 #include <private/android_filesystem_config.h> 45 #include <procinfo/process.h> 46 47 #define ATRACE_TAG ATRACE_TAG_BIONIC 48 #include <utils/Trace.h> 49 50 #include <unwindstack/Regs.h> 51 52 #include "libdebuggerd/backtrace.h" 53 #include "libdebuggerd/tombstone.h" 54 #include "libdebuggerd/utility.h" 55 56 #include "debuggerd/handler.h" 57 #include "tombstoned/tombstoned.h" 58 59 #include "protocol.h" 60 #include "util.h" 61 62 using android::base::unique_fd; 63 using android::base::StringPrintf; 64 65 using unwindstack::Regs; 66 67 static bool pid_contains_tid(int pid_proc_fd, pid_t tid) { 68 struct stat st; 69 std::string task_path = StringPrintf("task/%d", tid); 70 return fstatat(pid_proc_fd, task_path.c_str(), &st, 0) == 0; 71 } 72 73 static pid_t get_tracer(pid_t tracee) { 74 // Check to see if the thread is being ptraced by another process. 75 android::procinfo::ProcessInfo process_info; 76 if (android::procinfo::GetProcessInfo(tracee, &process_info)) { 77 return process_info.tracer; 78 } 79 return -1; 80 } 81 82 // Attach to a thread, and verify that it's still a member of the given process 83 static bool ptrace_seize_thread(int pid_proc_fd, pid_t tid, std::string* error, int flags = 0) { 84 if (ptrace(PTRACE_SEIZE, tid, 0, flags) != 0) { 85 if (errno == EPERM) { 86 pid_t tracer = get_tracer(tid); 87 if (tracer != -1) { 88 *error = StringPrintf("failed to attach to thread %d, already traced by %d (%s)", tid, 89 tracer, get_process_name(tracer).c_str()); 90 return false; 91 } 92 } 93 94 *error = StringPrintf("failed to attach to thread %d: %s", tid, strerror(errno)); 95 return false; 96 } 97 98 // Make sure that the task we attached to is actually part of the pid we're dumping. 99 if (!pid_contains_tid(pid_proc_fd, tid)) { 100 if (ptrace(PTRACE_DETACH, tid, 0, 0) != 0) { 101 PLOG(WARNING) << "failed to detach from thread " << tid; 102 } 103 *error = StringPrintf("thread %d is not in process", tid); 104 return false; 105 } 106 107 return true; 108 } 109 110 static bool wait_for_stop(pid_t tid, int* received_signal) { 111 while (true) { 112 int status; 113 pid_t result = waitpid(tid, &status, __WALL); 114 if (result != tid) { 115 PLOG(ERROR) << "waitpid failed on " << tid << " while detaching"; 116 return false; 117 } 118 119 if (WIFSTOPPED(status)) { 120 if (status >> 16 == PTRACE_EVENT_STOP) { 121 *received_signal = 0; 122 } else { 123 *received_signal = WSTOPSIG(status); 124 } 125 return true; 126 } 127 } 128 } 129 130 // Interrupt a process and wait for it to be interrupted. 131 static bool ptrace_interrupt(pid_t tid, int* received_signal) { 132 if (ptrace(PTRACE_INTERRUPT, tid, 0, 0) == 0) { 133 return wait_for_stop(tid, received_signal); 134 } 135 136 PLOG(ERROR) << "failed to interrupt " << tid << " to detach"; 137 return false; 138 } 139 140 static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data) { 141 ATRACE_CALL(); 142 android::base::unique_fd amfd(socket_local_client( 143 "/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM)); 144 if (amfd.get() == -1) { 145 PLOG(ERROR) << "unable to connect to activity manager"; 146 return false; 147 } 148 149 struct timeval tv = { 150 .tv_sec = 1, 151 .tv_usec = 0, 152 }; 153 if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) { 154 PLOG(ERROR) << "failed to set send timeout on activity manager socket"; 155 return false; 156 } 157 tv.tv_sec = 3; // 3 seconds on handshake read 158 if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { 159 PLOG(ERROR) << "failed to set receive timeout on activity manager socket"; 160 return false; 161 } 162 163 // Activity Manager protocol: binary 32-bit network-byte-order ints for the 164 // pid and signal number, followed by the raw text of the dump, culminating 165 // in a zero byte that marks end-of-data. 166 uint32_t datum = htonl(pid); 167 if (!android::base::WriteFully(amfd, &datum, 4)) { 168 PLOG(ERROR) << "AM pid write failed"; 169 return false; 170 } 171 datum = htonl(signal); 172 if (!android::base::WriteFully(amfd, &datum, 4)) { 173 PLOG(ERROR) << "AM signal write failed"; 174 return false; 175 } 176 if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size() + 1)) { 177 PLOG(ERROR) << "AM data write failed"; 178 return false; 179 } 180 181 // 3 sec timeout reading the ack; we're fine if the read fails. 182 char ack; 183 android::base::ReadFully(amfd, &ack, 1); 184 return true; 185 } 186 187 // Globals used by the abort handler. 188 static pid_t g_target_thread = -1; 189 static bool g_tombstoned_connected = false; 190 static unique_fd g_tombstoned_socket; 191 static unique_fd g_output_fd; 192 193 static void DefuseSignalHandlers() { 194 // Don't try to dump ourselves. 195 struct sigaction action = {}; 196 action.sa_handler = SIG_DFL; 197 debuggerd_register_handlers(&action); 198 199 sigset_t mask; 200 sigemptyset(&mask); 201 if (sigprocmask(SIG_SETMASK, &mask, nullptr) != 0) { 202 PLOG(FATAL) << "failed to set signal mask"; 203 } 204 } 205 206 static void Initialize(char** argv) { 207 android::base::InitLogging(argv); 208 android::base::SetAborter([](const char* abort_msg) { 209 // If we abort before we get an output fd, contact tombstoned to let any 210 // potential listeners know that we failed. 211 if (!g_tombstoned_connected) { 212 if (!tombstoned_connect(g_target_thread, &g_tombstoned_socket, &g_output_fd, 213 kDebuggerdAnyIntercept)) { 214 // We failed to connect, not much we can do. 215 LOG(ERROR) << "failed to connected to tombstoned to report failure"; 216 _exit(1); 217 } 218 } 219 220 dprintf(g_output_fd.get(), "crash_dump failed to dump process"); 221 if (g_target_thread != 1) { 222 dprintf(g_output_fd.get(), " %d: %s\n", g_target_thread, abort_msg); 223 } else { 224 dprintf(g_output_fd.get(), ": %s\n", abort_msg); 225 } 226 227 _exit(1); 228 }); 229 } 230 231 static void ParseArgs(int argc, char** argv, pid_t* pseudothread_tid, DebuggerdDumpType* dump_type) { 232 if (argc != 4) { 233 LOG(FATAL) << "wrong number of args: " << argc << " (expected 4)"; 234 } 235 236 if (!android::base::ParseInt(argv[1], &g_target_thread, 1, std::numeric_limits<pid_t>::max())) { 237 LOG(FATAL) << "invalid target tid: " << argv[1]; 238 } 239 240 if (!android::base::ParseInt(argv[2], pseudothread_tid, 1, std::numeric_limits<pid_t>::max())) { 241 LOG(FATAL) << "invalid pseudothread tid: " << argv[2]; 242 } 243 244 int dump_type_int; 245 if (!android::base::ParseInt(argv[3], &dump_type_int, 0, 1)) { 246 LOG(FATAL) << "invalid requested dump type: " << argv[3]; 247 } 248 *dump_type = static_cast<DebuggerdDumpType>(dump_type_int); 249 } 250 251 static void ReadCrashInfo(unique_fd& fd, siginfo_t* siginfo, 252 std::unique_ptr<unwindstack::Regs>* regs, uintptr_t* abort_address) { 253 std::aligned_storage<sizeof(CrashInfo) + 1, alignof(CrashInfo)>::type buf; 254 ssize_t rc = TEMP_FAILURE_RETRY(read(fd.get(), &buf, sizeof(buf))); 255 if (rc == -1) { 256 PLOG(FATAL) << "failed to read target ucontext"; 257 } else if (rc != sizeof(CrashInfo)) { 258 LOG(FATAL) << "read " << rc << " bytes when reading target crash information, expected " 259 << sizeof(CrashInfo); 260 } 261 262 CrashInfo* crash_info = reinterpret_cast<CrashInfo*>(&buf); 263 if (crash_info->version != 1) { 264 LOG(FATAL) << "version mismatch, expected 1, received " << crash_info->version; 265 } 266 267 *siginfo = crash_info->siginfo; 268 regs->reset(Regs::CreateFromUcontext(Regs::CurrentArch(), &crash_info->ucontext)); 269 *abort_address = crash_info->abort_msg_address; 270 } 271 272 // Wait for a process to clone and return the child's pid. 273 // Note: this leaves the parent in PTRACE_EVENT_STOP. 274 static pid_t wait_for_clone(pid_t pid, bool resume_child) { 275 int status; 276 pid_t result = TEMP_FAILURE_RETRY(waitpid(pid, &status, __WALL)); 277 if (result == -1) { 278 PLOG(FATAL) << "failed to waitpid"; 279 } 280 281 if (WIFEXITED(status)) { 282 LOG(FATAL) << "traced process exited with status " << WEXITSTATUS(status); 283 } else if (WIFSIGNALED(status)) { 284 LOG(FATAL) << "traced process exited with signal " << WTERMSIG(status); 285 } else if (!WIFSTOPPED(status)) { 286 LOG(FATAL) << "process didn't stop? (status = " << status << ")"; 287 } 288 289 if (status >> 8 != (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) { 290 LOG(FATAL) << "process didn't stop due to PTRACE_O_TRACECLONE (status = " << status << ")"; 291 } 292 293 pid_t child; 294 if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &child) != 0) { 295 PLOG(FATAL) << "failed to get child pid via PTRACE_GETEVENTMSG"; 296 } 297 298 int stop_signal; 299 if (!wait_for_stop(child, &stop_signal)) { 300 PLOG(FATAL) << "failed to waitpid on child"; 301 } 302 303 CHECK_EQ(0, stop_signal); 304 305 if (resume_child) { 306 if (ptrace(PTRACE_CONT, child, 0, 0) != 0) { 307 PLOG(FATAL) << "failed to resume child (pid = " << child << ")"; 308 } 309 } 310 311 return child; 312 } 313 314 static pid_t wait_for_vm_process(pid_t pseudothread_tid) { 315 // The pseudothread will double-fork, we want its grandchild. 316 pid_t intermediate = wait_for_clone(pseudothread_tid, true); 317 pid_t vm_pid = wait_for_clone(intermediate, false); 318 if (ptrace(PTRACE_DETACH, intermediate, 0, 0) != 0) { 319 PLOG(FATAL) << "failed to detach from intermediate vm process"; 320 } 321 322 return vm_pid; 323 } 324 325 int main(int argc, char** argv) { 326 DefuseSignalHandlers(); 327 328 atrace_begin(ATRACE_TAG, "before reparent"); 329 pid_t target_process = getppid(); 330 331 // Open /proc/`getppid()` before we daemonize. 332 std::string target_proc_path = "/proc/" + std::to_string(target_process); 333 int target_proc_fd = open(target_proc_path.c_str(), O_DIRECTORY | O_RDONLY); 334 if (target_proc_fd == -1) { 335 PLOG(FATAL) << "failed to open " << target_proc_path; 336 } 337 338 // Make sure getppid() hasn't changed. 339 if (getppid() != target_process) { 340 LOG(FATAL) << "parent died"; 341 } 342 atrace_end(ATRACE_TAG); 343 344 // Reparent ourselves to init, so that the signal handler can waitpid on the 345 // original process to avoid leaving a zombie for non-fatal dumps. 346 // Move the input/output pipes off of stdout/stderr, out of paranoia. 347 unique_fd output_pipe(dup(STDOUT_FILENO)); 348 unique_fd input_pipe(dup(STDIN_FILENO)); 349 350 unique_fd fork_exit_read, fork_exit_write; 351 if (!Pipe(&fork_exit_read, &fork_exit_write)) { 352 PLOG(FATAL) << "failed to create pipe"; 353 } 354 355 pid_t forkpid = fork(); 356 if (forkpid == -1) { 357 PLOG(FATAL) << "fork failed"; 358 } else if (forkpid == 0) { 359 fork_exit_read.reset(); 360 } else { 361 // We need the pseudothread to live until we get around to verifying the vm pid against it. 362 // The last thing it does is block on a waitpid on us, so wait until our child tells us to die. 363 fork_exit_write.reset(); 364 char buf; 365 TEMP_FAILURE_RETRY(read(fork_exit_read.get(), &buf, sizeof(buf))); 366 _exit(0); 367 } 368 369 ATRACE_NAME("after reparent"); 370 pid_t pseudothread_tid; 371 DebuggerdDumpType dump_type; 372 uintptr_t abort_address = 0; 373 374 Initialize(argv); 375 ParseArgs(argc, argv, &pseudothread_tid, &dump_type); 376 377 // Die if we take too long. 378 // 379 // Note: processes with many threads and minidebug-info can take a bit to 380 // unwind, do not make this too small. b/62828735 381 alarm(30); 382 383 // Get the process name (aka cmdline). 384 std::string process_name = get_process_name(g_target_thread); 385 386 // Collect the list of open files. 387 OpenFilesList open_files; 388 { 389 ATRACE_NAME("open files"); 390 populate_open_files_list(g_target_thread, &open_files); 391 } 392 393 // In order to reduce the duration that we pause the process for, we ptrace 394 // the threads, fetch their registers and associated information, and then 395 // fork a separate process as a snapshot of the process's address space. 396 std::set<pid_t> threads; 397 if (!android::procinfo::GetProcessTids(g_target_thread, &threads)) { 398 PLOG(FATAL) << "failed to get process threads"; 399 } 400 401 std::map<pid_t, ThreadInfo> thread_info; 402 siginfo_t siginfo; 403 std::string error; 404 405 { 406 ATRACE_NAME("ptrace"); 407 for (pid_t thread : threads) { 408 // Trace the pseudothread separately, so we can use different options. 409 if (thread == pseudothread_tid) { 410 continue; 411 } 412 413 if (!ptrace_seize_thread(target_proc_fd, thread, &error)) { 414 bool fatal = thread == g_target_thread; 415 LOG(fatal ? FATAL : WARNING) << error; 416 } 417 418 ThreadInfo info; 419 info.pid = target_process; 420 info.tid = thread; 421 info.process_name = process_name; 422 info.thread_name = get_thread_name(thread); 423 424 if (!ptrace_interrupt(thread, &info.signo)) { 425 PLOG(WARNING) << "failed to ptrace interrupt thread " << thread; 426 ptrace(PTRACE_DETACH, thread, 0, 0); 427 continue; 428 } 429 430 if (thread == g_target_thread) { 431 // Read the thread's registers along with the rest of the crash info out of the pipe. 432 ReadCrashInfo(input_pipe, &siginfo, &info.registers, &abort_address); 433 info.siginfo = &siginfo; 434 info.signo = info.siginfo->si_signo; 435 } else { 436 info.registers.reset(Regs::RemoteGet(thread)); 437 if (!info.registers) { 438 PLOG(WARNING) << "failed to fetch registers for thread " << thread; 439 ptrace(PTRACE_DETACH, thread, 0, 0); 440 continue; 441 } 442 } 443 444 thread_info[thread] = std::move(info); 445 } 446 } 447 448 // Trace the pseudothread with PTRACE_O_TRACECLONE and tell it to fork. 449 if (!ptrace_seize_thread(target_proc_fd, pseudothread_tid, &error, PTRACE_O_TRACECLONE)) { 450 LOG(FATAL) << "failed to seize pseudothread: " << error; 451 } 452 453 if (TEMP_FAILURE_RETRY(write(output_pipe.get(), "\1", 1)) != 1) { 454 PLOG(FATAL) << "failed to write to pseudothread"; 455 } 456 457 pid_t vm_pid = wait_for_vm_process(pseudothread_tid); 458 if (ptrace(PTRACE_DETACH, pseudothread_tid, 0, 0) != 0) { 459 PLOG(FATAL) << "failed to detach from pseudothread"; 460 } 461 462 // The pseudothread can die now. 463 fork_exit_write.reset(); 464 465 // Defer the message until later, for readability. 466 bool wait_for_gdb = android::base::GetBoolProperty("debug.debuggerd.wait_for_gdb", false); 467 if (siginfo.si_signo == DEBUGGER_SIGNAL) { 468 wait_for_gdb = false; 469 } 470 471 // Detach from all of our attached threads before resuming. 472 for (const auto& [tid, thread] : thread_info) { 473 int resume_signal = thread.signo == DEBUGGER_SIGNAL ? 0 : thread.signo; 474 if (wait_for_gdb) { 475 resume_signal = 0; 476 if (tgkill(target_process, tid, SIGSTOP) != 0) { 477 PLOG(WARNING) << "failed to send SIGSTOP to " << tid; 478 } 479 } 480 481 LOG(DEBUG) << "detaching from thread " << tid; 482 if (ptrace(PTRACE_DETACH, tid, 0, resume_signal) != 0) { 483 PLOG(ERROR) << "failed to detach from thread " << tid; 484 } 485 } 486 487 // Drop our capabilities now that we've fetched all of the information we need. 488 drop_capabilities(); 489 490 { 491 ATRACE_NAME("tombstoned_connect"); 492 LOG(INFO) << "obtaining output fd from tombstoned, type: " << dump_type; 493 g_tombstoned_connected = 494 tombstoned_connect(g_target_thread, &g_tombstoned_socket, &g_output_fd, dump_type); 495 } 496 497 if (g_tombstoned_connected) { 498 if (TEMP_FAILURE_RETRY(dup2(g_output_fd.get(), STDOUT_FILENO)) == -1) { 499 PLOG(ERROR) << "failed to dup2 output fd (" << g_output_fd.get() << ") to STDOUT_FILENO"; 500 } 501 } else { 502 unique_fd devnull(TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR))); 503 TEMP_FAILURE_RETRY(dup2(devnull.get(), STDOUT_FILENO)); 504 g_output_fd = std::move(devnull); 505 } 506 507 LOG(INFO) << "performing dump of process " << target_process << " (target tid = " << g_target_thread 508 << ")"; 509 510 int signo = siginfo.si_signo; 511 bool fatal_signal = signo != DEBUGGER_SIGNAL; 512 bool backtrace = false; 513 514 // si_value is special when used with DEBUGGER_SIGNAL. 515 // 0: dump tombstone 516 // 1: dump backtrace 517 if (!fatal_signal) { 518 int si_val = siginfo.si_value.sival_int; 519 if (si_val == 0) { 520 backtrace = false; 521 } else if (si_val == 1) { 522 backtrace = true; 523 } else { 524 LOG(WARNING) << "unknown si_value value " << si_val; 525 } 526 } 527 528 // TODO: Use seccomp to lock ourselves down. 529 std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(vm_pid, false)); 530 if (!map) { 531 LOG(FATAL) << "failed to create backtrace map"; 532 } 533 534 std::shared_ptr<unwindstack::Memory> process_memory = map->GetProcessMemory(); 535 if (!process_memory) { 536 LOG(FATAL) << "failed to get unwindstack::Memory handle"; 537 } 538 539 std::string amfd_data; 540 if (backtrace) { 541 ATRACE_NAME("dump_backtrace"); 542 dump_backtrace(std::move(g_output_fd), map.get(), thread_info, g_target_thread); 543 } else { 544 ATRACE_NAME("engrave_tombstone"); 545 engrave_tombstone(std::move(g_output_fd), map.get(), process_memory.get(), thread_info, 546 g_target_thread, abort_address, &open_files, &amfd_data); 547 } 548 549 if (fatal_signal) { 550 // Don't try to notify ActivityManager if it just crashed, or we might hang until timeout. 551 if (thread_info[target_process].thread_name != "system_server") { 552 activity_manager_notify(target_process, signo, amfd_data); 553 } 554 } 555 556 if (wait_for_gdb) { 557 // Use ALOGI to line up with output from engrave_tombstone. 558 ALOGI( 559 "***********************************************************\n" 560 "* Process %d has been suspended while crashing.\n" 561 "* To attach gdbserver and start gdb, run this on the host:\n" 562 "*\n" 563 "* gdbclient.py -p %d\n" 564 "*\n" 565 "***********************************************************", 566 target_process, target_process); 567 } 568 569 // Close stdout before we notify tombstoned of completion. 570 close(STDOUT_FILENO); 571 if (g_tombstoned_connected && !tombstoned_notify_completion(g_tombstoned_socket.get())) { 572 LOG(ERROR) << "failed to notify tombstoned of completion"; 573 } 574 575 return 0; 576 } 577