1 /* 2 * Copyright 2016, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <fcntl.h> 18 #include <stdio.h> 19 #include <stdlib.h> 20 #include <sys/stat.h> 21 #include <sys/types.h> 22 #include <unistd.h> 23 24 #include <array> 25 #include <deque> 26 #include <string> 27 #include <unordered_map> 28 #include <utility> 29 30 #include <event2/event.h> 31 #include <event2/listener.h> 32 #include <event2/thread.h> 33 34 #include <android-base/logging.h> 35 #include <android-base/properties.h> 36 #include <android-base/stringprintf.h> 37 #include <android-base/unique_fd.h> 38 #include <cutils/sockets.h> 39 40 #include "debuggerd/handler.h" 41 #include "dump_type.h" 42 #include "protocol.h" 43 #include "util.h" 44 45 #include "intercept_manager.h" 46 47 using android::base::GetIntProperty; 48 using android::base::StringPrintf; 49 using android::base::unique_fd; 50 51 static InterceptManager* intercept_manager; 52 53 enum CrashStatus { 54 kCrashStatusRunning, 55 kCrashStatusQueued, 56 }; 57 58 // Ownership of Crash is a bit messy. 59 // It's either owned by an active event that must have a timeout, or owned by 60 // queued_requests, in the case that multiple crashes come in at the same time. 61 struct Crash { 62 ~Crash() { event_free(crash_event); } 63 64 unique_fd crash_fd; 65 pid_t crash_pid; 66 event* crash_event = nullptr; 67 std::string crash_path; 68 69 DebuggerdDumpType crash_type; 70 }; 71 72 class CrashQueue { 73 public: 74 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts, 75 size_t max_concurrent_dumps) 76 : file_name_prefix_(file_name_prefix), 77 dir_path_(dir_path), 78 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)), 79 max_artifacts_(max_artifacts), 80 next_artifact_(0), 81 max_concurrent_dumps_(max_concurrent_dumps), 82 num_concurrent_dumps_(0) { 83 if (dir_fd_ == -1) { 84 PLOG(FATAL) << "failed to open directory: " << dir_path; 85 } 86 87 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the 88 // same filename could be handed out to multiple processes. 89 CHECK(max_artifacts_ > max_concurrent_dumps_); 90 91 find_oldest_artifact(); 92 } 93 94 static CrashQueue* for_crash(const Crash* crash) { 95 return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones(); 96 } 97 98 static CrashQueue* for_tombstones() { 99 static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */, 100 GetIntProperty("tombstoned.max_tombstone_count", 10), 101 1 /* max_concurrent_dumps */); 102 return &queue; 103 } 104 105 static CrashQueue* for_anrs() { 106 static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */, 107 GetIntProperty("tombstoned.max_anr_count", 64), 108 4 /* max_concurrent_dumps */); 109 return &queue; 110 } 111 112 std::pair<unique_fd, std::string> get_output() { 113 unique_fd result; 114 std::string file_name = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_); 115 116 // Unlink and create the file, instead of using O_TRUNC, to avoid two processes 117 // interleaving their output in case we ever get into that situation. 118 if (unlinkat(dir_fd_, file_name.c_str(), 0) != 0 && errno != ENOENT) { 119 PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << "/" << file_name; 120 } 121 122 result.reset(openat(dir_fd_, file_name.c_str(), 123 O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640)); 124 if (result == -1) { 125 PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << "/" << file_name; 126 } 127 128 next_artifact_ = (next_artifact_ + 1) % max_artifacts_; 129 return {std::move(result), dir_path_ + "/" + file_name}; 130 } 131 132 bool maybe_enqueue_crash(Crash* crash) { 133 if (num_concurrent_dumps_ == max_concurrent_dumps_) { 134 queued_requests_.push_back(crash); 135 return true; 136 } 137 138 return false; 139 } 140 141 void maybe_dequeue_crashes(void (*handler)(Crash* crash)) { 142 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) { 143 Crash* next_crash = queued_requests_.front(); 144 queued_requests_.pop_front(); 145 handler(next_crash); 146 } 147 } 148 149 void on_crash_started() { ++num_concurrent_dumps_; } 150 151 void on_crash_completed() { --num_concurrent_dumps_; } 152 153 private: 154 void find_oldest_artifact() { 155 size_t oldest_tombstone = 0; 156 time_t oldest_time = std::numeric_limits<time_t>::max(); 157 158 for (size_t i = 0; i < max_artifacts_; ++i) { 159 std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i); 160 struct stat st; 161 if (stat(path.c_str(), &st) != 0) { 162 if (errno == ENOENT) { 163 oldest_tombstone = i; 164 break; 165 } else { 166 PLOG(ERROR) << "failed to stat " << path; 167 continue; 168 } 169 } 170 171 if (st.st_mtime < oldest_time) { 172 oldest_tombstone = i; 173 oldest_time = st.st_mtime; 174 } 175 } 176 177 next_artifact_ = oldest_tombstone; 178 } 179 180 const std::string file_name_prefix_; 181 182 const std::string dir_path_; 183 const int dir_fd_; 184 185 const size_t max_artifacts_; 186 int next_artifact_; 187 188 const size_t max_concurrent_dumps_; 189 size_t num_concurrent_dumps_; 190 191 std::deque<Crash*> queued_requests_; 192 193 DISALLOW_COPY_AND_ASSIGN(CrashQueue); 194 }; 195 196 // Whether java trace dumps are produced via tombstoned. 197 static constexpr bool kJavaTraceDumpsEnabled = true; 198 199 // Forward declare the callbacks so they can be placed in a sensible order. 200 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*); 201 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg); 202 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg); 203 204 static void perform_request(Crash* crash) { 205 unique_fd output_fd; 206 if (!intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd)) { 207 std::tie(output_fd, crash->crash_path) = CrashQueue::for_crash(crash)->get_output(); 208 } 209 210 TombstonedCrashPacket response = { 211 .packet_type = CrashPacketType::kPerformDump 212 }; 213 ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd)); 214 if (rc == -1) { 215 PLOG(WARNING) << "failed to send response to CrashRequest"; 216 goto fail; 217 } else if (rc != sizeof(response)) { 218 PLOG(WARNING) << "crash socket write returned short"; 219 goto fail; 220 } else { 221 // TODO: Make this configurable by the interceptor? 222 struct timeval timeout = { 10, 0 }; 223 224 event_base* base = event_get_base(crash->crash_event); 225 event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ, 226 crash_completed_cb, crash); 227 event_add(crash->crash_event, &timeout); 228 } 229 230 CrashQueue::for_crash(crash)->on_crash_started(); 231 return; 232 233 fail: 234 delete crash; 235 } 236 237 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, 238 void*) { 239 event_base* base = evconnlistener_get_base(listener); 240 Crash* crash = new Crash(); 241 242 // TODO: Make sure that only java crashes come in on the java socket 243 // and only native crashes on the native socket. 244 struct timeval timeout = { 1, 0 }; 245 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash); 246 crash->crash_fd.reset(sockfd); 247 crash->crash_event = crash_event; 248 event_add(crash_event, &timeout); 249 } 250 251 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) { 252 ssize_t rc; 253 Crash* crash = static_cast<Crash*>(arg); 254 255 TombstonedCrashPacket request = {}; 256 257 if ((ev & EV_TIMEOUT) != 0) { 258 LOG(WARNING) << "crash request timed out"; 259 goto fail; 260 } else if ((ev & EV_READ) == 0) { 261 LOG(WARNING) << "tombstoned received unexpected event from crash socket"; 262 goto fail; 263 } 264 265 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request))); 266 if (rc == -1) { 267 PLOG(WARNING) << "failed to read from crash socket"; 268 goto fail; 269 } else if (rc != sizeof(request)) { 270 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected " 271 << sizeof(request) << ")"; 272 goto fail; 273 } 274 275 if (request.packet_type != CrashPacketType::kDumpRequest) { 276 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received " 277 << StringPrintf("%#2hhX", request.packet_type); 278 goto fail; 279 } 280 281 crash->crash_type = request.packet.dump_request.dump_type; 282 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) { 283 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type; 284 goto fail; 285 } 286 287 if (crash->crash_type != kDebuggerdJavaBacktrace) { 288 crash->crash_pid = request.packet.dump_request.pid; 289 } else { 290 // Requests for java traces are sent from untrusted processes, so we 291 // must not trust the PID sent down with the request. Instead, we ask the 292 // kernel. 293 ucred cr = {}; 294 socklen_t len = sizeof(cr); 295 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len); 296 if (ret != 0) { 297 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)"; 298 goto fail; 299 } 300 301 crash->crash_pid = cr.pid; 302 } 303 304 LOG(INFO) << "received crash request for pid " << crash->crash_pid; 305 306 if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) { 307 LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid; 308 } else { 309 perform_request(crash); 310 } 311 312 return; 313 314 fail: 315 delete crash; 316 } 317 318 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) { 319 ssize_t rc; 320 Crash* crash = static_cast<Crash*>(arg); 321 TombstonedCrashPacket request = {}; 322 323 CrashQueue::for_crash(crash)->on_crash_completed(); 324 325 if ((ev & EV_READ) == 0) { 326 goto fail; 327 } 328 329 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request))); 330 if (rc == -1) { 331 PLOG(WARNING) << "failed to read from crash socket"; 332 goto fail; 333 } else if (rc != sizeof(request)) { 334 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected " 335 << sizeof(request) << ")"; 336 goto fail; 337 } 338 339 if (request.packet_type != CrashPacketType::kCompletedDump) { 340 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received " 341 << uint32_t(request.packet_type); 342 goto fail; 343 } 344 345 if (!crash->crash_path.empty()) { 346 if (crash->crash_type == kDebuggerdJavaBacktrace) { 347 LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << crash->crash_path; 348 } else { 349 // NOTE: Several tools parse this log message to figure out where the 350 // tombstone associated with a given native crash was written. Any changes 351 // to this message must be carefully considered. 352 LOG(ERROR) << "Tombstone written to: " << crash->crash_path; 353 } 354 } 355 356 fail: 357 CrashQueue* queue = CrashQueue::for_crash(crash); 358 delete crash; 359 360 // If there's something queued up, let them proceed. 361 queue->maybe_dequeue_crashes(perform_request); 362 } 363 364 int main(int, char* []) { 365 umask(0137); 366 367 // Don't try to connect to ourselves if we crash. 368 struct sigaction action = {}; 369 action.sa_handler = [](int signal) { 370 LOG(ERROR) << "received fatal signal " << signal; 371 _exit(1); 372 }; 373 debuggerd_register_handlers(&action); 374 375 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName); 376 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName); 377 378 if (intercept_socket == -1 || crash_socket == -1) { 379 PLOG(FATAL) << "failed to get socket from init"; 380 } 381 382 evutil_make_socket_nonblocking(intercept_socket); 383 evutil_make_socket_nonblocking(crash_socket); 384 385 event_base* base = event_base_new(); 386 if (!base) { 387 LOG(FATAL) << "failed to create event_base"; 388 } 389 390 intercept_manager = new InterceptManager(base, intercept_socket); 391 392 evconnlistener* tombstone_listener = evconnlistener_new( 393 base, crash_accept_cb, CrashQueue::for_tombstones(), -1, LEV_OPT_CLOSE_ON_FREE, crash_socket); 394 if (!tombstone_listener) { 395 LOG(FATAL) << "failed to create evconnlistener for tombstones."; 396 } 397 398 if (kJavaTraceDumpsEnabled) { 399 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName); 400 if (java_trace_socket == -1) { 401 PLOG(FATAL) << "failed to get socket from init"; 402 } 403 404 evutil_make_socket_nonblocking(java_trace_socket); 405 evconnlistener* java_trace_listener = evconnlistener_new( 406 base, crash_accept_cb, CrashQueue::for_anrs(), -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket); 407 if (!java_trace_listener) { 408 LOG(FATAL) << "failed to create evconnlistener for java traces."; 409 } 410 } 411 412 LOG(INFO) << "tombstoned successfully initialized"; 413 event_base_dispatch(base); 414 } 415