Home | History | Annotate | Download | only in tombstoned
      1 /*
      2  * Copyright 2016, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <sys/stat.h>
     21 #include <sys/types.h>
     22 #include <unistd.h>
     23 
     24 #include <array>
     25 #include <deque>
     26 #include <string>
     27 #include <unordered_map>
     28 #include <utility>
     29 
     30 #include <event2/event.h>
     31 #include <event2/listener.h>
     32 #include <event2/thread.h>
     33 
     34 #include <android-base/logging.h>
     35 #include <android-base/properties.h>
     36 #include <android-base/stringprintf.h>
     37 #include <android-base/unique_fd.h>
     38 #include <cutils/sockets.h>
     39 
     40 #include "debuggerd/handler.h"
     41 #include "dump_type.h"
     42 #include "protocol.h"
     43 #include "util.h"
     44 
     45 #include "intercept_manager.h"
     46 
     47 using android::base::GetIntProperty;
     48 using android::base::StringPrintf;
     49 using android::base::unique_fd;
     50 
     51 static InterceptManager* intercept_manager;
     52 
     53 enum CrashStatus {
     54   kCrashStatusRunning,
     55   kCrashStatusQueued,
     56 };
     57 
     58 // Ownership of Crash is a bit messy.
     59 // It's either owned by an active event that must have a timeout, or owned by
     60 // queued_requests, in the case that multiple crashes come in at the same time.
     61 struct Crash {
     62   ~Crash() { event_free(crash_event); }
     63 
     64   std::string crash_tombstone_path;
     65   unique_fd crash_tombstone_fd;
     66   unique_fd crash_socket_fd;
     67   pid_t crash_pid;
     68   event* crash_event = nullptr;
     69 
     70   DebuggerdDumpType crash_type;
     71 };
     72 
     73 class CrashQueue {
     74  public:
     75   CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
     76              size_t max_concurrent_dumps)
     77       : file_name_prefix_(file_name_prefix),
     78         dir_path_(dir_path),
     79         dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
     80         max_artifacts_(max_artifacts),
     81         next_artifact_(0),
     82         max_concurrent_dumps_(max_concurrent_dumps),
     83         num_concurrent_dumps_(0) {
     84     if (dir_fd_ == -1) {
     85       PLOG(FATAL) << "failed to open directory: " << dir_path;
     86     }
     87 
     88     // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
     89     // same filename could be handed out to multiple processes.
     90     CHECK(max_artifacts_ > max_concurrent_dumps_);
     91 
     92     find_oldest_artifact();
     93   }
     94 
     95   static CrashQueue* for_crash(const Crash* crash) {
     96     return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
     97   }
     98 
     99   static CrashQueue* for_tombstones() {
    100     static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
    101                             GetIntProperty("tombstoned.max_tombstone_count", 10),
    102                             1 /* max_concurrent_dumps */);
    103     return &queue;
    104   }
    105 
    106   static CrashQueue* for_anrs() {
    107     static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
    108                             GetIntProperty("tombstoned.max_anr_count", 64),
    109                             4 /* max_concurrent_dumps */);
    110     return &queue;
    111   }
    112 
    113   std::pair<std::string, unique_fd> get_output() {
    114     std::string path;
    115     unique_fd result(openat(dir_fd_, ".", O_WRONLY | O_APPEND | O_TMPFILE | O_CLOEXEC, 0640));
    116     if (result == -1) {
    117       // We might not have O_TMPFILE. Try creating with an arbitrary filename instead.
    118       static size_t counter = 0;
    119       std::string tmp_filename = StringPrintf(".temporary%zu", counter++);
    120       result.reset(openat(dir_fd_, tmp_filename.c_str(),
    121                           O_WRONLY | O_APPEND | O_CREAT | O_TRUNC | O_CLOEXEC, 0640));
    122       if (result == -1) {
    123         PLOG(FATAL) << "failed to create temporary tombstone in " << dir_path_;
    124       }
    125 
    126       path = StringPrintf("%s/%s", dir_path_.c_str(), tmp_filename.c_str());
    127     }
    128     return std::make_pair(std::move(path), std::move(result));
    129   }
    130 
    131   std::string get_next_artifact_path() {
    132     std::string file_name =
    133         StringPrintf("%s/%s%02d", dir_path_.c_str(), file_name_prefix_.c_str(), next_artifact_);
    134     next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
    135     return file_name;
    136   }
    137 
    138   bool maybe_enqueue_crash(Crash* crash) {
    139     if (num_concurrent_dumps_ == max_concurrent_dumps_) {
    140       queued_requests_.push_back(crash);
    141       return true;
    142     }
    143 
    144     return false;
    145   }
    146 
    147   void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
    148     while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
    149       Crash* next_crash = queued_requests_.front();
    150       queued_requests_.pop_front();
    151       handler(next_crash);
    152     }
    153   }
    154 
    155   void on_crash_started() { ++num_concurrent_dumps_; }
    156 
    157   void on_crash_completed() { --num_concurrent_dumps_; }
    158 
    159  private:
    160   void find_oldest_artifact() {
    161     size_t oldest_tombstone = 0;
    162     time_t oldest_time = std::numeric_limits<time_t>::max();
    163 
    164     for (size_t i = 0; i < max_artifacts_; ++i) {
    165       std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
    166       struct stat st;
    167       if (stat(path.c_str(), &st) != 0) {
    168         if (errno == ENOENT) {
    169           oldest_tombstone = i;
    170           break;
    171         } else {
    172           PLOG(ERROR) << "failed to stat " << path;
    173           continue;
    174         }
    175       }
    176 
    177       if (st.st_mtime < oldest_time) {
    178         oldest_tombstone = i;
    179         oldest_time = st.st_mtime;
    180       }
    181     }
    182 
    183     next_artifact_ = oldest_tombstone;
    184   }
    185 
    186   const std::string file_name_prefix_;
    187 
    188   const std::string dir_path_;
    189   const int dir_fd_;
    190 
    191   const size_t max_artifacts_;
    192   int next_artifact_;
    193 
    194   const size_t max_concurrent_dumps_;
    195   size_t num_concurrent_dumps_;
    196 
    197   std::deque<Crash*> queued_requests_;
    198 
    199   DISALLOW_COPY_AND_ASSIGN(CrashQueue);
    200 };
    201 
    202 // Whether java trace dumps are produced via tombstoned.
    203 static constexpr bool kJavaTraceDumpsEnabled = true;
    204 
    205 // Forward declare the callbacks so they can be placed in a sensible order.
    206 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
    207 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
    208 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
    209 
    210 static void perform_request(Crash* crash) {
    211   unique_fd output_fd;
    212   bool intercepted =
    213       intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd);
    214   if (!intercepted) {
    215     std::tie(crash->crash_tombstone_path, output_fd) = CrashQueue::for_crash(crash)->get_output();
    216     crash->crash_tombstone_fd.reset(dup(output_fd.get()));
    217   }
    218 
    219   TombstonedCrashPacket response = {
    220     .packet_type = CrashPacketType::kPerformDump
    221   };
    222   ssize_t rc = send_fd(crash->crash_socket_fd, &response, sizeof(response), std::move(output_fd));
    223   if (rc == -1) {
    224     PLOG(WARNING) << "failed to send response to CrashRequest";
    225     goto fail;
    226   } else if (rc != sizeof(response)) {
    227     PLOG(WARNING) << "crash socket write returned short";
    228     goto fail;
    229   } else {
    230     // TODO: Make this configurable by the interceptor?
    231     struct timeval timeout = { 10, 0 };
    232 
    233     event_base* base = event_get_base(crash->crash_event);
    234     event_assign(crash->crash_event, base, crash->crash_socket_fd, EV_TIMEOUT | EV_READ,
    235                  crash_completed_cb, crash);
    236     event_add(crash->crash_event, &timeout);
    237   }
    238 
    239   CrashQueue::for_crash(crash)->on_crash_started();
    240   return;
    241 
    242 fail:
    243   delete crash;
    244 }
    245 
    246 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
    247                             void*) {
    248   event_base* base = evconnlistener_get_base(listener);
    249   Crash* crash = new Crash();
    250 
    251   // TODO: Make sure that only java crashes come in on the java socket
    252   // and only native crashes on the native socket.
    253   struct timeval timeout = { 1, 0 };
    254   event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
    255   crash->crash_socket_fd.reset(sockfd);
    256   crash->crash_event = crash_event;
    257   event_add(crash_event, &timeout);
    258 }
    259 
    260 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
    261   ssize_t rc;
    262   Crash* crash = static_cast<Crash*>(arg);
    263 
    264   TombstonedCrashPacket request = {};
    265 
    266   if ((ev & EV_TIMEOUT) != 0) {
    267     LOG(WARNING) << "crash request timed out";
    268     goto fail;
    269   } else if ((ev & EV_READ) == 0) {
    270     LOG(WARNING) << "tombstoned received unexpected event from crash socket";
    271     goto fail;
    272   }
    273 
    274   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    275   if (rc == -1) {
    276     PLOG(WARNING) << "failed to read from crash socket";
    277     goto fail;
    278   } else if (rc != sizeof(request)) {
    279     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    280                  << sizeof(request) << ")";
    281     goto fail;
    282   }
    283 
    284   if (request.packet_type != CrashPacketType::kDumpRequest) {
    285     LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
    286                  << StringPrintf("%#2hhX", request.packet_type);
    287     goto fail;
    288   }
    289 
    290   crash->crash_type = request.packet.dump_request.dump_type;
    291   if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
    292     LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
    293     goto fail;
    294   }
    295 
    296   if (crash->crash_type != kDebuggerdJavaBacktrace) {
    297     crash->crash_pid = request.packet.dump_request.pid;
    298   } else {
    299     // Requests for java traces are sent from untrusted processes, so we
    300     // must not trust the PID sent down with the request. Instead, we ask the
    301     // kernel.
    302     ucred cr = {};
    303     socklen_t len = sizeof(cr);
    304     int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
    305     if (ret != 0) {
    306       PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
    307       goto fail;
    308     }
    309 
    310     crash->crash_pid = cr.pid;
    311   }
    312 
    313   LOG(INFO) << "received crash request for pid " << crash->crash_pid;
    314 
    315   if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) {
    316     LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
    317   } else {
    318     perform_request(crash);
    319   }
    320 
    321   return;
    322 
    323 fail:
    324   delete crash;
    325 }
    326 
    327 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
    328   ssize_t rc;
    329   Crash* crash = static_cast<Crash*>(arg);
    330   TombstonedCrashPacket request = {};
    331 
    332   CrashQueue::for_crash(crash)->on_crash_completed();
    333 
    334   if ((ev & EV_READ) == 0) {
    335     goto fail;
    336   }
    337 
    338   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    339   if (rc == -1) {
    340     PLOG(WARNING) << "failed to read from crash socket";
    341     goto fail;
    342   } else if (rc != sizeof(request)) {
    343     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    344                  << sizeof(request) << ")";
    345     goto fail;
    346   }
    347 
    348   if (request.packet_type != CrashPacketType::kCompletedDump) {
    349     LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
    350                  << uint32_t(request.packet_type);
    351     goto fail;
    352   }
    353 
    354   if (crash->crash_tombstone_fd != -1) {
    355     std::string fd_path = StringPrintf("/proc/self/fd/%d", crash->crash_tombstone_fd.get());
    356     std::string tombstone_path = CrashQueue::for_crash(crash)->get_next_artifact_path();
    357 
    358     // linkat doesn't let us replace a file, so we need to unlink first.
    359     int rc = unlink(tombstone_path.c_str());
    360     if (rc != 0 && errno != ENOENT) {
    361       PLOG(ERROR) << "failed to unlink tombstone at " << tombstone_path;
    362       goto fail;
    363     }
    364 
    365     rc = linkat(AT_FDCWD, fd_path.c_str(), AT_FDCWD, tombstone_path.c_str(), AT_SYMLINK_FOLLOW);
    366     if (rc != 0) {
    367       PLOG(ERROR) << "failed to link tombstone";
    368     } else {
    369       if (crash->crash_type == kDebuggerdJavaBacktrace) {
    370         LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << tombstone_path;
    371       } else {
    372         // NOTE: Several tools parse this log message to figure out where the
    373         // tombstone associated with a given native crash was written. Any changes
    374         // to this message must be carefully considered.
    375         LOG(ERROR) << "Tombstone written to: " << tombstone_path;
    376       }
    377     }
    378 
    379     // If we don't have O_TMPFILE, we need to clean up after ourselves.
    380     if (!crash->crash_tombstone_path.empty()) {
    381       rc = unlink(crash->crash_tombstone_path.c_str());
    382       if (rc != 0) {
    383         PLOG(ERROR) << "failed to unlink temporary tombstone at " << crash->crash_tombstone_path;
    384       }
    385     }
    386   }
    387 
    388 fail:
    389   CrashQueue* queue = CrashQueue::for_crash(crash);
    390   delete crash;
    391 
    392   // If there's something queued up, let them proceed.
    393   queue->maybe_dequeue_crashes(perform_request);
    394 }
    395 
    396 int main(int, char* []) {
    397   umask(0137);
    398 
    399   // Don't try to connect to ourselves if we crash.
    400   struct sigaction action = {};
    401   action.sa_handler = [](int signal) {
    402     LOG(ERROR) << "received fatal signal " << signal;
    403     _exit(1);
    404   };
    405   debuggerd_register_handlers(&action);
    406 
    407   int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
    408   int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
    409 
    410   if (intercept_socket == -1 || crash_socket == -1) {
    411     PLOG(FATAL) << "failed to get socket from init";
    412   }
    413 
    414   evutil_make_socket_nonblocking(intercept_socket);
    415   evutil_make_socket_nonblocking(crash_socket);
    416 
    417   event_base* base = event_base_new();
    418   if (!base) {
    419     LOG(FATAL) << "failed to create event_base";
    420   }
    421 
    422   intercept_manager = new InterceptManager(base, intercept_socket);
    423 
    424   evconnlistener* tombstone_listener =
    425       evconnlistener_new(base, crash_accept_cb, CrashQueue::for_tombstones(), LEV_OPT_CLOSE_ON_FREE,
    426                          -1 /* backlog */, crash_socket);
    427   if (!tombstone_listener) {
    428     LOG(FATAL) << "failed to create evconnlistener for tombstones.";
    429   }
    430 
    431   if (kJavaTraceDumpsEnabled) {
    432     const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
    433     if (java_trace_socket == -1) {
    434       PLOG(FATAL) << "failed to get socket from init";
    435     }
    436 
    437     evutil_make_socket_nonblocking(java_trace_socket);
    438     evconnlistener* java_trace_listener =
    439         evconnlistener_new(base, crash_accept_cb, CrashQueue::for_anrs(), LEV_OPT_CLOSE_ON_FREE,
    440                            -1 /* backlog */, java_trace_socket);
    441     if (!java_trace_listener) {
    442       LOG(FATAL) << "failed to create evconnlistener for java traces.";
    443     }
    444   }
    445 
    446   LOG(INFO) << "tombstoned successfully initialized";
    447   event_base_dispatch(base);
    448 }
    449