Home | History | Annotate | Download | only in tombstoned
      1 /*
      2  * Copyright 2016, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <sys/stat.h>
     21 #include <sys/types.h>
     22 #include <unistd.h>
     23 
     24 #include <array>
     25 #include <deque>
     26 #include <string>
     27 #include <unordered_map>
     28 #include <utility>
     29 
     30 #include <event2/event.h>
     31 #include <event2/listener.h>
     32 #include <event2/thread.h>
     33 
     34 #include <android-base/logging.h>
     35 #include <android-base/properties.h>
     36 #include <android-base/stringprintf.h>
     37 #include <android-base/unique_fd.h>
     38 #include <cutils/sockets.h>
     39 
     40 #include "debuggerd/handler.h"
     41 #include "dump_type.h"
     42 #include "protocol.h"
     43 #include "util.h"
     44 
     45 #include "intercept_manager.h"
     46 
     47 using android::base::GetIntProperty;
     48 using android::base::StringPrintf;
     49 using android::base::unique_fd;
     50 
     51 static InterceptManager* intercept_manager;
     52 
     53 enum CrashStatus {
     54   kCrashStatusRunning,
     55   kCrashStatusQueued,
     56 };
     57 
     58 // Ownership of Crash is a bit messy.
     59 // It's either owned by an active event that must have a timeout, or owned by
     60 // queued_requests, in the case that multiple crashes come in at the same time.
     61 struct Crash {
     62   ~Crash() { event_free(crash_event); }
     63 
     64   unique_fd crash_fd;
     65   pid_t crash_pid;
     66   event* crash_event = nullptr;
     67   std::string crash_path;
     68 
     69   DebuggerdDumpType crash_type;
     70 };
     71 
     72 class CrashQueue {
     73  public:
     74   CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
     75              size_t max_concurrent_dumps)
     76       : file_name_prefix_(file_name_prefix),
     77         dir_path_(dir_path),
     78         dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
     79         max_artifacts_(max_artifacts),
     80         next_artifact_(0),
     81         max_concurrent_dumps_(max_concurrent_dumps),
     82         num_concurrent_dumps_(0) {
     83     if (dir_fd_ == -1) {
     84       PLOG(FATAL) << "failed to open directory: " << dir_path;
     85     }
     86 
     87     // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
     88     // same filename could be handed out to multiple processes.
     89     CHECK(max_artifacts_ > max_concurrent_dumps_);
     90 
     91     find_oldest_artifact();
     92   }
     93 
     94   static CrashQueue* for_crash(const Crash* crash) {
     95     return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
     96   }
     97 
     98   static CrashQueue* for_tombstones() {
     99     static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
    100                             GetIntProperty("tombstoned.max_tombstone_count", 10),
    101                             1 /* max_concurrent_dumps */);
    102     return &queue;
    103   }
    104 
    105   static CrashQueue* for_anrs() {
    106     static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
    107                             GetIntProperty("tombstoned.max_anr_count", 64),
    108                             4 /* max_concurrent_dumps */);
    109     return &queue;
    110   }
    111 
    112   std::pair<unique_fd, std::string> get_output() {
    113     unique_fd result;
    114     std::string file_name = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_);
    115 
    116     // Unlink and create the file, instead of using O_TRUNC, to avoid two processes
    117     // interleaving their output in case we ever get into that situation.
    118     if (unlinkat(dir_fd_, file_name.c_str(), 0) != 0 && errno != ENOENT) {
    119       PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << "/" << file_name;
    120     }
    121 
    122     result.reset(openat(dir_fd_, file_name.c_str(),
    123                         O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
    124     if (result == -1) {
    125       PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << "/" << file_name;
    126     }
    127 
    128     next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
    129     return {std::move(result), dir_path_ + "/" + file_name};
    130   }
    131 
    132   bool maybe_enqueue_crash(Crash* crash) {
    133     if (num_concurrent_dumps_ == max_concurrent_dumps_) {
    134       queued_requests_.push_back(crash);
    135       return true;
    136     }
    137 
    138     return false;
    139   }
    140 
    141   void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
    142     while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
    143       Crash* next_crash = queued_requests_.front();
    144       queued_requests_.pop_front();
    145       handler(next_crash);
    146     }
    147   }
    148 
    149   void on_crash_started() { ++num_concurrent_dumps_; }
    150 
    151   void on_crash_completed() { --num_concurrent_dumps_; }
    152 
    153  private:
    154   void find_oldest_artifact() {
    155     size_t oldest_tombstone = 0;
    156     time_t oldest_time = std::numeric_limits<time_t>::max();
    157 
    158     for (size_t i = 0; i < max_artifacts_; ++i) {
    159       std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
    160       struct stat st;
    161       if (stat(path.c_str(), &st) != 0) {
    162         if (errno == ENOENT) {
    163           oldest_tombstone = i;
    164           break;
    165         } else {
    166           PLOG(ERROR) << "failed to stat " << path;
    167           continue;
    168         }
    169       }
    170 
    171       if (st.st_mtime < oldest_time) {
    172         oldest_tombstone = i;
    173         oldest_time = st.st_mtime;
    174       }
    175     }
    176 
    177     next_artifact_ = oldest_tombstone;
    178   }
    179 
    180   const std::string file_name_prefix_;
    181 
    182   const std::string dir_path_;
    183   const int dir_fd_;
    184 
    185   const size_t max_artifacts_;
    186   int next_artifact_;
    187 
    188   const size_t max_concurrent_dumps_;
    189   size_t num_concurrent_dumps_;
    190 
    191   std::deque<Crash*> queued_requests_;
    192 
    193   DISALLOW_COPY_AND_ASSIGN(CrashQueue);
    194 };
    195 
    196 // Whether java trace dumps are produced via tombstoned.
    197 static constexpr bool kJavaTraceDumpsEnabled = true;
    198 
    199 // Forward declare the callbacks so they can be placed in a sensible order.
    200 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
    201 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
    202 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
    203 
    204 static void perform_request(Crash* crash) {
    205   unique_fd output_fd;
    206   if (!intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd)) {
    207     std::tie(output_fd, crash->crash_path) = CrashQueue::for_crash(crash)->get_output();
    208   }
    209 
    210   TombstonedCrashPacket response = {
    211     .packet_type = CrashPacketType::kPerformDump
    212   };
    213   ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
    214   if (rc == -1) {
    215     PLOG(WARNING) << "failed to send response to CrashRequest";
    216     goto fail;
    217   } else if (rc != sizeof(response)) {
    218     PLOG(WARNING) << "crash socket write returned short";
    219     goto fail;
    220   } else {
    221     // TODO: Make this configurable by the interceptor?
    222     struct timeval timeout = { 10, 0 };
    223 
    224     event_base* base = event_get_base(crash->crash_event);
    225     event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
    226                  crash_completed_cb, crash);
    227     event_add(crash->crash_event, &timeout);
    228   }
    229 
    230   CrashQueue::for_crash(crash)->on_crash_started();
    231   return;
    232 
    233 fail:
    234   delete crash;
    235 }
    236 
    237 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
    238                             void*) {
    239   event_base* base = evconnlistener_get_base(listener);
    240   Crash* crash = new Crash();
    241 
    242   // TODO: Make sure that only java crashes come in on the java socket
    243   // and only native crashes on the native socket.
    244   struct timeval timeout = { 1, 0 };
    245   event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
    246   crash->crash_fd.reset(sockfd);
    247   crash->crash_event = crash_event;
    248   event_add(crash_event, &timeout);
    249 }
    250 
    251 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
    252   ssize_t rc;
    253   Crash* crash = static_cast<Crash*>(arg);
    254 
    255   TombstonedCrashPacket request = {};
    256 
    257   if ((ev & EV_TIMEOUT) != 0) {
    258     LOG(WARNING) << "crash request timed out";
    259     goto fail;
    260   } else if ((ev & EV_READ) == 0) {
    261     LOG(WARNING) << "tombstoned received unexpected event from crash socket";
    262     goto fail;
    263   }
    264 
    265   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    266   if (rc == -1) {
    267     PLOG(WARNING) << "failed to read from crash socket";
    268     goto fail;
    269   } else if (rc != sizeof(request)) {
    270     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    271                  << sizeof(request) << ")";
    272     goto fail;
    273   }
    274 
    275   if (request.packet_type != CrashPacketType::kDumpRequest) {
    276     LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
    277                  << StringPrintf("%#2hhX", request.packet_type);
    278     goto fail;
    279   }
    280 
    281   crash->crash_type = request.packet.dump_request.dump_type;
    282   if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
    283     LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
    284     goto fail;
    285   }
    286 
    287   if (crash->crash_type != kDebuggerdJavaBacktrace) {
    288     crash->crash_pid = request.packet.dump_request.pid;
    289   } else {
    290     // Requests for java traces are sent from untrusted processes, so we
    291     // must not trust the PID sent down with the request. Instead, we ask the
    292     // kernel.
    293     ucred cr = {};
    294     socklen_t len = sizeof(cr);
    295     int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
    296     if (ret != 0) {
    297       PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
    298       goto fail;
    299     }
    300 
    301     crash->crash_pid = cr.pid;
    302   }
    303 
    304   LOG(INFO) << "received crash request for pid " << crash->crash_pid;
    305 
    306   if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) {
    307     LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
    308   } else {
    309     perform_request(crash);
    310   }
    311 
    312   return;
    313 
    314 fail:
    315   delete crash;
    316 }
    317 
    318 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
    319   ssize_t rc;
    320   Crash* crash = static_cast<Crash*>(arg);
    321   TombstonedCrashPacket request = {};
    322 
    323   CrashQueue::for_crash(crash)->on_crash_completed();
    324 
    325   if ((ev & EV_READ) == 0) {
    326     goto fail;
    327   }
    328 
    329   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    330   if (rc == -1) {
    331     PLOG(WARNING) << "failed to read from crash socket";
    332     goto fail;
    333   } else if (rc != sizeof(request)) {
    334     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    335                  << sizeof(request) << ")";
    336     goto fail;
    337   }
    338 
    339   if (request.packet_type != CrashPacketType::kCompletedDump) {
    340     LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
    341                  << uint32_t(request.packet_type);
    342     goto fail;
    343   }
    344 
    345   if (!crash->crash_path.empty()) {
    346     if (crash->crash_type == kDebuggerdJavaBacktrace) {
    347       LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << crash->crash_path;
    348     } else {
    349       // NOTE: Several tools parse this log message to figure out where the
    350       // tombstone associated with a given native crash was written. Any changes
    351       // to this message must be carefully considered.
    352       LOG(ERROR) << "Tombstone written to: " << crash->crash_path;
    353     }
    354   }
    355 
    356 fail:
    357   CrashQueue* queue = CrashQueue::for_crash(crash);
    358   delete crash;
    359 
    360   // If there's something queued up, let them proceed.
    361   queue->maybe_dequeue_crashes(perform_request);
    362 }
    363 
    364 int main(int, char* []) {
    365   umask(0137);
    366 
    367   // Don't try to connect to ourselves if we crash.
    368   struct sigaction action = {};
    369   action.sa_handler = [](int signal) {
    370     LOG(ERROR) << "received fatal signal " << signal;
    371     _exit(1);
    372   };
    373   debuggerd_register_handlers(&action);
    374 
    375   int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
    376   int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
    377 
    378   if (intercept_socket == -1 || crash_socket == -1) {
    379     PLOG(FATAL) << "failed to get socket from init";
    380   }
    381 
    382   evutil_make_socket_nonblocking(intercept_socket);
    383   evutil_make_socket_nonblocking(crash_socket);
    384 
    385   event_base* base = event_base_new();
    386   if (!base) {
    387     LOG(FATAL) << "failed to create event_base";
    388   }
    389 
    390   intercept_manager = new InterceptManager(base, intercept_socket);
    391 
    392   evconnlistener* tombstone_listener = evconnlistener_new(
    393       base, crash_accept_cb, CrashQueue::for_tombstones(), -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
    394   if (!tombstone_listener) {
    395     LOG(FATAL) << "failed to create evconnlistener for tombstones.";
    396   }
    397 
    398   if (kJavaTraceDumpsEnabled) {
    399     const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
    400     if (java_trace_socket == -1) {
    401       PLOG(FATAL) << "failed to get socket from init";
    402     }
    403 
    404     evutil_make_socket_nonblocking(java_trace_socket);
    405     evconnlistener* java_trace_listener = evconnlistener_new(
    406         base, crash_accept_cb, CrashQueue::for_anrs(), -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket);
    407     if (!java_trace_listener) {
    408       LOG(FATAL) << "failed to create evconnlistener for java traces.";
    409     }
    410   }
    411 
    412   LOG(INFO) << "tombstoned successfully initialized";
    413   event_base_dispatch(base);
    414 }
    415