Home | History | Annotate | Download | only in tombstoned
      1 /*
      2  * Copyright 2016, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <fcntl.h>
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <sys/stat.h>
     21 #include <sys/types.h>
     22 #include <unistd.h>
     23 
     24 #include <array>
     25 #include <deque>
     26 #include <unordered_map>
     27 
     28 #include <event2/event.h>
     29 #include <event2/listener.h>
     30 #include <event2/thread.h>
     31 
     32 #include <android-base/logging.h>
     33 #include <android-base/stringprintf.h>
     34 #include <android-base/unique_fd.h>
     35 #include <cutils/sockets.h>
     36 
     37 #include "debuggerd/handler.h"
     38 #include "debuggerd/protocol.h"
     39 #include "debuggerd/util.h"
     40 
     41 #include "intercept_manager.h"
     42 
     43 using android::base::StringPrintf;
     44 using android::base::unique_fd;
     45 
     46 static InterceptManager* intercept_manager;
     47 
     48 enum CrashStatus {
     49   kCrashStatusRunning,
     50   kCrashStatusQueued,
     51 };
     52 
     53 // Ownership of Crash is a bit messy.
     54 // It's either owned by an active event that must have a timeout, or owned by
     55 // queued_requests, in the case that multiple crashes come in at the same time.
     56 struct Crash {
     57   ~Crash() {
     58     event_free(crash_event);
     59   }
     60 
     61   unique_fd crash_fd;
     62   pid_t crash_pid;
     63   event* crash_event = nullptr;
     64   std::string crash_path;
     65 };
     66 
     67 static constexpr char kTombstoneDirectory[] = "/data/tombstones/";
     68 static constexpr size_t kTombstoneCount = 10;
     69 static int tombstone_directory_fd = -1;
     70 static int next_tombstone = 0;
     71 
     72 static constexpr size_t kMaxConcurrentDumps = 1;
     73 static size_t num_concurrent_dumps = 0;
     74 
     75 static std::deque<Crash*> queued_requests;
     76 
     77 // Forward declare the callbacks so they can be placed in a sensible order.
     78 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
     79 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
     80 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
     81 
     82 static void find_oldest_tombstone() {
     83   size_t oldest_tombstone = 0;
     84   time_t oldest_time = std::numeric_limits<time_t>::max();
     85 
     86   for (size_t i = 0; i < kTombstoneCount; ++i) {
     87     std::string path = android::base::StringPrintf("%stombstone_%02zu", kTombstoneDirectory, i);
     88     struct stat st;
     89     if (stat(path.c_str(), &st) != 0) {
     90       if (errno == ENOENT) {
     91         oldest_tombstone = i;
     92         break;
     93       } else {
     94         PLOG(ERROR) << "failed to stat " << path;
     95         continue;
     96       }
     97     }
     98 
     99     if (st.st_mtime < oldest_time) {
    100       oldest_tombstone = i;
    101       oldest_time = st.st_mtime;
    102     }
    103   }
    104 
    105   next_tombstone = oldest_tombstone;
    106 }
    107 
    108 static std::pair<unique_fd, std::string> get_tombstone() {
    109   // If kMaxConcurrentDumps is greater than 1, then theoretically the same
    110   // filename could be handed out to multiple processes. Unlink and create the
    111   // file, instead of using O_TRUNC, to avoid two processes interleaving their
    112   // output.
    113   unique_fd result;
    114   std::string file_name = StringPrintf("tombstone_%02d", next_tombstone);
    115   if (unlinkat(tombstone_directory_fd, file_name.c_str(), 0) != 0 && errno != ENOENT) {
    116     PLOG(FATAL) << "failed to unlink tombstone at " << kTombstoneDirectory << "/" << file_name;
    117   }
    118 
    119   result.reset(openat(tombstone_directory_fd, file_name.c_str(),
    120                       O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
    121   if (result == -1) {
    122     PLOG(FATAL) << "failed to create tombstone at " << kTombstoneDirectory << "/" << file_name;
    123   }
    124 
    125   next_tombstone = (next_tombstone + 1) % kTombstoneCount;
    126   return {std::move(result), std::string(kTombstoneDirectory) + "/" + file_name};
    127 }
    128 
    129 static void perform_request(Crash* crash) {
    130   unique_fd output_fd;
    131   if (!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) {
    132     std::tie(output_fd, crash->crash_path) = get_tombstone();
    133   }
    134 
    135   TombstonedCrashPacket response = {
    136     .packet_type = CrashPacketType::kPerformDump
    137   };
    138   ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
    139   if (rc == -1) {
    140     PLOG(WARNING) << "failed to send response to CrashRequest";
    141     goto fail;
    142   } else if (rc != sizeof(response)) {
    143     PLOG(WARNING) << "crash socket write returned short";
    144     goto fail;
    145   } else {
    146     // TODO: Make this configurable by the interceptor?
    147     struct timeval timeout = { 10, 0 };
    148 
    149     event_base* base = event_get_base(crash->crash_event);
    150     event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
    151                  crash_completed_cb, crash);
    152     event_add(crash->crash_event, &timeout);
    153   }
    154 
    155   ++num_concurrent_dumps;
    156   return;
    157 
    158 fail:
    159   delete crash;
    160 }
    161 
    162 static void dequeue_requests() {
    163   while (!queued_requests.empty() && num_concurrent_dumps < kMaxConcurrentDumps) {
    164     Crash* next_crash = queued_requests.front();
    165     queued_requests.pop_front();
    166     perform_request(next_crash);
    167   }
    168 }
    169 
    170 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
    171                             void*) {
    172   event_base* base = evconnlistener_get_base(listener);
    173   Crash* crash = new Crash();
    174 
    175   struct timeval timeout = { 1, 0 };
    176   event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
    177   crash->crash_fd.reset(sockfd);
    178   crash->crash_event = crash_event;
    179   event_add(crash_event, &timeout);
    180 }
    181 
    182 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
    183   ssize_t rc;
    184   Crash* crash = static_cast<Crash*>(arg);
    185   TombstonedCrashPacket request = {};
    186 
    187   if ((ev & EV_TIMEOUT) != 0) {
    188     LOG(WARNING) << "crash request timed out";
    189     goto fail;
    190   } else if ((ev & EV_READ) == 0) {
    191     LOG(WARNING) << "tombstoned received unexpected event from crash socket";
    192     goto fail;
    193   }
    194 
    195   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    196   if (rc == -1) {
    197     PLOG(WARNING) << "failed to read from crash socket";
    198     goto fail;
    199   } else if (rc != sizeof(request)) {
    200     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    201                  << sizeof(request) << ")";
    202     goto fail;
    203   }
    204 
    205   if (request.packet_type != CrashPacketType::kDumpRequest) {
    206     LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
    207                  << StringPrintf("%#2hhX", request.packet_type);
    208     goto fail;
    209   }
    210 
    211   crash->crash_pid = request.packet.dump_request.pid;
    212   LOG(INFO) << "received crash request for pid " << crash->crash_pid;
    213 
    214   if (num_concurrent_dumps == kMaxConcurrentDumps) {
    215     LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
    216     queued_requests.push_back(crash);
    217   } else {
    218     perform_request(crash);
    219   }
    220 
    221   return;
    222 
    223 fail:
    224   delete crash;
    225 }
    226 
    227 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
    228   ssize_t rc;
    229   Crash* crash = static_cast<Crash*>(arg);
    230   TombstonedCrashPacket request = {};
    231 
    232   --num_concurrent_dumps;
    233 
    234   if ((ev & EV_READ) == 0) {
    235     goto fail;
    236   }
    237 
    238   rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
    239   if (rc == -1) {
    240     PLOG(WARNING) << "failed to read from crash socket";
    241     goto fail;
    242   } else if (rc != sizeof(request)) {
    243     LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
    244                  << sizeof(request) << ")";
    245     goto fail;
    246   }
    247 
    248   if (request.packet_type != CrashPacketType::kCompletedDump) {
    249     LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
    250                  << uint32_t(request.packet_type);
    251     goto fail;
    252   }
    253 
    254   if (!crash->crash_path.empty()) {
    255     LOG(ERROR) << "Tombstone written to: " << crash->crash_path;
    256   }
    257 
    258 fail:
    259   delete crash;
    260 
    261   // If there's something queued up, let them proceed.
    262   dequeue_requests();
    263 }
    264 
    265 int main(int, char* []) {
    266   umask(0137);
    267 
    268   // Don't try to connect to ourselves if we crash.
    269   struct sigaction action = {};
    270   action.sa_handler = [](int signal) {
    271     LOG(ERROR) << "received fatal signal " << signal;
    272     _exit(1);
    273   };
    274   debuggerd_register_handlers(&action);
    275 
    276   tombstone_directory_fd = open(kTombstoneDirectory, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
    277   if (tombstone_directory_fd == -1) {
    278     PLOG(FATAL) << "failed to open tombstone directory";
    279   }
    280 
    281   find_oldest_tombstone();
    282 
    283   int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
    284   int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
    285 
    286   if (intercept_socket == -1 || crash_socket == -1) {
    287     PLOG(FATAL) << "failed to get socket from init";
    288   }
    289 
    290   evutil_make_socket_nonblocking(intercept_socket);
    291   evutil_make_socket_nonblocking(crash_socket);
    292 
    293   event_base* base = event_base_new();
    294   if (!base) {
    295     LOG(FATAL) << "failed to create event_base";
    296   }
    297 
    298   intercept_manager = new InterceptManager(base, intercept_socket);
    299 
    300   evconnlistener* listener =
    301     evconnlistener_new(base, crash_accept_cb, nullptr, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
    302   if (!listener) {
    303     LOG(FATAL) << "failed to create evconnlistener";
    304   }
    305 
    306   LOG(INFO) << "tombstoned successfully initialized";
    307   event_base_dispatch(base);
    308 }
    309