Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "fault_handler.h"
     18 
     19 #include <setjmp.h>
     20 #include <sys/mman.h>
     21 #include <sys/ucontext.h>
     22 
     23 #include "art_method-inl.h"
     24 #include "base/stl_util.h"
     25 #include "mirror/class.h"
     26 #include "oat_quick_method_header.h"
     27 #include "sigchain.h"
     28 #include "thread-inl.h"
     29 #include "verify_object-inl.h"
     30 
     31 // Note on nested signal support
     32 // -----------------------------
     33 //
     34 // Typically a signal handler should not need to deal with signals that occur within it.
     35 // However, when a SIGSEGV occurs that is in generated code and is not one of the
     36 // handled signals (implicit checks), we call a function to try to dump the stack
     37 // to the log.  This enhances the debugging experience but may have the side effect
     38 // that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
     39 // memory region, the stack backtrace code may run into trouble and may either crash
     40 // or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
     41 // mask the original signal and thus prevent useful debug output from being presented.
     42 //
     43 // In order to handle this situation, before we call the stack tracer we do the following:
     44 //
     45 // 1. shutdown the fault manager so that we are talking to the real signal management
     46 //    functions rather than those in sigchain.
     47 // 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
     48 //    thread running the signal handler.
     49 // 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
     50 // 4. save the thread's state to the TLS of the current thread using 'setjmp'
     51 //
     52 // We then call the stack tracer and one of two things may happen:
     53 // a. it completes successfully
     54 // b. it crashes and a signal is raised.
     55 //
     56 // In the former case, we fall through and everything is fine.  In the latter case
     57 // our secondary signal handler gets called in a signal context.  This results in
     58 // a call to FaultManager::HandledNestedSignal(), an archirecture specific function
     59 // whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
     60 // thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
     61 // and write something to the log to tell the user that it happened.
     62 //
     63 // Regardless of how we got there, we reach the code after the stack tracer and we
     64 // restore the signal states to their original values, reinstate the fault manager (thus
     65 // reestablishing the signal chain) and continue.
     66 
     67 // This is difficult to test with a runtime test.  To invoke the nested signal code
     68 // on any signal, uncomment the following line and run something that throws a
     69 // NullPointerException.
     70 // #define TEST_NESTED_SIGNAL
     71 
     72 namespace art {
     73 // Static fault manger object accessed by signal handler.
     74 FaultManager fault_manager;
     75 
     76 extern "C" __attribute__((visibility("default"))) void art_sigsegv_fault() {
     77   // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
     78   VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler - chaining to next handler.";
     79 }
     80 
     81 // Signal handler called on SIGSEGV.
     82 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
     83   fault_manager.HandleFault(sig, info, context);
     84 }
     85 
     86 // Signal handler for dealing with a nested signal.
     87 static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
     88   fault_manager.HandleNestedSignal(sig, info, context);
     89 }
     90 
     91 FaultManager::FaultManager() : initialized_(false) {
     92   sigaction(SIGSEGV, nullptr, &oldaction_);
     93 }
     94 
     95 FaultManager::~FaultManager() {
     96 }
     97 
     98 static void SetUpArtAction(struct sigaction* action) {
     99   action->sa_sigaction = art_fault_handler;
    100   sigemptyset(&action->sa_mask);
    101   action->sa_flags = SA_SIGINFO | SA_ONSTACK;
    102 #if !defined(__APPLE__) && !defined(__mips__)
    103   action->sa_restorer = nullptr;
    104 #endif
    105 }
    106 
    107 void FaultManager::EnsureArtActionInFrontOfSignalChain() {
    108   if (initialized_) {
    109     struct sigaction action;
    110     SetUpArtAction(&action);
    111     EnsureFrontOfChain(SIGSEGV, &action);
    112   } else {
    113     LOG(WARNING) << "Can't call " << __FUNCTION__ << " due to unitialized fault manager";
    114   }
    115 }
    116 
    117 void FaultManager::Init() {
    118   CHECK(!initialized_);
    119   struct sigaction action;
    120   SetUpArtAction(&action);
    121 
    122   // Set our signal handler now.
    123   int e = sigaction(SIGSEGV, &action, &oldaction_);
    124   if (e != 0) {
    125     VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
    126   }
    127   // Make sure our signal handler is called before any user handlers.
    128   ClaimSignalChain(SIGSEGV, &oldaction_);
    129   initialized_ = true;
    130 }
    131 
    132 void FaultManager::Release() {
    133   if (initialized_) {
    134     UnclaimSignalChain(SIGSEGV);
    135     initialized_ = false;
    136   }
    137 }
    138 
    139 void FaultManager::Shutdown() {
    140   if (initialized_) {
    141     Release();
    142 
    143     // Free all handlers.
    144     STLDeleteElements(&generated_code_handlers_);
    145     STLDeleteElements(&other_handlers_);
    146   }
    147 }
    148 
    149 bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context) {
    150   if (other_handlers_.empty()) {
    151     return false;
    152   }
    153 
    154   Thread* self = Thread::Current();
    155 
    156   DCHECK(self != nullptr);
    157   DCHECK(Runtime::Current() != nullptr);
    158   DCHECK(Runtime::Current()->IsStarted());
    159 
    160   // Now set up the nested signal handler.
    161 
    162   // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
    163   static const int handled_nested_signals[] = {SIGABRT};
    164   constexpr size_t num_handled_nested_signals = arraysize(handled_nested_signals);
    165 
    166   // Release the fault manager so that it will remove the signal chain for
    167   // SIGSEGV and we call the real sigaction.
    168   fault_manager.Release();
    169 
    170   // The action for SIGSEGV should be the default handler now.
    171 
    172   // Unblock the signals we allow so that they can be delivered in the signal handler.
    173   sigset_t sigset;
    174   sigemptyset(&sigset);
    175   for (int signal : handled_nested_signals) {
    176     sigaddset(&sigset, signal);
    177   }
    178   pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
    179 
    180   // If we get a signal in this code we want to invoke our nested signal
    181   // handler.
    182   struct sigaction action;
    183   struct sigaction oldactions[num_handled_nested_signals];
    184   action.sa_sigaction = art_nested_signal_handler;
    185 
    186   // Explicitly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
    187   // should be the default but we definitely don't want these happening in our
    188   // nested signal handler.
    189   sigemptyset(&action.sa_mask);
    190   for (int signal : handled_nested_signals) {
    191     sigaddset(&action.sa_mask, signal);
    192   }
    193 
    194   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
    195 #if !defined(__APPLE__) && !defined(__mips__)
    196   action.sa_restorer = nullptr;
    197 #endif
    198 
    199   // Catch handled signals to invoke our nested handler.
    200   bool success = true;
    201   for (size_t i = 0; i < num_handled_nested_signals; ++i) {
    202     success = sigaction(handled_nested_signals[i], &action, &oldactions[i]) == 0;
    203     if (!success) {
    204       PLOG(ERROR) << "Unable to set up nested signal handler";
    205       break;
    206     }
    207   }
    208 
    209   if (success) {
    210     // Save the current state and call the handlers.  If anything causes a signal
    211     // our nested signal handler will be invoked and this will longjmp to the saved
    212     // state.
    213     if (setjmp(*self->GetNestedSignalState()) == 0) {
    214       for (const auto& handler : other_handlers_) {
    215         if (handler->Action(sig, info, context)) {
    216           // Restore the signal handlers, reinit the fault manager and return.  Signal was
    217           // handled.
    218           for (size_t i = 0; i < num_handled_nested_signals; ++i) {
    219             success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
    220             if (!success) {
    221               PLOG(ERROR) << "Unable to restore signal handler";
    222             }
    223           }
    224           fault_manager.Init();
    225           return true;
    226         }
    227       }
    228     } else {
    229       LOG(ERROR) << "Nested signal detected - original signal being reported";
    230     }
    231 
    232     // Restore the signal handlers.
    233     for (size_t i = 0; i < num_handled_nested_signals; ++i) {
    234       success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
    235       if (!success) {
    236         PLOG(ERROR) << "Unable to restore signal handler";
    237       }
    238     }
    239   }
    240 
    241   // Now put the fault manager back in place.
    242   fault_manager.Init();
    243   return false;
    244 }
    245 
    246 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
    247   // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
    248   //
    249   // If malloc calls abort, it will be holding its lock.
    250   // If the handler tries to call malloc, it will deadlock.
    251   VLOG(signals) << "Handling fault";
    252   if (IsInGeneratedCode(info, context, true)) {
    253     VLOG(signals) << "in generated code, looking for handler";
    254     for (const auto& handler : generated_code_handlers_) {
    255       VLOG(signals) << "invoking Action on handler " << handler;
    256       if (handler->Action(sig, info, context)) {
    257 #ifdef TEST_NESTED_SIGNAL
    258         // In test mode we want to fall through to stack trace handler
    259         // on every signal (in reality this will cause a crash on the first
    260         // signal).
    261         break;
    262 #else
    263         // We have handled a signal so it's time to return from the
    264         // signal handler to the appropriate place.
    265         return;
    266 #endif
    267       }
    268     }
    269 
    270     // We hit a signal we didn't handle.  This might be something for which
    271     // we can give more information about so call all registered handlers to see
    272     // if it is.
    273     if (HandleFaultByOtherHandlers(sig, info, context)) {
    274         return;
    275     }
    276   }
    277 
    278   // Set a breakpoint in this function to catch unhandled signals.
    279   art_sigsegv_fault();
    280 
    281   // Pass this on to the next handler in the chain, or the default if none.
    282   InvokeUserSignalHandler(sig, info, context);
    283 }
    284 
    285 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
    286   DCHECK(initialized_);
    287   if (generated_code) {
    288     generated_code_handlers_.push_back(handler);
    289   } else {
    290     other_handlers_.push_back(handler);
    291   }
    292 }
    293 
    294 void FaultManager::RemoveHandler(FaultHandler* handler) {
    295   auto it = std::find(generated_code_handlers_.begin(), generated_code_handlers_.end(), handler);
    296   if (it != generated_code_handlers_.end()) {
    297     generated_code_handlers_.erase(it);
    298     return;
    299   }
    300   auto it2 = std::find(other_handlers_.begin(), other_handlers_.end(), handler);
    301   if (it2 != other_handlers_.end()) {
    302     other_handlers_.erase(it);
    303     return;
    304   }
    305   LOG(FATAL) << "Attempted to remove non existent handler " << handler;
    306 }
    307 
    308 // This function is called within the signal handler.  It checks that
    309 // the mutator_lock is held (shared).  No annotalysis is done.
    310 bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
    311   // We can only be running Java code in the current thread if it
    312   // is in Runnable state.
    313   VLOG(signals) << "Checking for generated code";
    314   Thread* thread = Thread::Current();
    315   if (thread == nullptr) {
    316     VLOG(signals) << "no current thread";
    317     return false;
    318   }
    319 
    320   ThreadState state = thread->GetState();
    321   if (state != kRunnable) {
    322     VLOG(signals) << "not runnable";
    323     return false;
    324   }
    325 
    326   // Current thread is runnable.
    327   // Make sure it has the mutator lock.
    328   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
    329     VLOG(signals) << "no lock";
    330     return false;
    331   }
    332 
    333   ArtMethod* method_obj = nullptr;
    334   uintptr_t return_pc = 0;
    335   uintptr_t sp = 0;
    336 
    337   // Get the architecture specific method address and return address.  These
    338   // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
    339   GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp);
    340 
    341   // If we don't have a potential method, we're outta here.
    342   VLOG(signals) << "potential method: " << method_obj;
    343   // TODO: Check linear alloc and image.
    344   DCHECK_ALIGNED(ArtMethod::Size(sizeof(void*)), sizeof(void*))
    345       << "ArtMethod is not pointer aligned";
    346   if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
    347     VLOG(signals) << "no method";
    348     return false;
    349   }
    350 
    351   // Verify that the potential method is indeed a method.
    352   // TODO: check the GC maps to make sure it's an object.
    353   // Check that the class pointer inside the object is not null and is aligned.
    354   // TODO: Method might be not a heap address, and GetClass could fault.
    355   // No read barrier because method_obj may not be a real object.
    356   mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
    357   if (cls == nullptr) {
    358     VLOG(signals) << "not a class";
    359     return false;
    360   }
    361   if (!IsAligned<kObjectAlignment>(cls)) {
    362     VLOG(signals) << "not aligned";
    363     return false;
    364   }
    365 
    366 
    367   if (!VerifyClassClass(cls)) {
    368     VLOG(signals) << "not a class class";
    369     return false;
    370   }
    371 
    372   const OatQuickMethodHeader* method_header = method_obj->GetOatQuickMethodHeader(return_pc);
    373 
    374   // We can be certain that this is a method now.  Check if we have a GC map
    375   // at the return PC address.
    376   if (true || kIsDebugBuild) {
    377     VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
    378     uint32_t sought_offset = return_pc -
    379         reinterpret_cast<uintptr_t>(method_header->GetEntryPoint());
    380     VLOG(signals) << "pc offset: " << std::hex << sought_offset;
    381   }
    382   uint32_t dexpc = method_header->ToDexPc(method_obj, return_pc, false);
    383   VLOG(signals) << "dexpc: " << dexpc;
    384   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
    385 }
    386 
    387 FaultHandler::FaultHandler(FaultManager* manager) : manager_(manager) {
    388 }
    389 
    390 //
    391 // Null pointer fault handler
    392 //
    393 NullPointerHandler::NullPointerHandler(FaultManager* manager) : FaultHandler(manager) {
    394   manager_->AddHandler(this, true);
    395 }
    396 
    397 //
    398 // Suspension fault handler
    399 //
    400 SuspensionHandler::SuspensionHandler(FaultManager* manager) : FaultHandler(manager) {
    401   manager_->AddHandler(this, true);
    402 }
    403 
    404 //
    405 // Stack overflow fault handler
    406 //
    407 StackOverflowHandler::StackOverflowHandler(FaultManager* manager) : FaultHandler(manager) {
    408   manager_->AddHandler(this, true);
    409 }
    410 
    411 //
    412 // Stack trace handler, used to help get a stack trace from SIGSEGV inside of compiled code.
    413 //
    414 JavaStackTraceHandler::JavaStackTraceHandler(FaultManager* manager) : FaultHandler(manager) {
    415   manager_->AddHandler(this, false);
    416 }
    417 
    418 bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) {
    419   // Make sure that we are in the generated code, but we may not have a dex pc.
    420 #ifdef TEST_NESTED_SIGNAL
    421   bool in_generated_code = true;
    422 #else
    423   bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
    424 #endif
    425   if (in_generated_code) {
    426     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
    427     ArtMethod* method = nullptr;
    428     uintptr_t return_pc = 0;
    429     uintptr_t sp = 0;
    430     Thread* self = Thread::Current();
    431 
    432     manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
    433     // Inside of generated code, sp[0] is the method, so sp is the frame.
    434     self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp));
    435 #ifdef TEST_NESTED_SIGNAL
    436     // To test the nested signal handler we raise a signal here.  This will cause the
    437     // nested signal handler to be called and perform a longjmp back to the setjmp
    438     // above.
    439     abort();
    440 #endif
    441     self->DumpJavaStack(LOG(ERROR));
    442   }
    443 
    444   return false;  // Return false since we want to propagate the fault to the main signal handler.
    445 }
    446 
    447 }   // namespace art
    448