1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "fault_handler.h" 18 19 #include <setjmp.h> 20 #include <sys/mman.h> 21 #include <sys/ucontext.h> 22 23 #include "art_method-inl.h" 24 #include "base/stl_util.h" 25 #include "mirror/class.h" 26 #include "oat_quick_method_header.h" 27 #include "sigchain.h" 28 #include "thread-inl.h" 29 #include "verify_object-inl.h" 30 31 // Note on nested signal support 32 // ----------------------------- 33 // 34 // Typically a signal handler should not need to deal with signals that occur within it. 35 // However, when a SIGSEGV occurs that is in generated code and is not one of the 36 // handled signals (implicit checks), we call a function to try to dump the stack 37 // to the log. This enhances the debugging experience but may have the side effect 38 // that it may not work. If the cause of the original SIGSEGV is a corrupted stack or other 39 // memory region, the stack backtrace code may run into trouble and may either crash 40 // or fail with an abort (SIGABRT). In either case we don't want that (new) signal to 41 // mask the original signal and thus prevent useful debug output from being presented. 42 // 43 // In order to handle this situation, before we call the stack tracer we do the following: 44 // 45 // 1. shutdown the fault manager so that we are talking to the real signal management 46 // functions rather than those in sigchain. 47 // 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the 48 // thread running the signal handler. 49 // 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler. 50 // 4. save the thread's state to the TLS of the current thread using 'setjmp' 51 // 52 // We then call the stack tracer and one of two things may happen: 53 // a. it completes successfully 54 // b. it crashes and a signal is raised. 55 // 56 // In the former case, we fall through and everything is fine. In the latter case 57 // our secondary signal handler gets called in a signal context. This results in 58 // a call to FaultManager::HandledNestedSignal(), an archirecture specific function 59 // whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current 60 // thread. This results in a return with a non-zero value from 'setjmp'. We detect this 61 // and write something to the log to tell the user that it happened. 62 // 63 // Regardless of how we got there, we reach the code after the stack tracer and we 64 // restore the signal states to their original values, reinstate the fault manager (thus 65 // reestablishing the signal chain) and continue. 66 67 // This is difficult to test with a runtime test. To invoke the nested signal code 68 // on any signal, uncomment the following line and run something that throws a 69 // NullPointerException. 70 // #define TEST_NESTED_SIGNAL 71 72 namespace art { 73 // Static fault manger object accessed by signal handler. 74 FaultManager fault_manager; 75 76 extern "C" __attribute__((visibility("default"))) void art_sigsegv_fault() { 77 // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART. 78 VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler - chaining to next handler."; 79 } 80 81 // Signal handler called on SIGSEGV. 82 static void art_fault_handler(int sig, siginfo_t* info, void* context) { 83 fault_manager.HandleFault(sig, info, context); 84 } 85 86 // Signal handler for dealing with a nested signal. 87 static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) { 88 fault_manager.HandleNestedSignal(sig, info, context); 89 } 90 91 FaultManager::FaultManager() : initialized_(false) { 92 sigaction(SIGSEGV, nullptr, &oldaction_); 93 } 94 95 FaultManager::~FaultManager() { 96 } 97 98 static void SetUpArtAction(struct sigaction* action) { 99 action->sa_sigaction = art_fault_handler; 100 sigemptyset(&action->sa_mask); 101 action->sa_flags = SA_SIGINFO | SA_ONSTACK; 102 #if !defined(__APPLE__) && !defined(__mips__) 103 action->sa_restorer = nullptr; 104 #endif 105 } 106 107 void FaultManager::EnsureArtActionInFrontOfSignalChain() { 108 if (initialized_) { 109 struct sigaction action; 110 SetUpArtAction(&action); 111 EnsureFrontOfChain(SIGSEGV, &action); 112 } else { 113 LOG(WARNING) << "Can't call " << __FUNCTION__ << " due to unitialized fault manager"; 114 } 115 } 116 117 void FaultManager::Init() { 118 CHECK(!initialized_); 119 struct sigaction action; 120 SetUpArtAction(&action); 121 122 // Set our signal handler now. 123 int e = sigaction(SIGSEGV, &action, &oldaction_); 124 if (e != 0) { 125 VLOG(signals) << "Failed to claim SEGV: " << strerror(errno); 126 } 127 // Make sure our signal handler is called before any user handlers. 128 ClaimSignalChain(SIGSEGV, &oldaction_); 129 initialized_ = true; 130 } 131 132 void FaultManager::Release() { 133 if (initialized_) { 134 UnclaimSignalChain(SIGSEGV); 135 initialized_ = false; 136 } 137 } 138 139 void FaultManager::Shutdown() { 140 if (initialized_) { 141 Release(); 142 143 // Free all handlers. 144 STLDeleteElements(&generated_code_handlers_); 145 STLDeleteElements(&other_handlers_); 146 } 147 } 148 149 bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context) { 150 if (other_handlers_.empty()) { 151 return false; 152 } 153 154 Thread* self = Thread::Current(); 155 156 DCHECK(self != nullptr); 157 DCHECK(Runtime::Current() != nullptr); 158 DCHECK(Runtime::Current()->IsStarted()); 159 160 // Now set up the nested signal handler. 161 162 // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully. 163 static const int handled_nested_signals[] = {SIGABRT}; 164 constexpr size_t num_handled_nested_signals = arraysize(handled_nested_signals); 165 166 // Release the fault manager so that it will remove the signal chain for 167 // SIGSEGV and we call the real sigaction. 168 fault_manager.Release(); 169 170 // The action for SIGSEGV should be the default handler now. 171 172 // Unblock the signals we allow so that they can be delivered in the signal handler. 173 sigset_t sigset; 174 sigemptyset(&sigset); 175 for (int signal : handled_nested_signals) { 176 sigaddset(&sigset, signal); 177 } 178 pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr); 179 180 // If we get a signal in this code we want to invoke our nested signal 181 // handler. 182 struct sigaction action; 183 struct sigaction oldactions[num_handled_nested_signals]; 184 action.sa_sigaction = art_nested_signal_handler; 185 186 // Explicitly mask out SIGSEGV and SIGABRT from the nested signal handler. This 187 // should be the default but we definitely don't want these happening in our 188 // nested signal handler. 189 sigemptyset(&action.sa_mask); 190 for (int signal : handled_nested_signals) { 191 sigaddset(&action.sa_mask, signal); 192 } 193 194 action.sa_flags = SA_SIGINFO | SA_ONSTACK; 195 #if !defined(__APPLE__) && !defined(__mips__) 196 action.sa_restorer = nullptr; 197 #endif 198 199 // Catch handled signals to invoke our nested handler. 200 bool success = true; 201 for (size_t i = 0; i < num_handled_nested_signals; ++i) { 202 success = sigaction(handled_nested_signals[i], &action, &oldactions[i]) == 0; 203 if (!success) { 204 PLOG(ERROR) << "Unable to set up nested signal handler"; 205 break; 206 } 207 } 208 209 if (success) { 210 // Save the current state and call the handlers. If anything causes a signal 211 // our nested signal handler will be invoked and this will longjmp to the saved 212 // state. 213 if (setjmp(*self->GetNestedSignalState()) == 0) { 214 for (const auto& handler : other_handlers_) { 215 if (handler->Action(sig, info, context)) { 216 // Restore the signal handlers, reinit the fault manager and return. Signal was 217 // handled. 218 for (size_t i = 0; i < num_handled_nested_signals; ++i) { 219 success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0; 220 if (!success) { 221 PLOG(ERROR) << "Unable to restore signal handler"; 222 } 223 } 224 fault_manager.Init(); 225 return true; 226 } 227 } 228 } else { 229 LOG(ERROR) << "Nested signal detected - original signal being reported"; 230 } 231 232 // Restore the signal handlers. 233 for (size_t i = 0; i < num_handled_nested_signals; ++i) { 234 success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0; 235 if (!success) { 236 PLOG(ERROR) << "Unable to restore signal handler"; 237 } 238 } 239 } 240 241 // Now put the fault manager back in place. 242 fault_manager.Init(); 243 return false; 244 } 245 246 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) { 247 // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...) 248 // 249 // If malloc calls abort, it will be holding its lock. 250 // If the handler tries to call malloc, it will deadlock. 251 VLOG(signals) << "Handling fault"; 252 if (IsInGeneratedCode(info, context, true)) { 253 VLOG(signals) << "in generated code, looking for handler"; 254 for (const auto& handler : generated_code_handlers_) { 255 VLOG(signals) << "invoking Action on handler " << handler; 256 if (handler->Action(sig, info, context)) { 257 #ifdef TEST_NESTED_SIGNAL 258 // In test mode we want to fall through to stack trace handler 259 // on every signal (in reality this will cause a crash on the first 260 // signal). 261 break; 262 #else 263 // We have handled a signal so it's time to return from the 264 // signal handler to the appropriate place. 265 return; 266 #endif 267 } 268 } 269 270 // We hit a signal we didn't handle. This might be something for which 271 // we can give more information about so call all registered handlers to see 272 // if it is. 273 if (HandleFaultByOtherHandlers(sig, info, context)) { 274 return; 275 } 276 } 277 278 // Set a breakpoint in this function to catch unhandled signals. 279 art_sigsegv_fault(); 280 281 // Pass this on to the next handler in the chain, or the default if none. 282 InvokeUserSignalHandler(sig, info, context); 283 } 284 285 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) { 286 DCHECK(initialized_); 287 if (generated_code) { 288 generated_code_handlers_.push_back(handler); 289 } else { 290 other_handlers_.push_back(handler); 291 } 292 } 293 294 void FaultManager::RemoveHandler(FaultHandler* handler) { 295 auto it = std::find(generated_code_handlers_.begin(), generated_code_handlers_.end(), handler); 296 if (it != generated_code_handlers_.end()) { 297 generated_code_handlers_.erase(it); 298 return; 299 } 300 auto it2 = std::find(other_handlers_.begin(), other_handlers_.end(), handler); 301 if (it2 != other_handlers_.end()) { 302 other_handlers_.erase(it); 303 return; 304 } 305 LOG(FATAL) << "Attempted to remove non existent handler " << handler; 306 } 307 308 // This function is called within the signal handler. It checks that 309 // the mutator_lock is held (shared). No annotalysis is done. 310 bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) { 311 // We can only be running Java code in the current thread if it 312 // is in Runnable state. 313 VLOG(signals) << "Checking for generated code"; 314 Thread* thread = Thread::Current(); 315 if (thread == nullptr) { 316 VLOG(signals) << "no current thread"; 317 return false; 318 } 319 320 ThreadState state = thread->GetState(); 321 if (state != kRunnable) { 322 VLOG(signals) << "not runnable"; 323 return false; 324 } 325 326 // Current thread is runnable. 327 // Make sure it has the mutator lock. 328 if (!Locks::mutator_lock_->IsSharedHeld(thread)) { 329 VLOG(signals) << "no lock"; 330 return false; 331 } 332 333 ArtMethod* method_obj = nullptr; 334 uintptr_t return_pc = 0; 335 uintptr_t sp = 0; 336 337 // Get the architecture specific method address and return address. These 338 // are in architecture specific files in arch/<arch>/fault_handler_<arch>. 339 GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp); 340 341 // If we don't have a potential method, we're outta here. 342 VLOG(signals) << "potential method: " << method_obj; 343 // TODO: Check linear alloc and image. 344 DCHECK_ALIGNED(ArtMethod::Size(sizeof(void*)), sizeof(void*)) 345 << "ArtMethod is not pointer aligned"; 346 if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) { 347 VLOG(signals) << "no method"; 348 return false; 349 } 350 351 // Verify that the potential method is indeed a method. 352 // TODO: check the GC maps to make sure it's an object. 353 // Check that the class pointer inside the object is not null and is aligned. 354 // TODO: Method might be not a heap address, and GetClass could fault. 355 // No read barrier because method_obj may not be a real object. 356 mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>(); 357 if (cls == nullptr) { 358 VLOG(signals) << "not a class"; 359 return false; 360 } 361 if (!IsAligned<kObjectAlignment>(cls)) { 362 VLOG(signals) << "not aligned"; 363 return false; 364 } 365 366 367 if (!VerifyClassClass(cls)) { 368 VLOG(signals) << "not a class class"; 369 return false; 370 } 371 372 const OatQuickMethodHeader* method_header = method_obj->GetOatQuickMethodHeader(return_pc); 373 374 // We can be certain that this is a method now. Check if we have a GC map 375 // at the return PC address. 376 if (true || kIsDebugBuild) { 377 VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc; 378 uint32_t sought_offset = return_pc - 379 reinterpret_cast<uintptr_t>(method_header->GetEntryPoint()); 380 VLOG(signals) << "pc offset: " << std::hex << sought_offset; 381 } 382 uint32_t dexpc = method_header->ToDexPc(method_obj, return_pc, false); 383 VLOG(signals) << "dexpc: " << dexpc; 384 return !check_dex_pc || dexpc != DexFile::kDexNoIndex; 385 } 386 387 FaultHandler::FaultHandler(FaultManager* manager) : manager_(manager) { 388 } 389 390 // 391 // Null pointer fault handler 392 // 393 NullPointerHandler::NullPointerHandler(FaultManager* manager) : FaultHandler(manager) { 394 manager_->AddHandler(this, true); 395 } 396 397 // 398 // Suspension fault handler 399 // 400 SuspensionHandler::SuspensionHandler(FaultManager* manager) : FaultHandler(manager) { 401 manager_->AddHandler(this, true); 402 } 403 404 // 405 // Stack overflow fault handler 406 // 407 StackOverflowHandler::StackOverflowHandler(FaultManager* manager) : FaultHandler(manager) { 408 manager_->AddHandler(this, true); 409 } 410 411 // 412 // Stack trace handler, used to help get a stack trace from SIGSEGV inside of compiled code. 413 // 414 JavaStackTraceHandler::JavaStackTraceHandler(FaultManager* manager) : FaultHandler(manager) { 415 manager_->AddHandler(this, false); 416 } 417 418 bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) { 419 // Make sure that we are in the generated code, but we may not have a dex pc. 420 #ifdef TEST_NESTED_SIGNAL 421 bool in_generated_code = true; 422 #else 423 bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false); 424 #endif 425 if (in_generated_code) { 426 LOG(ERROR) << "Dumping java stack trace for crash in generated code"; 427 ArtMethod* method = nullptr; 428 uintptr_t return_pc = 0; 429 uintptr_t sp = 0; 430 Thread* self = Thread::Current(); 431 432 manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp); 433 // Inside of generated code, sp[0] is the method, so sp is the frame. 434 self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp)); 435 #ifdef TEST_NESTED_SIGNAL 436 // To test the nested signal handler we raise a signal here. This will cause the 437 // nested signal handler to be called and perform a longjmp back to the setjmp 438 // above. 439 abort(); 440 #endif 441 self->DumpJavaStack(LOG(ERROR)); 442 } 443 444 return false; // Return false since we want to propagate the fault to the main signal handler. 445 } 446 447 } // namespace art 448