1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "thread_list.h" 18 19 #include <backtrace/BacktraceMap.h> 20 #include <dirent.h> 21 #include <ScopedLocalRef.h> 22 #include <ScopedUtfChars.h> 23 #include <sys/types.h> 24 #include <unistd.h> 25 26 #include <sstream> 27 28 #include "android-base/stringprintf.h" 29 30 #include "base/histogram-inl.h" 31 #include "base/mutex-inl.h" 32 #include "base/systrace.h" 33 #include "base/time_utils.h" 34 #include "base/timing_logger.h" 35 #include "debugger.h" 36 #include "gc/collector/concurrent_copying.h" 37 #include "gc/reference_processor.h" 38 #include "jni_internal.h" 39 #include "lock_word.h" 40 #include "monitor.h" 41 #include "native_stack_dump.h" 42 #include "scoped_thread_state_change-inl.h" 43 #include "thread.h" 44 #include "trace.h" 45 #include "well_known_classes.h" 46 47 #if ART_USE_FUTEXES 48 #include "linux/futex.h" 49 #include "sys/syscall.h" 50 #ifndef SYS_futex 51 #define SYS_futex __NR_futex 52 #endif 53 #endif // ART_USE_FUTEXES 54 55 namespace art { 56 57 using android::base::StringPrintf; 58 59 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5); 60 // Use 0 since we want to yield to prevent blocking for an unpredictable amount of time. 61 static constexpr useconds_t kThreadSuspendInitialSleepUs = 0; 62 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000; 63 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; 64 65 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for 66 // some history. 67 // Turned off again. b/29248079 68 static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = false; 69 70 ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns) 71 : suspend_all_count_(0), 72 debug_suspend_all_count_(0), 73 unregistering_count_(0), 74 suspend_all_historam_("suspend all histogram", 16, 64), 75 long_suspend_(false), 76 thread_suspend_timeout_ns_(thread_suspend_timeout_ns), 77 empty_checkpoint_barrier_(new Barrier(0)) { 78 CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U))); 79 } 80 81 ThreadList::~ThreadList() { 82 ScopedTrace trace(__PRETTY_FUNCTION__); 83 // Detach the current thread if necessary. If we failed to start, there might not be any threads. 84 // We need to detach the current thread here in case there's another thread waiting to join with 85 // us. 86 bool contains = false; 87 Thread* self = Thread::Current(); 88 { 89 MutexLock mu(self, *Locks::thread_list_lock_); 90 contains = Contains(self); 91 } 92 if (contains) { 93 Runtime::Current()->DetachCurrentThread(); 94 } 95 WaitForOtherNonDaemonThreadsToExit(); 96 // Disable GC and wait for GC to complete in case there are still daemon threads doing 97 // allocations. 98 gc::Heap* const heap = Runtime::Current()->GetHeap(); 99 heap->DisableGCForShutdown(); 100 // In case a GC is in progress, wait for it to finish. 101 heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current()); 102 // TODO: there's an unaddressed race here where a thread may attach during shutdown, see 103 // Thread::Init. 104 SuspendAllDaemonThreadsForShutdown(); 105 } 106 107 bool ThreadList::Contains(Thread* thread) { 108 return find(list_.begin(), list_.end(), thread) != list_.end(); 109 } 110 111 bool ThreadList::Contains(pid_t tid) { 112 for (const auto& thread : list_) { 113 if (thread->GetTid() == tid) { 114 return true; 115 } 116 } 117 return false; 118 } 119 120 pid_t ThreadList::GetLockOwner() { 121 return Locks::thread_list_lock_->GetExclusiveOwnerTid(); 122 } 123 124 void ThreadList::DumpNativeStacks(std::ostream& os) { 125 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_); 126 std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid())); 127 for (const auto& thread : list_) { 128 os << "DUMPING THREAD " << thread->GetTid() << "\n"; 129 DumpNativeStack(os, thread->GetTid(), map.get(), "\t"); 130 os << "\n"; 131 } 132 } 133 134 void ThreadList::DumpForSigQuit(std::ostream& os) { 135 { 136 ScopedObjectAccess soa(Thread::Current()); 137 // Only print if we have samples. 138 if (suspend_all_historam_.SampleSize() > 0) { 139 Histogram<uint64_t>::CumulativeData data; 140 suspend_all_historam_.CreateHistogram(&data); 141 suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. 142 } 143 } 144 bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit(); 145 Dump(os, dump_native_stack); 146 DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit); 147 } 148 149 static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack) 150 NO_THREAD_SAFETY_ANALYSIS { 151 // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should 152 // refactor DumpState to avoid skipping analysis. 153 Thread::DumpState(os, nullptr, tid); 154 DumpKernelStack(os, tid, " kernel: ", false); 155 if (dump_native_stack) { 156 DumpNativeStack(os, tid, nullptr, " native: "); 157 } 158 os << "\n"; 159 } 160 161 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) { 162 DIR* d = opendir("/proc/self/task"); 163 if (!d) { 164 return; 165 } 166 167 Thread* self = Thread::Current(); 168 dirent* e; 169 while ((e = readdir(d)) != nullptr) { 170 char* end; 171 pid_t tid = strtol(e->d_name, &end, 10); 172 if (!*end) { 173 bool contains; 174 { 175 MutexLock mu(self, *Locks::thread_list_lock_); 176 contains = Contains(tid); 177 } 178 if (!contains) { 179 DumpUnattachedThread(os, tid, dump_native_stack); 180 } 181 } 182 } 183 closedir(d); 184 } 185 186 // Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be 187 // overloaded with ANR dumps. 188 static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000; 189 190 // A closure used by Thread::Dump. 191 class DumpCheckpoint FINAL : public Closure { 192 public: 193 DumpCheckpoint(std::ostream* os, bool dump_native_stack) 194 : os_(os), 195 barrier_(0), 196 backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr), 197 dump_native_stack_(dump_native_stack) {} 198 199 void Run(Thread* thread) OVERRIDE { 200 // Note thread and self may not be equal if thread was already suspended at the point of the 201 // request. 202 Thread* self = Thread::Current(); 203 CHECK(self != nullptr); 204 std::ostringstream local_os; 205 { 206 ScopedObjectAccess soa(self); 207 thread->Dump(local_os, dump_native_stack_, backtrace_map_.get()); 208 } 209 local_os << "\n"; 210 { 211 // Use the logging lock to ensure serialization when writing to the common ostream. 212 MutexLock mu(self, *Locks::logging_lock_); 213 *os_ << local_os.str(); 214 } 215 barrier_.Pass(self); 216 } 217 218 void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) { 219 Thread* self = Thread::Current(); 220 ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); 221 bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout); 222 if (timed_out) { 223 // Avoid a recursive abort. 224 LOG((kIsDebugBuild && (gAborting == 0)) ? ::android::base::FATAL : ::android::base::ERROR) 225 << "Unexpected time out during dump checkpoint."; 226 } 227 } 228 229 private: 230 // The common stream that will accumulate all the dumps. 231 std::ostream* const os_; 232 // The barrier to be passed through and for the requestor to wait upon. 233 Barrier barrier_; 234 // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately. 235 std::unique_ptr<BacktraceMap> backtrace_map_; 236 // Whether we should dump the native stack. 237 const bool dump_native_stack_; 238 }; 239 240 void ThreadList::Dump(std::ostream& os, bool dump_native_stack) { 241 Thread* self = Thread::Current(); 242 { 243 MutexLock mu(self, *Locks::thread_list_lock_); 244 os << "DALVIK THREADS (" << list_.size() << "):\n"; 245 } 246 if (self != nullptr) { 247 DumpCheckpoint checkpoint(&os, dump_native_stack); 248 size_t threads_running_checkpoint; 249 { 250 // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time. 251 ScopedObjectAccess soa(self); 252 threads_running_checkpoint = RunCheckpoint(&checkpoint); 253 } 254 if (threads_running_checkpoint != 0) { 255 checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); 256 } 257 } else { 258 DumpUnattachedThreads(os, dump_native_stack); 259 } 260 } 261 262 void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) { 263 MutexLock mu(self, *Locks::thread_list_lock_); 264 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 265 for (const auto& thread : list_) { 266 if (thread != ignore1 && thread != ignore2) { 267 CHECK(thread->IsSuspended()) 268 << "\nUnsuspended thread: <<" << *thread << "\n" 269 << "self: <<" << *Thread::Current(); 270 } 271 } 272 } 273 274 #if HAVE_TIMED_RWLOCK 275 // Attempt to rectify locks so that we dump thread list with required locks before exiting. 276 NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() { 277 Runtime* runtime = Runtime::Current(); 278 std::ostringstream ss; 279 ss << "Thread suspend timeout\n"; 280 Locks::mutator_lock_->Dump(ss); 281 ss << "\n"; 282 runtime->GetThreadList()->Dump(ss); 283 LOG(FATAL) << ss.str(); 284 exit(0); 285 } 286 #endif 287 288 // Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an 289 // individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then 290 // we use sched_yield instead of calling usleep. 291 static void ThreadSuspendSleep(useconds_t delay_us) { 292 if (delay_us == 0) { 293 sched_yield(); 294 } else { 295 usleep(delay_us); 296 } 297 } 298 299 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) { 300 Thread* self = Thread::Current(); 301 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 302 Locks::thread_list_lock_->AssertNotHeld(self); 303 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 304 305 std::vector<Thread*> suspended_count_modified_threads; 306 size_t count = 0; 307 { 308 // Call a checkpoint function for each thread, threads which are suspend get their checkpoint 309 // manually called. 310 MutexLock mu(self, *Locks::thread_list_lock_); 311 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 312 count = list_.size(); 313 for (const auto& thread : list_) { 314 if (thread != self) { 315 while (true) { 316 if (thread->RequestCheckpoint(checkpoint_function)) { 317 // This thread will run its checkpoint some time in the near future. 318 break; 319 } else { 320 // We are probably suspended, try to make sure that we stay suspended. 321 // The thread switched back to runnable. 322 if (thread->GetState() == kRunnable) { 323 // Spurious fail, try again. 324 continue; 325 } 326 bool updated = thread->ModifySuspendCount(self, +1, nullptr, false); 327 DCHECK(updated); 328 suspended_count_modified_threads.push_back(thread); 329 break; 330 } 331 } 332 } 333 } 334 // Run the callback to be called inside this critical section. 335 if (callback != nullptr) { 336 callback->Run(self); 337 } 338 } 339 340 // Run the checkpoint on ourself while we wait for threads to suspend. 341 checkpoint_function->Run(self); 342 343 // Run the checkpoint on the suspended threads. 344 for (const auto& thread : suspended_count_modified_threads) { 345 if (!thread->IsSuspended()) { 346 if (ATRACE_ENABLED()) { 347 std::ostringstream oss; 348 thread->ShortDump(oss); 349 ATRACE_BEGIN((std::string("Waiting for suspension of thread ") + oss.str()).c_str()); 350 } 351 // Busy wait until the thread is suspended. 352 const uint64_t start_time = NanoTime(); 353 do { 354 ThreadSuspendSleep(kThreadSuspendInitialSleepUs); 355 } while (!thread->IsSuspended()); 356 const uint64_t total_delay = NanoTime() - start_time; 357 // Shouldn't need to wait for longer than 1000 microseconds. 358 constexpr uint64_t kLongWaitThreshold = MsToNs(1); 359 ATRACE_END(); 360 if (UNLIKELY(total_delay > kLongWaitThreshold)) { 361 LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for " 362 << *thread << " suspension!"; 363 } 364 } 365 // We know for sure that the thread is suspended at this point. 366 checkpoint_function->Run(thread); 367 { 368 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 369 bool updated = thread->ModifySuspendCount(self, -1, nullptr, false); 370 DCHECK(updated); 371 } 372 } 373 374 { 375 // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their 376 // suspend count. Now the suspend_count_ is lowered so we must do the broadcast. 377 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 378 Thread::resume_cond_->Broadcast(self); 379 } 380 381 return count; 382 } 383 384 void ThreadList::RunEmptyCheckpoint() { 385 Thread* self = Thread::Current(); 386 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 387 Locks::thread_list_lock_->AssertNotHeld(self); 388 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 389 std::vector<uint32_t> runnable_thread_ids; 390 size_t count = 0; 391 Barrier* barrier = empty_checkpoint_barrier_.get(); 392 barrier->Init(self, 0); 393 { 394 MutexLock mu(self, *Locks::thread_list_lock_); 395 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 396 for (Thread* thread : list_) { 397 if (thread != self) { 398 while (true) { 399 if (thread->RequestEmptyCheckpoint()) { 400 // This thread will run an empty checkpoint (decrement the empty checkpoint barrier) 401 // some time in the near future. 402 ++count; 403 if (kIsDebugBuild) { 404 runnable_thread_ids.push_back(thread->GetThreadId()); 405 } 406 break; 407 } 408 if (thread->GetState() != kRunnable) { 409 // It's seen suspended, we are done because it must not be in the middle of a mutator 410 // heap access. 411 break; 412 } 413 } 414 } 415 } 416 } 417 418 // Wake up the threads blocking for weak ref access so that they will respond to the empty 419 // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state. 420 Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self); 421 Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true); 422 { 423 ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); 424 uint64_t total_wait_time = 0; 425 bool first_iter = true; 426 while (true) { 427 // Wake up the runnable threads blocked on the mutexes that another thread, which is blocked 428 // on a weak ref access, holds (indirectly blocking for weak ref access through another thread 429 // and a mutex.) This needs to be done periodically because the thread may be preempted 430 // between the CheckEmptyCheckpointFromMutex call and the subsequent futex wait in 431 // Mutex::ExclusiveLock, etc. when the wakeup via WakeupToRespondToEmptyCheckpoint 432 // arrives. This could cause a *very rare* deadlock, if not repeated. Most of the cases are 433 // handled in the first iteration. 434 for (BaseMutex* mutex : Locks::expected_mutexes_on_weak_ref_access_) { 435 mutex->WakeupToRespondToEmptyCheckpoint(); 436 } 437 static constexpr uint64_t kEmptyCheckpointPeriodicTimeoutMs = 100; // 100ms 438 static constexpr uint64_t kEmptyCheckpointTotalTimeoutMs = 600 * 1000; // 10 minutes. 439 size_t barrier_count = first_iter ? count : 0; 440 first_iter = false; // Don't add to the barrier count from the second iteration on. 441 bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointPeriodicTimeoutMs); 442 if (!timed_out) { 443 break; // Success 444 } 445 // This is a very rare case. 446 total_wait_time += kEmptyCheckpointPeriodicTimeoutMs; 447 if (kIsDebugBuild && total_wait_time > kEmptyCheckpointTotalTimeoutMs) { 448 std::ostringstream ss; 449 ss << "Empty checkpoint timeout\n"; 450 ss << "Barrier count " << barrier->GetCount(self) << "\n"; 451 ss << "Runnable thread IDs"; 452 for (uint32_t tid : runnable_thread_ids) { 453 ss << " " << tid; 454 } 455 ss << "\n"; 456 Locks::mutator_lock_->Dump(ss); 457 ss << "\n"; 458 LOG(FATAL_WITHOUT_ABORT) << ss.str(); 459 // Some threads in 'runnable_thread_ids' are probably stuck. Try to dump their stacks. 460 // Avoid using ThreadList::Dump() initially because it is likely to get stuck as well. 461 { 462 ScopedObjectAccess soa(self); 463 MutexLock mu1(self, *Locks::thread_list_lock_); 464 for (Thread* thread : GetList()) { 465 uint32_t tid = thread->GetThreadId(); 466 bool is_in_runnable_thread_ids = 467 std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) != 468 runnable_thread_ids.end(); 469 if (is_in_runnable_thread_ids && 470 thread->ReadFlag(kEmptyCheckpointRequest)) { 471 // Found a runnable thread that hasn't responded to the empty checkpoint request. 472 // Assume it's stuck and safe to dump its stack. 473 thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT), 474 /*dump_native_stack*/ true, 475 /*backtrace_map*/ nullptr, 476 /*force_dump_stack*/ true); 477 } 478 } 479 } 480 LOG(FATAL_WITHOUT_ABORT) 481 << "Dumped runnable threads that haven't responded to empty checkpoint."; 482 // Now use ThreadList::Dump() to dump more threads, noting it may get stuck. 483 Dump(LOG_STREAM(FATAL_WITHOUT_ABORT)); 484 LOG(FATAL) << "Dumped all threads."; 485 } 486 } 487 } 488 } 489 490 // Request that a checkpoint function be run on all active (non-suspended) 491 // threads. Returns the number of successful requests. 492 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) { 493 Thread* self = Thread::Current(); 494 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 495 Locks::thread_list_lock_->AssertNotHeld(self); 496 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 497 CHECK_NE(self->GetState(), kRunnable); 498 499 size_t count = 0; 500 { 501 // Call a checkpoint function for each non-suspended thread. 502 MutexLock mu(self, *Locks::thread_list_lock_); 503 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 504 for (const auto& thread : list_) { 505 if (thread != self) { 506 if (thread->RequestCheckpoint(checkpoint_function)) { 507 // This thread will run its checkpoint some time in the near future. 508 count++; 509 } 510 } 511 } 512 } 513 514 // Return the number of threads that will run the checkpoint function. 515 return count; 516 } 517 518 // A checkpoint/suspend-all hybrid to switch thread roots from 519 // from-space to to-space refs. Used to synchronize threads at a point 520 // to mark the initiation of marking while maintaining the to-space 521 // invariant. 522 size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, 523 Closure* flip_callback, 524 gc::collector::GarbageCollector* collector) { 525 TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings()); 526 Thread* self = Thread::Current(); 527 Locks::mutator_lock_->AssertNotHeld(self); 528 Locks::thread_list_lock_->AssertNotHeld(self); 529 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 530 CHECK_NE(self->GetState(), kRunnable); 531 532 collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls. 533 534 // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the 535 // pause. 536 const uint64_t suspend_start_time = NanoTime(); 537 SuspendAllInternal(self, self, nullptr); 538 539 // Run the flip callback for the collector. 540 Locks::mutator_lock_->ExclusiveLock(self); 541 suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time); 542 flip_callback->Run(self); 543 Locks::mutator_lock_->ExclusiveUnlock(self); 544 collector->RegisterPause(NanoTime() - suspend_start_time); 545 546 // Resume runnable threads. 547 size_t runnable_thread_count = 0; 548 std::vector<Thread*> other_threads; 549 { 550 TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings()); 551 MutexLock mu(self, *Locks::thread_list_lock_); 552 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 553 --suspend_all_count_; 554 for (const auto& thread : list_) { 555 // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by 556 // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before 557 // a runnable thread runs it for itself or we run it for a suspended thread below. 558 thread->SetFlipFunction(thread_flip_visitor); 559 if (thread == self) { 560 continue; 561 } 562 // Resume early the threads that were runnable but are suspended just for this thread flip or 563 // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to 564 // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting 565 // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip), 566 ThreadState state = thread->GetState(); 567 if ((state == kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) && 568 thread->GetSuspendCount() == 1) { 569 // The thread will resume right after the broadcast. 570 bool updated = thread->ModifySuspendCount(self, -1, nullptr, false); 571 DCHECK(updated); 572 ++runnable_thread_count; 573 } else { 574 other_threads.push_back(thread); 575 } 576 } 577 Thread::resume_cond_->Broadcast(self); 578 } 579 580 collector->GetHeap()->ThreadFlipEnd(self); 581 582 // Run the closure on the other threads and let them resume. 583 { 584 TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings()); 585 ReaderMutexLock mu(self, *Locks::mutator_lock_); 586 for (const auto& thread : other_threads) { 587 Closure* flip_func = thread->GetFlipFunction(); 588 if (flip_func != nullptr) { 589 flip_func->Run(thread); 590 } 591 } 592 // Run it for self. 593 Closure* flip_func = self->GetFlipFunction(); 594 if (flip_func != nullptr) { 595 flip_func->Run(self); 596 } 597 } 598 599 // Resume other threads. 600 { 601 TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings()); 602 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 603 for (const auto& thread : other_threads) { 604 bool updated = thread->ModifySuspendCount(self, -1, nullptr, false); 605 DCHECK(updated); 606 } 607 Thread::resume_cond_->Broadcast(self); 608 } 609 610 return runnable_thread_count + other_threads.size() + 1; // +1 for self. 611 } 612 613 void ThreadList::SuspendAll(const char* cause, bool long_suspend) { 614 Thread* self = Thread::Current(); 615 616 if (self != nullptr) { 617 VLOG(threads) << *self << " SuspendAll for " << cause << " starting..."; 618 } else { 619 VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting..."; 620 } 621 { 622 ScopedTrace trace("Suspending mutator threads"); 623 const uint64_t start_time = NanoTime(); 624 625 SuspendAllInternal(self, self); 626 // All threads are known to have suspended (but a thread may still own the mutator lock) 627 // Make sure this thread grabs exclusive access to the mutator lock and its protected data. 628 #if HAVE_TIMED_RWLOCK 629 while (true) { 630 if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 631 NsToMs(thread_suspend_timeout_ns_), 632 0)) { 633 break; 634 } else if (!long_suspend_) { 635 // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this 636 // could result in a thread suspend timeout. 637 // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds. 638 UnsafeLogFatalForThreadSuspendAllTimeout(); 639 } 640 } 641 #else 642 Locks::mutator_lock_->ExclusiveLock(self); 643 #endif 644 645 long_suspend_ = long_suspend; 646 647 const uint64_t end_time = NanoTime(); 648 const uint64_t suspend_time = end_time - start_time; 649 suspend_all_historam_.AdjustAndAddValue(suspend_time); 650 if (suspend_time > kLongThreadSuspendThreshold) { 651 LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time); 652 } 653 654 if (kDebugLocking) { 655 // Debug check that all threads are suspended. 656 AssertThreadsAreSuspended(self, self); 657 } 658 } 659 ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str()); 660 661 if (self != nullptr) { 662 VLOG(threads) << *self << " SuspendAll complete"; 663 } else { 664 VLOG(threads) << "Thread[null] SuspendAll complete"; 665 } 666 } 667 668 // Ensures all threads running Java suspend and that those not running Java don't start. 669 // Debugger thread might be set to kRunnable for a short period of time after the 670 // SuspendAllInternal. This is safe because it will be set back to suspended state before 671 // the SuspendAll returns. 672 void ThreadList::SuspendAllInternal(Thread* self, 673 Thread* ignore1, 674 Thread* ignore2, 675 bool debug_suspend) { 676 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 677 Locks::thread_list_lock_->AssertNotHeld(self); 678 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 679 if (kDebugLocking && self != nullptr) { 680 CHECK_NE(self->GetState(), kRunnable); 681 } 682 683 // First request that all threads suspend, then wait for them to suspend before 684 // returning. This suspension scheme also relies on other behaviour: 685 // 1. Threads cannot be deleted while they are suspended or have a suspend- 686 // request flag set - (see Unregister() below). 687 // 2. When threads are created, they are created in a suspended state (actually 688 // kNative) and will never begin executing Java code without first checking 689 // the suspend-request flag. 690 691 // The atomic counter for number of threads that need to pass the barrier. 692 AtomicInteger pending_threads; 693 uint32_t num_ignored = 0; 694 if (ignore1 != nullptr) { 695 ++num_ignored; 696 } 697 if (ignore2 != nullptr && ignore1 != ignore2) { 698 ++num_ignored; 699 } 700 { 701 MutexLock mu(self, *Locks::thread_list_lock_); 702 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 703 // Update global suspend all state for attaching threads. 704 ++suspend_all_count_; 705 if (debug_suspend) { 706 ++debug_suspend_all_count_; 707 } 708 pending_threads.StoreRelaxed(list_.size() - num_ignored); 709 // Increment everybody's suspend count (except those that should be ignored). 710 for (const auto& thread : list_) { 711 if (thread == ignore1 || thread == ignore2) { 712 continue; 713 } 714 VLOG(threads) << "requesting thread suspend: " << *thread; 715 bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend); 716 DCHECK(updated); 717 718 // Must install the pending_threads counter first, then check thread->IsSuspend() and clear 719 // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended() 720 // that can lead a thread to miss a call to PassActiveSuspendBarriers(). 721 if (thread->IsSuspended()) { 722 // Only clear the counter for the current thread. 723 thread->ClearSuspendBarrier(&pending_threads); 724 pending_threads.FetchAndSubSequentiallyConsistent(1); 725 } 726 } 727 } 728 729 // Wait for the barrier to be passed by all runnable threads. This wait 730 // is done with a timeout so that we can detect problems. 731 #if ART_USE_FUTEXES 732 timespec wait_timeout; 733 InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout); 734 #endif 735 const uint64_t start_time = NanoTime(); 736 while (true) { 737 int32_t cur_val = pending_threads.LoadRelaxed(); 738 if (LIKELY(cur_val > 0)) { 739 #if ART_USE_FUTEXES 740 if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) { 741 // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning. 742 if ((errno != EAGAIN) && (errno != EINTR)) { 743 if (errno == ETIMEDOUT) { 744 LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR) 745 << "Timed out waiting for threads to suspend, waited for " 746 << PrettyDuration(NanoTime() - start_time); 747 } else { 748 PLOG(FATAL) << "futex wait failed for SuspendAllInternal()"; 749 } 750 } 751 } // else re-check pending_threads in the next iteration (this may be a spurious wake-up). 752 #else 753 // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set. 754 UNUSED(start_time); 755 #endif 756 } else { 757 CHECK_EQ(cur_val, 0); 758 break; 759 } 760 } 761 } 762 763 void ThreadList::ResumeAll() { 764 Thread* self = Thread::Current(); 765 766 if (self != nullptr) { 767 VLOG(threads) << *self << " ResumeAll starting"; 768 } else { 769 VLOG(threads) << "Thread[null] ResumeAll starting"; 770 } 771 772 ATRACE_END(); 773 774 ScopedTrace trace("Resuming mutator threads"); 775 776 if (kDebugLocking) { 777 // Debug check that all threads are suspended. 778 AssertThreadsAreSuspended(self, self); 779 } 780 781 long_suspend_ = false; 782 783 Locks::mutator_lock_->ExclusiveUnlock(self); 784 { 785 MutexLock mu(self, *Locks::thread_list_lock_); 786 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 787 // Update global suspend all state for attaching threads. 788 --suspend_all_count_; 789 // Decrement the suspend counts for all threads. 790 for (const auto& thread : list_) { 791 if (thread == self) { 792 continue; 793 } 794 bool updated = thread->ModifySuspendCount(self, -1, nullptr, false); 795 DCHECK(updated); 796 } 797 798 // Broadcast a notification to all suspended threads, some or all of 799 // which may choose to wake up. No need to wait for them. 800 if (self != nullptr) { 801 VLOG(threads) << *self << " ResumeAll waking others"; 802 } else { 803 VLOG(threads) << "Thread[null] ResumeAll waking others"; 804 } 805 Thread::resume_cond_->Broadcast(self); 806 } 807 808 if (self != nullptr) { 809 VLOG(threads) << *self << " ResumeAll complete"; 810 } else { 811 VLOG(threads) << "Thread[null] ResumeAll complete"; 812 } 813 } 814 815 void ThreadList::Resume(Thread* thread, bool for_debugger) { 816 // This assumes there was an ATRACE_BEGIN when we suspended the thread. 817 ATRACE_END(); 818 819 Thread* self = Thread::Current(); 820 DCHECK_NE(thread, self); 821 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..." 822 << (for_debugger ? " (debugger)" : ""); 823 824 { 825 // To check Contains. 826 MutexLock mu(self, *Locks::thread_list_lock_); 827 // To check IsSuspended. 828 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 829 DCHECK(thread->IsSuspended()); 830 if (!Contains(thread)) { 831 // We only expect threads within the thread-list to have been suspended otherwise we can't 832 // stop such threads from delete-ing themselves. 833 LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread) 834 << ") thread not within thread list"; 835 return; 836 } 837 bool updated = thread->ModifySuspendCount(self, -1, nullptr, for_debugger); 838 DCHECK(updated); 839 } 840 841 { 842 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others"; 843 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 844 Thread::resume_cond_->Broadcast(self); 845 } 846 847 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete"; 848 } 849 850 static void ThreadSuspendByPeerWarning(Thread* self, 851 LogSeverity severity, 852 const char* message, 853 jobject peer) { 854 JNIEnvExt* env = self->GetJniEnv(); 855 ScopedLocalRef<jstring> 856 scoped_name_string(env, static_cast<jstring>(env->GetObjectField( 857 peer, WellKnownClasses::java_lang_Thread_name))); 858 ScopedUtfChars scoped_name_chars(env, scoped_name_string.get()); 859 if (scoped_name_chars.c_str() == nullptr) { 860 LOG(severity) << message << ": " << peer; 861 env->ExceptionClear(); 862 } else { 863 LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str(); 864 } 865 } 866 867 Thread* ThreadList::SuspendThreadByPeer(jobject peer, 868 bool request_suspension, 869 bool debug_suspension, 870 bool* timed_out) { 871 const uint64_t start_time = NanoTime(); 872 useconds_t sleep_us = kThreadSuspendInitialSleepUs; 873 *timed_out = false; 874 Thread* const self = Thread::Current(); 875 Thread* suspended_thread = nullptr; 876 VLOG(threads) << "SuspendThreadByPeer starting"; 877 while (true) { 878 Thread* thread; 879 { 880 // Note: this will transition to runnable and potentially suspend. We ensure only one thread 881 // is requesting another suspend, to avoid deadlock, by requiring this function be called 882 // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather 883 // than request thread suspension, to avoid potential cycles in threads requesting each other 884 // suspend. 885 ScopedObjectAccess soa(self); 886 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 887 thread = Thread::FromManagedThread(soa, peer); 888 if (thread == nullptr) { 889 if (suspended_thread != nullptr) { 890 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 891 // If we incremented the suspend count but the thread reset its peer, we need to 892 // re-decrement it since it is shutting down and may deadlock the runtime in 893 // ThreadList::WaitForOtherNonDaemonThreadsToExit. 894 bool updated = suspended_thread->ModifySuspendCount(soa.Self(), 895 -1, 896 nullptr, 897 debug_suspension); 898 DCHECK(updated); 899 } 900 ThreadSuspendByPeerWarning(self, 901 ::android::base::WARNING, 902 "No such thread for suspend", 903 peer); 904 return nullptr; 905 } 906 if (!Contains(thread)) { 907 CHECK(suspended_thread == nullptr); 908 VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: " 909 << reinterpret_cast<void*>(thread); 910 return nullptr; 911 } 912 VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread; 913 { 914 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 915 if (request_suspension) { 916 if (self->GetSuspendCount() > 0) { 917 // We hold the suspend count lock but another thread is trying to suspend us. Its not 918 // safe to try to suspend another thread in case we get a cycle. Start the loop again 919 // which will allow this thread to be suspended. 920 continue; 921 } 922 CHECK(suspended_thread == nullptr); 923 suspended_thread = thread; 924 bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, debug_suspension); 925 DCHECK(updated); 926 request_suspension = false; 927 } else { 928 // If the caller isn't requesting suspension, a suspension should have already occurred. 929 CHECK_GT(thread->GetSuspendCount(), 0); 930 } 931 // IsSuspended on the current thread will fail as the current thread is changed into 932 // Runnable above. As the suspend count is now raised if this is the current thread 933 // it will self suspend on transition to Runnable, making it hard to work with. It's simpler 934 // to just explicitly handle the current thread in the callers to this code. 935 CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; 936 // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend 937 // count, or else we've waited and it has self suspended) or is the current thread, we're 938 // done. 939 if (thread->IsSuspended()) { 940 VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread; 941 if (ATRACE_ENABLED()) { 942 std::string name; 943 thread->GetThreadName(name); 944 ATRACE_BEGIN(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(), 945 peer).c_str()); 946 } 947 return thread; 948 } 949 const uint64_t total_delay = NanoTime() - start_time; 950 if (total_delay >= thread_suspend_timeout_ns_) { 951 ThreadSuspendByPeerWarning(self, 952 ::android::base::FATAL, 953 "Thread suspension timed out", 954 peer); 955 if (suspended_thread != nullptr) { 956 CHECK_EQ(suspended_thread, thread); 957 bool updated = suspended_thread->ModifySuspendCount(soa.Self(), 958 -1, 959 nullptr, 960 debug_suspension); 961 DCHECK(updated); 962 } 963 *timed_out = true; 964 return nullptr; 965 } else if (sleep_us == 0 && 966 total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { 967 // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent 968 // excessive CPU usage. 969 sleep_us = kThreadSuspendMaxYieldUs / 2; 970 } 971 } 972 // Release locks and come out of runnable state. 973 } 974 VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend"; 975 ThreadSuspendSleep(sleep_us); 976 // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at 977 // all if possible. This shouldn't be an issue since time to suspend should always be small. 978 sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); 979 } 980 } 981 982 static void ThreadSuspendByThreadIdWarning(LogSeverity severity, 983 const char* message, 984 uint32_t thread_id) { 985 LOG(severity) << StringPrintf("%s: %d", message, thread_id); 986 } 987 988 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, 989 bool debug_suspension, 990 bool* timed_out) { 991 const uint64_t start_time = NanoTime(); 992 useconds_t sleep_us = kThreadSuspendInitialSleepUs; 993 *timed_out = false; 994 Thread* suspended_thread = nullptr; 995 Thread* const self = Thread::Current(); 996 CHECK_NE(thread_id, kInvalidThreadId); 997 VLOG(threads) << "SuspendThreadByThreadId starting"; 998 while (true) { 999 { 1000 // Note: this will transition to runnable and potentially suspend. We ensure only one thread 1001 // is requesting another suspend, to avoid deadlock, by requiring this function be called 1002 // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather 1003 // than request thread suspension, to avoid potential cycles in threads requesting each other 1004 // suspend. 1005 ScopedObjectAccess soa(self); 1006 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 1007 Thread* thread = nullptr; 1008 for (const auto& it : list_) { 1009 if (it->GetThreadId() == thread_id) { 1010 thread = it; 1011 break; 1012 } 1013 } 1014 if (thread == nullptr) { 1015 CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread 1016 << " no longer in thread list"; 1017 // There's a race in inflating a lock and the owner giving up ownership and then dying. 1018 ThreadSuspendByThreadIdWarning(::android::base::WARNING, 1019 "No such thread id for suspend", 1020 thread_id); 1021 return nullptr; 1022 } 1023 VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread; 1024 DCHECK(Contains(thread)); 1025 { 1026 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 1027 if (suspended_thread == nullptr) { 1028 if (self->GetSuspendCount() > 0) { 1029 // We hold the suspend count lock but another thread is trying to suspend us. Its not 1030 // safe to try to suspend another thread in case we get a cycle. Start the loop again 1031 // which will allow this thread to be suspended. 1032 continue; 1033 } 1034 bool updated = thread->ModifySuspendCount(self, +1, nullptr, debug_suspension); 1035 DCHECK(updated); 1036 suspended_thread = thread; 1037 } else { 1038 CHECK_EQ(suspended_thread, thread); 1039 // If the caller isn't requesting suspension, a suspension should have already occurred. 1040 CHECK_GT(thread->GetSuspendCount(), 0); 1041 } 1042 // IsSuspended on the current thread will fail as the current thread is changed into 1043 // Runnable above. As the suspend count is now raised if this is the current thread 1044 // it will self suspend on transition to Runnable, making it hard to work with. It's simpler 1045 // to just explicitly handle the current thread in the callers to this code. 1046 CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; 1047 // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend 1048 // count, or else we've waited and it has self suspended) or is the current thread, we're 1049 // done. 1050 if (thread->IsSuspended()) { 1051 if (ATRACE_ENABLED()) { 1052 std::string name; 1053 thread->GetThreadName(name); 1054 ATRACE_BEGIN(StringPrintf("SuspendThreadByThreadId suspended %s id=%d", 1055 name.c_str(), thread_id).c_str()); 1056 } 1057 VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread; 1058 return thread; 1059 } 1060 const uint64_t total_delay = NanoTime() - start_time; 1061 if (total_delay >= thread_suspend_timeout_ns_) { 1062 ThreadSuspendByThreadIdWarning(::android::base::WARNING, 1063 "Thread suspension timed out", 1064 thread_id); 1065 if (suspended_thread != nullptr) { 1066 bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension); 1067 DCHECK(updated); 1068 } 1069 *timed_out = true; 1070 return nullptr; 1071 } else if (sleep_us == 0 && 1072 total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { 1073 // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent 1074 // excessive CPU usage. 1075 sleep_us = kThreadSuspendMaxYieldUs / 2; 1076 } 1077 } 1078 // Release locks and come out of runnable state. 1079 } 1080 VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend"; 1081 ThreadSuspendSleep(sleep_us); 1082 sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); 1083 } 1084 } 1085 1086 Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) { 1087 for (const auto& thread : list_) { 1088 if (thread->GetThreadId() == thread_id) { 1089 return thread; 1090 } 1091 } 1092 return nullptr; 1093 } 1094 1095 void ThreadList::SuspendAllForDebugger() { 1096 Thread* self = Thread::Current(); 1097 Thread* debug_thread = Dbg::GetDebugThread(); 1098 1099 VLOG(threads) << *self << " SuspendAllForDebugger starting..."; 1100 1101 SuspendAllInternal(self, self, debug_thread, true); 1102 // Block on the mutator lock until all Runnable threads release their share of access then 1103 // immediately unlock again. 1104 #if HAVE_TIMED_RWLOCK 1105 // Timeout if we wait more than 30 seconds. 1106 if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) { 1107 UnsafeLogFatalForThreadSuspendAllTimeout(); 1108 } else { 1109 Locks::mutator_lock_->ExclusiveUnlock(self); 1110 } 1111 #else 1112 Locks::mutator_lock_->ExclusiveLock(self); 1113 Locks::mutator_lock_->ExclusiveUnlock(self); 1114 #endif 1115 // Disabled for the following race condition: 1116 // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock. 1117 // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected). 1118 // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable 1119 // state (from SetStateUnsafe). 1120 // AssertThreadsAreSuspended(self, self, debug_thread); 1121 1122 VLOG(threads) << *self << " SuspendAllForDebugger complete"; 1123 } 1124 1125 void ThreadList::SuspendSelfForDebugger() { 1126 Thread* const self = Thread::Current(); 1127 self->SetReadyForDebugInvoke(true); 1128 1129 // The debugger thread must not suspend itself due to debugger activity! 1130 Thread* debug_thread = Dbg::GetDebugThread(); 1131 CHECK(self != debug_thread); 1132 CHECK_NE(self->GetState(), kRunnable); 1133 Locks::mutator_lock_->AssertNotHeld(self); 1134 1135 // The debugger may have detached while we were executing an invoke request. In that case, we 1136 // must not suspend ourself. 1137 DebugInvokeReq* pReq = self->GetInvokeReq(); 1138 const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive()); 1139 if (!skip_thread_suspension) { 1140 // Collisions with other suspends aren't really interesting. We want 1141 // to ensure that we're the only one fiddling with the suspend count 1142 // though. 1143 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1144 bool updated = self->ModifySuspendCount(self, +1, nullptr, true); 1145 DCHECK(updated); 1146 CHECK_GT(self->GetSuspendCount(), 0); 1147 1148 VLOG(threads) << *self << " self-suspending (debugger)"; 1149 } else { 1150 // We must no longer be subject to debugger suspension. 1151 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1152 CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us"; 1153 1154 VLOG(threads) << *self << " not self-suspending because debugger detached during invoke"; 1155 } 1156 1157 // If the debugger requested an invoke, we need to send the reply and clear the request. 1158 if (pReq != nullptr) { 1159 Dbg::FinishInvokeMethod(pReq); 1160 self->ClearDebugInvokeReq(); 1161 pReq = nullptr; // object has been deleted, clear it for safety. 1162 } 1163 1164 // Tell JDWP that we've completed suspension. The JDWP thread can't 1165 // tell us to resume before we're fully asleep because we hold the 1166 // suspend count lock. 1167 Dbg::ClearWaitForEventThread(); 1168 1169 { 1170 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1171 while (self->GetSuspendCount() != 0) { 1172 Thread::resume_cond_->Wait(self); 1173 if (self->GetSuspendCount() != 0) { 1174 // The condition was signaled but we're still suspended. This 1175 // can happen when we suspend then resume all threads to 1176 // update instrumentation or compute monitor info. This can 1177 // also happen if the debugger lets go while a SIGQUIT thread 1178 // dump event is pending (assuming SignalCatcher was resumed for 1179 // just long enough to try to grab the thread-suspend lock). 1180 VLOG(jdwp) << *self << " still suspended after undo " 1181 << "(suspend count=" << self->GetSuspendCount() << ", " 1182 << "debug suspend count=" << self->GetDebugSuspendCount() << ")"; 1183 } 1184 } 1185 CHECK_EQ(self->GetSuspendCount(), 0); 1186 } 1187 1188 self->SetReadyForDebugInvoke(false); 1189 VLOG(threads) << *self << " self-reviving (debugger)"; 1190 } 1191 1192 void ThreadList::ResumeAllForDebugger() { 1193 Thread* self = Thread::Current(); 1194 Thread* debug_thread = Dbg::GetDebugThread(); 1195 1196 VLOG(threads) << *self << " ResumeAllForDebugger starting..."; 1197 1198 // Threads can't resume if we exclusively hold the mutator lock. 1199 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 1200 1201 { 1202 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 1203 { 1204 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 1205 // Update global suspend all state for attaching threads. 1206 DCHECK_GE(suspend_all_count_, debug_suspend_all_count_); 1207 if (debug_suspend_all_count_ > 0) { 1208 --suspend_all_count_; 1209 --debug_suspend_all_count_; 1210 } else { 1211 // We've been asked to resume all threads without being asked to 1212 // suspend them all before. That may happen if a debugger tries 1213 // to resume some suspended threads (with suspend count == 1) 1214 // at once with a VirtualMachine.Resume command. Let's print a 1215 // warning. 1216 LOG(WARNING) << "Debugger attempted to resume all threads without " 1217 << "having suspended them all before."; 1218 } 1219 // Decrement everybody's suspend count (except our own). 1220 for (const auto& thread : list_) { 1221 if (thread == self || thread == debug_thread) { 1222 continue; 1223 } 1224 if (thread->GetDebugSuspendCount() == 0) { 1225 // This thread may have been individually resumed with ThreadReference.Resume. 1226 continue; 1227 } 1228 VLOG(threads) << "requesting thread resume: " << *thread; 1229 bool updated = thread->ModifySuspendCount(self, -1, nullptr, true); 1230 DCHECK(updated); 1231 } 1232 } 1233 } 1234 1235 { 1236 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1237 Thread::resume_cond_->Broadcast(self); 1238 } 1239 1240 VLOG(threads) << *self << " ResumeAllForDebugger complete"; 1241 } 1242 1243 void ThreadList::UndoDebuggerSuspensions() { 1244 Thread* self = Thread::Current(); 1245 1246 VLOG(threads) << *self << " UndoDebuggerSuspensions starting"; 1247 1248 { 1249 MutexLock mu(self, *Locks::thread_list_lock_); 1250 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1251 // Update global suspend all state for attaching threads. 1252 suspend_all_count_ -= debug_suspend_all_count_; 1253 debug_suspend_all_count_ = 0; 1254 // Update running threads. 1255 for (const auto& thread : list_) { 1256 if (thread == self || thread->GetDebugSuspendCount() == 0) { 1257 continue; 1258 } 1259 bool suspended = thread->ModifySuspendCount(self, 1260 -thread->GetDebugSuspendCount(), 1261 nullptr, 1262 true); 1263 DCHECK(suspended); 1264 } 1265 } 1266 1267 { 1268 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1269 Thread::resume_cond_->Broadcast(self); 1270 } 1271 1272 VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete"; 1273 } 1274 1275 void ThreadList::WaitForOtherNonDaemonThreadsToExit() { 1276 ScopedTrace trace(__PRETTY_FUNCTION__); 1277 Thread* self = Thread::Current(); 1278 Locks::mutator_lock_->AssertNotHeld(self); 1279 while (true) { 1280 { 1281 // No more threads can be born after we start to shutdown. 1282 MutexLock mu(self, *Locks::runtime_shutdown_lock_); 1283 CHECK(Runtime::Current()->IsShuttingDownLocked()); 1284 CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U); 1285 } 1286 MutexLock mu(self, *Locks::thread_list_lock_); 1287 // Also wait for any threads that are unregistering to finish. This is required so that no 1288 // threads access the thread list after it is deleted. TODO: This may not work for user daemon 1289 // threads since they could unregister at the wrong time. 1290 bool done = unregistering_count_ == 0; 1291 if (done) { 1292 for (const auto& thread : list_) { 1293 if (thread != self && !thread->IsDaemon()) { 1294 done = false; 1295 break; 1296 } 1297 } 1298 } 1299 if (done) { 1300 break; 1301 } 1302 // Wait for another thread to exit before re-checking. 1303 Locks::thread_exit_cond_->Wait(self); 1304 } 1305 } 1306 1307 void ThreadList::SuspendAllDaemonThreadsForShutdown() { 1308 ScopedTrace trace(__PRETTY_FUNCTION__); 1309 Thread* self = Thread::Current(); 1310 size_t daemons_left = 0; 1311 { 1312 // Tell all the daemons it's time to suspend. 1313 MutexLock mu(self, *Locks::thread_list_lock_); 1314 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1315 for (const auto& thread : list_) { 1316 // This is only run after all non-daemon threads have exited, so the remainder should all be 1317 // daemons. 1318 CHECK(thread->IsDaemon()) << *thread; 1319 if (thread != self) { 1320 bool updated = thread->ModifySuspendCount(self, +1, nullptr, false); 1321 DCHECK(updated); 1322 ++daemons_left; 1323 } 1324 // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be 1325 // the sleep forever one. 1326 thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions(); 1327 } 1328 } 1329 // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they 1330 // are about to access runtime state and are not in a runnable state. Examples: Monitor code 1331 // or waking up from a condition variable. TODO: Try and see if there is a better way to wait 1332 // for daemon threads to be in a blocked state. 1333 if (daemons_left > 0) { 1334 static constexpr size_t kDaemonSleepTime = 200 * 1000; 1335 usleep(kDaemonSleepTime); 1336 } 1337 // Give the threads a chance to suspend, complaining if they're slow. 1338 bool have_complained = false; 1339 static constexpr size_t kTimeoutMicroseconds = 2000 * 1000; 1340 static constexpr size_t kSleepMicroseconds = 1000; 1341 for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) { 1342 bool all_suspended = true; 1343 { 1344 MutexLock mu(self, *Locks::thread_list_lock_); 1345 for (const auto& thread : list_) { 1346 if (thread != self && thread->GetState() == kRunnable) { 1347 if (!have_complained) { 1348 LOG(WARNING) << "daemon thread not yet suspended: " << *thread; 1349 have_complained = true; 1350 } 1351 all_suspended = false; 1352 } 1353 } 1354 } 1355 if (all_suspended) { 1356 return; 1357 } 1358 usleep(kSleepMicroseconds); 1359 } 1360 LOG(WARNING) << "timed out suspending all daemon threads"; 1361 } 1362 1363 void ThreadList::Register(Thread* self) { 1364 DCHECK_EQ(self, Thread::Current()); 1365 1366 if (VLOG_IS_ON(threads)) { 1367 std::ostringstream oss; 1368 self->ShortDump(oss); // We don't hold the mutator_lock_ yet and so cannot call Dump. 1369 LOG(INFO) << "ThreadList::Register() " << *self << "\n" << oss.str(); 1370 } 1371 1372 // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing 1373 // SuspendAll requests. 1374 MutexLock mu(self, *Locks::thread_list_lock_); 1375 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1376 CHECK_GE(suspend_all_count_, debug_suspend_all_count_); 1377 // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While 1378 // this isn't particularly efficient the suspend counts are most commonly 0 or 1. 1379 for (int delta = debug_suspend_all_count_; delta > 0; delta--) { 1380 bool updated = self->ModifySuspendCount(self, +1, nullptr, true); 1381 DCHECK(updated); 1382 } 1383 for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) { 1384 bool updated = self->ModifySuspendCount(self, +1, nullptr, false); 1385 DCHECK(updated); 1386 } 1387 CHECK(!Contains(self)); 1388 list_.push_back(self); 1389 if (kUseReadBarrier) { 1390 // Initialize according to the state of the CC collector. 1391 bool is_gc_marking = 1392 Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking(); 1393 self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking); 1394 bool weak_ref_access_enabled = 1395 Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled(); 1396 self->SetWeakRefAccessEnabled(weak_ref_access_enabled); 1397 } 1398 } 1399 1400 void ThreadList::Unregister(Thread* self) { 1401 DCHECK_EQ(self, Thread::Current()); 1402 CHECK_NE(self->GetState(), kRunnable); 1403 Locks::mutator_lock_->AssertNotHeld(self); 1404 1405 VLOG(threads) << "ThreadList::Unregister() " << *self; 1406 1407 { 1408 MutexLock mu(self, *Locks::thread_list_lock_); 1409 ++unregistering_count_; 1410 } 1411 1412 // Any time-consuming destruction, plus anything that can call back into managed code or 1413 // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what 1414 // causes the threads to join. It is important to do this after incrementing unregistering_count_ 1415 // since we want the runtime to wait for the daemon threads to exit before deleting the thread 1416 // list. 1417 self->Destroy(); 1418 1419 // If tracing, remember thread id and name before thread exits. 1420 Trace::StoreExitingThreadInfo(self); 1421 1422 uint32_t thin_lock_id = self->GetThreadId(); 1423 while (true) { 1424 // Remove and delete the Thread* while holding the thread_list_lock_ and 1425 // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended. 1426 // Note: deliberately not using MutexLock that could hold a stale self pointer. 1427 MutexLock mu(self, *Locks::thread_list_lock_); 1428 if (!Contains(self)) { 1429 std::string thread_name; 1430 self->GetThreadName(thread_name); 1431 std::ostringstream os; 1432 DumpNativeStack(os, GetTid(), nullptr, " native: ", nullptr); 1433 LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str(); 1434 break; 1435 } else { 1436 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1437 if (!self->IsSuspended()) { 1438 list_.remove(self); 1439 break; 1440 } 1441 } 1442 // We failed to remove the thread due to a suspend request, loop and try again. 1443 } 1444 delete self; 1445 1446 // Release the thread ID after the thread is finished and deleted to avoid cases where we can 1447 // temporarily have multiple threads with the same thread id. When this occurs, it causes 1448 // problems in FindThreadByThreadId / SuspendThreadByThreadId. 1449 ReleaseThreadId(nullptr, thin_lock_id); 1450 1451 // Clear the TLS data, so that the underlying native thread is recognizably detached. 1452 // (It may wish to reattach later.) 1453 #ifdef ART_TARGET_ANDROID 1454 __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr; 1455 #else 1456 CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self"); 1457 #endif 1458 1459 // Signal that a thread just detached. 1460 MutexLock mu(nullptr, *Locks::thread_list_lock_); 1461 --unregistering_count_; 1462 Locks::thread_exit_cond_->Broadcast(nullptr); 1463 } 1464 1465 void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) { 1466 for (const auto& thread : list_) { 1467 callback(thread, context); 1468 } 1469 } 1470 1471 void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) { 1472 Thread* const self = Thread::Current(); 1473 std::vector<Thread*> threads_to_visit; 1474 1475 // Tell threads to suspend and copy them into list. 1476 { 1477 MutexLock mu(self, *Locks::thread_list_lock_); 1478 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1479 for (Thread* thread : list_) { 1480 bool suspended = thread->ModifySuspendCount(self, +1, nullptr, false); 1481 DCHECK(suspended); 1482 if (thread == self || thread->IsSuspended()) { 1483 threads_to_visit.push_back(thread); 1484 } else { 1485 bool resumed = thread->ModifySuspendCount(self, -1, nullptr, false); 1486 DCHECK(resumed); 1487 } 1488 } 1489 } 1490 1491 // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock 1492 // order violations. 1493 for (Thread* thread : threads_to_visit) { 1494 thread->VisitRoots(visitor); 1495 } 1496 1497 // Restore suspend counts. 1498 { 1499 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1500 for (Thread* thread : threads_to_visit) { 1501 bool updated = thread->ModifySuspendCount(self, -1, nullptr, false); 1502 DCHECK(updated); 1503 } 1504 } 1505 } 1506 1507 void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const { 1508 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_); 1509 for (const auto& thread : list_) { 1510 thread->VisitRoots(visitor, flags); 1511 } 1512 } 1513 1514 uint32_t ThreadList::AllocThreadId(Thread* self) { 1515 MutexLock mu(self, *Locks::allocated_thread_ids_lock_); 1516 for (size_t i = 0; i < allocated_ids_.size(); ++i) { 1517 if (!allocated_ids_[i]) { 1518 allocated_ids_.set(i); 1519 return i + 1; // Zero is reserved to mean "invalid". 1520 } 1521 } 1522 LOG(FATAL) << "Out of internal thread ids"; 1523 return 0; 1524 } 1525 1526 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) { 1527 MutexLock mu(self, *Locks::allocated_thread_ids_lock_); 1528 --id; // Zero is reserved to mean "invalid". 1529 DCHECK(allocated_ids_[id]) << id; 1530 allocated_ids_.reset(id); 1531 } 1532 1533 ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) { 1534 Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend); 1535 } 1536 1537 ScopedSuspendAll::~ScopedSuspendAll() { 1538 Runtime::Current()->GetThreadList()->ResumeAll(); 1539 } 1540 1541 } // namespace art 1542