1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "thread_list.h" 18 19 #include <dirent.h> 20 #include <sys/types.h> 21 #include <unistd.h> 22 23 #include <sstream> 24 #include <vector> 25 26 #include "android-base/stringprintf.h" 27 #include "backtrace/BacktraceMap.h" 28 #include "nativehelper/scoped_local_ref.h" 29 #include "nativehelper/scoped_utf_chars.h" 30 31 #include "base/aborting.h" 32 #include "base/histogram-inl.h" 33 #include "base/mutex-inl.h" 34 #include "base/systrace.h" 35 #include "base/time_utils.h" 36 #include "base/timing_logger.h" 37 #include "debugger.h" 38 #include "gc/collector/concurrent_copying.h" 39 #include "gc/gc_pause_listener.h" 40 #include "gc/heap.h" 41 #include "gc/reference_processor.h" 42 #include "gc_root.h" 43 #include "jni_internal.h" 44 #include "lock_word.h" 45 #include "monitor.h" 46 #include "native_stack_dump.h" 47 #include "scoped_thread_state_change-inl.h" 48 #include "thread.h" 49 #include "trace.h" 50 #include "well_known_classes.h" 51 52 #if ART_USE_FUTEXES 53 #include "linux/futex.h" 54 #include "sys/syscall.h" 55 #ifndef SYS_futex 56 #define SYS_futex __NR_futex 57 #endif 58 #endif // ART_USE_FUTEXES 59 60 namespace art { 61 62 using android::base::StringPrintf; 63 64 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5); 65 // Use 0 since we want to yield to prevent blocking for an unpredictable amount of time. 66 static constexpr useconds_t kThreadSuspendInitialSleepUs = 0; 67 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000; 68 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; 69 70 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for 71 // some history. 72 static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true; 73 74 ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns) 75 : suspend_all_count_(0), 76 debug_suspend_all_count_(0), 77 unregistering_count_(0), 78 suspend_all_historam_("suspend all histogram", 16, 64), 79 long_suspend_(false), 80 shut_down_(false), 81 thread_suspend_timeout_ns_(thread_suspend_timeout_ns), 82 empty_checkpoint_barrier_(new Barrier(0)) { 83 CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U))); 84 } 85 86 ThreadList::~ThreadList() { 87 CHECK(shut_down_); 88 } 89 90 void ThreadList::ShutDown() { 91 ScopedTrace trace(__PRETTY_FUNCTION__); 92 // Detach the current thread if necessary. If we failed to start, there might not be any threads. 93 // We need to detach the current thread here in case there's another thread waiting to join with 94 // us. 95 bool contains = false; 96 Thread* self = Thread::Current(); 97 { 98 MutexLock mu(self, *Locks::thread_list_lock_); 99 contains = Contains(self); 100 } 101 if (contains) { 102 Runtime::Current()->DetachCurrentThread(); 103 } 104 WaitForOtherNonDaemonThreadsToExit(); 105 // Disable GC and wait for GC to complete in case there are still daemon threads doing 106 // allocations. 107 gc::Heap* const heap = Runtime::Current()->GetHeap(); 108 heap->DisableGCForShutdown(); 109 // In case a GC is in progress, wait for it to finish. 110 heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current()); 111 // TODO: there's an unaddressed race here where a thread may attach during shutdown, see 112 // Thread::Init. 113 SuspendAllDaemonThreadsForShutdown(); 114 115 shut_down_ = true; 116 } 117 118 bool ThreadList::Contains(Thread* thread) { 119 return find(list_.begin(), list_.end(), thread) != list_.end(); 120 } 121 122 bool ThreadList::Contains(pid_t tid) { 123 for (const auto& thread : list_) { 124 if (thread->GetTid() == tid) { 125 return true; 126 } 127 } 128 return false; 129 } 130 131 pid_t ThreadList::GetLockOwner() { 132 return Locks::thread_list_lock_->GetExclusiveOwnerTid(); 133 } 134 135 void ThreadList::DumpNativeStacks(std::ostream& os) { 136 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_); 137 std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid())); 138 for (const auto& thread : list_) { 139 os << "DUMPING THREAD " << thread->GetTid() << "\n"; 140 DumpNativeStack(os, thread->GetTid(), map.get(), "\t"); 141 os << "\n"; 142 } 143 } 144 145 void ThreadList::DumpForSigQuit(std::ostream& os) { 146 { 147 ScopedObjectAccess soa(Thread::Current()); 148 // Only print if we have samples. 149 if (suspend_all_historam_.SampleSize() > 0) { 150 Histogram<uint64_t>::CumulativeData data; 151 suspend_all_historam_.CreateHistogram(&data); 152 suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend. 153 } 154 } 155 bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit(); 156 Dump(os, dump_native_stack); 157 DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit); 158 } 159 160 static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack) 161 NO_THREAD_SAFETY_ANALYSIS { 162 // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should 163 // refactor DumpState to avoid skipping analysis. 164 Thread::DumpState(os, nullptr, tid); 165 DumpKernelStack(os, tid, " kernel: ", false); 166 if (dump_native_stack) { 167 DumpNativeStack(os, tid, nullptr, " native: "); 168 } 169 os << std::endl; 170 } 171 172 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) { 173 DIR* d = opendir("/proc/self/task"); 174 if (!d) { 175 return; 176 } 177 178 Thread* self = Thread::Current(); 179 dirent* e; 180 while ((e = readdir(d)) != nullptr) { 181 char* end; 182 pid_t tid = strtol(e->d_name, &end, 10); 183 if (!*end) { 184 bool contains; 185 { 186 MutexLock mu(self, *Locks::thread_list_lock_); 187 contains = Contains(tid); 188 } 189 if (!contains) { 190 DumpUnattachedThread(os, tid, dump_native_stack); 191 } 192 } 193 } 194 closedir(d); 195 } 196 197 // Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be 198 // overloaded with ANR dumps. 199 static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000; 200 201 // A closure used by Thread::Dump. 202 class DumpCheckpoint FINAL : public Closure { 203 public: 204 DumpCheckpoint(std::ostream* os, bool dump_native_stack) 205 : os_(os), 206 barrier_(0), 207 backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr), 208 dump_native_stack_(dump_native_stack) { 209 if (backtrace_map_ != nullptr) { 210 backtrace_map_->SetSuffixesToIgnore(std::vector<std::string> { "oat", "odex" }); 211 } 212 } 213 214 void Run(Thread* thread) OVERRIDE { 215 // Note thread and self may not be equal if thread was already suspended at the point of the 216 // request. 217 Thread* self = Thread::Current(); 218 CHECK(self != nullptr); 219 std::ostringstream local_os; 220 { 221 ScopedObjectAccess soa(self); 222 thread->Dump(local_os, dump_native_stack_, backtrace_map_.get()); 223 } 224 { 225 // Use the logging lock to ensure serialization when writing to the common ostream. 226 MutexLock mu(self, *Locks::logging_lock_); 227 *os_ << local_os.str() << std::endl; 228 } 229 barrier_.Pass(self); 230 } 231 232 void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) { 233 Thread* self = Thread::Current(); 234 ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); 235 bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout); 236 if (timed_out) { 237 // Avoid a recursive abort. 238 LOG((kIsDebugBuild && (gAborting == 0)) ? ::android::base::FATAL : ::android::base::ERROR) 239 << "Unexpected time out during dump checkpoint."; 240 } 241 } 242 243 private: 244 // The common stream that will accumulate all the dumps. 245 std::ostream* const os_; 246 // The barrier to be passed through and for the requestor to wait upon. 247 Barrier barrier_; 248 // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately. 249 std::unique_ptr<BacktraceMap> backtrace_map_; 250 // Whether we should dump the native stack. 251 const bool dump_native_stack_; 252 }; 253 254 void ThreadList::Dump(std::ostream& os, bool dump_native_stack) { 255 Thread* self = Thread::Current(); 256 { 257 MutexLock mu(self, *Locks::thread_list_lock_); 258 os << "DALVIK THREADS (" << list_.size() << "):\n"; 259 } 260 if (self != nullptr) { 261 DumpCheckpoint checkpoint(&os, dump_native_stack); 262 size_t threads_running_checkpoint; 263 { 264 // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time. 265 ScopedObjectAccess soa(self); 266 threads_running_checkpoint = RunCheckpoint(&checkpoint); 267 } 268 if (threads_running_checkpoint != 0) { 269 checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); 270 } 271 } else { 272 DumpUnattachedThreads(os, dump_native_stack); 273 } 274 } 275 276 void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) { 277 MutexLock mu(self, *Locks::thread_list_lock_); 278 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 279 for (const auto& thread : list_) { 280 if (thread != ignore1 && thread != ignore2) { 281 CHECK(thread->IsSuspended()) 282 << "\nUnsuspended thread: <<" << *thread << "\n" 283 << "self: <<" << *Thread::Current(); 284 } 285 } 286 } 287 288 #if HAVE_TIMED_RWLOCK 289 // Attempt to rectify locks so that we dump thread list with required locks before exiting. 290 NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() { 291 // Increment gAborting before doing the thread list dump since we don't want any failures from 292 // AssertThreadSuspensionIsAllowable in cases where thread suspension is not allowed. 293 // See b/69044468. 294 ++gAborting; 295 Runtime* runtime = Runtime::Current(); 296 std::ostringstream ss; 297 ss << "Thread suspend timeout\n"; 298 Locks::mutator_lock_->Dump(ss); 299 ss << "\n"; 300 runtime->GetThreadList()->Dump(ss); 301 --gAborting; 302 LOG(FATAL) << ss.str(); 303 exit(0); 304 } 305 #endif 306 307 // Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an 308 // individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then 309 // we use sched_yield instead of calling usleep. 310 // Although there is the possibility, here and elsewhere, that usleep could return -1 and 311 // errno = EINTR, there should be no problem if interrupted, so we do not check. 312 static void ThreadSuspendSleep(useconds_t delay_us) { 313 if (delay_us == 0) { 314 sched_yield(); 315 } else { 316 usleep(delay_us); 317 } 318 } 319 320 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) { 321 Thread* self = Thread::Current(); 322 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 323 Locks::thread_list_lock_->AssertNotHeld(self); 324 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 325 326 std::vector<Thread*> suspended_count_modified_threads; 327 size_t count = 0; 328 { 329 // Call a checkpoint function for each thread, threads which are suspend get their checkpoint 330 // manually called. 331 MutexLock mu(self, *Locks::thread_list_lock_); 332 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 333 count = list_.size(); 334 for (const auto& thread : list_) { 335 if (thread != self) { 336 while (true) { 337 if (thread->RequestCheckpoint(checkpoint_function)) { 338 // This thread will run its checkpoint some time in the near future. 339 break; 340 } else { 341 // We are probably suspended, try to make sure that we stay suspended. 342 // The thread switched back to runnable. 343 if (thread->GetState() == kRunnable) { 344 // Spurious fail, try again. 345 continue; 346 } 347 bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); 348 DCHECK(updated); 349 suspended_count_modified_threads.push_back(thread); 350 break; 351 } 352 } 353 } 354 } 355 // Run the callback to be called inside this critical section. 356 if (callback != nullptr) { 357 callback->Run(self); 358 } 359 } 360 361 // Run the checkpoint on ourself while we wait for threads to suspend. 362 checkpoint_function->Run(self); 363 364 // Run the checkpoint on the suspended threads. 365 for (const auto& thread : suspended_count_modified_threads) { 366 if (!thread->IsSuspended()) { 367 ScopedTrace trace([&]() { 368 std::ostringstream oss; 369 thread->ShortDump(oss); 370 return std::string("Waiting for suspension of thread ") + oss.str(); 371 }); 372 // Busy wait until the thread is suspended. 373 const uint64_t start_time = NanoTime(); 374 do { 375 ThreadSuspendSleep(kThreadSuspendInitialSleepUs); 376 } while (!thread->IsSuspended()); 377 const uint64_t total_delay = NanoTime() - start_time; 378 // Shouldn't need to wait for longer than 1000 microseconds. 379 constexpr uint64_t kLongWaitThreshold = MsToNs(1); 380 if (UNLIKELY(total_delay > kLongWaitThreshold)) { 381 LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for " 382 << *thread << " suspension!"; 383 } 384 } 385 // We know for sure that the thread is suspended at this point. 386 checkpoint_function->Run(thread); 387 { 388 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 389 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 390 DCHECK(updated); 391 } 392 } 393 394 { 395 // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their 396 // suspend count. Now the suspend_count_ is lowered so we must do the broadcast. 397 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 398 Thread::resume_cond_->Broadcast(self); 399 } 400 401 return count; 402 } 403 404 void ThreadList::RunEmptyCheckpoint() { 405 Thread* self = Thread::Current(); 406 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 407 Locks::thread_list_lock_->AssertNotHeld(self); 408 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 409 std::vector<uint32_t> runnable_thread_ids; 410 size_t count = 0; 411 Barrier* barrier = empty_checkpoint_barrier_.get(); 412 barrier->Init(self, 0); 413 { 414 MutexLock mu(self, *Locks::thread_list_lock_); 415 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 416 for (Thread* thread : list_) { 417 if (thread != self) { 418 while (true) { 419 if (thread->RequestEmptyCheckpoint()) { 420 // This thread will run an empty checkpoint (decrement the empty checkpoint barrier) 421 // some time in the near future. 422 ++count; 423 if (kIsDebugBuild) { 424 runnable_thread_ids.push_back(thread->GetThreadId()); 425 } 426 break; 427 } 428 if (thread->GetState() != kRunnable) { 429 // It's seen suspended, we are done because it must not be in the middle of a mutator 430 // heap access. 431 break; 432 } 433 } 434 } 435 } 436 } 437 438 // Wake up the threads blocking for weak ref access so that they will respond to the empty 439 // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state. 440 Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self); 441 Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true); 442 { 443 ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); 444 uint64_t total_wait_time = 0; 445 bool first_iter = true; 446 while (true) { 447 // Wake up the runnable threads blocked on the mutexes that another thread, which is blocked 448 // on a weak ref access, holds (indirectly blocking for weak ref access through another thread 449 // and a mutex.) This needs to be done periodically because the thread may be preempted 450 // between the CheckEmptyCheckpointFromMutex call and the subsequent futex wait in 451 // Mutex::ExclusiveLock, etc. when the wakeup via WakeupToRespondToEmptyCheckpoint 452 // arrives. This could cause a *very rare* deadlock, if not repeated. Most of the cases are 453 // handled in the first iteration. 454 for (BaseMutex* mutex : Locks::expected_mutexes_on_weak_ref_access_) { 455 mutex->WakeupToRespondToEmptyCheckpoint(); 456 } 457 static constexpr uint64_t kEmptyCheckpointPeriodicTimeoutMs = 100; // 100ms 458 static constexpr uint64_t kEmptyCheckpointTotalTimeoutMs = 600 * 1000; // 10 minutes. 459 size_t barrier_count = first_iter ? count : 0; 460 first_iter = false; // Don't add to the barrier count from the second iteration on. 461 bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointPeriodicTimeoutMs); 462 if (!timed_out) { 463 break; // Success 464 } 465 // This is a very rare case. 466 total_wait_time += kEmptyCheckpointPeriodicTimeoutMs; 467 if (kIsDebugBuild && total_wait_time > kEmptyCheckpointTotalTimeoutMs) { 468 std::ostringstream ss; 469 ss << "Empty checkpoint timeout\n"; 470 ss << "Barrier count " << barrier->GetCount(self) << "\n"; 471 ss << "Runnable thread IDs"; 472 for (uint32_t tid : runnable_thread_ids) { 473 ss << " " << tid; 474 } 475 ss << "\n"; 476 Locks::mutator_lock_->Dump(ss); 477 ss << "\n"; 478 LOG(FATAL_WITHOUT_ABORT) << ss.str(); 479 // Some threads in 'runnable_thread_ids' are probably stuck. Try to dump their stacks. 480 // Avoid using ThreadList::Dump() initially because it is likely to get stuck as well. 481 { 482 ScopedObjectAccess soa(self); 483 MutexLock mu1(self, *Locks::thread_list_lock_); 484 for (Thread* thread : GetList()) { 485 uint32_t tid = thread->GetThreadId(); 486 bool is_in_runnable_thread_ids = 487 std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) != 488 runnable_thread_ids.end(); 489 if (is_in_runnable_thread_ids && 490 thread->ReadFlag(kEmptyCheckpointRequest)) { 491 // Found a runnable thread that hasn't responded to the empty checkpoint request. 492 // Assume it's stuck and safe to dump its stack. 493 thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT), 494 /*dump_native_stack*/ true, 495 /*backtrace_map*/ nullptr, 496 /*force_dump_stack*/ true); 497 } 498 } 499 } 500 LOG(FATAL_WITHOUT_ABORT) 501 << "Dumped runnable threads that haven't responded to empty checkpoint."; 502 // Now use ThreadList::Dump() to dump more threads, noting it may get stuck. 503 Dump(LOG_STREAM(FATAL_WITHOUT_ABORT)); 504 LOG(FATAL) << "Dumped all threads."; 505 } 506 } 507 } 508 } 509 510 // Request that a checkpoint function be run on all active (non-suspended) 511 // threads. Returns the number of successful requests. 512 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) { 513 Thread* self = Thread::Current(); 514 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 515 Locks::thread_list_lock_->AssertNotHeld(self); 516 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 517 CHECK_NE(self->GetState(), kRunnable); 518 519 size_t count = 0; 520 { 521 // Call a checkpoint function for each non-suspended thread. 522 MutexLock mu(self, *Locks::thread_list_lock_); 523 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 524 for (const auto& thread : list_) { 525 if (thread != self) { 526 if (thread->RequestCheckpoint(checkpoint_function)) { 527 // This thread will run its checkpoint some time in the near future. 528 count++; 529 } 530 } 531 } 532 } 533 534 // Return the number of threads that will run the checkpoint function. 535 return count; 536 } 537 538 // A checkpoint/suspend-all hybrid to switch thread roots from 539 // from-space to to-space refs. Used to synchronize threads at a point 540 // to mark the initiation of marking while maintaining the to-space 541 // invariant. 542 size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, 543 Closure* flip_callback, 544 gc::collector::GarbageCollector* collector, 545 gc::GcPauseListener* pause_listener) { 546 TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings()); 547 Thread* self = Thread::Current(); 548 Locks::mutator_lock_->AssertNotHeld(self); 549 Locks::thread_list_lock_->AssertNotHeld(self); 550 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 551 CHECK_NE(self->GetState(), kRunnable); 552 553 collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls. 554 555 // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the 556 // pause. 557 const uint64_t suspend_start_time = NanoTime(); 558 SuspendAllInternal(self, self, nullptr); 559 if (pause_listener != nullptr) { 560 pause_listener->StartPause(); 561 } 562 563 // Run the flip callback for the collector. 564 Locks::mutator_lock_->ExclusiveLock(self); 565 suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time); 566 flip_callback->Run(self); 567 Locks::mutator_lock_->ExclusiveUnlock(self); 568 collector->RegisterPause(NanoTime() - suspend_start_time); 569 if (pause_listener != nullptr) { 570 pause_listener->EndPause(); 571 } 572 573 // Resume runnable threads. 574 size_t runnable_thread_count = 0; 575 std::vector<Thread*> other_threads; 576 { 577 TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings()); 578 MutexLock mu(self, *Locks::thread_list_lock_); 579 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 580 --suspend_all_count_; 581 for (const auto& thread : list_) { 582 // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by 583 // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before 584 // a runnable thread runs it for itself or we run it for a suspended thread below. 585 thread->SetFlipFunction(thread_flip_visitor); 586 if (thread == self) { 587 continue; 588 } 589 // Resume early the threads that were runnable but are suspended just for this thread flip or 590 // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to 591 // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting 592 // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip), 593 ThreadState state = thread->GetState(); 594 if ((state == kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) && 595 thread->GetSuspendCount() == 1) { 596 // The thread will resume right after the broadcast. 597 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 598 DCHECK(updated); 599 ++runnable_thread_count; 600 } else { 601 other_threads.push_back(thread); 602 } 603 } 604 Thread::resume_cond_->Broadcast(self); 605 } 606 607 collector->GetHeap()->ThreadFlipEnd(self); 608 609 // Run the closure on the other threads and let them resume. 610 { 611 TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings()); 612 ReaderMutexLock mu(self, *Locks::mutator_lock_); 613 for (const auto& thread : other_threads) { 614 Closure* flip_func = thread->GetFlipFunction(); 615 if (flip_func != nullptr) { 616 flip_func->Run(thread); 617 } 618 } 619 // Run it for self. 620 Closure* flip_func = self->GetFlipFunction(); 621 if (flip_func != nullptr) { 622 flip_func->Run(self); 623 } 624 } 625 626 // Resume other threads. 627 { 628 TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings()); 629 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 630 for (const auto& thread : other_threads) { 631 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 632 DCHECK(updated); 633 } 634 Thread::resume_cond_->Broadcast(self); 635 } 636 637 return runnable_thread_count + other_threads.size() + 1; // +1 for self. 638 } 639 640 void ThreadList::SuspendAll(const char* cause, bool long_suspend) { 641 Thread* self = Thread::Current(); 642 643 if (self != nullptr) { 644 VLOG(threads) << *self << " SuspendAll for " << cause << " starting..."; 645 } else { 646 VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting..."; 647 } 648 { 649 ScopedTrace trace("Suspending mutator threads"); 650 const uint64_t start_time = NanoTime(); 651 652 SuspendAllInternal(self, self); 653 // All threads are known to have suspended (but a thread may still own the mutator lock) 654 // Make sure this thread grabs exclusive access to the mutator lock and its protected data. 655 #if HAVE_TIMED_RWLOCK 656 while (true) { 657 if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 658 NsToMs(thread_suspend_timeout_ns_), 659 0)) { 660 break; 661 } else if (!long_suspend_) { 662 // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this 663 // could result in a thread suspend timeout. 664 // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds. 665 UnsafeLogFatalForThreadSuspendAllTimeout(); 666 } 667 } 668 #else 669 Locks::mutator_lock_->ExclusiveLock(self); 670 #endif 671 672 long_suspend_ = long_suspend; 673 674 const uint64_t end_time = NanoTime(); 675 const uint64_t suspend_time = end_time - start_time; 676 suspend_all_historam_.AdjustAndAddValue(suspend_time); 677 if (suspend_time > kLongThreadSuspendThreshold) { 678 LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time); 679 } 680 681 if (kDebugLocking) { 682 // Debug check that all threads are suspended. 683 AssertThreadsAreSuspended(self, self); 684 } 685 } 686 ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str()); 687 688 if (self != nullptr) { 689 VLOG(threads) << *self << " SuspendAll complete"; 690 } else { 691 VLOG(threads) << "Thread[null] SuspendAll complete"; 692 } 693 } 694 695 // Ensures all threads running Java suspend and that those not running Java don't start. 696 // Debugger thread might be set to kRunnable for a short period of time after the 697 // SuspendAllInternal. This is safe because it will be set back to suspended state before 698 // the SuspendAll returns. 699 void ThreadList::SuspendAllInternal(Thread* self, 700 Thread* ignore1, 701 Thread* ignore2, 702 SuspendReason reason) { 703 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 704 Locks::thread_list_lock_->AssertNotHeld(self); 705 Locks::thread_suspend_count_lock_->AssertNotHeld(self); 706 if (kDebugLocking && self != nullptr) { 707 CHECK_NE(self->GetState(), kRunnable); 708 } 709 710 // First request that all threads suspend, then wait for them to suspend before 711 // returning. This suspension scheme also relies on other behaviour: 712 // 1. Threads cannot be deleted while they are suspended or have a suspend- 713 // request flag set - (see Unregister() below). 714 // 2. When threads are created, they are created in a suspended state (actually 715 // kNative) and will never begin executing Java code without first checking 716 // the suspend-request flag. 717 718 // The atomic counter for number of threads that need to pass the barrier. 719 AtomicInteger pending_threads; 720 uint32_t num_ignored = 0; 721 if (ignore1 != nullptr) { 722 ++num_ignored; 723 } 724 if (ignore2 != nullptr && ignore1 != ignore2) { 725 ++num_ignored; 726 } 727 { 728 MutexLock mu(self, *Locks::thread_list_lock_); 729 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 730 // Update global suspend all state for attaching threads. 731 ++suspend_all_count_; 732 if (reason == SuspendReason::kForDebugger) { 733 ++debug_suspend_all_count_; 734 } 735 pending_threads.StoreRelaxed(list_.size() - num_ignored); 736 // Increment everybody's suspend count (except those that should be ignored). 737 for (const auto& thread : list_) { 738 if (thread == ignore1 || thread == ignore2) { 739 continue; 740 } 741 VLOG(threads) << "requesting thread suspend: " << *thread; 742 bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason); 743 DCHECK(updated); 744 745 // Must install the pending_threads counter first, then check thread->IsSuspend() and clear 746 // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended() 747 // that can lead a thread to miss a call to PassActiveSuspendBarriers(). 748 if (thread->IsSuspended()) { 749 // Only clear the counter for the current thread. 750 thread->ClearSuspendBarrier(&pending_threads); 751 pending_threads.FetchAndSubSequentiallyConsistent(1); 752 } 753 } 754 } 755 756 // Wait for the barrier to be passed by all runnable threads. This wait 757 // is done with a timeout so that we can detect problems. 758 #if ART_USE_FUTEXES 759 timespec wait_timeout; 760 InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout); 761 #endif 762 const uint64_t start_time = NanoTime(); 763 while (true) { 764 int32_t cur_val = pending_threads.LoadRelaxed(); 765 if (LIKELY(cur_val > 0)) { 766 #if ART_USE_FUTEXES 767 if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) { 768 // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning. 769 if ((errno != EAGAIN) && (errno != EINTR)) { 770 if (errno == ETIMEDOUT) { 771 LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR) 772 << "Timed out waiting for threads to suspend, waited for " 773 << PrettyDuration(NanoTime() - start_time); 774 } else { 775 PLOG(FATAL) << "futex wait failed for SuspendAllInternal()"; 776 } 777 } 778 } // else re-check pending_threads in the next iteration (this may be a spurious wake-up). 779 #else 780 // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set. 781 UNUSED(start_time); 782 #endif 783 } else { 784 CHECK_EQ(cur_val, 0); 785 break; 786 } 787 } 788 } 789 790 void ThreadList::ResumeAll() { 791 Thread* self = Thread::Current(); 792 793 if (self != nullptr) { 794 VLOG(threads) << *self << " ResumeAll starting"; 795 } else { 796 VLOG(threads) << "Thread[null] ResumeAll starting"; 797 } 798 799 ATRACE_END(); 800 801 ScopedTrace trace("Resuming mutator threads"); 802 803 if (kDebugLocking) { 804 // Debug check that all threads are suspended. 805 AssertThreadsAreSuspended(self, self); 806 } 807 808 long_suspend_ = false; 809 810 Locks::mutator_lock_->ExclusiveUnlock(self); 811 { 812 MutexLock mu(self, *Locks::thread_list_lock_); 813 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 814 // Update global suspend all state for attaching threads. 815 --suspend_all_count_; 816 // Decrement the suspend counts for all threads. 817 for (const auto& thread : list_) { 818 if (thread == self) { 819 continue; 820 } 821 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 822 DCHECK(updated); 823 } 824 825 // Broadcast a notification to all suspended threads, some or all of 826 // which may choose to wake up. No need to wait for them. 827 if (self != nullptr) { 828 VLOG(threads) << *self << " ResumeAll waking others"; 829 } else { 830 VLOG(threads) << "Thread[null] ResumeAll waking others"; 831 } 832 Thread::resume_cond_->Broadcast(self); 833 } 834 835 if (self != nullptr) { 836 VLOG(threads) << *self << " ResumeAll complete"; 837 } else { 838 VLOG(threads) << "Thread[null] ResumeAll complete"; 839 } 840 } 841 842 bool ThreadList::Resume(Thread* thread, SuspendReason reason) { 843 // This assumes there was an ATRACE_BEGIN when we suspended the thread. 844 ATRACE_END(); 845 846 Thread* self = Thread::Current(); 847 DCHECK_NE(thread, self); 848 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..." << reason; 849 850 { 851 // To check Contains. 852 MutexLock mu(self, *Locks::thread_list_lock_); 853 // To check IsSuspended. 854 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 855 if (UNLIKELY(!thread->IsSuspended())) { 856 LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread) 857 << ") thread not suspended"; 858 return false; 859 } 860 if (!Contains(thread)) { 861 // We only expect threads within the thread-list to have been suspended otherwise we can't 862 // stop such threads from delete-ing themselves. 863 LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread) 864 << ") thread not within thread list"; 865 return false; 866 } 867 if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) { 868 LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread) 869 << ") could not modify suspend count."; 870 return false; 871 } 872 } 873 874 { 875 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others"; 876 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 877 Thread::resume_cond_->Broadcast(self); 878 } 879 880 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete"; 881 return true; 882 } 883 884 static void ThreadSuspendByPeerWarning(Thread* self, 885 LogSeverity severity, 886 const char* message, 887 jobject peer) { 888 JNIEnvExt* env = self->GetJniEnv(); 889 ScopedLocalRef<jstring> 890 scoped_name_string(env, static_cast<jstring>(env->GetObjectField( 891 peer, WellKnownClasses::java_lang_Thread_name))); 892 ScopedUtfChars scoped_name_chars(env, scoped_name_string.get()); 893 if (scoped_name_chars.c_str() == nullptr) { 894 LOG(severity) << message << ": " << peer; 895 env->ExceptionClear(); 896 } else { 897 LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str(); 898 } 899 } 900 901 Thread* ThreadList::SuspendThreadByPeer(jobject peer, 902 bool request_suspension, 903 SuspendReason reason, 904 bool* timed_out) { 905 const uint64_t start_time = NanoTime(); 906 useconds_t sleep_us = kThreadSuspendInitialSleepUs; 907 *timed_out = false; 908 Thread* const self = Thread::Current(); 909 Thread* suspended_thread = nullptr; 910 VLOG(threads) << "SuspendThreadByPeer starting"; 911 while (true) { 912 Thread* thread; 913 { 914 // Note: this will transition to runnable and potentially suspend. We ensure only one thread 915 // is requesting another suspend, to avoid deadlock, by requiring this function be called 916 // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather 917 // than request thread suspension, to avoid potential cycles in threads requesting each other 918 // suspend. 919 ScopedObjectAccess soa(self); 920 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 921 thread = Thread::FromManagedThread(soa, peer); 922 if (thread == nullptr) { 923 if (suspended_thread != nullptr) { 924 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 925 // If we incremented the suspend count but the thread reset its peer, we need to 926 // re-decrement it since it is shutting down and may deadlock the runtime in 927 // ThreadList::WaitForOtherNonDaemonThreadsToExit. 928 bool updated = suspended_thread->ModifySuspendCount(soa.Self(), 929 -1, 930 nullptr, 931 reason); 932 DCHECK(updated); 933 } 934 ThreadSuspendByPeerWarning(self, 935 ::android::base::WARNING, 936 "No such thread for suspend", 937 peer); 938 return nullptr; 939 } 940 if (!Contains(thread)) { 941 CHECK(suspended_thread == nullptr); 942 VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: " 943 << reinterpret_cast<void*>(thread); 944 return nullptr; 945 } 946 VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread; 947 { 948 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 949 if (request_suspension) { 950 if (self->GetSuspendCount() > 0) { 951 // We hold the suspend count lock but another thread is trying to suspend us. Its not 952 // safe to try to suspend another thread in case we get a cycle. Start the loop again 953 // which will allow this thread to be suspended. 954 continue; 955 } 956 CHECK(suspended_thread == nullptr); 957 suspended_thread = thread; 958 bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason); 959 DCHECK(updated); 960 request_suspension = false; 961 } else { 962 // If the caller isn't requesting suspension, a suspension should have already occurred. 963 CHECK_GT(thread->GetSuspendCount(), 0); 964 } 965 // IsSuspended on the current thread will fail as the current thread is changed into 966 // Runnable above. As the suspend count is now raised if this is the current thread 967 // it will self suspend on transition to Runnable, making it hard to work with. It's simpler 968 // to just explicitly handle the current thread in the callers to this code. 969 CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; 970 // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend 971 // count, or else we've waited and it has self suspended) or is the current thread, we're 972 // done. 973 if (thread->IsSuspended()) { 974 VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread; 975 if (ATRACE_ENABLED()) { 976 std::string name; 977 thread->GetThreadName(name); 978 ATRACE_BEGIN(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(), 979 peer).c_str()); 980 } 981 return thread; 982 } 983 const uint64_t total_delay = NanoTime() - start_time; 984 if (total_delay >= thread_suspend_timeout_ns_) { 985 ThreadSuspendByPeerWarning(self, 986 ::android::base::FATAL, 987 "Thread suspension timed out", 988 peer); 989 if (suspended_thread != nullptr) { 990 CHECK_EQ(suspended_thread, thread); 991 bool updated = suspended_thread->ModifySuspendCount(soa.Self(), 992 -1, 993 nullptr, 994 reason); 995 DCHECK(updated); 996 } 997 *timed_out = true; 998 return nullptr; 999 } else if (sleep_us == 0 && 1000 total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { 1001 // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent 1002 // excessive CPU usage. 1003 sleep_us = kThreadSuspendMaxYieldUs / 2; 1004 } 1005 } 1006 // Release locks and come out of runnable state. 1007 } 1008 VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend"; 1009 ThreadSuspendSleep(sleep_us); 1010 // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at 1011 // all if possible. This shouldn't be an issue since time to suspend should always be small. 1012 sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); 1013 } 1014 } 1015 1016 static void ThreadSuspendByThreadIdWarning(LogSeverity severity, 1017 const char* message, 1018 uint32_t thread_id) { 1019 LOG(severity) << StringPrintf("%s: %d", message, thread_id); 1020 } 1021 1022 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, 1023 SuspendReason reason, 1024 bool* timed_out) { 1025 const uint64_t start_time = NanoTime(); 1026 useconds_t sleep_us = kThreadSuspendInitialSleepUs; 1027 *timed_out = false; 1028 Thread* suspended_thread = nullptr; 1029 Thread* const self = Thread::Current(); 1030 CHECK_NE(thread_id, kInvalidThreadId); 1031 VLOG(threads) << "SuspendThreadByThreadId starting"; 1032 while (true) { 1033 { 1034 // Note: this will transition to runnable and potentially suspend. We ensure only one thread 1035 // is requesting another suspend, to avoid deadlock, by requiring this function be called 1036 // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather 1037 // than request thread suspension, to avoid potential cycles in threads requesting each other 1038 // suspend. 1039 ScopedObjectAccess soa(self); 1040 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 1041 Thread* thread = nullptr; 1042 for (const auto& it : list_) { 1043 if (it->GetThreadId() == thread_id) { 1044 thread = it; 1045 break; 1046 } 1047 } 1048 if (thread == nullptr) { 1049 CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread 1050 << " no longer in thread list"; 1051 // There's a race in inflating a lock and the owner giving up ownership and then dying. 1052 ThreadSuspendByThreadIdWarning(::android::base::WARNING, 1053 "No such thread id for suspend", 1054 thread_id); 1055 return nullptr; 1056 } 1057 VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread; 1058 DCHECK(Contains(thread)); 1059 { 1060 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 1061 if (suspended_thread == nullptr) { 1062 if (self->GetSuspendCount() > 0) { 1063 // We hold the suspend count lock but another thread is trying to suspend us. Its not 1064 // safe to try to suspend another thread in case we get a cycle. Start the loop again 1065 // which will allow this thread to be suspended. 1066 continue; 1067 } 1068 bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason); 1069 DCHECK(updated); 1070 suspended_thread = thread; 1071 } else { 1072 CHECK_EQ(suspended_thread, thread); 1073 // If the caller isn't requesting suspension, a suspension should have already occurred. 1074 CHECK_GT(thread->GetSuspendCount(), 0); 1075 } 1076 // IsSuspended on the current thread will fail as the current thread is changed into 1077 // Runnable above. As the suspend count is now raised if this is the current thread 1078 // it will self suspend on transition to Runnable, making it hard to work with. It's simpler 1079 // to just explicitly handle the current thread in the callers to this code. 1080 CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger"; 1081 // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend 1082 // count, or else we've waited and it has self suspended) or is the current thread, we're 1083 // done. 1084 if (thread->IsSuspended()) { 1085 if (ATRACE_ENABLED()) { 1086 std::string name; 1087 thread->GetThreadName(name); 1088 ATRACE_BEGIN(StringPrintf("SuspendThreadByThreadId suspended %s id=%d", 1089 name.c_str(), thread_id).c_str()); 1090 } 1091 VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread; 1092 return thread; 1093 } 1094 const uint64_t total_delay = NanoTime() - start_time; 1095 if (total_delay >= thread_suspend_timeout_ns_) { 1096 ThreadSuspendByThreadIdWarning(::android::base::WARNING, 1097 "Thread suspension timed out", 1098 thread_id); 1099 if (suspended_thread != nullptr) { 1100 bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason); 1101 DCHECK(updated); 1102 } 1103 *timed_out = true; 1104 return nullptr; 1105 } else if (sleep_us == 0 && 1106 total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { 1107 // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent 1108 // excessive CPU usage. 1109 sleep_us = kThreadSuspendMaxYieldUs / 2; 1110 } 1111 } 1112 // Release locks and come out of runnable state. 1113 } 1114 VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend"; 1115 ThreadSuspendSleep(sleep_us); 1116 sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); 1117 } 1118 } 1119 1120 Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) { 1121 for (const auto& thread : list_) { 1122 if (thread->GetThreadId() == thread_id) { 1123 return thread; 1124 } 1125 } 1126 return nullptr; 1127 } 1128 1129 void ThreadList::SuspendAllForDebugger() { 1130 Thread* self = Thread::Current(); 1131 Thread* debug_thread = Dbg::GetDebugThread(); 1132 1133 VLOG(threads) << *self << " SuspendAllForDebugger starting..."; 1134 1135 SuspendAllInternal(self, self, debug_thread, SuspendReason::kForDebugger); 1136 // Block on the mutator lock until all Runnable threads release their share of access then 1137 // immediately unlock again. 1138 #if HAVE_TIMED_RWLOCK 1139 // Timeout if we wait more than 30 seconds. 1140 if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) { 1141 UnsafeLogFatalForThreadSuspendAllTimeout(); 1142 } else { 1143 Locks::mutator_lock_->ExclusiveUnlock(self); 1144 } 1145 #else 1146 Locks::mutator_lock_->ExclusiveLock(self); 1147 Locks::mutator_lock_->ExclusiveUnlock(self); 1148 #endif 1149 // Disabled for the following race condition: 1150 // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock. 1151 // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected). 1152 // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable 1153 // state (from SetStateUnsafe). 1154 // AssertThreadsAreSuspended(self, self, debug_thread); 1155 1156 VLOG(threads) << *self << " SuspendAllForDebugger complete"; 1157 } 1158 1159 void ThreadList::SuspendSelfForDebugger() { 1160 Thread* const self = Thread::Current(); 1161 self->SetReadyForDebugInvoke(true); 1162 1163 // The debugger thread must not suspend itself due to debugger activity! 1164 Thread* debug_thread = Dbg::GetDebugThread(); 1165 CHECK(self != debug_thread); 1166 CHECK_NE(self->GetState(), kRunnable); 1167 Locks::mutator_lock_->AssertNotHeld(self); 1168 1169 // The debugger may have detached while we were executing an invoke request. In that case, we 1170 // must not suspend ourself. 1171 DebugInvokeReq* pReq = self->GetInvokeReq(); 1172 const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive()); 1173 if (!skip_thread_suspension) { 1174 // Collisions with other suspends aren't really interesting. We want 1175 // to ensure that we're the only one fiddling with the suspend count 1176 // though. 1177 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1178 bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger); 1179 DCHECK(updated); 1180 CHECK_GT(self->GetSuspendCount(), 0); 1181 1182 VLOG(threads) << *self << " self-suspending (debugger)"; 1183 } else { 1184 // We must no longer be subject to debugger suspension. 1185 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1186 CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us"; 1187 1188 VLOG(threads) << *self << " not self-suspending because debugger detached during invoke"; 1189 } 1190 1191 // If the debugger requested an invoke, we need to send the reply and clear the request. 1192 if (pReq != nullptr) { 1193 Dbg::FinishInvokeMethod(pReq); 1194 self->ClearDebugInvokeReq(); 1195 pReq = nullptr; // object has been deleted, clear it for safety. 1196 } 1197 1198 // Tell JDWP that we've completed suspension. The JDWP thread can't 1199 // tell us to resume before we're fully asleep because we hold the 1200 // suspend count lock. 1201 Dbg::ClearWaitForEventThread(); 1202 1203 { 1204 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1205 while (self->GetSuspendCount() != 0) { 1206 Thread::resume_cond_->Wait(self); 1207 if (self->GetSuspendCount() != 0) { 1208 // The condition was signaled but we're still suspended. This 1209 // can happen when we suspend then resume all threads to 1210 // update instrumentation or compute monitor info. This can 1211 // also happen if the debugger lets go while a SIGQUIT thread 1212 // dump event is pending (assuming SignalCatcher was resumed for 1213 // just long enough to try to grab the thread-suspend lock). 1214 VLOG(jdwp) << *self << " still suspended after undo " 1215 << "(suspend count=" << self->GetSuspendCount() << ", " 1216 << "debug suspend count=" << self->GetDebugSuspendCount() << ")"; 1217 } 1218 } 1219 CHECK_EQ(self->GetSuspendCount(), 0); 1220 } 1221 1222 self->SetReadyForDebugInvoke(false); 1223 VLOG(threads) << *self << " self-reviving (debugger)"; 1224 } 1225 1226 void ThreadList::ResumeAllForDebugger() { 1227 Thread* self = Thread::Current(); 1228 Thread* debug_thread = Dbg::GetDebugThread(); 1229 1230 VLOG(threads) << *self << " ResumeAllForDebugger starting..."; 1231 1232 // Threads can't resume if we exclusively hold the mutator lock. 1233 Locks::mutator_lock_->AssertNotExclusiveHeld(self); 1234 1235 { 1236 MutexLock thread_list_mu(self, *Locks::thread_list_lock_); 1237 { 1238 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_); 1239 // Update global suspend all state for attaching threads. 1240 DCHECK_GE(suspend_all_count_, debug_suspend_all_count_); 1241 if (debug_suspend_all_count_ > 0) { 1242 --suspend_all_count_; 1243 --debug_suspend_all_count_; 1244 } else { 1245 // We've been asked to resume all threads without being asked to 1246 // suspend them all before. That may happen if a debugger tries 1247 // to resume some suspended threads (with suspend count == 1) 1248 // at once with a VirtualMachine.Resume command. Let's print a 1249 // warning. 1250 LOG(WARNING) << "Debugger attempted to resume all threads without " 1251 << "having suspended them all before."; 1252 } 1253 // Decrement everybody's suspend count (except our own). 1254 for (const auto& thread : list_) { 1255 if (thread == self || thread == debug_thread) { 1256 continue; 1257 } 1258 if (thread->GetDebugSuspendCount() == 0) { 1259 // This thread may have been individually resumed with ThreadReference.Resume. 1260 continue; 1261 } 1262 VLOG(threads) << "requesting thread resume: " << *thread; 1263 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kForDebugger); 1264 DCHECK(updated); 1265 } 1266 } 1267 } 1268 1269 { 1270 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1271 Thread::resume_cond_->Broadcast(self); 1272 } 1273 1274 VLOG(threads) << *self << " ResumeAllForDebugger complete"; 1275 } 1276 1277 void ThreadList::UndoDebuggerSuspensions() { 1278 Thread* self = Thread::Current(); 1279 1280 VLOG(threads) << *self << " UndoDebuggerSuspensions starting"; 1281 1282 { 1283 MutexLock mu(self, *Locks::thread_list_lock_); 1284 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1285 // Update global suspend all state for attaching threads. 1286 suspend_all_count_ -= debug_suspend_all_count_; 1287 debug_suspend_all_count_ = 0; 1288 // Update running threads. 1289 for (const auto& thread : list_) { 1290 if (thread == self || thread->GetDebugSuspendCount() == 0) { 1291 continue; 1292 } 1293 bool suspended = thread->ModifySuspendCount(self, 1294 -thread->GetDebugSuspendCount(), 1295 nullptr, 1296 SuspendReason::kForDebugger); 1297 DCHECK(suspended); 1298 } 1299 } 1300 1301 { 1302 MutexLock mu(self, *Locks::thread_suspend_count_lock_); 1303 Thread::resume_cond_->Broadcast(self); 1304 } 1305 1306 VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete"; 1307 } 1308 1309 void ThreadList::WaitForOtherNonDaemonThreadsToExit() { 1310 ScopedTrace trace(__PRETTY_FUNCTION__); 1311 Thread* self = Thread::Current(); 1312 Locks::mutator_lock_->AssertNotHeld(self); 1313 while (true) { 1314 { 1315 // No more threads can be born after we start to shutdown. 1316 MutexLock mu(self, *Locks::runtime_shutdown_lock_); 1317 CHECK(Runtime::Current()->IsShuttingDownLocked()); 1318 CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U); 1319 } 1320 MutexLock mu(self, *Locks::thread_list_lock_); 1321 // Also wait for any threads that are unregistering to finish. This is required so that no 1322 // threads access the thread list after it is deleted. TODO: This may not work for user daemon 1323 // threads since they could unregister at the wrong time. 1324 bool done = unregistering_count_ == 0; 1325 if (done) { 1326 for (const auto& thread : list_) { 1327 if (thread != self && !thread->IsDaemon()) { 1328 done = false; 1329 break; 1330 } 1331 } 1332 } 1333 if (done) { 1334 break; 1335 } 1336 // Wait for another thread to exit before re-checking. 1337 Locks::thread_exit_cond_->Wait(self); 1338 } 1339 } 1340 1341 void ThreadList::SuspendAllDaemonThreadsForShutdown() { 1342 ScopedTrace trace(__PRETTY_FUNCTION__); 1343 Thread* self = Thread::Current(); 1344 size_t daemons_left = 0; 1345 { 1346 // Tell all the daemons it's time to suspend. 1347 MutexLock mu(self, *Locks::thread_list_lock_); 1348 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1349 for (const auto& thread : list_) { 1350 // This is only run after all non-daemon threads have exited, so the remainder should all be 1351 // daemons. 1352 CHECK(thread->IsDaemon()) << *thread; 1353 if (thread != self) { 1354 bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); 1355 DCHECK(updated); 1356 ++daemons_left; 1357 } 1358 // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be 1359 // the sleep forever one. 1360 thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions(); 1361 } 1362 } 1363 // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they 1364 // are about to access runtime state and are not in a runnable state. Examples: Monitor code 1365 // or waking up from a condition variable. TODO: Try and see if there is a better way to wait 1366 // for daemon threads to be in a blocked state. 1367 if (daemons_left > 0) { 1368 static constexpr size_t kDaemonSleepTime = 200 * 1000; 1369 usleep(kDaemonSleepTime); 1370 } 1371 // Give the threads a chance to suspend, complaining if they're slow. 1372 bool have_complained = false; 1373 static constexpr size_t kTimeoutMicroseconds = 2000 * 1000; 1374 static constexpr size_t kSleepMicroseconds = 1000; 1375 for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) { 1376 bool all_suspended = true; 1377 { 1378 MutexLock mu(self, *Locks::thread_list_lock_); 1379 for (const auto& thread : list_) { 1380 if (thread != self && thread->GetState() == kRunnable) { 1381 if (!have_complained) { 1382 LOG(WARNING) << "daemon thread not yet suspended: " << *thread; 1383 have_complained = true; 1384 } 1385 all_suspended = false; 1386 } 1387 } 1388 } 1389 if (all_suspended) { 1390 return; 1391 } 1392 usleep(kSleepMicroseconds); 1393 } 1394 LOG(WARNING) << "timed out suspending all daemon threads"; 1395 } 1396 1397 void ThreadList::Register(Thread* self) { 1398 DCHECK_EQ(self, Thread::Current()); 1399 CHECK(!shut_down_); 1400 1401 if (VLOG_IS_ON(threads)) { 1402 std::ostringstream oss; 1403 self->ShortDump(oss); // We don't hold the mutator_lock_ yet and so cannot call Dump. 1404 LOG(INFO) << "ThreadList::Register() " << *self << "\n" << oss.str(); 1405 } 1406 1407 // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing 1408 // SuspendAll requests. 1409 MutexLock mu(self, *Locks::thread_list_lock_); 1410 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1411 CHECK_GE(suspend_all_count_, debug_suspend_all_count_); 1412 // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While 1413 // this isn't particularly efficient the suspend counts are most commonly 0 or 1. 1414 for (int delta = debug_suspend_all_count_; delta > 0; delta--) { 1415 bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger); 1416 DCHECK(updated); 1417 } 1418 for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) { 1419 bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); 1420 DCHECK(updated); 1421 } 1422 CHECK(!Contains(self)); 1423 list_.push_back(self); 1424 if (kUseReadBarrier) { 1425 gc::collector::ConcurrentCopying* const cc = 1426 Runtime::Current()->GetHeap()->ConcurrentCopyingCollector(); 1427 // Initialize according to the state of the CC collector. 1428 self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking()); 1429 if (cc->IsUsingReadBarrierEntrypoints()) { 1430 self->SetReadBarrierEntrypoints(); 1431 } 1432 self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled()); 1433 } 1434 } 1435 1436 void ThreadList::Unregister(Thread* self) { 1437 DCHECK_EQ(self, Thread::Current()); 1438 CHECK_NE(self->GetState(), kRunnable); 1439 Locks::mutator_lock_->AssertNotHeld(self); 1440 1441 VLOG(threads) << "ThreadList::Unregister() " << *self; 1442 1443 { 1444 MutexLock mu(self, *Locks::thread_list_lock_); 1445 ++unregistering_count_; 1446 } 1447 1448 // Any time-consuming destruction, plus anything that can call back into managed code or 1449 // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what 1450 // causes the threads to join. It is important to do this after incrementing unregistering_count_ 1451 // since we want the runtime to wait for the daemon threads to exit before deleting the thread 1452 // list. 1453 self->Destroy(); 1454 1455 // If tracing, remember thread id and name before thread exits. 1456 Trace::StoreExitingThreadInfo(self); 1457 1458 uint32_t thin_lock_id = self->GetThreadId(); 1459 while (true) { 1460 // Remove and delete the Thread* while holding the thread_list_lock_ and 1461 // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended. 1462 // Note: deliberately not using MutexLock that could hold a stale self pointer. 1463 MutexLock mu(self, *Locks::thread_list_lock_); 1464 if (!Contains(self)) { 1465 std::string thread_name; 1466 self->GetThreadName(thread_name); 1467 std::ostringstream os; 1468 DumpNativeStack(os, GetTid(), nullptr, " native: ", nullptr); 1469 LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str(); 1470 break; 1471 } else { 1472 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1473 if (!self->IsSuspended()) { 1474 list_.remove(self); 1475 break; 1476 } 1477 } 1478 // We failed to remove the thread due to a suspend request, loop and try again. 1479 } 1480 delete self; 1481 1482 // Release the thread ID after the thread is finished and deleted to avoid cases where we can 1483 // temporarily have multiple threads with the same thread id. When this occurs, it causes 1484 // problems in FindThreadByThreadId / SuspendThreadByThreadId. 1485 ReleaseThreadId(nullptr, thin_lock_id); 1486 1487 // Clear the TLS data, so that the underlying native thread is recognizably detached. 1488 // (It may wish to reattach later.) 1489 #ifdef ART_TARGET_ANDROID 1490 __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr; 1491 #else 1492 CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self"); 1493 #endif 1494 1495 // Signal that a thread just detached. 1496 MutexLock mu(nullptr, *Locks::thread_list_lock_); 1497 --unregistering_count_; 1498 Locks::thread_exit_cond_->Broadcast(nullptr); 1499 } 1500 1501 void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) { 1502 for (const auto& thread : list_) { 1503 callback(thread, context); 1504 } 1505 } 1506 1507 void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) { 1508 Thread* const self = Thread::Current(); 1509 std::vector<Thread*> threads_to_visit; 1510 1511 // Tell threads to suspend and copy them into list. 1512 { 1513 MutexLock mu(self, *Locks::thread_list_lock_); 1514 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1515 for (Thread* thread : list_) { 1516 bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal); 1517 DCHECK(suspended); 1518 if (thread == self || thread->IsSuspended()) { 1519 threads_to_visit.push_back(thread); 1520 } else { 1521 bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 1522 DCHECK(resumed); 1523 } 1524 } 1525 } 1526 1527 // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock 1528 // order violations. 1529 for (Thread* thread : threads_to_visit) { 1530 thread->VisitRoots(visitor, kVisitRootFlagAllRoots); 1531 } 1532 1533 // Restore suspend counts. 1534 { 1535 MutexLock mu2(self, *Locks::thread_suspend_count_lock_); 1536 for (Thread* thread : threads_to_visit) { 1537 bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal); 1538 DCHECK(updated); 1539 } 1540 } 1541 } 1542 1543 void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const { 1544 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_); 1545 for (const auto& thread : list_) { 1546 thread->VisitRoots(visitor, flags); 1547 } 1548 } 1549 1550 uint32_t ThreadList::AllocThreadId(Thread* self) { 1551 MutexLock mu(self, *Locks::allocated_thread_ids_lock_); 1552 for (size_t i = 0; i < allocated_ids_.size(); ++i) { 1553 if (!allocated_ids_[i]) { 1554 allocated_ids_.set(i); 1555 return i + 1; // Zero is reserved to mean "invalid". 1556 } 1557 } 1558 LOG(FATAL) << "Out of internal thread ids"; 1559 return 0; 1560 } 1561 1562 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) { 1563 MutexLock mu(self, *Locks::allocated_thread_ids_lock_); 1564 --id; // Zero is reserved to mean "invalid". 1565 DCHECK(allocated_ids_[id]) << id; 1566 allocated_ids_.reset(id); 1567 } 1568 1569 ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) { 1570 Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend); 1571 } 1572 1573 ScopedSuspendAll::~ScopedSuspendAll() { 1574 Runtime::Current()->GetThreadList()->ResumeAll(); 1575 } 1576 1577 } // namespace art 1578