Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "thread_list.h"
     18 
     19 #include <dirent.h>
     20 #include <sys/types.h>
     21 #include <unistd.h>
     22 
     23 #include <sstream>
     24 #include <vector>
     25 
     26 #include "android-base/stringprintf.h"
     27 #include "backtrace/BacktraceMap.h"
     28 #include "nativehelper/scoped_local_ref.h"
     29 #include "nativehelper/scoped_utf_chars.h"
     30 
     31 #include "base/aborting.h"
     32 #include "base/histogram-inl.h"
     33 #include "base/mutex-inl.h"
     34 #include "base/systrace.h"
     35 #include "base/time_utils.h"
     36 #include "base/timing_logger.h"
     37 #include "debugger.h"
     38 #include "gc/collector/concurrent_copying.h"
     39 #include "gc/gc_pause_listener.h"
     40 #include "gc/heap.h"
     41 #include "gc/reference_processor.h"
     42 #include "gc_root.h"
     43 #include "jni_internal.h"
     44 #include "lock_word.h"
     45 #include "monitor.h"
     46 #include "native_stack_dump.h"
     47 #include "scoped_thread_state_change-inl.h"
     48 #include "thread.h"
     49 #include "trace.h"
     50 #include "well_known_classes.h"
     51 
     52 #if ART_USE_FUTEXES
     53 #include "linux/futex.h"
     54 #include "sys/syscall.h"
     55 #ifndef SYS_futex
     56 #define SYS_futex __NR_futex
     57 #endif
     58 #endif  // ART_USE_FUTEXES
     59 
     60 namespace art {
     61 
     62 using android::base::StringPrintf;
     63 
     64 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
     65 // Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
     66 static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
     67 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
     68 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
     69 
     70 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
     71 // some history.
     72 static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
     73 
     74 ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
     75     : suspend_all_count_(0),
     76       debug_suspend_all_count_(0),
     77       unregistering_count_(0),
     78       suspend_all_historam_("suspend all histogram", 16, 64),
     79       long_suspend_(false),
     80       shut_down_(false),
     81       thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
     82       empty_checkpoint_barrier_(new Barrier(0)) {
     83   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
     84 }
     85 
     86 ThreadList::~ThreadList() {
     87   CHECK(shut_down_);
     88 }
     89 
     90 void ThreadList::ShutDown() {
     91   ScopedTrace trace(__PRETTY_FUNCTION__);
     92   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
     93   // We need to detach the current thread here in case there's another thread waiting to join with
     94   // us.
     95   bool contains = false;
     96   Thread* self = Thread::Current();
     97   {
     98     MutexLock mu(self, *Locks::thread_list_lock_);
     99     contains = Contains(self);
    100   }
    101   if (contains) {
    102     Runtime::Current()->DetachCurrentThread();
    103   }
    104   WaitForOtherNonDaemonThreadsToExit();
    105   // Disable GC and wait for GC to complete in case there are still daemon threads doing
    106   // allocations.
    107   gc::Heap* const heap = Runtime::Current()->GetHeap();
    108   heap->DisableGCForShutdown();
    109   // In case a GC is in progress, wait for it to finish.
    110   heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
    111   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
    112   //       Thread::Init.
    113   SuspendAllDaemonThreadsForShutdown();
    114 
    115   shut_down_ = true;
    116 }
    117 
    118 bool ThreadList::Contains(Thread* thread) {
    119   return find(list_.begin(), list_.end(), thread) != list_.end();
    120 }
    121 
    122 bool ThreadList::Contains(pid_t tid) {
    123   for (const auto& thread : list_) {
    124     if (thread->GetTid() == tid) {
    125       return true;
    126     }
    127   }
    128   return false;
    129 }
    130 
    131 pid_t ThreadList::GetLockOwner() {
    132   return Locks::thread_list_lock_->GetExclusiveOwnerTid();
    133 }
    134 
    135 void ThreadList::DumpNativeStacks(std::ostream& os) {
    136   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
    137   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
    138   for (const auto& thread : list_) {
    139     os << "DUMPING THREAD " << thread->GetTid() << "\n";
    140     DumpNativeStack(os, thread->GetTid(), map.get(), "\t");
    141     os << "\n";
    142   }
    143 }
    144 
    145 void ThreadList::DumpForSigQuit(std::ostream& os) {
    146   {
    147     ScopedObjectAccess soa(Thread::Current());
    148     // Only print if we have samples.
    149     if (suspend_all_historam_.SampleSize() > 0) {
    150       Histogram<uint64_t>::CumulativeData data;
    151       suspend_all_historam_.CreateHistogram(&data);
    152       suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
    153     }
    154   }
    155   bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
    156   Dump(os, dump_native_stack);
    157   DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit);
    158 }
    159 
    160 static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
    161     NO_THREAD_SAFETY_ANALYSIS {
    162   // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should
    163   // refactor DumpState to avoid skipping analysis.
    164   Thread::DumpState(os, nullptr, tid);
    165   DumpKernelStack(os, tid, "  kernel: ", false);
    166   if (dump_native_stack) {
    167     DumpNativeStack(os, tid, nullptr, "  native: ");
    168   }
    169   os << std::endl;
    170 }
    171 
    172 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
    173   DIR* d = opendir("/proc/self/task");
    174   if (!d) {
    175     return;
    176   }
    177 
    178   Thread* self = Thread::Current();
    179   dirent* e;
    180   while ((e = readdir(d)) != nullptr) {
    181     char* end;
    182     pid_t tid = strtol(e->d_name, &end, 10);
    183     if (!*end) {
    184       bool contains;
    185       {
    186         MutexLock mu(self, *Locks::thread_list_lock_);
    187         contains = Contains(tid);
    188       }
    189       if (!contains) {
    190         DumpUnattachedThread(os, tid, dump_native_stack);
    191       }
    192     }
    193   }
    194   closedir(d);
    195 }
    196 
    197 // Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
    198 // overloaded with ANR dumps.
    199 static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
    200 
    201 // A closure used by Thread::Dump.
    202 class DumpCheckpoint FINAL : public Closure {
    203  public:
    204   DumpCheckpoint(std::ostream* os, bool dump_native_stack)
    205       : os_(os),
    206         barrier_(0),
    207         backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr),
    208         dump_native_stack_(dump_native_stack) {
    209     if (backtrace_map_ != nullptr) {
    210       backtrace_map_->SetSuffixesToIgnore(std::vector<std::string> { "oat", "odex" });
    211     }
    212   }
    213 
    214   void Run(Thread* thread) OVERRIDE {
    215     // Note thread and self may not be equal if thread was already suspended at the point of the
    216     // request.
    217     Thread* self = Thread::Current();
    218     CHECK(self != nullptr);
    219     std::ostringstream local_os;
    220     {
    221       ScopedObjectAccess soa(self);
    222       thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
    223     }
    224     {
    225       // Use the logging lock to ensure serialization when writing to the common ostream.
    226       MutexLock mu(self, *Locks::logging_lock_);
    227       *os_ << local_os.str() << std::endl;
    228     }
    229     barrier_.Pass(self);
    230   }
    231 
    232   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
    233     Thread* self = Thread::Current();
    234     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
    235     bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout);
    236     if (timed_out) {
    237       // Avoid a recursive abort.
    238       LOG((kIsDebugBuild && (gAborting == 0)) ? ::android::base::FATAL : ::android::base::ERROR)
    239           << "Unexpected time out during dump checkpoint.";
    240     }
    241   }
    242 
    243  private:
    244   // The common stream that will accumulate all the dumps.
    245   std::ostream* const os_;
    246   // The barrier to be passed through and for the requestor to wait upon.
    247   Barrier barrier_;
    248   // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
    249   std::unique_ptr<BacktraceMap> backtrace_map_;
    250   // Whether we should dump the native stack.
    251   const bool dump_native_stack_;
    252 };
    253 
    254 void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
    255   Thread* self = Thread::Current();
    256   {
    257     MutexLock mu(self, *Locks::thread_list_lock_);
    258     os << "DALVIK THREADS (" << list_.size() << "):\n";
    259   }
    260   if (self != nullptr) {
    261     DumpCheckpoint checkpoint(&os, dump_native_stack);
    262     size_t threads_running_checkpoint;
    263     {
    264       // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
    265       ScopedObjectAccess soa(self);
    266       threads_running_checkpoint = RunCheckpoint(&checkpoint);
    267     }
    268     if (threads_running_checkpoint != 0) {
    269       checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
    270     }
    271   } else {
    272     DumpUnattachedThreads(os, dump_native_stack);
    273   }
    274 }
    275 
    276 void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
    277   MutexLock mu(self, *Locks::thread_list_lock_);
    278   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    279   for (const auto& thread : list_) {
    280     if (thread != ignore1 && thread != ignore2) {
    281       CHECK(thread->IsSuspended())
    282             << "\nUnsuspended thread: <<" << *thread << "\n"
    283             << "self: <<" << *Thread::Current();
    284     }
    285   }
    286 }
    287 
    288 #if HAVE_TIMED_RWLOCK
    289 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
    290 NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
    291   // Increment gAborting before doing the thread list dump since we don't want any failures from
    292   // AssertThreadSuspensionIsAllowable in cases where thread suspension is not allowed.
    293   // See b/69044468.
    294   ++gAborting;
    295   Runtime* runtime = Runtime::Current();
    296   std::ostringstream ss;
    297   ss << "Thread suspend timeout\n";
    298   Locks::mutator_lock_->Dump(ss);
    299   ss << "\n";
    300   runtime->GetThreadList()->Dump(ss);
    301   --gAborting;
    302   LOG(FATAL) << ss.str();
    303   exit(0);
    304 }
    305 #endif
    306 
    307 // Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
    308 // individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
    309 // we use sched_yield instead of calling usleep.
    310 // Although there is the possibility, here and elsewhere, that usleep could return -1 and
    311 // errno = EINTR, there should be no problem if interrupted, so we do not check.
    312 static void ThreadSuspendSleep(useconds_t delay_us) {
    313   if (delay_us == 0) {
    314     sched_yield();
    315   } else {
    316     usleep(delay_us);
    317   }
    318 }
    319 
    320 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function, Closure* callback) {
    321   Thread* self = Thread::Current();
    322   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
    323   Locks::thread_list_lock_->AssertNotHeld(self);
    324   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
    325 
    326   std::vector<Thread*> suspended_count_modified_threads;
    327   size_t count = 0;
    328   {
    329     // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
    330     // manually called.
    331     MutexLock mu(self, *Locks::thread_list_lock_);
    332     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    333     count = list_.size();
    334     for (const auto& thread : list_) {
    335       if (thread != self) {
    336         while (true) {
    337           if (thread->RequestCheckpoint(checkpoint_function)) {
    338             // This thread will run its checkpoint some time in the near future.
    339             break;
    340           } else {
    341             // We are probably suspended, try to make sure that we stay suspended.
    342             // The thread switched back to runnable.
    343             if (thread->GetState() == kRunnable) {
    344               // Spurious fail, try again.
    345               continue;
    346             }
    347             bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
    348             DCHECK(updated);
    349             suspended_count_modified_threads.push_back(thread);
    350             break;
    351           }
    352         }
    353       }
    354     }
    355     // Run the callback to be called inside this critical section.
    356     if (callback != nullptr) {
    357       callback->Run(self);
    358     }
    359   }
    360 
    361   // Run the checkpoint on ourself while we wait for threads to suspend.
    362   checkpoint_function->Run(self);
    363 
    364   // Run the checkpoint on the suspended threads.
    365   for (const auto& thread : suspended_count_modified_threads) {
    366     if (!thread->IsSuspended()) {
    367       ScopedTrace trace([&]() {
    368         std::ostringstream oss;
    369         thread->ShortDump(oss);
    370         return std::string("Waiting for suspension of thread ") + oss.str();
    371       });
    372       // Busy wait until the thread is suspended.
    373       const uint64_t start_time = NanoTime();
    374       do {
    375         ThreadSuspendSleep(kThreadSuspendInitialSleepUs);
    376       } while (!thread->IsSuspended());
    377       const uint64_t total_delay = NanoTime() - start_time;
    378       // Shouldn't need to wait for longer than 1000 microseconds.
    379       constexpr uint64_t kLongWaitThreshold = MsToNs(1);
    380       if (UNLIKELY(total_delay > kLongWaitThreshold)) {
    381         LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for "
    382             << *thread << " suspension!";
    383       }
    384     }
    385     // We know for sure that the thread is suspended at this point.
    386     checkpoint_function->Run(thread);
    387     {
    388       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    389       bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
    390       DCHECK(updated);
    391     }
    392   }
    393 
    394   {
    395     // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their
    396     // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
    397     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    398     Thread::resume_cond_->Broadcast(self);
    399   }
    400 
    401   return count;
    402 }
    403 
    404 void ThreadList::RunEmptyCheckpoint() {
    405   Thread* self = Thread::Current();
    406   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
    407   Locks::thread_list_lock_->AssertNotHeld(self);
    408   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
    409   std::vector<uint32_t> runnable_thread_ids;
    410   size_t count = 0;
    411   Barrier* barrier = empty_checkpoint_barrier_.get();
    412   barrier->Init(self, 0);
    413   {
    414     MutexLock mu(self, *Locks::thread_list_lock_);
    415     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    416     for (Thread* thread : list_) {
    417       if (thread != self) {
    418         while (true) {
    419           if (thread->RequestEmptyCheckpoint()) {
    420             // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
    421             // some time in the near future.
    422             ++count;
    423             if (kIsDebugBuild) {
    424               runnable_thread_ids.push_back(thread->GetThreadId());
    425             }
    426             break;
    427           }
    428           if (thread->GetState() != kRunnable) {
    429             // It's seen suspended, we are done because it must not be in the middle of a mutator
    430             // heap access.
    431             break;
    432           }
    433         }
    434       }
    435     }
    436   }
    437 
    438   // Wake up the threads blocking for weak ref access so that they will respond to the empty
    439   // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state.
    440   Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
    441   Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true);
    442   {
    443     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
    444     uint64_t total_wait_time = 0;
    445     bool first_iter = true;
    446     while (true) {
    447       // Wake up the runnable threads blocked on the mutexes that another thread, which is blocked
    448       // on a weak ref access, holds (indirectly blocking for weak ref access through another thread
    449       // and a mutex.) This needs to be done periodically because the thread may be preempted
    450       // between the CheckEmptyCheckpointFromMutex call and the subsequent futex wait in
    451       // Mutex::ExclusiveLock, etc. when the wakeup via WakeupToRespondToEmptyCheckpoint
    452       // arrives. This could cause a *very rare* deadlock, if not repeated. Most of the cases are
    453       // handled in the first iteration.
    454       for (BaseMutex* mutex : Locks::expected_mutexes_on_weak_ref_access_) {
    455         mutex->WakeupToRespondToEmptyCheckpoint();
    456       }
    457       static constexpr uint64_t kEmptyCheckpointPeriodicTimeoutMs = 100;  // 100ms
    458       static constexpr uint64_t kEmptyCheckpointTotalTimeoutMs = 600 * 1000;  // 10 minutes.
    459       size_t barrier_count = first_iter ? count : 0;
    460       first_iter = false;  // Don't add to the barrier count from the second iteration on.
    461       bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointPeriodicTimeoutMs);
    462       if (!timed_out) {
    463         break;  // Success
    464       }
    465       // This is a very rare case.
    466       total_wait_time += kEmptyCheckpointPeriodicTimeoutMs;
    467       if (kIsDebugBuild && total_wait_time > kEmptyCheckpointTotalTimeoutMs) {
    468         std::ostringstream ss;
    469         ss << "Empty checkpoint timeout\n";
    470         ss << "Barrier count " << barrier->GetCount(self) << "\n";
    471         ss << "Runnable thread IDs";
    472         for (uint32_t tid : runnable_thread_ids) {
    473           ss << " " << tid;
    474         }
    475         ss << "\n";
    476         Locks::mutator_lock_->Dump(ss);
    477         ss << "\n";
    478         LOG(FATAL_WITHOUT_ABORT) << ss.str();
    479         // Some threads in 'runnable_thread_ids' are probably stuck. Try to dump their stacks.
    480         // Avoid using ThreadList::Dump() initially because it is likely to get stuck as well.
    481         {
    482           ScopedObjectAccess soa(self);
    483           MutexLock mu1(self, *Locks::thread_list_lock_);
    484           for (Thread* thread : GetList()) {
    485             uint32_t tid = thread->GetThreadId();
    486             bool is_in_runnable_thread_ids =
    487                 std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) !=
    488                 runnable_thread_ids.end();
    489             if (is_in_runnable_thread_ids &&
    490                 thread->ReadFlag(kEmptyCheckpointRequest)) {
    491               // Found a runnable thread that hasn't responded to the empty checkpoint request.
    492               // Assume it's stuck and safe to dump its stack.
    493               thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT),
    494                            /*dump_native_stack*/ true,
    495                            /*backtrace_map*/ nullptr,
    496                            /*force_dump_stack*/ true);
    497             }
    498           }
    499         }
    500         LOG(FATAL_WITHOUT_ABORT)
    501             << "Dumped runnable threads that haven't responded to empty checkpoint.";
    502         // Now use ThreadList::Dump() to dump more threads, noting it may get stuck.
    503         Dump(LOG_STREAM(FATAL_WITHOUT_ABORT));
    504         LOG(FATAL) << "Dumped all threads.";
    505       }
    506     }
    507   }
    508 }
    509 
    510 // Request that a checkpoint function be run on all active (non-suspended)
    511 // threads.  Returns the number of successful requests.
    512 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
    513   Thread* self = Thread::Current();
    514   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
    515   Locks::thread_list_lock_->AssertNotHeld(self);
    516   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
    517   CHECK_NE(self->GetState(), kRunnable);
    518 
    519   size_t count = 0;
    520   {
    521     // Call a checkpoint function for each non-suspended thread.
    522     MutexLock mu(self, *Locks::thread_list_lock_);
    523     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    524     for (const auto& thread : list_) {
    525       if (thread != self) {
    526         if (thread->RequestCheckpoint(checkpoint_function)) {
    527           // This thread will run its checkpoint some time in the near future.
    528           count++;
    529         }
    530       }
    531     }
    532   }
    533 
    534   // Return the number of threads that will run the checkpoint function.
    535   return count;
    536 }
    537 
    538 // A checkpoint/suspend-all hybrid to switch thread roots from
    539 // from-space to to-space refs. Used to synchronize threads at a point
    540 // to mark the initiation of marking while maintaining the to-space
    541 // invariant.
    542 size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
    543                                    Closure* flip_callback,
    544                                    gc::collector::GarbageCollector* collector,
    545                                    gc::GcPauseListener* pause_listener) {
    546   TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
    547   Thread* self = Thread::Current();
    548   Locks::mutator_lock_->AssertNotHeld(self);
    549   Locks::thread_list_lock_->AssertNotHeld(self);
    550   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
    551   CHECK_NE(self->GetState(), kRunnable);
    552 
    553   collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
    554 
    555   // ThreadFlipBegin happens before we suspend all the threads, so it does not count towards the
    556   // pause.
    557   const uint64_t suspend_start_time = NanoTime();
    558   SuspendAllInternal(self, self, nullptr);
    559   if (pause_listener != nullptr) {
    560     pause_listener->StartPause();
    561   }
    562 
    563   // Run the flip callback for the collector.
    564   Locks::mutator_lock_->ExclusiveLock(self);
    565   suspend_all_historam_.AdjustAndAddValue(NanoTime() - suspend_start_time);
    566   flip_callback->Run(self);
    567   Locks::mutator_lock_->ExclusiveUnlock(self);
    568   collector->RegisterPause(NanoTime() - suspend_start_time);
    569   if (pause_listener != nullptr) {
    570     pause_listener->EndPause();
    571   }
    572 
    573   // Resume runnable threads.
    574   size_t runnable_thread_count = 0;
    575   std::vector<Thread*> other_threads;
    576   {
    577     TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
    578     MutexLock mu(self, *Locks::thread_list_lock_);
    579     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    580     --suspend_all_count_;
    581     for (const auto& thread : list_) {
    582       // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
    583       // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
    584       // a runnable thread runs it for itself or we run it for a suspended thread below.
    585       thread->SetFlipFunction(thread_flip_visitor);
    586       if (thread == self) {
    587         continue;
    588       }
    589       // Resume early the threads that were runnable but are suspended just for this thread flip or
    590       // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
    591       // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
    592       // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
    593       ThreadState state = thread->GetState();
    594       if ((state == kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
    595           thread->GetSuspendCount() == 1) {
    596         // The thread will resume right after the broadcast.
    597         bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
    598         DCHECK(updated);
    599         ++runnable_thread_count;
    600       } else {
    601         other_threads.push_back(thread);
    602       }
    603     }
    604     Thread::resume_cond_->Broadcast(self);
    605   }
    606 
    607   collector->GetHeap()->ThreadFlipEnd(self);
    608 
    609   // Run the closure on the other threads and let them resume.
    610   {
    611     TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
    612     ReaderMutexLock mu(self, *Locks::mutator_lock_);
    613     for (const auto& thread : other_threads) {
    614       Closure* flip_func = thread->GetFlipFunction();
    615       if (flip_func != nullptr) {
    616         flip_func->Run(thread);
    617       }
    618     }
    619     // Run it for self.
    620     Closure* flip_func = self->GetFlipFunction();
    621     if (flip_func != nullptr) {
    622       flip_func->Run(self);
    623     }
    624   }
    625 
    626   // Resume other threads.
    627   {
    628     TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
    629     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    630     for (const auto& thread : other_threads) {
    631       bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
    632       DCHECK(updated);
    633     }
    634     Thread::resume_cond_->Broadcast(self);
    635   }
    636 
    637   return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
    638 }
    639 
    640 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
    641   Thread* self = Thread::Current();
    642 
    643   if (self != nullptr) {
    644     VLOG(threads) << *self << " SuspendAll for " << cause << " starting...";
    645   } else {
    646     VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
    647   }
    648   {
    649     ScopedTrace trace("Suspending mutator threads");
    650     const uint64_t start_time = NanoTime();
    651 
    652     SuspendAllInternal(self, self);
    653     // All threads are known to have suspended (but a thread may still own the mutator lock)
    654     // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
    655 #if HAVE_TIMED_RWLOCK
    656     while (true) {
    657       if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self,
    658                                                          NsToMs(thread_suspend_timeout_ns_),
    659                                                          0)) {
    660         break;
    661       } else if (!long_suspend_) {
    662         // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
    663         // could result in a thread suspend timeout.
    664         // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds.
    665         UnsafeLogFatalForThreadSuspendAllTimeout();
    666       }
    667     }
    668 #else
    669     Locks::mutator_lock_->ExclusiveLock(self);
    670 #endif
    671 
    672     long_suspend_ = long_suspend;
    673 
    674     const uint64_t end_time = NanoTime();
    675     const uint64_t suspend_time = end_time - start_time;
    676     suspend_all_historam_.AdjustAndAddValue(suspend_time);
    677     if (suspend_time > kLongThreadSuspendThreshold) {
    678       LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
    679     }
    680 
    681     if (kDebugLocking) {
    682       // Debug check that all threads are suspended.
    683       AssertThreadsAreSuspended(self, self);
    684     }
    685   }
    686   ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str());
    687 
    688   if (self != nullptr) {
    689     VLOG(threads) << *self << " SuspendAll complete";
    690   } else {
    691     VLOG(threads) << "Thread[null] SuspendAll complete";
    692   }
    693 }
    694 
    695 // Ensures all threads running Java suspend and that those not running Java don't start.
    696 // Debugger thread might be set to kRunnable for a short period of time after the
    697 // SuspendAllInternal. This is safe because it will be set back to suspended state before
    698 // the SuspendAll returns.
    699 void ThreadList::SuspendAllInternal(Thread* self,
    700                                     Thread* ignore1,
    701                                     Thread* ignore2,
    702                                     SuspendReason reason) {
    703   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
    704   Locks::thread_list_lock_->AssertNotHeld(self);
    705   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
    706   if (kDebugLocking && self != nullptr) {
    707     CHECK_NE(self->GetState(), kRunnable);
    708   }
    709 
    710   // First request that all threads suspend, then wait for them to suspend before
    711   // returning. This suspension scheme also relies on other behaviour:
    712   // 1. Threads cannot be deleted while they are suspended or have a suspend-
    713   //    request flag set - (see Unregister() below).
    714   // 2. When threads are created, they are created in a suspended state (actually
    715   //    kNative) and will never begin executing Java code without first checking
    716   //    the suspend-request flag.
    717 
    718   // The atomic counter for number of threads that need to pass the barrier.
    719   AtomicInteger pending_threads;
    720   uint32_t num_ignored = 0;
    721   if (ignore1 != nullptr) {
    722     ++num_ignored;
    723   }
    724   if (ignore2 != nullptr && ignore1 != ignore2) {
    725     ++num_ignored;
    726   }
    727   {
    728     MutexLock mu(self, *Locks::thread_list_lock_);
    729     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    730     // Update global suspend all state for attaching threads.
    731     ++suspend_all_count_;
    732     if (reason == SuspendReason::kForDebugger) {
    733       ++debug_suspend_all_count_;
    734     }
    735     pending_threads.StoreRelaxed(list_.size() - num_ignored);
    736     // Increment everybody's suspend count (except those that should be ignored).
    737     for (const auto& thread : list_) {
    738       if (thread == ignore1 || thread == ignore2) {
    739         continue;
    740       }
    741       VLOG(threads) << "requesting thread suspend: " << *thread;
    742       bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, reason);
    743       DCHECK(updated);
    744 
    745       // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
    746       // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
    747       // that can lead a thread to miss a call to PassActiveSuspendBarriers().
    748       if (thread->IsSuspended()) {
    749         // Only clear the counter for the current thread.
    750         thread->ClearSuspendBarrier(&pending_threads);
    751         pending_threads.FetchAndSubSequentiallyConsistent(1);
    752       }
    753     }
    754   }
    755 
    756   // Wait for the barrier to be passed by all runnable threads. This wait
    757   // is done with a timeout so that we can detect problems.
    758 #if ART_USE_FUTEXES
    759   timespec wait_timeout;
    760   InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(thread_suspend_timeout_ns_), 0, &wait_timeout);
    761 #endif
    762   const uint64_t start_time = NanoTime();
    763   while (true) {
    764     int32_t cur_val = pending_threads.LoadRelaxed();
    765     if (LIKELY(cur_val > 0)) {
    766 #if ART_USE_FUTEXES
    767       if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) {
    768         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
    769         if ((errno != EAGAIN) && (errno != EINTR)) {
    770           if (errno == ETIMEDOUT) {
    771             LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
    772                 << "Timed out waiting for threads to suspend, waited for "
    773                 << PrettyDuration(NanoTime() - start_time);
    774           } else {
    775             PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
    776           }
    777         }
    778       }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
    779 #else
    780       // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
    781       UNUSED(start_time);
    782 #endif
    783     } else {
    784       CHECK_EQ(cur_val, 0);
    785       break;
    786     }
    787   }
    788 }
    789 
    790 void ThreadList::ResumeAll() {
    791   Thread* self = Thread::Current();
    792 
    793   if (self != nullptr) {
    794     VLOG(threads) << *self << " ResumeAll starting";
    795   } else {
    796     VLOG(threads) << "Thread[null] ResumeAll starting";
    797   }
    798 
    799   ATRACE_END();
    800 
    801   ScopedTrace trace("Resuming mutator threads");
    802 
    803   if (kDebugLocking) {
    804     // Debug check that all threads are suspended.
    805     AssertThreadsAreSuspended(self, self);
    806   }
    807 
    808   long_suspend_ = false;
    809 
    810   Locks::mutator_lock_->ExclusiveUnlock(self);
    811   {
    812     MutexLock mu(self, *Locks::thread_list_lock_);
    813     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    814     // Update global suspend all state for attaching threads.
    815     --suspend_all_count_;
    816     // Decrement the suspend counts for all threads.
    817     for (const auto& thread : list_) {
    818       if (thread == self) {
    819         continue;
    820       }
    821       bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
    822       DCHECK(updated);
    823     }
    824 
    825     // Broadcast a notification to all suspended threads, some or all of
    826     // which may choose to wake up.  No need to wait for them.
    827     if (self != nullptr) {
    828       VLOG(threads) << *self << " ResumeAll waking others";
    829     } else {
    830       VLOG(threads) << "Thread[null] ResumeAll waking others";
    831     }
    832     Thread::resume_cond_->Broadcast(self);
    833   }
    834 
    835   if (self != nullptr) {
    836     VLOG(threads) << *self << " ResumeAll complete";
    837   } else {
    838     VLOG(threads) << "Thread[null] ResumeAll complete";
    839   }
    840 }
    841 
    842 bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
    843   // This assumes there was an ATRACE_BEGIN when we suspended the thread.
    844   ATRACE_END();
    845 
    846   Thread* self = Thread::Current();
    847   DCHECK_NE(thread, self);
    848   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..." << reason;
    849 
    850   {
    851     // To check Contains.
    852     MutexLock mu(self, *Locks::thread_list_lock_);
    853     // To check IsSuspended.
    854     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
    855     if (UNLIKELY(!thread->IsSuspended())) {
    856       LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
    857           << ") thread not suspended";
    858       return false;
    859     }
    860     if (!Contains(thread)) {
    861       // We only expect threads within the thread-list to have been suspended otherwise we can't
    862       // stop such threads from delete-ing themselves.
    863       LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
    864           << ") thread not within thread list";
    865       return false;
    866     }
    867     if (UNLIKELY(!thread->ModifySuspendCount(self, -1, nullptr, reason))) {
    868       LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
    869                  << ") could not modify suspend count.";
    870       return false;
    871     }
    872   }
    873 
    874   {
    875     VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others";
    876     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
    877     Thread::resume_cond_->Broadcast(self);
    878   }
    879 
    880   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
    881   return true;
    882 }
    883 
    884 static void ThreadSuspendByPeerWarning(Thread* self,
    885                                        LogSeverity severity,
    886                                        const char* message,
    887                                        jobject peer) {
    888   JNIEnvExt* env = self->GetJniEnv();
    889   ScopedLocalRef<jstring>
    890       scoped_name_string(env, static_cast<jstring>(env->GetObjectField(
    891           peer, WellKnownClasses::java_lang_Thread_name)));
    892   ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
    893   if (scoped_name_chars.c_str() == nullptr) {
    894       LOG(severity) << message << ": " << peer;
    895       env->ExceptionClear();
    896   } else {
    897       LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str();
    898   }
    899 }
    900 
    901 Thread* ThreadList::SuspendThreadByPeer(jobject peer,
    902                                         bool request_suspension,
    903                                         SuspendReason reason,
    904                                         bool* timed_out) {
    905   const uint64_t start_time = NanoTime();
    906   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
    907   *timed_out = false;
    908   Thread* const self = Thread::Current();
    909   Thread* suspended_thread = nullptr;
    910   VLOG(threads) << "SuspendThreadByPeer starting";
    911   while (true) {
    912     Thread* thread;
    913     {
    914       // Note: this will transition to runnable and potentially suspend. We ensure only one thread
    915       // is requesting another suspend, to avoid deadlock, by requiring this function be called
    916       // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
    917       // than request thread suspension, to avoid potential cycles in threads requesting each other
    918       // suspend.
    919       ScopedObjectAccess soa(self);
    920       MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
    921       thread = Thread::FromManagedThread(soa, peer);
    922       if (thread == nullptr) {
    923         if (suspended_thread != nullptr) {
    924           MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
    925           // If we incremented the suspend count but the thread reset its peer, we need to
    926           // re-decrement it since it is shutting down and may deadlock the runtime in
    927           // ThreadList::WaitForOtherNonDaemonThreadsToExit.
    928           bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
    929                                                               -1,
    930                                                               nullptr,
    931                                                               reason);
    932           DCHECK(updated);
    933         }
    934         ThreadSuspendByPeerWarning(self,
    935                                    ::android::base::WARNING,
    936                                     "No such thread for suspend",
    937                                     peer);
    938         return nullptr;
    939       }
    940       if (!Contains(thread)) {
    941         CHECK(suspended_thread == nullptr);
    942         VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
    943             << reinterpret_cast<void*>(thread);
    944         return nullptr;
    945       }
    946       VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
    947       {
    948         MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
    949         if (request_suspension) {
    950           if (self->GetSuspendCount() > 0) {
    951             // We hold the suspend count lock but another thread is trying to suspend us. Its not
    952             // safe to try to suspend another thread in case we get a cycle. Start the loop again
    953             // which will allow this thread to be suspended.
    954             continue;
    955           }
    956           CHECK(suspended_thread == nullptr);
    957           suspended_thread = thread;
    958           bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, reason);
    959           DCHECK(updated);
    960           request_suspension = false;
    961         } else {
    962           // If the caller isn't requesting suspension, a suspension should have already occurred.
    963           CHECK_GT(thread->GetSuspendCount(), 0);
    964         }
    965         // IsSuspended on the current thread will fail as the current thread is changed into
    966         // Runnable above. As the suspend count is now raised if this is the current thread
    967         // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
    968         // to just explicitly handle the current thread in the callers to this code.
    969         CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
    970         // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
    971         // count, or else we've waited and it has self suspended) or is the current thread, we're
    972         // done.
    973         if (thread->IsSuspended()) {
    974           VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
    975           if (ATRACE_ENABLED()) {
    976             std::string name;
    977             thread->GetThreadName(name);
    978             ATRACE_BEGIN(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
    979                                       peer).c_str());
    980           }
    981           return thread;
    982         }
    983         const uint64_t total_delay = NanoTime() - start_time;
    984         if (total_delay >= thread_suspend_timeout_ns_) {
    985           ThreadSuspendByPeerWarning(self,
    986                                      ::android::base::FATAL,
    987                                      "Thread suspension timed out",
    988                                      peer);
    989           if (suspended_thread != nullptr) {
    990             CHECK_EQ(suspended_thread, thread);
    991             bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
    992                                                                 -1,
    993                                                                 nullptr,
    994                                                                 reason);
    995             DCHECK(updated);
    996           }
    997           *timed_out = true;
    998           return nullptr;
    999         } else if (sleep_us == 0 &&
   1000             total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
   1001           // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
   1002           // excessive CPU usage.
   1003           sleep_us = kThreadSuspendMaxYieldUs / 2;
   1004         }
   1005       }
   1006       // Release locks and come out of runnable state.
   1007     }
   1008     VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
   1009     ThreadSuspendSleep(sleep_us);
   1010     // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
   1011     // all if possible. This shouldn't be an issue since time to suspend should always be small.
   1012     sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
   1013   }
   1014 }
   1015 
   1016 static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
   1017                                            const char* message,
   1018                                            uint32_t thread_id) {
   1019   LOG(severity) << StringPrintf("%s: %d", message, thread_id);
   1020 }
   1021 
   1022 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
   1023                                             SuspendReason reason,
   1024                                             bool* timed_out) {
   1025   const uint64_t start_time = NanoTime();
   1026   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
   1027   *timed_out = false;
   1028   Thread* suspended_thread = nullptr;
   1029   Thread* const self = Thread::Current();
   1030   CHECK_NE(thread_id, kInvalidThreadId);
   1031   VLOG(threads) << "SuspendThreadByThreadId starting";
   1032   while (true) {
   1033     {
   1034       // Note: this will transition to runnable and potentially suspend. We ensure only one thread
   1035       // is requesting another suspend, to avoid deadlock, by requiring this function be called
   1036       // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
   1037       // than request thread suspension, to avoid potential cycles in threads requesting each other
   1038       // suspend.
   1039       ScopedObjectAccess soa(self);
   1040       MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
   1041       Thread* thread = nullptr;
   1042       for (const auto& it : list_) {
   1043         if (it->GetThreadId() == thread_id) {
   1044           thread = it;
   1045           break;
   1046         }
   1047       }
   1048       if (thread == nullptr) {
   1049         CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
   1050             << " no longer in thread list";
   1051         // There's a race in inflating a lock and the owner giving up ownership and then dying.
   1052         ThreadSuspendByThreadIdWarning(::android::base::WARNING,
   1053                                        "No such thread id for suspend",
   1054                                        thread_id);
   1055         return nullptr;
   1056       }
   1057       VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
   1058       DCHECK(Contains(thread));
   1059       {
   1060         MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
   1061         if (suspended_thread == nullptr) {
   1062           if (self->GetSuspendCount() > 0) {
   1063             // We hold the suspend count lock but another thread is trying to suspend us. Its not
   1064             // safe to try to suspend another thread in case we get a cycle. Start the loop again
   1065             // which will allow this thread to be suspended.
   1066             continue;
   1067           }
   1068           bool updated = thread->ModifySuspendCount(self, +1, nullptr, reason);
   1069           DCHECK(updated);
   1070           suspended_thread = thread;
   1071         } else {
   1072           CHECK_EQ(suspended_thread, thread);
   1073           // If the caller isn't requesting suspension, a suspension should have already occurred.
   1074           CHECK_GT(thread->GetSuspendCount(), 0);
   1075         }
   1076         // IsSuspended on the current thread will fail as the current thread is changed into
   1077         // Runnable above. As the suspend count is now raised if this is the current thread
   1078         // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
   1079         // to just explicitly handle the current thread in the callers to this code.
   1080         CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
   1081         // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
   1082         // count, or else we've waited and it has self suspended) or is the current thread, we're
   1083         // done.
   1084         if (thread->IsSuspended()) {
   1085           if (ATRACE_ENABLED()) {
   1086             std::string name;
   1087             thread->GetThreadName(name);
   1088             ATRACE_BEGIN(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
   1089                                       name.c_str(), thread_id).c_str());
   1090           }
   1091           VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
   1092           return thread;
   1093         }
   1094         const uint64_t total_delay = NanoTime() - start_time;
   1095         if (total_delay >= thread_suspend_timeout_ns_) {
   1096           ThreadSuspendByThreadIdWarning(::android::base::WARNING,
   1097                                          "Thread suspension timed out",
   1098                                          thread_id);
   1099           if (suspended_thread != nullptr) {
   1100             bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, reason);
   1101             DCHECK(updated);
   1102           }
   1103           *timed_out = true;
   1104           return nullptr;
   1105         } else if (sleep_us == 0 &&
   1106             total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
   1107           // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
   1108           // excessive CPU usage.
   1109           sleep_us = kThreadSuspendMaxYieldUs / 2;
   1110         }
   1111       }
   1112       // Release locks and come out of runnable state.
   1113     }
   1114     VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
   1115     ThreadSuspendSleep(sleep_us);
   1116     sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
   1117   }
   1118 }
   1119 
   1120 Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
   1121   for (const auto& thread : list_) {
   1122     if (thread->GetThreadId() == thread_id) {
   1123       return thread;
   1124     }
   1125   }
   1126   return nullptr;
   1127 }
   1128 
   1129 void ThreadList::SuspendAllForDebugger() {
   1130   Thread* self = Thread::Current();
   1131   Thread* debug_thread = Dbg::GetDebugThread();
   1132 
   1133   VLOG(threads) << *self << " SuspendAllForDebugger starting...";
   1134 
   1135   SuspendAllInternal(self, self, debug_thread, SuspendReason::kForDebugger);
   1136   // Block on the mutator lock until all Runnable threads release their share of access then
   1137   // immediately unlock again.
   1138 #if HAVE_TIMED_RWLOCK
   1139   // Timeout if we wait more than 30 seconds.
   1140   if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
   1141     UnsafeLogFatalForThreadSuspendAllTimeout();
   1142   } else {
   1143     Locks::mutator_lock_->ExclusiveUnlock(self);
   1144   }
   1145 #else
   1146   Locks::mutator_lock_->ExclusiveLock(self);
   1147   Locks::mutator_lock_->ExclusiveUnlock(self);
   1148 #endif
   1149   // Disabled for the following race condition:
   1150   // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
   1151   // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
   1152   // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
   1153   // state (from SetStateUnsafe).
   1154   // AssertThreadsAreSuspended(self, self, debug_thread);
   1155 
   1156   VLOG(threads) << *self << " SuspendAllForDebugger complete";
   1157 }
   1158 
   1159 void ThreadList::SuspendSelfForDebugger() {
   1160   Thread* const self = Thread::Current();
   1161   self->SetReadyForDebugInvoke(true);
   1162 
   1163   // The debugger thread must not suspend itself due to debugger activity!
   1164   Thread* debug_thread = Dbg::GetDebugThread();
   1165   CHECK(self != debug_thread);
   1166   CHECK_NE(self->GetState(), kRunnable);
   1167   Locks::mutator_lock_->AssertNotHeld(self);
   1168 
   1169   // The debugger may have detached while we were executing an invoke request. In that case, we
   1170   // must not suspend ourself.
   1171   DebugInvokeReq* pReq = self->GetInvokeReq();
   1172   const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive());
   1173   if (!skip_thread_suspension) {
   1174     // Collisions with other suspends aren't really interesting. We want
   1175     // to ensure that we're the only one fiddling with the suspend count
   1176     // though.
   1177     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   1178     bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger);
   1179     DCHECK(updated);
   1180     CHECK_GT(self->GetSuspendCount(), 0);
   1181 
   1182     VLOG(threads) << *self << " self-suspending (debugger)";
   1183   } else {
   1184     // We must no longer be subject to debugger suspension.
   1185     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   1186     CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us";
   1187 
   1188     VLOG(threads) << *self << " not self-suspending because debugger detached during invoke";
   1189   }
   1190 
   1191   // If the debugger requested an invoke, we need to send the reply and clear the request.
   1192   if (pReq != nullptr) {
   1193     Dbg::FinishInvokeMethod(pReq);
   1194     self->ClearDebugInvokeReq();
   1195     pReq = nullptr;  // object has been deleted, clear it for safety.
   1196   }
   1197 
   1198   // Tell JDWP that we've completed suspension. The JDWP thread can't
   1199   // tell us to resume before we're fully asleep because we hold the
   1200   // suspend count lock.
   1201   Dbg::ClearWaitForEventThread();
   1202 
   1203   {
   1204     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   1205     while (self->GetSuspendCount() != 0) {
   1206       Thread::resume_cond_->Wait(self);
   1207       if (self->GetSuspendCount() != 0) {
   1208         // The condition was signaled but we're still suspended. This
   1209         // can happen when we suspend then resume all threads to
   1210         // update instrumentation or compute monitor info. This can
   1211         // also happen if the debugger lets go while a SIGQUIT thread
   1212         // dump event is pending (assuming SignalCatcher was resumed for
   1213         // just long enough to try to grab the thread-suspend lock).
   1214         VLOG(jdwp) << *self << " still suspended after undo "
   1215                    << "(suspend count=" << self->GetSuspendCount() << ", "
   1216                    << "debug suspend count=" << self->GetDebugSuspendCount() << ")";
   1217       }
   1218     }
   1219     CHECK_EQ(self->GetSuspendCount(), 0);
   1220   }
   1221 
   1222   self->SetReadyForDebugInvoke(false);
   1223   VLOG(threads) << *self << " self-reviving (debugger)";
   1224 }
   1225 
   1226 void ThreadList::ResumeAllForDebugger() {
   1227   Thread* self = Thread::Current();
   1228   Thread* debug_thread = Dbg::GetDebugThread();
   1229 
   1230   VLOG(threads) << *self << " ResumeAllForDebugger starting...";
   1231 
   1232   // Threads can't resume if we exclusively hold the mutator lock.
   1233   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   1234 
   1235   {
   1236     MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
   1237     {
   1238       MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
   1239       // Update global suspend all state for attaching threads.
   1240       DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
   1241       if (debug_suspend_all_count_ > 0) {
   1242         --suspend_all_count_;
   1243         --debug_suspend_all_count_;
   1244       } else {
   1245         // We've been asked to resume all threads without being asked to
   1246         // suspend them all before. That may happen if a debugger tries
   1247         // to resume some suspended threads (with suspend count == 1)
   1248         // at once with a VirtualMachine.Resume command. Let's print a
   1249         // warning.
   1250         LOG(WARNING) << "Debugger attempted to resume all threads without "
   1251                      << "having suspended them all before.";
   1252       }
   1253       // Decrement everybody's suspend count (except our own).
   1254       for (const auto& thread : list_) {
   1255         if (thread == self || thread == debug_thread) {
   1256           continue;
   1257         }
   1258         if (thread->GetDebugSuspendCount() == 0) {
   1259           // This thread may have been individually resumed with ThreadReference.Resume.
   1260           continue;
   1261         }
   1262         VLOG(threads) << "requesting thread resume: " << *thread;
   1263         bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kForDebugger);
   1264         DCHECK(updated);
   1265       }
   1266     }
   1267   }
   1268 
   1269   {
   1270     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   1271     Thread::resume_cond_->Broadcast(self);
   1272   }
   1273 
   1274   VLOG(threads) << *self << " ResumeAllForDebugger complete";
   1275 }
   1276 
   1277 void ThreadList::UndoDebuggerSuspensions() {
   1278   Thread* self = Thread::Current();
   1279 
   1280   VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
   1281 
   1282   {
   1283     MutexLock mu(self, *Locks::thread_list_lock_);
   1284     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1285     // Update global suspend all state for attaching threads.
   1286     suspend_all_count_ -= debug_suspend_all_count_;
   1287     debug_suspend_all_count_ = 0;
   1288     // Update running threads.
   1289     for (const auto& thread : list_) {
   1290       if (thread == self || thread->GetDebugSuspendCount() == 0) {
   1291         continue;
   1292       }
   1293       bool suspended = thread->ModifySuspendCount(self,
   1294                                                   -thread->GetDebugSuspendCount(),
   1295                                                   nullptr,
   1296                                                   SuspendReason::kForDebugger);
   1297       DCHECK(suspended);
   1298     }
   1299   }
   1300 
   1301   {
   1302     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   1303     Thread::resume_cond_->Broadcast(self);
   1304   }
   1305 
   1306   VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete";
   1307 }
   1308 
   1309 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
   1310   ScopedTrace trace(__PRETTY_FUNCTION__);
   1311   Thread* self = Thread::Current();
   1312   Locks::mutator_lock_->AssertNotHeld(self);
   1313   while (true) {
   1314     {
   1315       // No more threads can be born after we start to shutdown.
   1316       MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   1317       CHECK(Runtime::Current()->IsShuttingDownLocked());
   1318       CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
   1319     }
   1320     MutexLock mu(self, *Locks::thread_list_lock_);
   1321     // Also wait for any threads that are unregistering to finish. This is required so that no
   1322     // threads access the thread list after it is deleted. TODO: This may not work for user daemon
   1323     // threads since they could unregister at the wrong time.
   1324     bool done = unregistering_count_ == 0;
   1325     if (done) {
   1326       for (const auto& thread : list_) {
   1327         if (thread != self && !thread->IsDaemon()) {
   1328           done = false;
   1329           break;
   1330         }
   1331       }
   1332     }
   1333     if (done) {
   1334       break;
   1335     }
   1336     // Wait for another thread to exit before re-checking.
   1337     Locks::thread_exit_cond_->Wait(self);
   1338   }
   1339 }
   1340 
   1341 void ThreadList::SuspendAllDaemonThreadsForShutdown() {
   1342   ScopedTrace trace(__PRETTY_FUNCTION__);
   1343   Thread* self = Thread::Current();
   1344   size_t daemons_left = 0;
   1345   {
   1346     // Tell all the daemons it's time to suspend.
   1347     MutexLock mu(self, *Locks::thread_list_lock_);
   1348     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1349     for (const auto& thread : list_) {
   1350       // This is only run after all non-daemon threads have exited, so the remainder should all be
   1351       // daemons.
   1352       CHECK(thread->IsDaemon()) << *thread;
   1353       if (thread != self) {
   1354         bool updated = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
   1355         DCHECK(updated);
   1356         ++daemons_left;
   1357       }
   1358       // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
   1359       // the sleep forever one.
   1360       thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
   1361     }
   1362   }
   1363   // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they
   1364   // are about to access runtime state and are not in a runnable state. Examples: Monitor code
   1365   // or waking up from a condition variable. TODO: Try and see if there is a better way to wait
   1366   // for daemon threads to be in a blocked state.
   1367   if (daemons_left > 0) {
   1368     static constexpr size_t kDaemonSleepTime = 200 * 1000;
   1369     usleep(kDaemonSleepTime);
   1370   }
   1371   // Give the threads a chance to suspend, complaining if they're slow.
   1372   bool have_complained = false;
   1373   static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
   1374   static constexpr size_t kSleepMicroseconds = 1000;
   1375   for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
   1376     bool all_suspended = true;
   1377     {
   1378       MutexLock mu(self, *Locks::thread_list_lock_);
   1379       for (const auto& thread : list_) {
   1380         if (thread != self && thread->GetState() == kRunnable) {
   1381           if (!have_complained) {
   1382             LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
   1383             have_complained = true;
   1384           }
   1385           all_suspended = false;
   1386         }
   1387       }
   1388     }
   1389     if (all_suspended) {
   1390       return;
   1391     }
   1392     usleep(kSleepMicroseconds);
   1393   }
   1394   LOG(WARNING) << "timed out suspending all daemon threads";
   1395 }
   1396 
   1397 void ThreadList::Register(Thread* self) {
   1398   DCHECK_EQ(self, Thread::Current());
   1399   CHECK(!shut_down_);
   1400 
   1401   if (VLOG_IS_ON(threads)) {
   1402     std::ostringstream oss;
   1403     self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
   1404     LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
   1405   }
   1406 
   1407   // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
   1408   // SuspendAll requests.
   1409   MutexLock mu(self, *Locks::thread_list_lock_);
   1410   MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1411   CHECK_GE(suspend_all_count_, debug_suspend_all_count_);
   1412   // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
   1413   // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
   1414   for (int delta = debug_suspend_all_count_; delta > 0; delta--) {
   1415     bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kForDebugger);
   1416     DCHECK(updated);
   1417   }
   1418   for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) {
   1419     bool updated = self->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
   1420     DCHECK(updated);
   1421   }
   1422   CHECK(!Contains(self));
   1423   list_.push_back(self);
   1424   if (kUseReadBarrier) {
   1425     gc::collector::ConcurrentCopying* const cc =
   1426         Runtime::Current()->GetHeap()->ConcurrentCopyingCollector();
   1427     // Initialize according to the state of the CC collector.
   1428     self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking());
   1429     if (cc->IsUsingReadBarrierEntrypoints()) {
   1430       self->SetReadBarrierEntrypoints();
   1431     }
   1432     self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled());
   1433   }
   1434 }
   1435 
   1436 void ThreadList::Unregister(Thread* self) {
   1437   DCHECK_EQ(self, Thread::Current());
   1438   CHECK_NE(self->GetState(), kRunnable);
   1439   Locks::mutator_lock_->AssertNotHeld(self);
   1440 
   1441   VLOG(threads) << "ThreadList::Unregister() " << *self;
   1442 
   1443   {
   1444     MutexLock mu(self, *Locks::thread_list_lock_);
   1445     ++unregistering_count_;
   1446   }
   1447 
   1448   // Any time-consuming destruction, plus anything that can call back into managed code or
   1449   // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what
   1450   // causes the threads to join. It is important to do this after incrementing unregistering_count_
   1451   // since we want the runtime to wait for the daemon threads to exit before deleting the thread
   1452   // list.
   1453   self->Destroy();
   1454 
   1455   // If tracing, remember thread id and name before thread exits.
   1456   Trace::StoreExitingThreadInfo(self);
   1457 
   1458   uint32_t thin_lock_id = self->GetThreadId();
   1459   while (true) {
   1460     // Remove and delete the Thread* while holding the thread_list_lock_ and
   1461     // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
   1462     // Note: deliberately not using MutexLock that could hold a stale self pointer.
   1463     MutexLock mu(self, *Locks::thread_list_lock_);
   1464     if (!Contains(self)) {
   1465       std::string thread_name;
   1466       self->GetThreadName(thread_name);
   1467       std::ostringstream os;
   1468       DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
   1469       LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str();
   1470       break;
   1471     } else {
   1472       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1473       if (!self->IsSuspended()) {
   1474         list_.remove(self);
   1475         break;
   1476       }
   1477     }
   1478     // We failed to remove the thread due to a suspend request, loop and try again.
   1479   }
   1480   delete self;
   1481 
   1482   // Release the thread ID after the thread is finished and deleted to avoid cases where we can
   1483   // temporarily have multiple threads with the same thread id. When this occurs, it causes
   1484   // problems in FindThreadByThreadId / SuspendThreadByThreadId.
   1485   ReleaseThreadId(nullptr, thin_lock_id);
   1486 
   1487   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   1488   // (It may wish to reattach later.)
   1489 #ifdef ART_TARGET_ANDROID
   1490   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
   1491 #else
   1492   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
   1493 #endif
   1494 
   1495   // Signal that a thread just detached.
   1496   MutexLock mu(nullptr, *Locks::thread_list_lock_);
   1497   --unregistering_count_;
   1498   Locks::thread_exit_cond_->Broadcast(nullptr);
   1499 }
   1500 
   1501 void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
   1502   for (const auto& thread : list_) {
   1503     callback(thread, context);
   1504   }
   1505 }
   1506 
   1507 void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
   1508   Thread* const self = Thread::Current();
   1509   std::vector<Thread*> threads_to_visit;
   1510 
   1511   // Tell threads to suspend and copy them into list.
   1512   {
   1513     MutexLock mu(self, *Locks::thread_list_lock_);
   1514     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1515     for (Thread* thread : list_) {
   1516       bool suspended = thread->ModifySuspendCount(self, +1, nullptr, SuspendReason::kInternal);
   1517       DCHECK(suspended);
   1518       if (thread == self || thread->IsSuspended()) {
   1519         threads_to_visit.push_back(thread);
   1520       } else {
   1521         bool resumed = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
   1522         DCHECK(resumed);
   1523       }
   1524     }
   1525   }
   1526 
   1527   // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
   1528   // order violations.
   1529   for (Thread* thread : threads_to_visit) {
   1530     thread->VisitRoots(visitor, kVisitRootFlagAllRoots);
   1531   }
   1532 
   1533   // Restore suspend counts.
   1534   {
   1535     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   1536     for (Thread* thread : threads_to_visit) {
   1537       bool updated = thread->ModifySuspendCount(self, -1, nullptr, SuspendReason::kInternal);
   1538       DCHECK(updated);
   1539     }
   1540   }
   1541 }
   1542 
   1543 void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const {
   1544   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   1545   for (const auto& thread : list_) {
   1546     thread->VisitRoots(visitor, flags);
   1547   }
   1548 }
   1549 
   1550 uint32_t ThreadList::AllocThreadId(Thread* self) {
   1551   MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   1552   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
   1553     if (!allocated_ids_[i]) {
   1554       allocated_ids_.set(i);
   1555       return i + 1;  // Zero is reserved to mean "invalid".
   1556     }
   1557   }
   1558   LOG(FATAL) << "Out of internal thread ids";
   1559   return 0;
   1560 }
   1561 
   1562 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
   1563   MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   1564   --id;  // Zero is reserved to mean "invalid".
   1565   DCHECK(allocated_ids_[id]) << id;
   1566   allocated_ids_.reset(id);
   1567 }
   1568 
   1569 ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) {
   1570   Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend);
   1571 }
   1572 
   1573 ScopedSuspendAll::~ScopedSuspendAll() {
   1574   Runtime::Current()->GetThreadList()->ResumeAll();
   1575 }
   1576 
   1577 }  // namespace art
   1578