Home | History | Annotate | Download | only in rtl
      1 //===-- tsan_clock.cc -----------------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of ThreadSanitizer (TSan), a race detector.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 #include "tsan_clock.h"
     14 #include "tsan_rtl.h"
     15 
     16 // SyncClock and ThreadClock implement vector clocks for sync variables
     17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
     18 // ThreadClock contains fixed-size vector clock for maximum number of threads.
     19 // SyncClock contains growable vector clock for currently necessary number of
     20 // threads.
     21 // Together they implement very simple model of operations, namely:
     22 //
     23 //   void ThreadClock::acquire(const SyncClock *src) {
     24 //     for (int i = 0; i < kMaxThreads; i++)
     25 //       clock[i] = max(clock[i], src->clock[i]);
     26 //   }
     27 //
     28 //   void ThreadClock::release(SyncClock *dst) const {
     29 //     for (int i = 0; i < kMaxThreads; i++)
     30 //       dst->clock[i] = max(dst->clock[i], clock[i]);
     31 //   }
     32 //
     33 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
     34 //     for (int i = 0; i < kMaxThreads; i++)
     35 //       dst->clock[i] = clock[i];
     36 //   }
     37 //
     38 //   void ThreadClock::acq_rel(SyncClock *dst) {
     39 //     acquire(dst);
     40 //     release(dst);
     41 //   }
     42 //
     43 // Conformance to this model is extensively verified in tsan_clock_test.cc.
     44 // However, the implementation is significantly more complex. The complexity
     45 // allows to implement important classes of use cases in O(1) instead of O(N).
     46 //
     47 // The use cases are:
     48 // 1. Singleton/once atomic that has a single release-store operation followed
     49 //    by zillions of acquire-loads (the acquire-load is O(1)).
     50 // 2. Thread-local mutex (both lock and unlock can be O(1)).
     51 // 3. Leaf mutex (unlock is O(1)).
     52 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
     53 // 5. An atomic with a single writer (writes can be O(1)).
     54 // The implementation dynamically adopts to workload. So if an atomic is in
     55 // read-only phase, these reads will be O(1); if it later switches to read/write
     56 // phase, the implementation will correctly handle that by switching to O(N).
     57 //
     58 // Thread-safety note: all const operations on SyncClock's are conducted under
     59 // a shared lock; all non-const operations on SyncClock's are conducted under
     60 // an exclusive lock; ThreadClock's are private to respective threads and so
     61 // do not need any protection.
     62 //
     63 // Description of ThreadClock state:
     64 // clk_ - fixed size vector clock.
     65 // nclk_ - effective size of the vector clock (the rest is zeros).
     66 // tid_ - index of the thread associated with he clock ("current thread").
     67 // last_acquire_ - current thread time when it acquired something from
     68 //   other threads.
     69 //
     70 // Description of SyncClock state:
     71 // clk_ - variable size vector clock, low kClkBits hold timestamp,
     72 //   the remaining bits hold "acquired" flag (the actual value is thread's
     73 //   reused counter);
     74 //   if acquried == thr->reused_, then the respective thread has already
     75 //   acquired this clock (except possibly dirty_tids_).
     76 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
     77 //   need to acquire regardless of "acquired" flag value;
     78 // release_store_tid_ - denotes that the clock state is a result of
     79 //   release-store operation by the thread with release_store_tid_ index.
     80 // release_store_reused_ - reuse count of release_store_tid_.
     81 
     82 // We don't have ThreadState in these methods, so this is an ugly hack that
     83 // works only in C++.
     84 #ifndef TSAN_GO
     85 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
     86 #else
     87 # define CPP_STAT_INC(typ) (void)0
     88 #endif
     89 
     90 namespace __tsan {
     91 
     92 const unsigned kInvalidTid = (unsigned)-1;
     93 
     94 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
     95     : tid_(tid)
     96     , reused_(reused + 1) {  // 0 has special meaning
     97   CHECK_LT(tid, kMaxTidInClock);
     98   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
     99   nclk_ = tid_ + 1;
    100   last_acquire_ = 0;
    101   internal_memset(clk_, 0, sizeof(clk_));
    102   clk_[tid_].reused = reused_;
    103 }
    104 
    105 void ThreadClock::acquire(const SyncClock *src) {
    106   DCHECK(nclk_ <= kMaxTid);
    107   DCHECK(src->clk_.Size() <= kMaxTid);
    108   CPP_STAT_INC(StatClockAcquire);
    109 
    110   // Check if it's empty -> no need to do anything.
    111   const uptr nclk = src->clk_.Size();
    112   if (nclk == 0) {
    113     CPP_STAT_INC(StatClockAcquireEmpty);
    114     return;
    115   }
    116 
    117   // Check if we've already acquired src after the last release operation on src
    118   bool acquired = false;
    119   if (nclk > tid_) {
    120     CPP_STAT_INC(StatClockAcquireLarge);
    121     if (src->clk_[tid_].reused == reused_) {
    122       CPP_STAT_INC(StatClockAcquireRepeat);
    123       for (unsigned i = 0; i < kDirtyTids; i++) {
    124         unsigned tid = src->dirty_tids_[i];
    125         if (tid != kInvalidTid) {
    126           u64 epoch = src->clk_[tid].epoch;
    127           if (clk_[tid].epoch < epoch) {
    128             clk_[tid].epoch = epoch;
    129             acquired = true;
    130           }
    131         }
    132       }
    133       if (acquired) {
    134         CPP_STAT_INC(StatClockAcquiredSomething);
    135         last_acquire_ = clk_[tid_].epoch;
    136       }
    137       return;
    138     }
    139   }
    140 
    141   // O(N) acquire.
    142   CPP_STAT_INC(StatClockAcquireFull);
    143   nclk_ = max(nclk_, nclk);
    144   for (uptr i = 0; i < nclk; i++) {
    145     u64 epoch = src->clk_[i].epoch;
    146     if (clk_[i].epoch < epoch) {
    147       clk_[i].epoch = epoch;
    148       acquired = true;
    149     }
    150   }
    151 
    152   // Remember that this thread has acquired this clock.
    153   if (nclk > tid_)
    154     src->clk_[tid_].reused = reused_;
    155 
    156   if (acquired) {
    157     CPP_STAT_INC(StatClockAcquiredSomething);
    158     last_acquire_ = clk_[tid_].epoch;
    159   }
    160 }
    161 
    162 void ThreadClock::release(SyncClock *dst) const {
    163   DCHECK_LE(nclk_, kMaxTid);
    164   DCHECK_LE(dst->clk_.Size(), kMaxTid);
    165 
    166   if (dst->clk_.Size() == 0) {
    167     // ReleaseStore will correctly set release_store_tid_,
    168     // which can be important for future operations.
    169     ReleaseStore(dst);
    170     return;
    171   }
    172 
    173   CPP_STAT_INC(StatClockRelease);
    174   // Check if we need to resize dst.
    175   if (dst->clk_.Size() < nclk_) {
    176     CPP_STAT_INC(StatClockReleaseResize);
    177     dst->clk_.Resize(nclk_);
    178   }
    179 
    180   // Check if we had not acquired anything from other threads
    181   // since the last release on dst. If so, we need to update
    182   // only dst->clk_[tid_].
    183   if (dst->clk_[tid_].epoch > last_acquire_) {
    184     UpdateCurrentThread(dst);
    185     if (dst->release_store_tid_ != tid_ ||
    186         dst->release_store_reused_ != reused_)
    187       dst->release_store_tid_ = kInvalidTid;
    188     return;
    189   }
    190 
    191   // O(N) release.
    192   CPP_STAT_INC(StatClockReleaseFull);
    193   // First, remember whether we've acquired dst.
    194   bool acquired = IsAlreadyAcquired(dst);
    195   if (acquired)
    196     CPP_STAT_INC(StatClockReleaseAcquired);
    197   // Update dst->clk_.
    198   for (uptr i = 0; i < nclk_; i++) {
    199     dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
    200     dst->clk_[i].reused = 0;
    201   }
    202   // Clear 'acquired' flag in the remaining elements.
    203   if (nclk_ < dst->clk_.Size())
    204     CPP_STAT_INC(StatClockReleaseClearTail);
    205   for (uptr i = nclk_; i < dst->clk_.Size(); i++)
    206     dst->clk_[i].reused = 0;
    207   for (unsigned i = 0; i < kDirtyTids; i++)
    208     dst->dirty_tids_[i] = kInvalidTid;
    209   dst->release_store_tid_ = kInvalidTid;
    210   dst->release_store_reused_ = 0;
    211   // If we've acquired dst, remember this fact,
    212   // so that we don't need to acquire it on next acquire.
    213   if (acquired)
    214     dst->clk_[tid_].reused = reused_;
    215 }
    216 
    217 void ThreadClock::ReleaseStore(SyncClock *dst) const {
    218   DCHECK(nclk_ <= kMaxTid);
    219   DCHECK(dst->clk_.Size() <= kMaxTid);
    220   CPP_STAT_INC(StatClockStore);
    221 
    222   // Check if we need to resize dst.
    223   if (dst->clk_.Size() < nclk_) {
    224     CPP_STAT_INC(StatClockStoreResize);
    225     dst->clk_.Resize(nclk_);
    226   }
    227 
    228   if (dst->release_store_tid_ == tid_ &&
    229       dst->release_store_reused_ == reused_ &&
    230       dst->clk_[tid_].epoch > last_acquire_) {
    231     CPP_STAT_INC(StatClockStoreFast);
    232     UpdateCurrentThread(dst);
    233     return;
    234   }
    235 
    236   // O(N) release-store.
    237   CPP_STAT_INC(StatClockStoreFull);
    238   for (uptr i = 0; i < nclk_; i++) {
    239     dst->clk_[i].epoch = clk_[i].epoch;
    240     dst->clk_[i].reused = 0;
    241   }
    242   // Clear the tail of dst->clk_.
    243   if (nclk_ < dst->clk_.Size()) {
    244     internal_memset(&dst->clk_[nclk_], 0,
    245         (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
    246     CPP_STAT_INC(StatClockStoreTail);
    247   }
    248   for (unsigned i = 0; i < kDirtyTids; i++)
    249     dst->dirty_tids_[i] = kInvalidTid;
    250   dst->release_store_tid_ = tid_;
    251   dst->release_store_reused_ = reused_;
    252   // Rememeber that we don't need to acquire it in future.
    253   dst->clk_[tid_].reused = reused_;
    254 }
    255 
    256 void ThreadClock::acq_rel(SyncClock *dst) {
    257   CPP_STAT_INC(StatClockAcquireRelease);
    258   acquire(dst);
    259   ReleaseStore(dst);
    260 }
    261 
    262 // Updates only single element related to the current thread in dst->clk_.
    263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
    264   // Update the threads time, but preserve 'acquired' flag.
    265   dst->clk_[tid_].epoch = clk_[tid_].epoch;
    266 
    267   for (unsigned i = 0; i < kDirtyTids; i++) {
    268     if (dst->dirty_tids_[i] == tid_) {
    269       CPP_STAT_INC(StatClockReleaseFast1);
    270       return;
    271     }
    272     if (dst->dirty_tids_[i] == kInvalidTid) {
    273       CPP_STAT_INC(StatClockReleaseFast2);
    274       dst->dirty_tids_[i] = tid_;
    275       return;
    276     }
    277   }
    278   // Reset all 'acquired' flags, O(N).
    279   CPP_STAT_INC(StatClockReleaseSlow);
    280   for (uptr i = 0; i < dst->clk_.Size(); i++) {
    281     dst->clk_[i].reused = 0;
    282   }
    283   for (unsigned i = 0; i < kDirtyTids; i++)
    284     dst->dirty_tids_[i] = kInvalidTid;
    285 }
    286 
    287 // Checks whether the current threads has already acquired src.
    288 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
    289   if (src->clk_[tid_].reused != reused_)
    290     return false;
    291   for (unsigned i = 0; i < kDirtyTids; i++) {
    292     unsigned tid = src->dirty_tids_[i];
    293     if (tid != kInvalidTid) {
    294       if (clk_[tid].epoch < src->clk_[tid].epoch)
    295         return false;
    296     }
    297   }
    298   return true;
    299 }
    300 
    301 // Sets a single element in the vector clock.
    302 // This function is called only from weird places like AcquireGlobal.
    303 void ThreadClock::set(unsigned tid, u64 v) {
    304   DCHECK_LT(tid, kMaxTid);
    305   DCHECK_GE(v, clk_[tid].epoch);
    306   clk_[tid].epoch = v;
    307   if (nclk_ <= tid)
    308     nclk_ = tid + 1;
    309   last_acquire_ = clk_[tid_].epoch;
    310 }
    311 
    312 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
    313   printf("clock=[");
    314   for (uptr i = 0; i < nclk_; i++)
    315     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
    316   printf("] reused=[");
    317   for (uptr i = 0; i < nclk_; i++)
    318     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
    319   printf("] tid=%u/%u last_acq=%llu",
    320       tid_, reused_, last_acquire_);
    321 }
    322 
    323 SyncClock::SyncClock()
    324     : clk_(MBlockClock) {
    325   release_store_tid_ = kInvalidTid;
    326   release_store_reused_ = 0;
    327   for (uptr i = 0; i < kDirtyTids; i++)
    328     dirty_tids_[i] = kInvalidTid;
    329 }
    330 
    331 void SyncClock::Reset() {
    332   clk_.Reset();
    333   Zero();
    334 }
    335 
    336 void SyncClock::Zero() {
    337   clk_.Resize(0);
    338   release_store_tid_ = kInvalidTid;
    339   release_store_reused_ = 0;
    340   for (uptr i = 0; i < kDirtyTids; i++)
    341     dirty_tids_[i] = kInvalidTid;
    342 }
    343 
    344 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
    345   printf("clock=[");
    346   for (uptr i = 0; i < clk_.Size(); i++)
    347     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
    348   printf("] reused=[");
    349   for (uptr i = 0; i < clk_.Size(); i++)
    350     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
    351   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
    352       release_store_tid_, release_store_reused_,
    353       dirty_tids_[0], dirty_tids_[1]);
    354 }
    355 }  // namespace __tsan
    356