Home | History | Annotate | Download | only in rtl
      1 //===-- tsan_clock.cc -----------------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of ThreadSanitizer (TSan), a race detector.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 #include "tsan_clock.h"
     14 #include "tsan_rtl.h"
     15 #include "sanitizer_common/sanitizer_placement_new.h"
     16 
     17 // SyncClock and ThreadClock implement vector clocks for sync variables
     18 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
     19 // ThreadClock contains fixed-size vector clock for maximum number of threads.
     20 // SyncClock contains growable vector clock for currently necessary number of
     21 // threads.
     22 // Together they implement very simple model of operations, namely:
     23 //
     24 //   void ThreadClock::acquire(const SyncClock *src) {
     25 //     for (int i = 0; i < kMaxThreads; i++)
     26 //       clock[i] = max(clock[i], src->clock[i]);
     27 //   }
     28 //
     29 //   void ThreadClock::release(SyncClock *dst) const {
     30 //     for (int i = 0; i < kMaxThreads; i++)
     31 //       dst->clock[i] = max(dst->clock[i], clock[i]);
     32 //   }
     33 //
     34 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
     35 //     for (int i = 0; i < kMaxThreads; i++)
     36 //       dst->clock[i] = clock[i];
     37 //   }
     38 //
     39 //   void ThreadClock::acq_rel(SyncClock *dst) {
     40 //     acquire(dst);
     41 //     release(dst);
     42 //   }
     43 //
     44 // Conformance to this model is extensively verified in tsan_clock_test.cc.
     45 // However, the implementation is significantly more complex. The complexity
     46 // allows to implement important classes of use cases in O(1) instead of O(N).
     47 //
     48 // The use cases are:
     49 // 1. Singleton/once atomic that has a single release-store operation followed
     50 //    by zillions of acquire-loads (the acquire-load is O(1)).
     51 // 2. Thread-local mutex (both lock and unlock can be O(1)).
     52 // 3. Leaf mutex (unlock is O(1)).
     53 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
     54 // 5. An atomic with a single writer (writes can be O(1)).
     55 // The implementation dynamically adopts to workload. So if an atomic is in
     56 // read-only phase, these reads will be O(1); if it later switches to read/write
     57 // phase, the implementation will correctly handle that by switching to O(N).
     58 //
     59 // Thread-safety note: all const operations on SyncClock's are conducted under
     60 // a shared lock; all non-const operations on SyncClock's are conducted under
     61 // an exclusive lock; ThreadClock's are private to respective threads and so
     62 // do not need any protection.
     63 //
     64 // Description of ThreadClock state:
     65 // clk_ - fixed size vector clock.
     66 // nclk_ - effective size of the vector clock (the rest is zeros).
     67 // tid_ - index of the thread associated with he clock ("current thread").
     68 // last_acquire_ - current thread time when it acquired something from
     69 //   other threads.
     70 //
     71 // Description of SyncClock state:
     72 // clk_ - variable size vector clock, low kClkBits hold timestamp,
     73 //   the remaining bits hold "acquired" flag (the actual value is thread's
     74 //   reused counter);
     75 //   if acquried == thr->reused_, then the respective thread has already
     76 //   acquired this clock (except possibly dirty_tids_).
     77 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
     78 //   need to acquire regardless of "acquired" flag value;
     79 // release_store_tid_ - denotes that the clock state is a result of
     80 //   release-store operation by the thread with release_store_tid_ index.
     81 // release_store_reused_ - reuse count of release_store_tid_.
     82 
     83 // We don't have ThreadState in these methods, so this is an ugly hack that
     84 // works only in C++.
     85 #ifndef SANITIZER_GO
     86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
     87 #else
     88 # define CPP_STAT_INC(typ) (void)0
     89 #endif
     90 
     91 namespace __tsan {
     92 
     93 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
     94     : tid_(tid)
     95     , reused_(reused + 1) {  // 0 has special meaning
     96   CHECK_LT(tid, kMaxTidInClock);
     97   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
     98   nclk_ = tid_ + 1;
     99   last_acquire_ = 0;
    100   internal_memset(clk_, 0, sizeof(clk_));
    101   clk_[tid_].reused = reused_;
    102 }
    103 
    104 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
    105   DCHECK_LE(nclk_, kMaxTid);
    106   DCHECK_LE(src->size_, kMaxTid);
    107   CPP_STAT_INC(StatClockAcquire);
    108 
    109   // Check if it's empty -> no need to do anything.
    110   const uptr nclk = src->size_;
    111   if (nclk == 0) {
    112     CPP_STAT_INC(StatClockAcquireEmpty);
    113     return;
    114   }
    115 
    116   // Check if we've already acquired src after the last release operation on src
    117   bool acquired = false;
    118   if (nclk > tid_) {
    119     CPP_STAT_INC(StatClockAcquireLarge);
    120     if (src->elem(tid_).reused == reused_) {
    121       CPP_STAT_INC(StatClockAcquireRepeat);
    122       for (unsigned i = 0; i < kDirtyTids; i++) {
    123         unsigned tid = src->dirty_tids_[i];
    124         if (tid != kInvalidTid) {
    125           u64 epoch = src->elem(tid).epoch;
    126           if (clk_[tid].epoch < epoch) {
    127             clk_[tid].epoch = epoch;
    128             acquired = true;
    129           }
    130         }
    131       }
    132       if (acquired) {
    133         CPP_STAT_INC(StatClockAcquiredSomething);
    134         last_acquire_ = clk_[tid_].epoch;
    135       }
    136       return;
    137     }
    138   }
    139 
    140   // O(N) acquire.
    141   CPP_STAT_INC(StatClockAcquireFull);
    142   nclk_ = max(nclk_, nclk);
    143   for (uptr i = 0; i < nclk; i++) {
    144     u64 epoch = src->elem(i).epoch;
    145     if (clk_[i].epoch < epoch) {
    146       clk_[i].epoch = epoch;
    147       acquired = true;
    148     }
    149   }
    150 
    151   // Remember that this thread has acquired this clock.
    152   if (nclk > tid_)
    153     src->elem(tid_).reused = reused_;
    154 
    155   if (acquired) {
    156     CPP_STAT_INC(StatClockAcquiredSomething);
    157     last_acquire_ = clk_[tid_].epoch;
    158   }
    159 }
    160 
    161 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
    162   DCHECK_LE(nclk_, kMaxTid);
    163   DCHECK_LE(dst->size_, kMaxTid);
    164 
    165   if (dst->size_ == 0) {
    166     // ReleaseStore will correctly set release_store_tid_,
    167     // which can be important for future operations.
    168     ReleaseStore(c, dst);
    169     return;
    170   }
    171 
    172   CPP_STAT_INC(StatClockRelease);
    173   // Check if we need to resize dst.
    174   if (dst->size_ < nclk_)
    175     dst->Resize(c, nclk_);
    176 
    177   // Check if we had not acquired anything from other threads
    178   // since the last release on dst. If so, we need to update
    179   // only dst->elem(tid_).
    180   if (dst->elem(tid_).epoch > last_acquire_) {
    181     UpdateCurrentThread(dst);
    182     if (dst->release_store_tid_ != tid_ ||
    183         dst->release_store_reused_ != reused_)
    184       dst->release_store_tid_ = kInvalidTid;
    185     return;
    186   }
    187 
    188   // O(N) release.
    189   CPP_STAT_INC(StatClockReleaseFull);
    190   // First, remember whether we've acquired dst.
    191   bool acquired = IsAlreadyAcquired(dst);
    192   if (acquired)
    193     CPP_STAT_INC(StatClockReleaseAcquired);
    194   // Update dst->clk_.
    195   for (uptr i = 0; i < nclk_; i++) {
    196     ClockElem &ce = dst->elem(i);
    197     ce.epoch = max(ce.epoch, clk_[i].epoch);
    198     ce.reused = 0;
    199   }
    200   // Clear 'acquired' flag in the remaining elements.
    201   if (nclk_ < dst->size_)
    202     CPP_STAT_INC(StatClockReleaseClearTail);
    203   for (uptr i = nclk_; i < dst->size_; i++)
    204     dst->elem(i).reused = 0;
    205   for (unsigned i = 0; i < kDirtyTids; i++)
    206     dst->dirty_tids_[i] = kInvalidTid;
    207   dst->release_store_tid_ = kInvalidTid;
    208   dst->release_store_reused_ = 0;
    209   // If we've acquired dst, remember this fact,
    210   // so that we don't need to acquire it on next acquire.
    211   if (acquired)
    212     dst->elem(tid_).reused = reused_;
    213 }
    214 
    215 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
    216   DCHECK_LE(nclk_, kMaxTid);
    217   DCHECK_LE(dst->size_, kMaxTid);
    218   CPP_STAT_INC(StatClockStore);
    219 
    220   // Check if we need to resize dst.
    221   if (dst->size_ < nclk_)
    222     dst->Resize(c, nclk_);
    223 
    224   if (dst->release_store_tid_ == tid_ &&
    225       dst->release_store_reused_ == reused_ &&
    226       dst->elem(tid_).epoch > last_acquire_) {
    227     CPP_STAT_INC(StatClockStoreFast);
    228     UpdateCurrentThread(dst);
    229     return;
    230   }
    231 
    232   // O(N) release-store.
    233   CPP_STAT_INC(StatClockStoreFull);
    234   for (uptr i = 0; i < nclk_; i++) {
    235     ClockElem &ce = dst->elem(i);
    236     ce.epoch = clk_[i].epoch;
    237     ce.reused = 0;
    238   }
    239   // Clear the tail of dst->clk_.
    240   if (nclk_ < dst->size_) {
    241     for (uptr i = nclk_; i < dst->size_; i++) {
    242       ClockElem &ce = dst->elem(i);
    243       ce.epoch = 0;
    244       ce.reused = 0;
    245     }
    246     CPP_STAT_INC(StatClockStoreTail);
    247   }
    248   for (unsigned i = 0; i < kDirtyTids; i++)
    249     dst->dirty_tids_[i] = kInvalidTid;
    250   dst->release_store_tid_ = tid_;
    251   dst->release_store_reused_ = reused_;
    252   // Rememeber that we don't need to acquire it in future.
    253   dst->elem(tid_).reused = reused_;
    254 }
    255 
    256 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
    257   CPP_STAT_INC(StatClockAcquireRelease);
    258   acquire(c, dst);
    259   ReleaseStore(c, dst);
    260 }
    261 
    262 // Updates only single element related to the current thread in dst->clk_.
    263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
    264   // Update the threads time, but preserve 'acquired' flag.
    265   dst->elem(tid_).epoch = clk_[tid_].epoch;
    266 
    267   for (unsigned i = 0; i < kDirtyTids; i++) {
    268     if (dst->dirty_tids_[i] == tid_) {
    269       CPP_STAT_INC(StatClockReleaseFast1);
    270       return;
    271     }
    272     if (dst->dirty_tids_[i] == kInvalidTid) {
    273       CPP_STAT_INC(StatClockReleaseFast2);
    274       dst->dirty_tids_[i] = tid_;
    275       return;
    276     }
    277   }
    278   // Reset all 'acquired' flags, O(N).
    279   CPP_STAT_INC(StatClockReleaseSlow);
    280   for (uptr i = 0; i < dst->size_; i++)
    281     dst->elem(i).reused = 0;
    282   for (unsigned i = 0; i < kDirtyTids; i++)
    283     dst->dirty_tids_[i] = kInvalidTid;
    284 }
    285 
    286 // Checks whether the current threads has already acquired src.
    287 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
    288   if (src->elem(tid_).reused != reused_)
    289     return false;
    290   for (unsigned i = 0; i < kDirtyTids; i++) {
    291     unsigned tid = src->dirty_tids_[i];
    292     if (tid != kInvalidTid) {
    293       if (clk_[tid].epoch < src->elem(tid).epoch)
    294         return false;
    295     }
    296   }
    297   return true;
    298 }
    299 
    300 void SyncClock::Resize(ClockCache *c, uptr nclk) {
    301   CPP_STAT_INC(StatClockReleaseResize);
    302   if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
    303       RoundUpTo(size_, ClockBlock::kClockCount)) {
    304     // Growing within the same block.
    305     // Memory is already allocated, just increase the size.
    306     size_ = nclk;
    307     return;
    308   }
    309   if (nclk <= ClockBlock::kClockCount) {
    310     // Grow from 0 to one-level table.
    311     CHECK_EQ(size_, 0);
    312     CHECK_EQ(tab_, 0);
    313     CHECK_EQ(tab_idx_, 0);
    314     size_ = nclk;
    315     tab_idx_ = ctx->clock_alloc.Alloc(c);
    316     tab_ = ctx->clock_alloc.Map(tab_idx_);
    317     internal_memset(tab_, 0, sizeof(*tab_));
    318     return;
    319   }
    320   // Growing two-level table.
    321   if (size_ == 0) {
    322     // Allocate first level table.
    323     tab_idx_ = ctx->clock_alloc.Alloc(c);
    324     tab_ = ctx->clock_alloc.Map(tab_idx_);
    325     internal_memset(tab_, 0, sizeof(*tab_));
    326   } else if (size_ <= ClockBlock::kClockCount) {
    327     // Transform one-level table to two-level table.
    328     u32 old = tab_idx_;
    329     tab_idx_ = ctx->clock_alloc.Alloc(c);
    330     tab_ = ctx->clock_alloc.Map(tab_idx_);
    331     internal_memset(tab_, 0, sizeof(*tab_));
    332     tab_->table[0] = old;
    333   }
    334   // At this point we have first level table allocated.
    335   // Add second level tables as necessary.
    336   for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
    337       i < nclk; i += ClockBlock::kClockCount) {
    338     u32 idx = ctx->clock_alloc.Alloc(c);
    339     ClockBlock *cb = ctx->clock_alloc.Map(idx);
    340     internal_memset(cb, 0, sizeof(*cb));
    341     CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
    342     tab_->table[i/ClockBlock::kClockCount] = idx;
    343   }
    344   size_ = nclk;
    345 }
    346 
    347 // Sets a single element in the vector clock.
    348 // This function is called only from weird places like AcquireGlobal.
    349 void ThreadClock::set(unsigned tid, u64 v) {
    350   DCHECK_LT(tid, kMaxTid);
    351   DCHECK_GE(v, clk_[tid].epoch);
    352   clk_[tid].epoch = v;
    353   if (nclk_ <= tid)
    354     nclk_ = tid + 1;
    355   last_acquire_ = clk_[tid_].epoch;
    356 }
    357 
    358 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
    359   printf("clock=[");
    360   for (uptr i = 0; i < nclk_; i++)
    361     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
    362   printf("] reused=[");
    363   for (uptr i = 0; i < nclk_; i++)
    364     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
    365   printf("] tid=%u/%u last_acq=%llu",
    366       tid_, reused_, last_acquire_);
    367 }
    368 
    369 SyncClock::SyncClock()
    370     : release_store_tid_(kInvalidTid)
    371     , release_store_reused_()
    372     , tab_()
    373     , tab_idx_()
    374     , size_() {
    375   for (uptr i = 0; i < kDirtyTids; i++)
    376     dirty_tids_[i] = kInvalidTid;
    377 }
    378 
    379 SyncClock::~SyncClock() {
    380   // Reset must be called before dtor.
    381   CHECK_EQ(size_, 0);
    382   CHECK_EQ(tab_, 0);
    383   CHECK_EQ(tab_idx_, 0);
    384 }
    385 
    386 void SyncClock::Reset(ClockCache *c) {
    387   if (size_ == 0) {
    388     // nothing
    389   } else if (size_ <= ClockBlock::kClockCount) {
    390     // One-level table.
    391     ctx->clock_alloc.Free(c, tab_idx_);
    392   } else {
    393     // Two-level table.
    394     for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
    395       ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
    396     ctx->clock_alloc.Free(c, tab_idx_);
    397   }
    398   tab_ = 0;
    399   tab_idx_ = 0;
    400   size_ = 0;
    401   release_store_tid_ = kInvalidTid;
    402   release_store_reused_ = 0;
    403   for (uptr i = 0; i < kDirtyTids; i++)
    404     dirty_tids_[i] = kInvalidTid;
    405 }
    406 
    407 ClockElem &SyncClock::elem(unsigned tid) const {
    408   DCHECK_LT(tid, size_);
    409   if (size_ <= ClockBlock::kClockCount)
    410     return tab_->clock[tid];
    411   u32 idx = tab_->table[tid / ClockBlock::kClockCount];
    412   ClockBlock *cb = ctx->clock_alloc.Map(idx);
    413   return cb->clock[tid % ClockBlock::kClockCount];
    414 }
    415 
    416 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
    417   printf("clock=[");
    418   for (uptr i = 0; i < size_; i++)
    419     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
    420   printf("] reused=[");
    421   for (uptr i = 0; i < size_; i++)
    422     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
    423   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
    424       release_store_tid_, release_store_reused_,
    425       dirty_tids_[0], dirty_tids_[1]);
    426 }
    427 }  // namespace __tsan
    428