1 //===-- tsan_clock.cc -----------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of ThreadSanitizer (TSan), a race detector. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "tsan_clock.h" 14 #include "tsan_rtl.h" 15 16 // SyncClock and ThreadClock implement vector clocks for sync variables 17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively. 18 // ThreadClock contains fixed-size vector clock for maximum number of threads. 19 // SyncClock contains growable vector clock for currently necessary number of 20 // threads. 21 // Together they implement very simple model of operations, namely: 22 // 23 // void ThreadClock::acquire(const SyncClock *src) { 24 // for (int i = 0; i < kMaxThreads; i++) 25 // clock[i] = max(clock[i], src->clock[i]); 26 // } 27 // 28 // void ThreadClock::release(SyncClock *dst) const { 29 // for (int i = 0; i < kMaxThreads; i++) 30 // dst->clock[i] = max(dst->clock[i], clock[i]); 31 // } 32 // 33 // void ThreadClock::ReleaseStore(SyncClock *dst) const { 34 // for (int i = 0; i < kMaxThreads; i++) 35 // dst->clock[i] = clock[i]; 36 // } 37 // 38 // void ThreadClock::acq_rel(SyncClock *dst) { 39 // acquire(dst); 40 // release(dst); 41 // } 42 // 43 // Conformance to this model is extensively verified in tsan_clock_test.cc. 44 // However, the implementation is significantly more complex. The complexity 45 // allows to implement important classes of use cases in O(1) instead of O(N). 46 // 47 // The use cases are: 48 // 1. Singleton/once atomic that has a single release-store operation followed 49 // by zillions of acquire-loads (the acquire-load is O(1)). 50 // 2. Thread-local mutex (both lock and unlock can be O(1)). 51 // 3. Leaf mutex (unlock is O(1)). 52 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). 53 // 5. An atomic with a single writer (writes can be O(1)). 54 // The implementation dynamically adopts to workload. So if an atomic is in 55 // read-only phase, these reads will be O(1); if it later switches to read/write 56 // phase, the implementation will correctly handle that by switching to O(N). 57 // 58 // Thread-safety note: all const operations on SyncClock's are conducted under 59 // a shared lock; all non-const operations on SyncClock's are conducted under 60 // an exclusive lock; ThreadClock's are private to respective threads and so 61 // do not need any protection. 62 // 63 // Description of ThreadClock state: 64 // clk_ - fixed size vector clock. 65 // nclk_ - effective size of the vector clock (the rest is zeros). 66 // tid_ - index of the thread associated with he clock ("current thread"). 67 // last_acquire_ - current thread time when it acquired something from 68 // other threads. 69 // 70 // Description of SyncClock state: 71 // clk_ - variable size vector clock, low kClkBits hold timestamp, 72 // the remaining bits hold "acquired" flag (the actual value is thread's 73 // reused counter); 74 // if acquried == thr->reused_, then the respective thread has already 75 // acquired this clock (except possibly dirty_tids_). 76 // dirty_tids_ - holds up to two indeces in the vector clock that other threads 77 // need to acquire regardless of "acquired" flag value; 78 // release_store_tid_ - denotes that the clock state is a result of 79 // release-store operation by the thread with release_store_tid_ index. 80 // release_store_reused_ - reuse count of release_store_tid_. 81 82 // We don't have ThreadState in these methods, so this is an ugly hack that 83 // works only in C++. 84 #ifndef TSAN_GO 85 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ) 86 #else 87 # define CPP_STAT_INC(typ) (void)0 88 #endif 89 90 namespace __tsan { 91 92 const unsigned kInvalidTid = (unsigned)-1; 93 94 ThreadClock::ThreadClock(unsigned tid, unsigned reused) 95 : tid_(tid) 96 , reused_(reused + 1) { // 0 has special meaning 97 CHECK_LT(tid, kMaxTidInClock); 98 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); 99 nclk_ = tid_ + 1; 100 last_acquire_ = 0; 101 internal_memset(clk_, 0, sizeof(clk_)); 102 clk_[tid_].reused = reused_; 103 } 104 105 void ThreadClock::acquire(const SyncClock *src) { 106 DCHECK(nclk_ <= kMaxTid); 107 DCHECK(src->clk_.Size() <= kMaxTid); 108 CPP_STAT_INC(StatClockAcquire); 109 110 // Check if it's empty -> no need to do anything. 111 const uptr nclk = src->clk_.Size(); 112 if (nclk == 0) { 113 CPP_STAT_INC(StatClockAcquireEmpty); 114 return; 115 } 116 117 // Check if we've already acquired src after the last release operation on src 118 bool acquired = false; 119 if (nclk > tid_) { 120 CPP_STAT_INC(StatClockAcquireLarge); 121 if (src->clk_[tid_].reused == reused_) { 122 CPP_STAT_INC(StatClockAcquireRepeat); 123 for (unsigned i = 0; i < kDirtyTids; i++) { 124 unsigned tid = src->dirty_tids_[i]; 125 if (tid != kInvalidTid) { 126 u64 epoch = src->clk_[tid].epoch; 127 if (clk_[tid].epoch < epoch) { 128 clk_[tid].epoch = epoch; 129 acquired = true; 130 } 131 } 132 } 133 if (acquired) { 134 CPP_STAT_INC(StatClockAcquiredSomething); 135 last_acquire_ = clk_[tid_].epoch; 136 } 137 return; 138 } 139 } 140 141 // O(N) acquire. 142 CPP_STAT_INC(StatClockAcquireFull); 143 nclk_ = max(nclk_, nclk); 144 for (uptr i = 0; i < nclk; i++) { 145 u64 epoch = src->clk_[i].epoch; 146 if (clk_[i].epoch < epoch) { 147 clk_[i].epoch = epoch; 148 acquired = true; 149 } 150 } 151 152 // Remember that this thread has acquired this clock. 153 if (nclk > tid_) 154 src->clk_[tid_].reused = reused_; 155 156 if (acquired) { 157 CPP_STAT_INC(StatClockAcquiredSomething); 158 last_acquire_ = clk_[tid_].epoch; 159 } 160 } 161 162 void ThreadClock::release(SyncClock *dst) const { 163 DCHECK_LE(nclk_, kMaxTid); 164 DCHECK_LE(dst->clk_.Size(), kMaxTid); 165 166 if (dst->clk_.Size() == 0) { 167 // ReleaseStore will correctly set release_store_tid_, 168 // which can be important for future operations. 169 ReleaseStore(dst); 170 return; 171 } 172 173 CPP_STAT_INC(StatClockRelease); 174 // Check if we need to resize dst. 175 if (dst->clk_.Size() < nclk_) { 176 CPP_STAT_INC(StatClockReleaseResize); 177 dst->clk_.Resize(nclk_); 178 } 179 180 // Check if we had not acquired anything from other threads 181 // since the last release on dst. If so, we need to update 182 // only dst->clk_[tid_]. 183 if (dst->clk_[tid_].epoch > last_acquire_) { 184 UpdateCurrentThread(dst); 185 if (dst->release_store_tid_ != tid_ || 186 dst->release_store_reused_ != reused_) 187 dst->release_store_tid_ = kInvalidTid; 188 return; 189 } 190 191 // O(N) release. 192 CPP_STAT_INC(StatClockReleaseFull); 193 // First, remember whether we've acquired dst. 194 bool acquired = IsAlreadyAcquired(dst); 195 if (acquired) 196 CPP_STAT_INC(StatClockReleaseAcquired); 197 // Update dst->clk_. 198 for (uptr i = 0; i < nclk_; i++) { 199 dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch); 200 dst->clk_[i].reused = 0; 201 } 202 // Clear 'acquired' flag in the remaining elements. 203 if (nclk_ < dst->clk_.Size()) 204 CPP_STAT_INC(StatClockReleaseClearTail); 205 for (uptr i = nclk_; i < dst->clk_.Size(); i++) 206 dst->clk_[i].reused = 0; 207 for (unsigned i = 0; i < kDirtyTids; i++) 208 dst->dirty_tids_[i] = kInvalidTid; 209 dst->release_store_tid_ = kInvalidTid; 210 dst->release_store_reused_ = 0; 211 // If we've acquired dst, remember this fact, 212 // so that we don't need to acquire it on next acquire. 213 if (acquired) 214 dst->clk_[tid_].reused = reused_; 215 } 216 217 void ThreadClock::ReleaseStore(SyncClock *dst) const { 218 DCHECK(nclk_ <= kMaxTid); 219 DCHECK(dst->clk_.Size() <= kMaxTid); 220 CPP_STAT_INC(StatClockStore); 221 222 // Check if we need to resize dst. 223 if (dst->clk_.Size() < nclk_) { 224 CPP_STAT_INC(StatClockStoreResize); 225 dst->clk_.Resize(nclk_); 226 } 227 228 if (dst->release_store_tid_ == tid_ && 229 dst->release_store_reused_ == reused_ && 230 dst->clk_[tid_].epoch > last_acquire_) { 231 CPP_STAT_INC(StatClockStoreFast); 232 UpdateCurrentThread(dst); 233 return; 234 } 235 236 // O(N) release-store. 237 CPP_STAT_INC(StatClockStoreFull); 238 for (uptr i = 0; i < nclk_; i++) { 239 dst->clk_[i].epoch = clk_[i].epoch; 240 dst->clk_[i].reused = 0; 241 } 242 // Clear the tail of dst->clk_. 243 if (nclk_ < dst->clk_.Size()) { 244 internal_memset(&dst->clk_[nclk_], 0, 245 (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0])); 246 CPP_STAT_INC(StatClockStoreTail); 247 } 248 for (unsigned i = 0; i < kDirtyTids; i++) 249 dst->dirty_tids_[i] = kInvalidTid; 250 dst->release_store_tid_ = tid_; 251 dst->release_store_reused_ = reused_; 252 // Rememeber that we don't need to acquire it in future. 253 dst->clk_[tid_].reused = reused_; 254 } 255 256 void ThreadClock::acq_rel(SyncClock *dst) { 257 CPP_STAT_INC(StatClockAcquireRelease); 258 acquire(dst); 259 ReleaseStore(dst); 260 } 261 262 // Updates only single element related to the current thread in dst->clk_. 263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { 264 // Update the threads time, but preserve 'acquired' flag. 265 dst->clk_[tid_].epoch = clk_[tid_].epoch; 266 267 for (unsigned i = 0; i < kDirtyTids; i++) { 268 if (dst->dirty_tids_[i] == tid_) { 269 CPP_STAT_INC(StatClockReleaseFast1); 270 return; 271 } 272 if (dst->dirty_tids_[i] == kInvalidTid) { 273 CPP_STAT_INC(StatClockReleaseFast2); 274 dst->dirty_tids_[i] = tid_; 275 return; 276 } 277 } 278 // Reset all 'acquired' flags, O(N). 279 CPP_STAT_INC(StatClockReleaseSlow); 280 for (uptr i = 0; i < dst->clk_.Size(); i++) { 281 dst->clk_[i].reused = 0; 282 } 283 for (unsigned i = 0; i < kDirtyTids; i++) 284 dst->dirty_tids_[i] = kInvalidTid; 285 } 286 287 // Checks whether the current threads has already acquired src. 288 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { 289 if (src->clk_[tid_].reused != reused_) 290 return false; 291 for (unsigned i = 0; i < kDirtyTids; i++) { 292 unsigned tid = src->dirty_tids_[i]; 293 if (tid != kInvalidTid) { 294 if (clk_[tid].epoch < src->clk_[tid].epoch) 295 return false; 296 } 297 } 298 return true; 299 } 300 301 // Sets a single element in the vector clock. 302 // This function is called only from weird places like AcquireGlobal. 303 void ThreadClock::set(unsigned tid, u64 v) { 304 DCHECK_LT(tid, kMaxTid); 305 DCHECK_GE(v, clk_[tid].epoch); 306 clk_[tid].epoch = v; 307 if (nclk_ <= tid) 308 nclk_ = tid + 1; 309 last_acquire_ = clk_[tid_].epoch; 310 } 311 312 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { 313 printf("clock=["); 314 for (uptr i = 0; i < nclk_; i++) 315 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); 316 printf("] reused=["); 317 for (uptr i = 0; i < nclk_; i++) 318 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); 319 printf("] tid=%u/%u last_acq=%llu", 320 tid_, reused_, last_acquire_); 321 } 322 323 SyncClock::SyncClock() 324 : clk_(MBlockClock) { 325 release_store_tid_ = kInvalidTid; 326 release_store_reused_ = 0; 327 for (uptr i = 0; i < kDirtyTids; i++) 328 dirty_tids_[i] = kInvalidTid; 329 } 330 331 void SyncClock::Reset() { 332 clk_.Reset(); 333 Zero(); 334 } 335 336 void SyncClock::Zero() { 337 clk_.Resize(0); 338 release_store_tid_ = kInvalidTid; 339 release_store_reused_ = 0; 340 for (uptr i = 0; i < kDirtyTids; i++) 341 dirty_tids_[i] = kInvalidTid; 342 } 343 344 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { 345 printf("clock=["); 346 for (uptr i = 0; i < clk_.Size(); i++) 347 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); 348 printf("] reused=["); 349 for (uptr i = 0; i < clk_.Size(); i++) 350 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); 351 printf("] release_store_tid=%d/%d dirty_tids=%d/%d", 352 release_store_tid_, release_store_reused_, 353 dirty_tids_[0], dirty_tids_[1]); 354 } 355 } // namespace __tsan 356