1 //===-- tsan_clock.cc -----------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of ThreadSanitizer (TSan), a race detector. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "tsan_clock.h" 14 #include "tsan_rtl.h" 15 #include "sanitizer_common/sanitizer_placement_new.h" 16 17 // SyncClock and ThreadClock implement vector clocks for sync variables 18 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively. 19 // ThreadClock contains fixed-size vector clock for maximum number of threads. 20 // SyncClock contains growable vector clock for currently necessary number of 21 // threads. 22 // Together they implement very simple model of operations, namely: 23 // 24 // void ThreadClock::acquire(const SyncClock *src) { 25 // for (int i = 0; i < kMaxThreads; i++) 26 // clock[i] = max(clock[i], src->clock[i]); 27 // } 28 // 29 // void ThreadClock::release(SyncClock *dst) const { 30 // for (int i = 0; i < kMaxThreads; i++) 31 // dst->clock[i] = max(dst->clock[i], clock[i]); 32 // } 33 // 34 // void ThreadClock::ReleaseStore(SyncClock *dst) const { 35 // for (int i = 0; i < kMaxThreads; i++) 36 // dst->clock[i] = clock[i]; 37 // } 38 // 39 // void ThreadClock::acq_rel(SyncClock *dst) { 40 // acquire(dst); 41 // release(dst); 42 // } 43 // 44 // Conformance to this model is extensively verified in tsan_clock_test.cc. 45 // However, the implementation is significantly more complex. The complexity 46 // allows to implement important classes of use cases in O(1) instead of O(N). 47 // 48 // The use cases are: 49 // 1. Singleton/once atomic that has a single release-store operation followed 50 // by zillions of acquire-loads (the acquire-load is O(1)). 51 // 2. Thread-local mutex (both lock and unlock can be O(1)). 52 // 3. Leaf mutex (unlock is O(1)). 53 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). 54 // 5. An atomic with a single writer (writes can be O(1)). 55 // The implementation dynamically adopts to workload. So if an atomic is in 56 // read-only phase, these reads will be O(1); if it later switches to read/write 57 // phase, the implementation will correctly handle that by switching to O(N). 58 // 59 // Thread-safety note: all const operations on SyncClock's are conducted under 60 // a shared lock; all non-const operations on SyncClock's are conducted under 61 // an exclusive lock; ThreadClock's are private to respective threads and so 62 // do not need any protection. 63 // 64 // Description of ThreadClock state: 65 // clk_ - fixed size vector clock. 66 // nclk_ - effective size of the vector clock (the rest is zeros). 67 // tid_ - index of the thread associated with he clock ("current thread"). 68 // last_acquire_ - current thread time when it acquired something from 69 // other threads. 70 // 71 // Description of SyncClock state: 72 // clk_ - variable size vector clock, low kClkBits hold timestamp, 73 // the remaining bits hold "acquired" flag (the actual value is thread's 74 // reused counter); 75 // if acquried == thr->reused_, then the respective thread has already 76 // acquired this clock (except possibly dirty_tids_). 77 // dirty_tids_ - holds up to two indeces in the vector clock that other threads 78 // need to acquire regardless of "acquired" flag value; 79 // release_store_tid_ - denotes that the clock state is a result of 80 // release-store operation by the thread with release_store_tid_ index. 81 // release_store_reused_ - reuse count of release_store_tid_. 82 83 // We don't have ThreadState in these methods, so this is an ugly hack that 84 // works only in C++. 85 #ifndef SANITIZER_GO 86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ) 87 #else 88 # define CPP_STAT_INC(typ) (void)0 89 #endif 90 91 namespace __tsan { 92 93 ThreadClock::ThreadClock(unsigned tid, unsigned reused) 94 : tid_(tid) 95 , reused_(reused + 1) { // 0 has special meaning 96 CHECK_LT(tid, kMaxTidInClock); 97 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); 98 nclk_ = tid_ + 1; 99 last_acquire_ = 0; 100 internal_memset(clk_, 0, sizeof(clk_)); 101 clk_[tid_].reused = reused_; 102 } 103 104 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { 105 DCHECK_LE(nclk_, kMaxTid); 106 DCHECK_LE(src->size_, kMaxTid); 107 CPP_STAT_INC(StatClockAcquire); 108 109 // Check if it's empty -> no need to do anything. 110 const uptr nclk = src->size_; 111 if (nclk == 0) { 112 CPP_STAT_INC(StatClockAcquireEmpty); 113 return; 114 } 115 116 // Check if we've already acquired src after the last release operation on src 117 bool acquired = false; 118 if (nclk > tid_) { 119 CPP_STAT_INC(StatClockAcquireLarge); 120 if (src->elem(tid_).reused == reused_) { 121 CPP_STAT_INC(StatClockAcquireRepeat); 122 for (unsigned i = 0; i < kDirtyTids; i++) { 123 unsigned tid = src->dirty_tids_[i]; 124 if (tid != kInvalidTid) { 125 u64 epoch = src->elem(tid).epoch; 126 if (clk_[tid].epoch < epoch) { 127 clk_[tid].epoch = epoch; 128 acquired = true; 129 } 130 } 131 } 132 if (acquired) { 133 CPP_STAT_INC(StatClockAcquiredSomething); 134 last_acquire_ = clk_[tid_].epoch; 135 } 136 return; 137 } 138 } 139 140 // O(N) acquire. 141 CPP_STAT_INC(StatClockAcquireFull); 142 nclk_ = max(nclk_, nclk); 143 for (uptr i = 0; i < nclk; i++) { 144 u64 epoch = src->elem(i).epoch; 145 if (clk_[i].epoch < epoch) { 146 clk_[i].epoch = epoch; 147 acquired = true; 148 } 149 } 150 151 // Remember that this thread has acquired this clock. 152 if (nclk > tid_) 153 src->elem(tid_).reused = reused_; 154 155 if (acquired) { 156 CPP_STAT_INC(StatClockAcquiredSomething); 157 last_acquire_ = clk_[tid_].epoch; 158 } 159 } 160 161 void ThreadClock::release(ClockCache *c, SyncClock *dst) const { 162 DCHECK_LE(nclk_, kMaxTid); 163 DCHECK_LE(dst->size_, kMaxTid); 164 165 if (dst->size_ == 0) { 166 // ReleaseStore will correctly set release_store_tid_, 167 // which can be important for future operations. 168 ReleaseStore(c, dst); 169 return; 170 } 171 172 CPP_STAT_INC(StatClockRelease); 173 // Check if we need to resize dst. 174 if (dst->size_ < nclk_) 175 dst->Resize(c, nclk_); 176 177 // Check if we had not acquired anything from other threads 178 // since the last release on dst. If so, we need to update 179 // only dst->elem(tid_). 180 if (dst->elem(tid_).epoch > last_acquire_) { 181 UpdateCurrentThread(dst); 182 if (dst->release_store_tid_ != tid_ || 183 dst->release_store_reused_ != reused_) 184 dst->release_store_tid_ = kInvalidTid; 185 return; 186 } 187 188 // O(N) release. 189 CPP_STAT_INC(StatClockReleaseFull); 190 // First, remember whether we've acquired dst. 191 bool acquired = IsAlreadyAcquired(dst); 192 if (acquired) 193 CPP_STAT_INC(StatClockReleaseAcquired); 194 // Update dst->clk_. 195 for (uptr i = 0; i < nclk_; i++) { 196 ClockElem &ce = dst->elem(i); 197 ce.epoch = max(ce.epoch, clk_[i].epoch); 198 ce.reused = 0; 199 } 200 // Clear 'acquired' flag in the remaining elements. 201 if (nclk_ < dst->size_) 202 CPP_STAT_INC(StatClockReleaseClearTail); 203 for (uptr i = nclk_; i < dst->size_; i++) 204 dst->elem(i).reused = 0; 205 for (unsigned i = 0; i < kDirtyTids; i++) 206 dst->dirty_tids_[i] = kInvalidTid; 207 dst->release_store_tid_ = kInvalidTid; 208 dst->release_store_reused_ = 0; 209 // If we've acquired dst, remember this fact, 210 // so that we don't need to acquire it on next acquire. 211 if (acquired) 212 dst->elem(tid_).reused = reused_; 213 } 214 215 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { 216 DCHECK_LE(nclk_, kMaxTid); 217 DCHECK_LE(dst->size_, kMaxTid); 218 CPP_STAT_INC(StatClockStore); 219 220 // Check if we need to resize dst. 221 if (dst->size_ < nclk_) 222 dst->Resize(c, nclk_); 223 224 if (dst->release_store_tid_ == tid_ && 225 dst->release_store_reused_ == reused_ && 226 dst->elem(tid_).epoch > last_acquire_) { 227 CPP_STAT_INC(StatClockStoreFast); 228 UpdateCurrentThread(dst); 229 return; 230 } 231 232 // O(N) release-store. 233 CPP_STAT_INC(StatClockStoreFull); 234 for (uptr i = 0; i < nclk_; i++) { 235 ClockElem &ce = dst->elem(i); 236 ce.epoch = clk_[i].epoch; 237 ce.reused = 0; 238 } 239 // Clear the tail of dst->clk_. 240 if (nclk_ < dst->size_) { 241 for (uptr i = nclk_; i < dst->size_; i++) { 242 ClockElem &ce = dst->elem(i); 243 ce.epoch = 0; 244 ce.reused = 0; 245 } 246 CPP_STAT_INC(StatClockStoreTail); 247 } 248 for (unsigned i = 0; i < kDirtyTids; i++) 249 dst->dirty_tids_[i] = kInvalidTid; 250 dst->release_store_tid_ = tid_; 251 dst->release_store_reused_ = reused_; 252 // Rememeber that we don't need to acquire it in future. 253 dst->elem(tid_).reused = reused_; 254 } 255 256 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { 257 CPP_STAT_INC(StatClockAcquireRelease); 258 acquire(c, dst); 259 ReleaseStore(c, dst); 260 } 261 262 // Updates only single element related to the current thread in dst->clk_. 263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { 264 // Update the threads time, but preserve 'acquired' flag. 265 dst->elem(tid_).epoch = clk_[tid_].epoch; 266 267 for (unsigned i = 0; i < kDirtyTids; i++) { 268 if (dst->dirty_tids_[i] == tid_) { 269 CPP_STAT_INC(StatClockReleaseFast1); 270 return; 271 } 272 if (dst->dirty_tids_[i] == kInvalidTid) { 273 CPP_STAT_INC(StatClockReleaseFast2); 274 dst->dirty_tids_[i] = tid_; 275 return; 276 } 277 } 278 // Reset all 'acquired' flags, O(N). 279 CPP_STAT_INC(StatClockReleaseSlow); 280 for (uptr i = 0; i < dst->size_; i++) 281 dst->elem(i).reused = 0; 282 for (unsigned i = 0; i < kDirtyTids; i++) 283 dst->dirty_tids_[i] = kInvalidTid; 284 } 285 286 // Checks whether the current threads has already acquired src. 287 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { 288 if (src->elem(tid_).reused != reused_) 289 return false; 290 for (unsigned i = 0; i < kDirtyTids; i++) { 291 unsigned tid = src->dirty_tids_[i]; 292 if (tid != kInvalidTid) { 293 if (clk_[tid].epoch < src->elem(tid).epoch) 294 return false; 295 } 296 } 297 return true; 298 } 299 300 void SyncClock::Resize(ClockCache *c, uptr nclk) { 301 CPP_STAT_INC(StatClockReleaseResize); 302 if (RoundUpTo(nclk, ClockBlock::kClockCount) <= 303 RoundUpTo(size_, ClockBlock::kClockCount)) { 304 // Growing within the same block. 305 // Memory is already allocated, just increase the size. 306 size_ = nclk; 307 return; 308 } 309 if (nclk <= ClockBlock::kClockCount) { 310 // Grow from 0 to one-level table. 311 CHECK_EQ(size_, 0); 312 CHECK_EQ(tab_, 0); 313 CHECK_EQ(tab_idx_, 0); 314 size_ = nclk; 315 tab_idx_ = ctx->clock_alloc.Alloc(c); 316 tab_ = ctx->clock_alloc.Map(tab_idx_); 317 internal_memset(tab_, 0, sizeof(*tab_)); 318 return; 319 } 320 // Growing two-level table. 321 if (size_ == 0) { 322 // Allocate first level table. 323 tab_idx_ = ctx->clock_alloc.Alloc(c); 324 tab_ = ctx->clock_alloc.Map(tab_idx_); 325 internal_memset(tab_, 0, sizeof(*tab_)); 326 } else if (size_ <= ClockBlock::kClockCount) { 327 // Transform one-level table to two-level table. 328 u32 old = tab_idx_; 329 tab_idx_ = ctx->clock_alloc.Alloc(c); 330 tab_ = ctx->clock_alloc.Map(tab_idx_); 331 internal_memset(tab_, 0, sizeof(*tab_)); 332 tab_->table[0] = old; 333 } 334 // At this point we have first level table allocated. 335 // Add second level tables as necessary. 336 for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount); 337 i < nclk; i += ClockBlock::kClockCount) { 338 u32 idx = ctx->clock_alloc.Alloc(c); 339 ClockBlock *cb = ctx->clock_alloc.Map(idx); 340 internal_memset(cb, 0, sizeof(*cb)); 341 CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0); 342 tab_->table[i/ClockBlock::kClockCount] = idx; 343 } 344 size_ = nclk; 345 } 346 347 // Sets a single element in the vector clock. 348 // This function is called only from weird places like AcquireGlobal. 349 void ThreadClock::set(unsigned tid, u64 v) { 350 DCHECK_LT(tid, kMaxTid); 351 DCHECK_GE(v, clk_[tid].epoch); 352 clk_[tid].epoch = v; 353 if (nclk_ <= tid) 354 nclk_ = tid + 1; 355 last_acquire_ = clk_[tid_].epoch; 356 } 357 358 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { 359 printf("clock=["); 360 for (uptr i = 0; i < nclk_; i++) 361 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); 362 printf("] reused=["); 363 for (uptr i = 0; i < nclk_; i++) 364 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); 365 printf("] tid=%u/%u last_acq=%llu", 366 tid_, reused_, last_acquire_); 367 } 368 369 SyncClock::SyncClock() 370 : release_store_tid_(kInvalidTid) 371 , release_store_reused_() 372 , tab_() 373 , tab_idx_() 374 , size_() { 375 for (uptr i = 0; i < kDirtyTids; i++) 376 dirty_tids_[i] = kInvalidTid; 377 } 378 379 SyncClock::~SyncClock() { 380 // Reset must be called before dtor. 381 CHECK_EQ(size_, 0); 382 CHECK_EQ(tab_, 0); 383 CHECK_EQ(tab_idx_, 0); 384 } 385 386 void SyncClock::Reset(ClockCache *c) { 387 if (size_ == 0) { 388 // nothing 389 } else if (size_ <= ClockBlock::kClockCount) { 390 // One-level table. 391 ctx->clock_alloc.Free(c, tab_idx_); 392 } else { 393 // Two-level table. 394 for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) 395 ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); 396 ctx->clock_alloc.Free(c, tab_idx_); 397 } 398 tab_ = 0; 399 tab_idx_ = 0; 400 size_ = 0; 401 release_store_tid_ = kInvalidTid; 402 release_store_reused_ = 0; 403 for (uptr i = 0; i < kDirtyTids; i++) 404 dirty_tids_[i] = kInvalidTid; 405 } 406 407 ClockElem &SyncClock::elem(unsigned tid) const { 408 DCHECK_LT(tid, size_); 409 if (size_ <= ClockBlock::kClockCount) 410 return tab_->clock[tid]; 411 u32 idx = tab_->table[tid / ClockBlock::kClockCount]; 412 ClockBlock *cb = ctx->clock_alloc.Map(idx); 413 return cb->clock[tid % ClockBlock::kClockCount]; 414 } 415 416 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { 417 printf("clock=["); 418 for (uptr i = 0; i < size_; i++) 419 printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); 420 printf("] reused=["); 421 for (uptr i = 0; i < size_; i++) 422 printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); 423 printf("] release_store_tid=%d/%d dirty_tids=%d/%d", 424 release_store_tid_, release_store_reused_, 425 dirty_tids_[0], dirty_tids_[1]); 426 } 427 } // namespace __tsan 428