1 //===-- tsan_rtl.cc -------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of ThreadSanitizer (TSan), a race detector. 11 // 12 // Main file (entry points) for the TSan run-time. 13 //===----------------------------------------------------------------------===// 14 15 #include "sanitizer_common/sanitizer_atomic.h" 16 #include "sanitizer_common/sanitizer_common.h" 17 #include "sanitizer_common/sanitizer_libc.h" 18 #include "sanitizer_common/sanitizer_stackdepot.h" 19 #include "sanitizer_common/sanitizer_placement_new.h" 20 #include "sanitizer_common/sanitizer_symbolizer.h" 21 #include "tsan_defs.h" 22 #include "tsan_platform.h" 23 #include "tsan_rtl.h" 24 #include "tsan_mman.h" 25 #include "tsan_suppressions.h" 26 #include "tsan_symbolize.h" 27 28 #ifdef __SSE3__ 29 // <emmintrin.h> transitively includes <stdlib.h>, 30 // and it's prohibited to include std headers into tsan runtime. 31 // So we do this dirty trick. 32 #define _MM_MALLOC_H_INCLUDED 33 #define __MM_MALLOC_H 34 #include <emmintrin.h> 35 typedef __m128i m128; 36 #endif 37 38 volatile int __tsan_resumed = 0; 39 40 extern "C" void __tsan_resume() { 41 __tsan_resumed = 1; 42 } 43 44 namespace __tsan { 45 46 #ifndef TSAN_GO 47 THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64); 48 #endif 49 static char ctx_placeholder[sizeof(Context)] ALIGNED(64); 50 Context *ctx; 51 52 // Can be overriden by a front-end. 53 #ifdef TSAN_EXTERNAL_HOOKS 54 bool OnFinalize(bool failed); 55 void OnInitialize(); 56 #else 57 SANITIZER_INTERFACE_ATTRIBUTE 58 bool WEAK OnFinalize(bool failed) { 59 return failed; 60 } 61 SANITIZER_INTERFACE_ATTRIBUTE 62 void WEAK OnInitialize() {} 63 #endif 64 65 static char thread_registry_placeholder[sizeof(ThreadRegistry)]; 66 67 static ThreadContextBase *CreateThreadContext(u32 tid) { 68 // Map thread trace when context is created. 69 MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event)); 70 MapThreadTrace(GetThreadTraceHeader(tid), sizeof(Trace)); 71 new(ThreadTrace(tid)) Trace(); 72 void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext)); 73 return new(mem) ThreadContext(tid); 74 } 75 76 #ifndef TSAN_GO 77 static const u32 kThreadQuarantineSize = 16; 78 #else 79 static const u32 kThreadQuarantineSize = 64; 80 #endif 81 82 Context::Context() 83 : initialized() 84 , report_mtx(MutexTypeReport, StatMtxReport) 85 , nreported() 86 , nmissed_expected() 87 , thread_registry(new(thread_registry_placeholder) ThreadRegistry( 88 CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse)) 89 , racy_stacks(MBlockRacyStacks) 90 , racy_addresses(MBlockRacyAddresses) 91 , fired_suppressions(8) { 92 } 93 94 // The objects are allocated in TLS, so one may rely on zero-initialization. 95 ThreadState::ThreadState(Context *ctx, int tid, int unique_id, u64 epoch, 96 unsigned reuse_count, 97 uptr stk_addr, uptr stk_size, 98 uptr tls_addr, uptr tls_size) 99 : fast_state(tid, epoch) 100 // Do not touch these, rely on zero initialization, 101 // they may be accessed before the ctor. 102 // , ignore_reads_and_writes() 103 // , ignore_interceptors() 104 , clock(tid, reuse_count) 105 #ifndef TSAN_GO 106 , jmp_bufs(MBlockJmpBuf) 107 #endif 108 , tid(tid) 109 , unique_id(unique_id) 110 , stk_addr(stk_addr) 111 , stk_size(stk_size) 112 , tls_addr(tls_addr) 113 , tls_size(tls_size) 114 #ifndef TSAN_GO 115 , last_sleep_clock(tid) 116 #endif 117 { 118 } 119 120 static void MemoryProfiler(Context *ctx, fd_t fd, int i) { 121 uptr n_threads; 122 uptr n_running_threads; 123 ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads); 124 InternalScopedBuffer<char> buf(4096); 125 WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads); 126 internal_write(fd, buf.data(), internal_strlen(buf.data())); 127 } 128 129 static void BackgroundThread(void *arg) { 130 #ifndef TSAN_GO 131 // This is a non-initialized non-user thread, nothing to see here. 132 // We don't use ScopedIgnoreInterceptors, because we want ignores to be 133 // enabled even when the thread function exits (e.g. during pthread thread 134 // shutdown code). 135 cur_thread()->ignore_interceptors++; 136 #endif 137 const u64 kMs2Ns = 1000 * 1000; 138 139 fd_t mprof_fd = kInvalidFd; 140 if (flags()->profile_memory && flags()->profile_memory[0]) { 141 if (internal_strcmp(flags()->profile_memory, "stdout") == 0) { 142 mprof_fd = 1; 143 } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) { 144 mprof_fd = 2; 145 } else { 146 InternalScopedBuffer<char> filename(4096); 147 internal_snprintf(filename.data(), filename.size(), "%s.%d", 148 flags()->profile_memory, (int)internal_getpid()); 149 uptr openrv = OpenFile(filename.data(), true); 150 if (internal_iserror(openrv)) { 151 Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", 152 &filename[0]); 153 } else { 154 mprof_fd = openrv; 155 } 156 } 157 } 158 159 u64 last_flush = NanoTime(); 160 u64 last_rss_check = NanoTime(); 161 uptr last_rss = 0; 162 for (int i = 0; 163 atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0; 164 i++) { 165 SleepForMillis(100); 166 u64 now = NanoTime(); 167 168 // Flush memory if requested. 169 if (flags()->flush_memory_ms > 0) { 170 if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) { 171 if (flags()->verbosity > 0) 172 Printf("ThreadSanitizer: periodic memory flush\n"); 173 FlushShadowMemory(); 174 last_flush = NanoTime(); 175 } 176 } 177 // GetRSS can be expensive on huge programs, so don't do it every 100ms. 178 if (flags()->memory_limit_mb > 0 && last_rss_check + 1000 * kMs2Ns < now) { 179 last_rss_check = now; 180 uptr rss = GetRSS(); 181 uptr limit = uptr(flags()->memory_limit_mb) << 20; 182 if (flags()->verbosity > 0) { 183 Printf("ThreadSanitizer: memory flush check" 184 " RSS=%llu LAST=%llu LIMIT=%llu\n", 185 (u64)rss>>20, (u64)last_rss>>20, (u64)limit>>20); 186 } 187 if (2 * rss > limit + last_rss) { 188 if (flags()->verbosity > 0) 189 Printf("ThreadSanitizer: flushing memory due to RSS\n"); 190 FlushShadowMemory(); 191 rss = GetRSS(); 192 if (flags()->verbosity > 0) 193 Printf("ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20); 194 } 195 last_rss = rss; 196 } 197 198 // Write memory profile if requested. 199 if (mprof_fd != kInvalidFd) 200 MemoryProfiler(ctx, mprof_fd, i); 201 202 #ifndef TSAN_GO 203 // Flush symbolizer cache if requested. 204 if (flags()->flush_symbolizer_ms > 0) { 205 u64 last = atomic_load(&ctx->last_symbolize_time_ns, 206 memory_order_relaxed); 207 if (last != 0 && last + flags()->flush_symbolizer_ms * kMs2Ns < now) { 208 Lock l(&ctx->report_mtx); 209 SpinMutexLock l2(&CommonSanitizerReportMutex); 210 SymbolizeFlush(); 211 atomic_store(&ctx->last_symbolize_time_ns, 0, memory_order_relaxed); 212 } 213 } 214 #endif 215 } 216 } 217 218 static void StartBackgroundThread() { 219 ctx->background_thread = internal_start_thread(&BackgroundThread, 0); 220 } 221 222 #ifndef TSAN_GO 223 static void StopBackgroundThread() { 224 atomic_store(&ctx->stop_background_thread, 1, memory_order_relaxed); 225 internal_join_thread(ctx->background_thread); 226 ctx->background_thread = 0; 227 } 228 #endif 229 230 void DontNeedShadowFor(uptr addr, uptr size) { 231 uptr shadow_beg = MemToShadow(addr); 232 uptr shadow_end = MemToShadow(addr + size); 233 FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg); 234 } 235 236 void MapShadow(uptr addr, uptr size) { 237 // Global data is not 64K aligned, but there are no adjacent mappings, 238 // so we can get away with unaligned mapping. 239 // CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 240 MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier); 241 242 // Meta shadow is 2:1, so tread carefully. 243 static bool data_mapped = false; 244 static uptr mapped_meta_end = 0; 245 uptr meta_begin = (uptr)MemToMeta(addr); 246 uptr meta_end = (uptr)MemToMeta(addr + size); 247 meta_begin = RoundDownTo(meta_begin, 64 << 10); 248 meta_end = RoundUpTo(meta_end, 64 << 10); 249 if (!data_mapped) { 250 // First call maps data+bss. 251 data_mapped = true; 252 MmapFixedNoReserve(meta_begin, meta_end - meta_begin); 253 } else { 254 // Mapping continous heap. 255 // Windows wants 64K alignment. 256 meta_begin = RoundDownTo(meta_begin, 64 << 10); 257 meta_end = RoundUpTo(meta_end, 64 << 10); 258 if (meta_end <= mapped_meta_end) 259 return; 260 if (meta_begin < mapped_meta_end) 261 meta_begin = mapped_meta_end; 262 MmapFixedNoReserve(meta_begin, meta_end - meta_begin); 263 mapped_meta_end = meta_end; 264 } 265 VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n", 266 addr, addr+size, meta_begin, meta_end); 267 } 268 269 void MapThreadTrace(uptr addr, uptr size) { 270 DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size); 271 CHECK_GE(addr, kTraceMemBegin); 272 CHECK_LE(addr + size, kTraceMemBegin + kTraceMemSize); 273 CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 274 uptr addr1 = (uptr)MmapFixedNoReserve(addr, size); 275 if (addr1 != addr) { 276 Printf("FATAL: ThreadSanitizer can not mmap thread trace (%p/%p->%p)\n", 277 addr, size, addr1); 278 Die(); 279 } 280 } 281 282 void Initialize(ThreadState *thr) { 283 // Thread safe because done before all threads exist. 284 static bool is_initialized = false; 285 if (is_initialized) 286 return; 287 is_initialized = true; 288 // We are not ready to handle interceptors yet. 289 ScopedIgnoreInterceptors ignore; 290 SanitizerToolName = "ThreadSanitizer"; 291 // Install tool-specific callbacks in sanitizer_common. 292 SetCheckFailedCallback(TsanCheckFailed); 293 294 #ifndef TSAN_GO 295 InitializeAllocator(); 296 #endif 297 InitializeInterceptors(); 298 const char *env = InitializePlatform(); 299 InitializeMutex(); 300 InitializeDynamicAnnotations(); 301 ctx = new(ctx_placeholder) Context; 302 #ifndef TSAN_GO 303 InitializeShadowMemory(); 304 #endif 305 InitializeFlags(&ctx->flags, env); 306 // Setup correct file descriptor for error reports. 307 __sanitizer_set_report_path(flags()->log_path); 308 InitializeSuppressions(); 309 #ifndef TSAN_GO 310 InitializeLibIgnore(); 311 Symbolizer::Init(common_flags()->external_symbolizer_path); 312 Symbolizer::Get()->AddHooks(EnterSymbolizer, ExitSymbolizer); 313 #endif 314 StartBackgroundThread(); 315 #ifndef TSAN_GO 316 SetSandboxingCallback(StopBackgroundThread); 317 #endif 318 if (flags()->detect_deadlocks) 319 ctx->dd = DDetector::Create(flags()); 320 321 if (ctx->flags.verbosity) 322 Printf("***** Running under ThreadSanitizer v2 (pid %d) *****\n", 323 (int)internal_getpid()); 324 325 // Initialize thread 0. 326 int tid = ThreadCreate(thr, 0, 0, true); 327 CHECK_EQ(tid, 0); 328 ThreadStart(thr, tid, internal_getpid()); 329 ctx->initialized = true; 330 331 if (flags()->stop_on_start) { 332 Printf("ThreadSanitizer is suspended at startup (pid %d)." 333 " Call __tsan_resume().\n", 334 (int)internal_getpid()); 335 while (__tsan_resumed == 0) {} 336 } 337 338 OnInitialize(); 339 } 340 341 int Finalize(ThreadState *thr) { 342 Context *ctx = __tsan::ctx; 343 bool failed = false; 344 345 if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) 346 SleepForMillis(flags()->atexit_sleep_ms); 347 348 // Wait for pending reports. 349 ctx->report_mtx.Lock(); 350 CommonSanitizerReportMutex.Lock(); 351 CommonSanitizerReportMutex.Unlock(); 352 ctx->report_mtx.Unlock(); 353 354 #ifndef TSAN_GO 355 if (ctx->flags.verbosity) 356 AllocatorPrintStats(); 357 #endif 358 359 ThreadFinalize(thr); 360 361 if (ctx->nreported) { 362 failed = true; 363 #ifndef TSAN_GO 364 Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported); 365 #else 366 Printf("Found %d data race(s)\n", ctx->nreported); 367 #endif 368 } 369 370 if (ctx->nmissed_expected) { 371 failed = true; 372 Printf("ThreadSanitizer: missed %d expected races\n", 373 ctx->nmissed_expected); 374 } 375 376 if (flags()->print_suppressions) 377 PrintMatchedSuppressions(); 378 #ifndef TSAN_GO 379 if (flags()->print_benign) 380 PrintMatchedBenignRaces(); 381 #endif 382 383 failed = OnFinalize(failed); 384 385 StatAggregate(ctx->stat, thr->stat); 386 StatOutput(ctx->stat); 387 return failed ? flags()->exitcode : 0; 388 } 389 390 #ifndef TSAN_GO 391 void ForkBefore(ThreadState *thr, uptr pc) { 392 ctx->thread_registry->Lock(); 393 ctx->report_mtx.Lock(); 394 } 395 396 void ForkParentAfter(ThreadState *thr, uptr pc) { 397 ctx->report_mtx.Unlock(); 398 ctx->thread_registry->Unlock(); 399 } 400 401 void ForkChildAfter(ThreadState *thr, uptr pc) { 402 ctx->report_mtx.Unlock(); 403 ctx->thread_registry->Unlock(); 404 405 uptr nthread = 0; 406 ctx->thread_registry->GetNumberOfThreads(0, 0, &nthread /* alive threads */); 407 VPrintf(1, "ThreadSanitizer: forked new process with pid %d," 408 " parent had %d threads\n", (int)internal_getpid(), (int)nthread); 409 if (nthread == 1) { 410 internal_start_thread(&BackgroundThread, 0); 411 } else { 412 // We've just forked a multi-threaded process. We cannot reasonably function 413 // after that (some mutexes may be locked before fork). So just enable 414 // ignores for everything in the hope that we will exec soon. 415 ctx->after_multithreaded_fork = true; 416 thr->ignore_interceptors++; 417 ThreadIgnoreBegin(thr, pc); 418 ThreadIgnoreSyncBegin(thr, pc); 419 } 420 } 421 #endif 422 423 #ifdef TSAN_GO 424 NOINLINE 425 void GrowShadowStack(ThreadState *thr) { 426 const int sz = thr->shadow_stack_end - thr->shadow_stack; 427 const int newsz = 2 * sz; 428 uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack, 429 newsz * sizeof(uptr)); 430 internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr)); 431 internal_free(thr->shadow_stack); 432 thr->shadow_stack = newstack; 433 thr->shadow_stack_pos = newstack + sz; 434 thr->shadow_stack_end = newstack + newsz; 435 } 436 #endif 437 438 u32 CurrentStackId(ThreadState *thr, uptr pc) { 439 if (thr->shadow_stack_pos == 0) // May happen during bootstrap. 440 return 0; 441 if (pc != 0) { 442 #ifndef TSAN_GO 443 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 444 #else 445 if (thr->shadow_stack_pos == thr->shadow_stack_end) 446 GrowShadowStack(thr); 447 #endif 448 thr->shadow_stack_pos[0] = pc; 449 thr->shadow_stack_pos++; 450 } 451 u32 id = StackDepotPut(thr->shadow_stack, 452 thr->shadow_stack_pos - thr->shadow_stack); 453 if (pc != 0) 454 thr->shadow_stack_pos--; 455 return id; 456 } 457 458 void TraceSwitch(ThreadState *thr) { 459 thr->nomalloc++; 460 Trace *thr_trace = ThreadTrace(thr->tid); 461 Lock l(&thr_trace->mtx); 462 unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); 463 TraceHeader *hdr = &thr_trace->headers[trace]; 464 hdr->epoch0 = thr->fast_state.epoch(); 465 hdr->stack0.ObtainCurrent(thr, 0); 466 hdr->mset0 = thr->mset; 467 thr->nomalloc--; 468 } 469 470 Trace *ThreadTrace(int tid) { 471 return (Trace*)GetThreadTraceHeader(tid); 472 } 473 474 uptr TraceTopPC(ThreadState *thr) { 475 Event *events = (Event*)GetThreadTrace(thr->tid); 476 uptr pc = events[thr->fast_state.GetTracePos()]; 477 return pc; 478 } 479 480 uptr TraceSize() { 481 return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); 482 } 483 484 uptr TraceParts() { 485 return TraceSize() / kTracePartSize; 486 } 487 488 #ifndef TSAN_GO 489 extern "C" void __tsan_trace_switch() { 490 TraceSwitch(cur_thread()); 491 } 492 493 extern "C" void __tsan_report_race() { 494 ReportRace(cur_thread()); 495 } 496 #endif 497 498 ALWAYS_INLINE 499 Shadow LoadShadow(u64 *p) { 500 u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed); 501 return Shadow(raw); 502 } 503 504 ALWAYS_INLINE 505 void StoreShadow(u64 *sp, u64 s) { 506 atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed); 507 } 508 509 ALWAYS_INLINE 510 void StoreIfNotYetStored(u64 *sp, u64 *s) { 511 StoreShadow(sp, *s); 512 *s = 0; 513 } 514 515 ALWAYS_INLINE 516 void HandleRace(ThreadState *thr, u64 *shadow_mem, 517 Shadow cur, Shadow old) { 518 thr->racy_state[0] = cur.raw(); 519 thr->racy_state[1] = old.raw(); 520 thr->racy_shadow_addr = shadow_mem; 521 #ifndef TSAN_GO 522 HACKY_CALL(__tsan_report_race); 523 #else 524 ReportRace(thr); 525 #endif 526 } 527 528 static inline bool HappensBefore(Shadow old, ThreadState *thr) { 529 return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); 530 } 531 532 ALWAYS_INLINE 533 void MemoryAccessImpl1(ThreadState *thr, uptr addr, 534 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 535 u64 *shadow_mem, Shadow cur) { 536 StatInc(thr, StatMop); 537 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 538 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 539 540 // This potentially can live in an MMX/SSE scratch register. 541 // The required intrinsics are: 542 // __m128i _mm_move_epi64(__m128i*); 543 // _mm_storel_epi64(u64*, __m128i); 544 u64 store_word = cur.raw(); 545 546 // scan all the shadow values and dispatch to 4 categories: 547 // same, replace, candidate and race (see comments below). 548 // we consider only 3 cases regarding access sizes: 549 // equal, intersect and not intersect. initially I considered 550 // larger and smaller as well, it allowed to replace some 551 // 'candidates' with 'same' or 'replace', but I think 552 // it's just not worth it (performance- and complexity-wise). 553 554 Shadow old(0); 555 if (kShadowCnt == 1) { 556 int idx = 0; 557 #include "tsan_update_shadow_word_inl.h" 558 } else if (kShadowCnt == 2) { 559 int idx = 0; 560 #include "tsan_update_shadow_word_inl.h" 561 idx = 1; 562 #include "tsan_update_shadow_word_inl.h" 563 } else if (kShadowCnt == 4) { 564 int idx = 0; 565 #include "tsan_update_shadow_word_inl.h" 566 idx = 1; 567 #include "tsan_update_shadow_word_inl.h" 568 idx = 2; 569 #include "tsan_update_shadow_word_inl.h" 570 idx = 3; 571 #include "tsan_update_shadow_word_inl.h" 572 } else if (kShadowCnt == 8) { 573 int idx = 0; 574 #include "tsan_update_shadow_word_inl.h" 575 idx = 1; 576 #include "tsan_update_shadow_word_inl.h" 577 idx = 2; 578 #include "tsan_update_shadow_word_inl.h" 579 idx = 3; 580 #include "tsan_update_shadow_word_inl.h" 581 idx = 4; 582 #include "tsan_update_shadow_word_inl.h" 583 idx = 5; 584 #include "tsan_update_shadow_word_inl.h" 585 idx = 6; 586 #include "tsan_update_shadow_word_inl.h" 587 idx = 7; 588 #include "tsan_update_shadow_word_inl.h" 589 } else { 590 CHECK(false); 591 } 592 593 // we did not find any races and had already stored 594 // the current access info, so we are done 595 if (LIKELY(store_word == 0)) 596 return; 597 // choose a random candidate slot and replace it 598 StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); 599 StatInc(thr, StatShadowReplace); 600 return; 601 RACE: 602 HandleRace(thr, shadow_mem, cur, old); 603 return; 604 } 605 606 void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, 607 int size, bool kAccessIsWrite, bool kIsAtomic) { 608 while (size) { 609 int size1 = 1; 610 int kAccessSizeLog = kSizeLog1; 611 if (size >= 8 && (addr & ~7) == ((addr + 8) & ~7)) { 612 size1 = 8; 613 kAccessSizeLog = kSizeLog8; 614 } else if (size >= 4 && (addr & ~7) == ((addr + 4) & ~7)) { 615 size1 = 4; 616 kAccessSizeLog = kSizeLog4; 617 } else if (size >= 2 && (addr & ~7) == ((addr + 2) & ~7)) { 618 size1 = 2; 619 kAccessSizeLog = kSizeLog2; 620 } 621 MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); 622 addr += size1; 623 size -= size1; 624 } 625 } 626 627 ALWAYS_INLINE 628 bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 629 Shadow cur(a); 630 for (uptr i = 0; i < kShadowCnt; i++) { 631 Shadow old(LoadShadow(&s[i])); 632 if (Shadow::Addr0AndSizeAreEqual(cur, old) && 633 old.TidWithIgnore() == cur.TidWithIgnore() && 634 old.epoch() > sync_epoch && 635 old.IsAtomic() == cur.IsAtomic() && 636 old.IsRead() <= cur.IsRead()) 637 return true; 638 } 639 return false; 640 } 641 642 #if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4 643 #define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \ 644 _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \ 645 (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) 646 ALWAYS_INLINE 647 bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 648 // This is an optimized version of ContainsSameAccessSlow. 649 // load current access into access[0:63] 650 const m128 access = _mm_cvtsi64_si128(a); 651 // duplicate high part of access in addr0: 652 // addr0[0:31] = access[32:63] 653 // addr0[32:63] = access[32:63] 654 // addr0[64:95] = access[32:63] 655 // addr0[96:127] = access[32:63] 656 const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); 657 // load 4 shadow slots 658 const m128 shadow0 = _mm_load_si128((__m128i*)s); 659 const m128 shadow1 = _mm_load_si128((__m128i*)s + 1); 660 // load high parts of 4 shadow slots into addr_vect: 661 // addr_vect[0:31] = shadow0[32:63] 662 // addr_vect[32:63] = shadow0[96:127] 663 // addr_vect[64:95] = shadow1[32:63] 664 // addr_vect[96:127] = shadow1[96:127] 665 m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); 666 if (!is_write) { 667 // set IsRead bit in addr_vect 668 const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15); 669 const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); 670 addr_vect = _mm_or_si128(addr_vect, rw_mask); 671 } 672 // addr0 == addr_vect? 673 const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); 674 // epoch1[0:63] = sync_epoch 675 const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); 676 // epoch[0:31] = sync_epoch[0:31] 677 // epoch[32:63] = sync_epoch[0:31] 678 // epoch[64:95] = sync_epoch[0:31] 679 // epoch[96:127] = sync_epoch[0:31] 680 const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); 681 // load low parts of shadow cell epochs into epoch_vect: 682 // epoch_vect[0:31] = shadow0[0:31] 683 // epoch_vect[32:63] = shadow0[64:95] 684 // epoch_vect[64:95] = shadow1[0:31] 685 // epoch_vect[96:127] = shadow1[64:95] 686 const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); 687 // epoch_vect >= sync_epoch? 688 const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); 689 // addr_res & epoch_res 690 const m128 res = _mm_and_si128(addr_res, epoch_res); 691 // mask[0] = res[7] 692 // mask[1] = res[15] 693 // ... 694 // mask[15] = res[127] 695 const int mask = _mm_movemask_epi8(res); 696 return mask != 0; 697 } 698 #endif 699 700 ALWAYS_INLINE 701 bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 702 #if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4 703 bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); 704 DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); 705 return res; 706 #else 707 return ContainsSameAccessSlow(s, a, sync_epoch, is_write); 708 #endif 709 } 710 711 ALWAYS_INLINE USED 712 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, 713 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) { 714 u64 *shadow_mem = (u64*)MemToShadow(addr); 715 DPrintf2("#%d: MemoryAccess: @%p %p size=%d" 716 " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", 717 (int)thr->fast_state.tid(), (void*)pc, (void*)addr, 718 (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, 719 (uptr)shadow_mem[0], (uptr)shadow_mem[1], 720 (uptr)shadow_mem[2], (uptr)shadow_mem[3]); 721 #if TSAN_DEBUG 722 if (!IsAppMem(addr)) { 723 Printf("Access to non app mem %zx\n", addr); 724 DCHECK(IsAppMem(addr)); 725 } 726 if (!IsShadowMem((uptr)shadow_mem)) { 727 Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); 728 DCHECK(IsShadowMem((uptr)shadow_mem)); 729 } 730 #endif 731 732 if (kCppMode && *shadow_mem == kShadowRodata) { 733 // Access to .rodata section, no races here. 734 // Measurements show that it can be 10-20% of all memory accesses. 735 StatInc(thr, StatMop); 736 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 737 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 738 StatInc(thr, StatMopRodata); 739 return; 740 } 741 742 FastState fast_state = thr->fast_state; 743 if (fast_state.GetIgnoreBit()) { 744 StatInc(thr, StatMop); 745 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 746 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 747 StatInc(thr, StatMopIgnored); 748 return; 749 } 750 751 Shadow cur(fast_state); 752 cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); 753 cur.SetWrite(kAccessIsWrite); 754 cur.SetAtomic(kIsAtomic); 755 756 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 757 thr->fast_synch_epoch, kAccessIsWrite))) { 758 StatInc(thr, StatMop); 759 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 760 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 761 StatInc(thr, StatMopSame); 762 return; 763 } 764 765 if (kCollectHistory) { 766 fast_state.IncrementEpoch(); 767 thr->fast_state = fast_state; 768 TraceAddEvent(thr, fast_state, EventTypeMop, pc); 769 cur.IncrementEpoch(); 770 } 771 772 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 773 shadow_mem, cur); 774 } 775 776 // Called by MemoryAccessRange in tsan_rtl_thread.cc 777 ALWAYS_INLINE USED 778 void MemoryAccessImpl(ThreadState *thr, uptr addr, 779 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 780 u64 *shadow_mem, Shadow cur) { 781 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 782 thr->fast_synch_epoch, kAccessIsWrite))) { 783 StatInc(thr, StatMop); 784 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 785 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 786 StatInc(thr, StatMopSame); 787 return; 788 } 789 790 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 791 shadow_mem, cur); 792 } 793 794 static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, 795 u64 val) { 796 (void)thr; 797 (void)pc; 798 if (size == 0) 799 return; 800 // FIXME: fix me. 801 uptr offset = addr % kShadowCell; 802 if (offset) { 803 offset = kShadowCell - offset; 804 if (size <= offset) 805 return; 806 addr += offset; 807 size -= offset; 808 } 809 DCHECK_EQ(addr % 8, 0); 810 // If a user passes some insane arguments (memset(0)), 811 // let it just crash as usual. 812 if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) 813 return; 814 // Don't want to touch lots of shadow memory. 815 // If a program maps 10MB stack, there is no need reset the whole range. 816 size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); 817 // UnmapOrDie/MmapFixedNoReserve does not work on Windows, 818 // so we do it only for C/C++. 819 if (kGoMode || size < common_flags()->clear_shadow_mmap_threshold) { 820 u64 *p = (u64*)MemToShadow(addr); 821 CHECK(IsShadowMem((uptr)p)); 822 CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1))); 823 // FIXME: may overwrite a part outside the region 824 for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { 825 p[i++] = val; 826 for (uptr j = 1; j < kShadowCnt; j++) 827 p[i++] = 0; 828 } 829 } else { 830 // The region is big, reset only beginning and end. 831 const uptr kPageSize = 4096; 832 u64 *begin = (u64*)MemToShadow(addr); 833 u64 *end = begin + size / kShadowCell * kShadowCnt; 834 u64 *p = begin; 835 // Set at least first kPageSize/2 to page boundary. 836 while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { 837 *p++ = val; 838 for (uptr j = 1; j < kShadowCnt; j++) 839 *p++ = 0; 840 } 841 // Reset middle part. 842 u64 *p1 = p; 843 p = RoundDown(end, kPageSize); 844 UnmapOrDie((void*)p1, (uptr)p - (uptr)p1); 845 MmapFixedNoReserve((uptr)p1, (uptr)p - (uptr)p1); 846 // Set the ending. 847 while (p < end) { 848 *p++ = val; 849 for (uptr j = 1; j < kShadowCnt; j++) 850 *p++ = 0; 851 } 852 } 853 } 854 855 void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { 856 MemoryRangeSet(thr, pc, addr, size, 0); 857 } 858 859 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { 860 // Processing more than 1k (4k of shadow) is expensive, 861 // can cause excessive memory consumption (user does not necessary touch 862 // the whole range) and most likely unnecessary. 863 if (size > 1024) 864 size = 1024; 865 CHECK_EQ(thr->is_freeing, false); 866 thr->is_freeing = true; 867 MemoryAccessRange(thr, pc, addr, size, true); 868 thr->is_freeing = false; 869 if (kCollectHistory) { 870 thr->fast_state.IncrementEpoch(); 871 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 872 } 873 Shadow s(thr->fast_state); 874 s.ClearIgnoreBit(); 875 s.MarkAsFreed(); 876 s.SetWrite(true); 877 s.SetAddr0AndSizeLog(0, 3); 878 MemoryRangeSet(thr, pc, addr, size, s.raw()); 879 } 880 881 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { 882 if (kCollectHistory) { 883 thr->fast_state.IncrementEpoch(); 884 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 885 } 886 Shadow s(thr->fast_state); 887 s.ClearIgnoreBit(); 888 s.SetWrite(true); 889 s.SetAddr0AndSizeLog(0, 3); 890 MemoryRangeSet(thr, pc, addr, size, s.raw()); 891 } 892 893 ALWAYS_INLINE USED 894 void FuncEntry(ThreadState *thr, uptr pc) { 895 StatInc(thr, StatFuncEnter); 896 DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc); 897 if (kCollectHistory) { 898 thr->fast_state.IncrementEpoch(); 899 TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); 900 } 901 902 // Shadow stack maintenance can be replaced with 903 // stack unwinding during trace switch (which presumably must be faster). 904 DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); 905 #ifndef TSAN_GO 906 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 907 #else 908 if (thr->shadow_stack_pos == thr->shadow_stack_end) 909 GrowShadowStack(thr); 910 #endif 911 thr->shadow_stack_pos[0] = pc; 912 thr->shadow_stack_pos++; 913 } 914 915 ALWAYS_INLINE USED 916 void FuncExit(ThreadState *thr) { 917 StatInc(thr, StatFuncExit); 918 DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); 919 if (kCollectHistory) { 920 thr->fast_state.IncrementEpoch(); 921 TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); 922 } 923 924 DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); 925 #ifndef TSAN_GO 926 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 927 #endif 928 thr->shadow_stack_pos--; 929 } 930 931 void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { 932 DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); 933 thr->ignore_reads_and_writes++; 934 CHECK_GT(thr->ignore_reads_and_writes, 0); 935 thr->fast_state.SetIgnoreBit(); 936 #ifndef TSAN_GO 937 if (!ctx->after_multithreaded_fork) 938 thr->mop_ignore_set.Add(CurrentStackId(thr, pc)); 939 #endif 940 } 941 942 void ThreadIgnoreEnd(ThreadState *thr, uptr pc) { 943 DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid); 944 thr->ignore_reads_and_writes--; 945 CHECK_GE(thr->ignore_reads_and_writes, 0); 946 if (thr->ignore_reads_and_writes == 0) { 947 thr->fast_state.ClearIgnoreBit(); 948 #ifndef TSAN_GO 949 thr->mop_ignore_set.Reset(); 950 #endif 951 } 952 } 953 954 void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) { 955 DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid); 956 thr->ignore_sync++; 957 CHECK_GT(thr->ignore_sync, 0); 958 #ifndef TSAN_GO 959 if (!ctx->after_multithreaded_fork) 960 thr->sync_ignore_set.Add(CurrentStackId(thr, pc)); 961 #endif 962 } 963 964 void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) { 965 DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid); 966 thr->ignore_sync--; 967 CHECK_GE(thr->ignore_sync, 0); 968 #ifndef TSAN_GO 969 if (thr->ignore_sync == 0) 970 thr->sync_ignore_set.Reset(); 971 #endif 972 } 973 974 bool MD5Hash::operator==(const MD5Hash &other) const { 975 return hash[0] == other.hash[0] && hash[1] == other.hash[1]; 976 } 977 978 #if TSAN_DEBUG 979 void build_consistency_debug() {} 980 #else 981 void build_consistency_release() {} 982 #endif 983 984 #if TSAN_COLLECT_STATS 985 void build_consistency_stats() {} 986 #else 987 void build_consistency_nostats() {} 988 #endif 989 990 #if TSAN_SHADOW_COUNT == 1 991 void build_consistency_shadow1() {} 992 #elif TSAN_SHADOW_COUNT == 2 993 void build_consistency_shadow2() {} 994 #elif TSAN_SHADOW_COUNT == 4 995 void build_consistency_shadow4() {} 996 #else 997 void build_consistency_shadow8() {} 998 #endif 999 1000 } // namespace __tsan 1001 1002 #ifndef TSAN_GO 1003 // Must be included in this file to make sure everything is inlined. 1004 #include "tsan_interface_inl.h" 1005 #endif 1006