1 //===-- tsan_rtl.cc -------------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of ThreadSanitizer (TSan), a race detector. 11 // 12 // Main file (entry points) for the TSan run-time. 13 //===----------------------------------------------------------------------===// 14 15 #include "sanitizer_common/sanitizer_atomic.h" 16 #include "sanitizer_common/sanitizer_common.h" 17 #include "sanitizer_common/sanitizer_libc.h" 18 #include "sanitizer_common/sanitizer_stackdepot.h" 19 #include "sanitizer_common/sanitizer_placement_new.h" 20 #include "sanitizer_common/sanitizer_symbolizer.h" 21 #include "tsan_defs.h" 22 #include "tsan_platform.h" 23 #include "tsan_rtl.h" 24 #include "tsan_mman.h" 25 #include "tsan_suppressions.h" 26 #include "tsan_symbolize.h" 27 #include "ubsan/ubsan_init.h" 28 29 #ifdef __SSE3__ 30 // <emmintrin.h> transitively includes <stdlib.h>, 31 // and it's prohibited to include std headers into tsan runtime. 32 // So we do this dirty trick. 33 #define _MM_MALLOC_H_INCLUDED 34 #define __MM_MALLOC_H 35 #include <emmintrin.h> 36 typedef __m128i m128; 37 #endif 38 39 volatile int __tsan_resumed = 0; 40 41 extern "C" void __tsan_resume() { 42 __tsan_resumed = 1; 43 } 44 45 namespace __tsan { 46 47 #if !defined(SANITIZER_GO) && !SANITIZER_MAC 48 THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64); 49 #endif 50 static char ctx_placeholder[sizeof(Context)] ALIGNED(64); 51 Context *ctx; 52 53 // Can be overriden by a front-end. 54 #ifdef TSAN_EXTERNAL_HOOKS 55 bool OnFinalize(bool failed); 56 void OnInitialize(); 57 #else 58 SANITIZER_WEAK_CXX_DEFAULT_IMPL 59 bool OnFinalize(bool failed) { 60 return failed; 61 } 62 SANITIZER_WEAK_CXX_DEFAULT_IMPL 63 void OnInitialize() {} 64 #endif 65 66 static char thread_registry_placeholder[sizeof(ThreadRegistry)]; 67 68 static ThreadContextBase *CreateThreadContext(u32 tid) { 69 // Map thread trace when context is created. 70 char name[50]; 71 internal_snprintf(name, sizeof(name), "trace %u", tid); 72 MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); 73 const uptr hdr = GetThreadTraceHeader(tid); 74 internal_snprintf(name, sizeof(name), "trace header %u", tid); 75 MapThreadTrace(hdr, sizeof(Trace), name); 76 new((void*)hdr) Trace(); 77 // We are going to use only a small part of the trace with the default 78 // value of history_size. However, the constructor writes to the whole trace. 79 // Unmap the unused part. 80 uptr hdr_end = hdr + sizeof(Trace); 81 hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); 82 hdr_end = RoundUp(hdr_end, GetPageSizeCached()); 83 if (hdr_end < hdr + sizeof(Trace)) 84 UnmapOrDie((void*)hdr_end, hdr + sizeof(Trace) - hdr_end); 85 void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext)); 86 return new(mem) ThreadContext(tid); 87 } 88 89 #ifndef SANITIZER_GO 90 static const u32 kThreadQuarantineSize = 16; 91 #else 92 static const u32 kThreadQuarantineSize = 64; 93 #endif 94 95 Context::Context() 96 : initialized() 97 , report_mtx(MutexTypeReport, StatMtxReport) 98 , nreported() 99 , nmissed_expected() 100 , thread_registry(new(thread_registry_placeholder) ThreadRegistry( 101 CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse)) 102 , racy_mtx(MutexTypeRacy, StatMtxRacy) 103 , racy_stacks(MBlockRacyStacks) 104 , racy_addresses(MBlockRacyAddresses) 105 , fired_suppressions_mtx(MutexTypeFired, StatMtxFired) 106 , fired_suppressions(8) { 107 } 108 109 // The objects are allocated in TLS, so one may rely on zero-initialization. 110 ThreadState::ThreadState(Context *ctx, int tid, int unique_id, u64 epoch, 111 unsigned reuse_count, 112 uptr stk_addr, uptr stk_size, 113 uptr tls_addr, uptr tls_size) 114 : fast_state(tid, epoch) 115 // Do not touch these, rely on zero initialization, 116 // they may be accessed before the ctor. 117 // , ignore_reads_and_writes() 118 // , ignore_interceptors() 119 , clock(tid, reuse_count) 120 #ifndef SANITIZER_GO 121 , jmp_bufs(MBlockJmpBuf) 122 #endif 123 , tid(tid) 124 , unique_id(unique_id) 125 , stk_addr(stk_addr) 126 , stk_size(stk_size) 127 , tls_addr(tls_addr) 128 , tls_size(tls_size) 129 #ifndef SANITIZER_GO 130 , last_sleep_clock(tid) 131 #endif 132 { 133 } 134 135 #ifndef SANITIZER_GO 136 static void MemoryProfiler(Context *ctx, fd_t fd, int i) { 137 uptr n_threads; 138 uptr n_running_threads; 139 ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads); 140 InternalScopedBuffer<char> buf(4096); 141 WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads); 142 WriteToFile(fd, buf.data(), internal_strlen(buf.data())); 143 } 144 145 static void BackgroundThread(void *arg) { 146 // This is a non-initialized non-user thread, nothing to see here. 147 // We don't use ScopedIgnoreInterceptors, because we want ignores to be 148 // enabled even when the thread function exits (e.g. during pthread thread 149 // shutdown code). 150 cur_thread()->ignore_interceptors++; 151 const u64 kMs2Ns = 1000 * 1000; 152 153 fd_t mprof_fd = kInvalidFd; 154 if (flags()->profile_memory && flags()->profile_memory[0]) { 155 if (internal_strcmp(flags()->profile_memory, "stdout") == 0) { 156 mprof_fd = 1; 157 } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) { 158 mprof_fd = 2; 159 } else { 160 InternalScopedString filename(kMaxPathLength); 161 filename.append("%s.%d", flags()->profile_memory, (int)internal_getpid()); 162 fd_t fd = OpenFile(filename.data(), WrOnly); 163 if (fd == kInvalidFd) { 164 Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", 165 &filename[0]); 166 } else { 167 mprof_fd = fd; 168 } 169 } 170 } 171 172 u64 last_flush = NanoTime(); 173 uptr last_rss = 0; 174 for (int i = 0; 175 atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0; 176 i++) { 177 SleepForMillis(100); 178 u64 now = NanoTime(); 179 180 // Flush memory if requested. 181 if (flags()->flush_memory_ms > 0) { 182 if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) { 183 VPrintf(1, "ThreadSanitizer: periodic memory flush\n"); 184 FlushShadowMemory(); 185 last_flush = NanoTime(); 186 } 187 } 188 // GetRSS can be expensive on huge programs, so don't do it every 100ms. 189 if (flags()->memory_limit_mb > 0) { 190 uptr rss = GetRSS(); 191 uptr limit = uptr(flags()->memory_limit_mb) << 20; 192 VPrintf(1, "ThreadSanitizer: memory flush check" 193 " RSS=%llu LAST=%llu LIMIT=%llu\n", 194 (u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20); 195 if (2 * rss > limit + last_rss) { 196 VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n"); 197 FlushShadowMemory(); 198 rss = GetRSS(); 199 VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20); 200 } 201 last_rss = rss; 202 } 203 204 // Write memory profile if requested. 205 if (mprof_fd != kInvalidFd) 206 MemoryProfiler(ctx, mprof_fd, i); 207 208 // Flush symbolizer cache if requested. 209 if (flags()->flush_symbolizer_ms > 0) { 210 u64 last = atomic_load(&ctx->last_symbolize_time_ns, 211 memory_order_relaxed); 212 if (last != 0 && last + flags()->flush_symbolizer_ms * kMs2Ns < now) { 213 Lock l(&ctx->report_mtx); 214 SpinMutexLock l2(&CommonSanitizerReportMutex); 215 SymbolizeFlush(); 216 atomic_store(&ctx->last_symbolize_time_ns, 0, memory_order_relaxed); 217 } 218 } 219 } 220 } 221 222 static void StartBackgroundThread() { 223 ctx->background_thread = internal_start_thread(&BackgroundThread, 0); 224 } 225 226 #ifndef __mips__ 227 static void StopBackgroundThread() { 228 atomic_store(&ctx->stop_background_thread, 1, memory_order_relaxed); 229 internal_join_thread(ctx->background_thread); 230 ctx->background_thread = 0; 231 } 232 #endif 233 #endif 234 235 void DontNeedShadowFor(uptr addr, uptr size) { 236 uptr shadow_beg = MemToShadow(addr); 237 uptr shadow_end = MemToShadow(addr + size); 238 FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg); 239 } 240 241 void MapShadow(uptr addr, uptr size) { 242 // Global data is not 64K aligned, but there are no adjacent mappings, 243 // so we can get away with unaligned mapping. 244 // CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 245 MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier, "shadow"); 246 247 // Meta shadow is 2:1, so tread carefully. 248 static bool data_mapped = false; 249 static uptr mapped_meta_end = 0; 250 uptr meta_begin = (uptr)MemToMeta(addr); 251 uptr meta_end = (uptr)MemToMeta(addr + size); 252 meta_begin = RoundDownTo(meta_begin, 64 << 10); 253 meta_end = RoundUpTo(meta_end, 64 << 10); 254 if (!data_mapped) { 255 // First call maps data+bss. 256 data_mapped = true; 257 MmapFixedNoReserve(meta_begin, meta_end - meta_begin, "meta shadow"); 258 } else { 259 // Mapping continous heap. 260 // Windows wants 64K alignment. 261 meta_begin = RoundDownTo(meta_begin, 64 << 10); 262 meta_end = RoundUpTo(meta_end, 64 << 10); 263 if (meta_end <= mapped_meta_end) 264 return; 265 if (meta_begin < mapped_meta_end) 266 meta_begin = mapped_meta_end; 267 MmapFixedNoReserve(meta_begin, meta_end - meta_begin, "meta shadow"); 268 mapped_meta_end = meta_end; 269 } 270 VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n", 271 addr, addr+size, meta_begin, meta_end); 272 } 273 274 void MapThreadTrace(uptr addr, uptr size, const char *name) { 275 DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size); 276 CHECK_GE(addr, TraceMemBeg()); 277 CHECK_LE(addr + size, TraceMemEnd()); 278 CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment 279 uptr addr1 = (uptr)MmapFixedNoReserve(addr, size, name); 280 if (addr1 != addr) { 281 Printf("FATAL: ThreadSanitizer can not mmap thread trace (%p/%p->%p)\n", 282 addr, size, addr1); 283 Die(); 284 } 285 } 286 287 static void CheckShadowMapping() { 288 uptr beg, end; 289 for (int i = 0; GetUserRegion(i, &beg, &end); i++) { 290 VPrintf(3, "checking shadow region %p-%p\n", beg, end); 291 for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) { 292 for (int x = -1; x <= 1; x++) { 293 const uptr p = p0 + x; 294 if (p < beg || p >= end) 295 continue; 296 const uptr s = MemToShadow(p); 297 const uptr m = (uptr)MemToMeta(p); 298 VPrintf(3, " checking pointer %p: shadow=%p meta=%p\n", p, s, m); 299 CHECK(IsAppMem(p)); 300 CHECK(IsShadowMem(s)); 301 CHECK_EQ(p & ~(kShadowCell - 1), ShadowToMem(s)); 302 CHECK(IsMetaMem(m)); 303 } 304 } 305 } 306 } 307 308 void Initialize(ThreadState *thr) { 309 // Thread safe because done before all threads exist. 310 static bool is_initialized = false; 311 if (is_initialized) 312 return; 313 is_initialized = true; 314 // We are not ready to handle interceptors yet. 315 ScopedIgnoreInterceptors ignore; 316 SanitizerToolName = "ThreadSanitizer"; 317 // Install tool-specific callbacks in sanitizer_common. 318 SetCheckFailedCallback(TsanCheckFailed); 319 320 ctx = new(ctx_placeholder) Context; 321 const char *options = GetEnv(kTsanOptionsEnv); 322 CacheBinaryName(); 323 InitializeFlags(&ctx->flags, options); 324 AvoidCVE_2016_2143(); 325 InitializePlatformEarly(); 326 #ifndef SANITIZER_GO 327 // Re-exec ourselves if we need to set additional env or command line args. 328 MaybeReexec(); 329 330 InitializeAllocator(); 331 ReplaceSystemMalloc(); 332 #endif 333 if (common_flags()->detect_deadlocks) 334 ctx->dd = DDetector::Create(flags()); 335 Processor *proc = ProcCreate(); 336 ProcWire(proc, thr); 337 InitializeInterceptors(); 338 CheckShadowMapping(); 339 InitializePlatform(); 340 InitializeMutex(); 341 InitializeDynamicAnnotations(); 342 #ifndef SANITIZER_GO 343 InitializeShadowMemory(); 344 InitializeAllocatorLate(); 345 #endif 346 // Setup correct file descriptor for error reports. 347 __sanitizer_set_report_path(common_flags()->log_path); 348 InitializeSuppressions(); 349 #ifndef SANITIZER_GO 350 InitializeLibIgnore(); 351 Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); 352 // On MIPS, TSan initialization is run before 353 // __pthread_initialize_minimal_internal() is finished, so we can not spawn 354 // new threads. 355 #ifndef __mips__ 356 StartBackgroundThread(); 357 SetSandboxingCallback(StopBackgroundThread); 358 #endif 359 #endif 360 361 VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", 362 (int)internal_getpid()); 363 364 // Initialize thread 0. 365 int tid = ThreadCreate(thr, 0, 0, true); 366 CHECK_EQ(tid, 0); 367 ThreadStart(thr, tid, internal_getpid()); 368 #if TSAN_CONTAINS_UBSAN 369 __ubsan::InitAsPlugin(); 370 #endif 371 ctx->initialized = true; 372 373 #ifndef SANITIZER_GO 374 Symbolizer::LateInitialize(); 375 #endif 376 377 if (flags()->stop_on_start) { 378 Printf("ThreadSanitizer is suspended at startup (pid %d)." 379 " Call __tsan_resume().\n", 380 (int)internal_getpid()); 381 while (__tsan_resumed == 0) {} 382 } 383 384 OnInitialize(); 385 } 386 387 int Finalize(ThreadState *thr) { 388 bool failed = false; 389 390 if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) 391 SleepForMillis(flags()->atexit_sleep_ms); 392 393 // Wait for pending reports. 394 ctx->report_mtx.Lock(); 395 CommonSanitizerReportMutex.Lock(); 396 CommonSanitizerReportMutex.Unlock(); 397 ctx->report_mtx.Unlock(); 398 399 #ifndef SANITIZER_GO 400 if (Verbosity()) AllocatorPrintStats(); 401 #endif 402 403 ThreadFinalize(thr); 404 405 if (ctx->nreported) { 406 failed = true; 407 #ifndef SANITIZER_GO 408 Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported); 409 #else 410 Printf("Found %d data race(s)\n", ctx->nreported); 411 #endif 412 } 413 414 if (ctx->nmissed_expected) { 415 failed = true; 416 Printf("ThreadSanitizer: missed %d expected races\n", 417 ctx->nmissed_expected); 418 } 419 420 if (common_flags()->print_suppressions) 421 PrintMatchedSuppressions(); 422 #ifndef SANITIZER_GO 423 if (flags()->print_benign) 424 PrintMatchedBenignRaces(); 425 #endif 426 427 failed = OnFinalize(failed); 428 429 #if TSAN_COLLECT_STATS 430 StatAggregate(ctx->stat, thr->stat); 431 StatOutput(ctx->stat); 432 #endif 433 434 return failed ? common_flags()->exitcode : 0; 435 } 436 437 #ifndef SANITIZER_GO 438 void ForkBefore(ThreadState *thr, uptr pc) { 439 ctx->thread_registry->Lock(); 440 ctx->report_mtx.Lock(); 441 } 442 443 void ForkParentAfter(ThreadState *thr, uptr pc) { 444 ctx->report_mtx.Unlock(); 445 ctx->thread_registry->Unlock(); 446 } 447 448 void ForkChildAfter(ThreadState *thr, uptr pc) { 449 ctx->report_mtx.Unlock(); 450 ctx->thread_registry->Unlock(); 451 452 uptr nthread = 0; 453 ctx->thread_registry->GetNumberOfThreads(0, 0, &nthread /* alive threads */); 454 VPrintf(1, "ThreadSanitizer: forked new process with pid %d," 455 " parent had %d threads\n", (int)internal_getpid(), (int)nthread); 456 if (nthread == 1) { 457 StartBackgroundThread(); 458 } else { 459 // We've just forked a multi-threaded process. We cannot reasonably function 460 // after that (some mutexes may be locked before fork). So just enable 461 // ignores for everything in the hope that we will exec soon. 462 ctx->after_multithreaded_fork = true; 463 thr->ignore_interceptors++; 464 ThreadIgnoreBegin(thr, pc); 465 ThreadIgnoreSyncBegin(thr, pc); 466 } 467 } 468 #endif 469 470 #ifdef SANITIZER_GO 471 NOINLINE 472 void GrowShadowStack(ThreadState *thr) { 473 const int sz = thr->shadow_stack_end - thr->shadow_stack; 474 const int newsz = 2 * sz; 475 uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack, 476 newsz * sizeof(uptr)); 477 internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr)); 478 internal_free(thr->shadow_stack); 479 thr->shadow_stack = newstack; 480 thr->shadow_stack_pos = newstack + sz; 481 thr->shadow_stack_end = newstack + newsz; 482 } 483 #endif 484 485 u32 CurrentStackId(ThreadState *thr, uptr pc) { 486 if (!thr->is_inited) // May happen during bootstrap. 487 return 0; 488 if (pc != 0) { 489 #ifndef SANITIZER_GO 490 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 491 #else 492 if (thr->shadow_stack_pos == thr->shadow_stack_end) 493 GrowShadowStack(thr); 494 #endif 495 thr->shadow_stack_pos[0] = pc; 496 thr->shadow_stack_pos++; 497 } 498 u32 id = StackDepotPut( 499 StackTrace(thr->shadow_stack, thr->shadow_stack_pos - thr->shadow_stack)); 500 if (pc != 0) 501 thr->shadow_stack_pos--; 502 return id; 503 } 504 505 void TraceSwitch(ThreadState *thr) { 506 thr->nomalloc++; 507 Trace *thr_trace = ThreadTrace(thr->tid); 508 Lock l(&thr_trace->mtx); 509 unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); 510 TraceHeader *hdr = &thr_trace->headers[trace]; 511 hdr->epoch0 = thr->fast_state.epoch(); 512 ObtainCurrentStack(thr, 0, &hdr->stack0); 513 hdr->mset0 = thr->mset; 514 thr->nomalloc--; 515 } 516 517 Trace *ThreadTrace(int tid) { 518 return (Trace*)GetThreadTraceHeader(tid); 519 } 520 521 uptr TraceTopPC(ThreadState *thr) { 522 Event *events = (Event*)GetThreadTrace(thr->tid); 523 uptr pc = events[thr->fast_state.GetTracePos()]; 524 return pc; 525 } 526 527 uptr TraceSize() { 528 return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); 529 } 530 531 uptr TraceParts() { 532 return TraceSize() / kTracePartSize; 533 } 534 535 #ifndef SANITIZER_GO 536 extern "C" void __tsan_trace_switch() { 537 TraceSwitch(cur_thread()); 538 } 539 540 extern "C" void __tsan_report_race() { 541 ReportRace(cur_thread()); 542 } 543 #endif 544 545 ALWAYS_INLINE 546 Shadow LoadShadow(u64 *p) { 547 u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed); 548 return Shadow(raw); 549 } 550 551 ALWAYS_INLINE 552 void StoreShadow(u64 *sp, u64 s) { 553 atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed); 554 } 555 556 ALWAYS_INLINE 557 void StoreIfNotYetStored(u64 *sp, u64 *s) { 558 StoreShadow(sp, *s); 559 *s = 0; 560 } 561 562 ALWAYS_INLINE 563 void HandleRace(ThreadState *thr, u64 *shadow_mem, 564 Shadow cur, Shadow old) { 565 thr->racy_state[0] = cur.raw(); 566 thr->racy_state[1] = old.raw(); 567 thr->racy_shadow_addr = shadow_mem; 568 #ifndef SANITIZER_GO 569 HACKY_CALL(__tsan_report_race); 570 #else 571 ReportRace(thr); 572 #endif 573 } 574 575 static inline bool HappensBefore(Shadow old, ThreadState *thr) { 576 return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); 577 } 578 579 ALWAYS_INLINE 580 void MemoryAccessImpl1(ThreadState *thr, uptr addr, 581 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 582 u64 *shadow_mem, Shadow cur) { 583 StatInc(thr, StatMop); 584 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 585 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 586 587 // This potentially can live in an MMX/SSE scratch register. 588 // The required intrinsics are: 589 // __m128i _mm_move_epi64(__m128i*); 590 // _mm_storel_epi64(u64*, __m128i); 591 u64 store_word = cur.raw(); 592 593 // scan all the shadow values and dispatch to 4 categories: 594 // same, replace, candidate and race (see comments below). 595 // we consider only 3 cases regarding access sizes: 596 // equal, intersect and not intersect. initially I considered 597 // larger and smaller as well, it allowed to replace some 598 // 'candidates' with 'same' or 'replace', but I think 599 // it's just not worth it (performance- and complexity-wise). 600 601 Shadow old(0); 602 603 // It release mode we manually unroll the loop, 604 // because empirically gcc generates better code this way. 605 // However, we can't afford unrolling in debug mode, because the function 606 // consumes almost 4K of stack. Gtest gives only 4K of stack to death test 607 // threads, which is not enough for the unrolled loop. 608 #if SANITIZER_DEBUG 609 for (int idx = 0; idx < 4; idx++) { 610 #include "tsan_update_shadow_word_inl.h" 611 } 612 #else 613 int idx = 0; 614 #include "tsan_update_shadow_word_inl.h" 615 idx = 1; 616 #include "tsan_update_shadow_word_inl.h" 617 idx = 2; 618 #include "tsan_update_shadow_word_inl.h" 619 idx = 3; 620 #include "tsan_update_shadow_word_inl.h" 621 #endif 622 623 // we did not find any races and had already stored 624 // the current access info, so we are done 625 if (LIKELY(store_word == 0)) 626 return; 627 // choose a random candidate slot and replace it 628 StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); 629 StatInc(thr, StatShadowReplace); 630 return; 631 RACE: 632 HandleRace(thr, shadow_mem, cur, old); 633 return; 634 } 635 636 void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, 637 int size, bool kAccessIsWrite, bool kIsAtomic) { 638 while (size) { 639 int size1 = 1; 640 int kAccessSizeLog = kSizeLog1; 641 if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { 642 size1 = 8; 643 kAccessSizeLog = kSizeLog8; 644 } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { 645 size1 = 4; 646 kAccessSizeLog = kSizeLog4; 647 } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { 648 size1 = 2; 649 kAccessSizeLog = kSizeLog2; 650 } 651 MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); 652 addr += size1; 653 size -= size1; 654 } 655 } 656 657 ALWAYS_INLINE 658 bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 659 Shadow cur(a); 660 for (uptr i = 0; i < kShadowCnt; i++) { 661 Shadow old(LoadShadow(&s[i])); 662 if (Shadow::Addr0AndSizeAreEqual(cur, old) && 663 old.TidWithIgnore() == cur.TidWithIgnore() && 664 old.epoch() > sync_epoch && 665 old.IsAtomic() == cur.IsAtomic() && 666 old.IsRead() <= cur.IsRead()) 667 return true; 668 } 669 return false; 670 } 671 672 #if defined(__SSE3__) 673 #define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \ 674 _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \ 675 (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) 676 ALWAYS_INLINE 677 bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 678 // This is an optimized version of ContainsSameAccessSlow. 679 // load current access into access[0:63] 680 const m128 access = _mm_cvtsi64_si128(a); 681 // duplicate high part of access in addr0: 682 // addr0[0:31] = access[32:63] 683 // addr0[32:63] = access[32:63] 684 // addr0[64:95] = access[32:63] 685 // addr0[96:127] = access[32:63] 686 const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); 687 // load 4 shadow slots 688 const m128 shadow0 = _mm_load_si128((__m128i*)s); 689 const m128 shadow1 = _mm_load_si128((__m128i*)s + 1); 690 // load high parts of 4 shadow slots into addr_vect: 691 // addr_vect[0:31] = shadow0[32:63] 692 // addr_vect[32:63] = shadow0[96:127] 693 // addr_vect[64:95] = shadow1[32:63] 694 // addr_vect[96:127] = shadow1[96:127] 695 m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); 696 if (!is_write) { 697 // set IsRead bit in addr_vect 698 const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15); 699 const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); 700 addr_vect = _mm_or_si128(addr_vect, rw_mask); 701 } 702 // addr0 == addr_vect? 703 const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); 704 // epoch1[0:63] = sync_epoch 705 const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); 706 // epoch[0:31] = sync_epoch[0:31] 707 // epoch[32:63] = sync_epoch[0:31] 708 // epoch[64:95] = sync_epoch[0:31] 709 // epoch[96:127] = sync_epoch[0:31] 710 const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); 711 // load low parts of shadow cell epochs into epoch_vect: 712 // epoch_vect[0:31] = shadow0[0:31] 713 // epoch_vect[32:63] = shadow0[64:95] 714 // epoch_vect[64:95] = shadow1[0:31] 715 // epoch_vect[96:127] = shadow1[64:95] 716 const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); 717 // epoch_vect >= sync_epoch? 718 const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); 719 // addr_res & epoch_res 720 const m128 res = _mm_and_si128(addr_res, epoch_res); 721 // mask[0] = res[7] 722 // mask[1] = res[15] 723 // ... 724 // mask[15] = res[127] 725 const int mask = _mm_movemask_epi8(res); 726 return mask != 0; 727 } 728 #endif 729 730 ALWAYS_INLINE 731 bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { 732 #if defined(__SSE3__) 733 bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); 734 // NOTE: this check can fail if the shadow is concurrently mutated 735 // by other threads. But it still can be useful if you modify 736 // ContainsSameAccessFast and want to ensure that it's not completely broken. 737 // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); 738 return res; 739 #else 740 return ContainsSameAccessSlow(s, a, sync_epoch, is_write); 741 #endif 742 } 743 744 ALWAYS_INLINE USED 745 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, 746 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) { 747 u64 *shadow_mem = (u64*)MemToShadow(addr); 748 DPrintf2("#%d: MemoryAccess: @%p %p size=%d" 749 " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", 750 (int)thr->fast_state.tid(), (void*)pc, (void*)addr, 751 (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, 752 (uptr)shadow_mem[0], (uptr)shadow_mem[1], 753 (uptr)shadow_mem[2], (uptr)shadow_mem[3]); 754 #if SANITIZER_DEBUG 755 if (!IsAppMem(addr)) { 756 Printf("Access to non app mem %zx\n", addr); 757 DCHECK(IsAppMem(addr)); 758 } 759 if (!IsShadowMem((uptr)shadow_mem)) { 760 Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); 761 DCHECK(IsShadowMem((uptr)shadow_mem)); 762 } 763 #endif 764 765 if (kCppMode && *shadow_mem == kShadowRodata) { 766 // Access to .rodata section, no races here. 767 // Measurements show that it can be 10-20% of all memory accesses. 768 StatInc(thr, StatMop); 769 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 770 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 771 StatInc(thr, StatMopRodata); 772 return; 773 } 774 775 FastState fast_state = thr->fast_state; 776 if (fast_state.GetIgnoreBit()) { 777 StatInc(thr, StatMop); 778 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 779 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 780 StatInc(thr, StatMopIgnored); 781 return; 782 } 783 784 Shadow cur(fast_state); 785 cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); 786 cur.SetWrite(kAccessIsWrite); 787 cur.SetAtomic(kIsAtomic); 788 789 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 790 thr->fast_synch_epoch, kAccessIsWrite))) { 791 StatInc(thr, StatMop); 792 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 793 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 794 StatInc(thr, StatMopSame); 795 return; 796 } 797 798 if (kCollectHistory) { 799 fast_state.IncrementEpoch(); 800 thr->fast_state = fast_state; 801 TraceAddEvent(thr, fast_state, EventTypeMop, pc); 802 cur.IncrementEpoch(); 803 } 804 805 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 806 shadow_mem, cur); 807 } 808 809 // Called by MemoryAccessRange in tsan_rtl_thread.cc 810 ALWAYS_INLINE USED 811 void MemoryAccessImpl(ThreadState *thr, uptr addr, 812 int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, 813 u64 *shadow_mem, Shadow cur) { 814 if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), 815 thr->fast_synch_epoch, kAccessIsWrite))) { 816 StatInc(thr, StatMop); 817 StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); 818 StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); 819 StatInc(thr, StatMopSame); 820 return; 821 } 822 823 MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, 824 shadow_mem, cur); 825 } 826 827 static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, 828 u64 val) { 829 (void)thr; 830 (void)pc; 831 if (size == 0) 832 return; 833 // FIXME: fix me. 834 uptr offset = addr % kShadowCell; 835 if (offset) { 836 offset = kShadowCell - offset; 837 if (size <= offset) 838 return; 839 addr += offset; 840 size -= offset; 841 } 842 DCHECK_EQ(addr % 8, 0); 843 // If a user passes some insane arguments (memset(0)), 844 // let it just crash as usual. 845 if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) 846 return; 847 // Don't want to touch lots of shadow memory. 848 // If a program maps 10MB stack, there is no need reset the whole range. 849 size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); 850 // UnmapOrDie/MmapFixedNoReserve does not work on Windows, 851 // so we do it only for C/C++. 852 if (kGoMode || size < common_flags()->clear_shadow_mmap_threshold) { 853 u64 *p = (u64*)MemToShadow(addr); 854 CHECK(IsShadowMem((uptr)p)); 855 CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1))); 856 // FIXME: may overwrite a part outside the region 857 for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { 858 p[i++] = val; 859 for (uptr j = 1; j < kShadowCnt; j++) 860 p[i++] = 0; 861 } 862 } else { 863 // The region is big, reset only beginning and end. 864 const uptr kPageSize = GetPageSizeCached(); 865 u64 *begin = (u64*)MemToShadow(addr); 866 u64 *end = begin + size / kShadowCell * kShadowCnt; 867 u64 *p = begin; 868 // Set at least first kPageSize/2 to page boundary. 869 while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { 870 *p++ = val; 871 for (uptr j = 1; j < kShadowCnt; j++) 872 *p++ = 0; 873 } 874 // Reset middle part. 875 u64 *p1 = p; 876 p = RoundDown(end, kPageSize); 877 UnmapOrDie((void*)p1, (uptr)p - (uptr)p1); 878 MmapFixedNoReserve((uptr)p1, (uptr)p - (uptr)p1); 879 // Set the ending. 880 while (p < end) { 881 *p++ = val; 882 for (uptr j = 1; j < kShadowCnt; j++) 883 *p++ = 0; 884 } 885 } 886 } 887 888 void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { 889 MemoryRangeSet(thr, pc, addr, size, 0); 890 } 891 892 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { 893 // Processing more than 1k (4k of shadow) is expensive, 894 // can cause excessive memory consumption (user does not necessary touch 895 // the whole range) and most likely unnecessary. 896 if (size > 1024) 897 size = 1024; 898 CHECK_EQ(thr->is_freeing, false); 899 thr->is_freeing = true; 900 MemoryAccessRange(thr, pc, addr, size, true); 901 thr->is_freeing = false; 902 if (kCollectHistory) { 903 thr->fast_state.IncrementEpoch(); 904 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 905 } 906 Shadow s(thr->fast_state); 907 s.ClearIgnoreBit(); 908 s.MarkAsFreed(); 909 s.SetWrite(true); 910 s.SetAddr0AndSizeLog(0, 3); 911 MemoryRangeSet(thr, pc, addr, size, s.raw()); 912 } 913 914 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { 915 if (kCollectHistory) { 916 thr->fast_state.IncrementEpoch(); 917 TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); 918 } 919 Shadow s(thr->fast_state); 920 s.ClearIgnoreBit(); 921 s.SetWrite(true); 922 s.SetAddr0AndSizeLog(0, 3); 923 MemoryRangeSet(thr, pc, addr, size, s.raw()); 924 } 925 926 ALWAYS_INLINE USED 927 void FuncEntry(ThreadState *thr, uptr pc) { 928 StatInc(thr, StatFuncEnter); 929 DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc); 930 if (kCollectHistory) { 931 thr->fast_state.IncrementEpoch(); 932 TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); 933 } 934 935 // Shadow stack maintenance can be replaced with 936 // stack unwinding during trace switch (which presumably must be faster). 937 DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); 938 #ifndef SANITIZER_GO 939 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 940 #else 941 if (thr->shadow_stack_pos == thr->shadow_stack_end) 942 GrowShadowStack(thr); 943 #endif 944 thr->shadow_stack_pos[0] = pc; 945 thr->shadow_stack_pos++; 946 } 947 948 ALWAYS_INLINE USED 949 void FuncExit(ThreadState *thr) { 950 StatInc(thr, StatFuncExit); 951 DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); 952 if (kCollectHistory) { 953 thr->fast_state.IncrementEpoch(); 954 TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); 955 } 956 957 DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); 958 #ifndef SANITIZER_GO 959 DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); 960 #endif 961 thr->shadow_stack_pos--; 962 } 963 964 void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { 965 DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); 966 thr->ignore_reads_and_writes++; 967 CHECK_GT(thr->ignore_reads_and_writes, 0); 968 thr->fast_state.SetIgnoreBit(); 969 #ifndef SANITIZER_GO 970 if (!ctx->after_multithreaded_fork) 971 thr->mop_ignore_set.Add(CurrentStackId(thr, pc)); 972 #endif 973 } 974 975 void ThreadIgnoreEnd(ThreadState *thr, uptr pc) { 976 DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid); 977 thr->ignore_reads_and_writes--; 978 CHECK_GE(thr->ignore_reads_and_writes, 0); 979 if (thr->ignore_reads_and_writes == 0) { 980 thr->fast_state.ClearIgnoreBit(); 981 #ifndef SANITIZER_GO 982 thr->mop_ignore_set.Reset(); 983 #endif 984 } 985 } 986 987 void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) { 988 DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid); 989 thr->ignore_sync++; 990 CHECK_GT(thr->ignore_sync, 0); 991 #ifndef SANITIZER_GO 992 if (!ctx->after_multithreaded_fork) 993 thr->sync_ignore_set.Add(CurrentStackId(thr, pc)); 994 #endif 995 } 996 997 void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) { 998 DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid); 999 thr->ignore_sync--; 1000 CHECK_GE(thr->ignore_sync, 0); 1001 #ifndef SANITIZER_GO 1002 if (thr->ignore_sync == 0) 1003 thr->sync_ignore_set.Reset(); 1004 #endif 1005 } 1006 1007 bool MD5Hash::operator==(const MD5Hash &other) const { 1008 return hash[0] == other.hash[0] && hash[1] == other.hash[1]; 1009 } 1010 1011 #if SANITIZER_DEBUG 1012 void build_consistency_debug() {} 1013 #else 1014 void build_consistency_release() {} 1015 #endif 1016 1017 #if TSAN_COLLECT_STATS 1018 void build_consistency_stats() {} 1019 #else 1020 void build_consistency_nostats() {} 1021 #endif 1022 1023 } // namespace __tsan 1024 1025 #ifndef SANITIZER_GO 1026 // Must be included in this file to make sure everything is inlined. 1027 #include "tsan_interface_inl.h" 1028 #endif 1029