Home | History | Annotate | Download | only in Instrumentation
      1 //===-- ThreadSanitizer.cpp - race detector -------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of ThreadSanitizer, a race detector.
     11 //
     12 // The tool is under development, for the details about previous versions see
     13 // http://code.google.com/p/data-race-test
     14 //
     15 // The instrumentation phase is quite simple:
     16 //   - Insert calls to run-time library before every memory access.
     17 //      - Optimizations may apply to avoid instrumenting some of the accesses.
     18 //   - Insert calls at function entry/exit.
     19 // The rest is handled by the run-time library.
     20 //===----------------------------------------------------------------------===//
     21 
     22 #define DEBUG_TYPE "tsan"
     23 
     24 #include "FunctionBlackList.h"
     25 #include "llvm/ADT/SmallSet.h"
     26 #include "llvm/ADT/SmallString.h"
     27 #include "llvm/ADT/SmallVector.h"
     28 #include "llvm/ADT/StringExtras.h"
     29 #include "llvm/Intrinsics.h"
     30 #include "llvm/Function.h"
     31 #include "llvm/LLVMContext.h"
     32 #include "llvm/Metadata.h"
     33 #include "llvm/Module.h"
     34 #include "llvm/Support/CommandLine.h"
     35 #include "llvm/Support/Debug.h"
     36 #include "llvm/Support/IRBuilder.h"
     37 #include "llvm/Support/MathExtras.h"
     38 #include "llvm/Support/raw_ostream.h"
     39 #include "llvm/Target/TargetData.h"
     40 #include "llvm/Transforms/Instrumentation.h"
     41 #include "llvm/Transforms/Utils/ModuleUtils.h"
     42 #include "llvm/Type.h"
     43 
     44 using namespace llvm;
     45 
     46 static cl::opt<std::string>  ClBlackListFile("tsan-blacklist",
     47        cl::desc("Blacklist file"), cl::Hidden);
     48 
     49 static cl::opt<bool> ClPrintStats("tsan-print-stats",
     50        cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden);
     51 
     52 namespace {
     53 
     54 // Stats counters for ThreadSanitizer instrumentation.
     55 struct ThreadSanitizerStats {
     56   size_t NumInstrumentedReads;
     57   size_t NumInstrumentedWrites;
     58   size_t NumOmittedReadsBeforeWrite;
     59   size_t NumAccessesWithBadSize;
     60   size_t NumInstrumentedVtableWrites;
     61   size_t NumOmittedReadsFromConstantGlobals;
     62   size_t NumOmittedReadsFromVtable;
     63 };
     64 
     65 /// ThreadSanitizer: instrument the code in module to find races.
     66 struct ThreadSanitizer : public FunctionPass {
     67   ThreadSanitizer();
     68   bool runOnFunction(Function &F);
     69   bool doInitialization(Module &M);
     70   bool doFinalization(Module &M);
     71   bool instrumentLoadOrStore(Instruction *I);
     72   static char ID;  // Pass identification, replacement for typeid.
     73 
     74  private:
     75   void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
     76                                      SmallVectorImpl<Instruction*> &All);
     77   bool addrPointsToConstantData(Value *Addr);
     78 
     79   TargetData *TD;
     80   OwningPtr<FunctionBlackList> BL;
     81   // Callbacks to run-time library are computed in doInitialization.
     82   Value *TsanFuncEntry;
     83   Value *TsanFuncExit;
     84   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
     85   static const size_t kNumberOfAccessSizes = 5;
     86   Value *TsanRead[kNumberOfAccessSizes];
     87   Value *TsanWrite[kNumberOfAccessSizes];
     88   Value *TsanVptrUpdate;
     89 
     90   // Stats are modified w/o synchronization.
     91   ThreadSanitizerStats stats;
     92 };
     93 }  // namespace
     94 
     95 char ThreadSanitizer::ID = 0;
     96 INITIALIZE_PASS(ThreadSanitizer, "tsan",
     97     "ThreadSanitizer: detects data races.",
     98     false, false)
     99 
    100 ThreadSanitizer::ThreadSanitizer()
    101   : FunctionPass(ID),
    102   TD(NULL) {
    103 }
    104 
    105 FunctionPass *llvm::createThreadSanitizerPass() {
    106   return new ThreadSanitizer();
    107 }
    108 
    109 bool ThreadSanitizer::doInitialization(Module &M) {
    110   TD = getAnalysisIfAvailable<TargetData>();
    111   if (!TD)
    112     return false;
    113   BL.reset(new FunctionBlackList(ClBlackListFile));
    114   memset(&stats, 0, sizeof(stats));
    115 
    116   // Always insert a call to __tsan_init into the module's CTORs.
    117   IRBuilder<> IRB(M.getContext());
    118   Value *TsanInit = M.getOrInsertFunction("__tsan_init",
    119                                           IRB.getVoidTy(), NULL);
    120   appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
    121 
    122   // Initialize the callbacks.
    123   TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(),
    124                                         IRB.getInt8PtrTy(), NULL);
    125   TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(),
    126                                        NULL);
    127   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
    128     SmallString<32> ReadName("__tsan_read");
    129     ReadName += itostr(1 << i);
    130     TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(),
    131                                         IRB.getInt8PtrTy(), NULL);
    132     SmallString<32> WriteName("__tsan_write");
    133     WriteName += itostr(1 << i);
    134     TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(),
    135                                          IRB.getInt8PtrTy(), NULL);
    136   }
    137   TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
    138                                          IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
    139                                          NULL);
    140   return true;
    141 }
    142 
    143 bool ThreadSanitizer::doFinalization(Module &M) {
    144   if (ClPrintStats) {
    145     errs() << "ThreadSanitizerStats " << M.getModuleIdentifier()
    146            << ": wr " << stats.NumInstrumentedWrites
    147            << "; rd " << stats.NumInstrumentedReads
    148            << "; vt " << stats.NumInstrumentedVtableWrites
    149            << "; bs " << stats.NumAccessesWithBadSize
    150            << "; rbw " << stats.NumOmittedReadsBeforeWrite
    151            << "; rcg " << stats.NumOmittedReadsFromConstantGlobals
    152            << "; rvt " << stats.NumOmittedReadsFromVtable
    153            << "\n";
    154   }
    155   return true;
    156 }
    157 
    158 static bool isVtableAccess(Instruction *I) {
    159   if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
    160     if (Tag->getNumOperands() < 1) return false;
    161     if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
    162       if (Tag1->getString() == "vtable pointer") return true;
    163     }
    164   }
    165   return false;
    166 }
    167 
    168 bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
    169   // If this is a GEP, just analyze its pointer operand.
    170   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
    171     Addr = GEP->getPointerOperand();
    172 
    173   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
    174     if (GV->isConstant()) {
    175       // Reads from constant globals can not race with any writes.
    176       stats.NumOmittedReadsFromConstantGlobals++;
    177       return true;
    178     }
    179   } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
    180     if (isVtableAccess(L)) {
    181       // Reads from a vtable pointer can not race with any writes.
    182       stats.NumOmittedReadsFromVtable++;
    183       return true;
    184     }
    185   }
    186   return false;
    187 }
    188 
    189 // Instrumenting some of the accesses may be proven redundant.
    190 // Currently handled:
    191 //  - read-before-write (within same BB, no calls between)
    192 //
    193 // We do not handle some of the patterns that should not survive
    194 // after the classic compiler optimizations.
    195 // E.g. two reads from the same temp should be eliminated by CSE,
    196 // two writes should be eliminated by DSE, etc.
    197 //
    198 // 'Local' is a vector of insns within the same BB (no calls between).
    199 // 'All' is a vector of insns that will be instrumented.
    200 void ThreadSanitizer::choseInstructionsToInstrument(
    201     SmallVectorImpl<Instruction*> &Local,
    202     SmallVectorImpl<Instruction*> &All) {
    203   SmallSet<Value*, 8> WriteTargets;
    204   // Iterate from the end.
    205   for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
    206        E = Local.rend(); It != E; ++It) {
    207     Instruction *I = *It;
    208     if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
    209       WriteTargets.insert(Store->getPointerOperand());
    210     } else {
    211       LoadInst *Load = cast<LoadInst>(I);
    212       Value *Addr = Load->getPointerOperand();
    213       if (WriteTargets.count(Addr)) {
    214         // We will write to this temp, so no reason to analyze the read.
    215         stats.NumOmittedReadsBeforeWrite++;
    216         continue;
    217       }
    218       if (addrPointsToConstantData(Addr)) {
    219         // Addr points to some constant data -- it can not race with any writes.
    220         continue;
    221       }
    222     }
    223     All.push_back(I);
    224   }
    225   Local.clear();
    226 }
    227 
    228 bool ThreadSanitizer::runOnFunction(Function &F) {
    229   if (!TD) return false;
    230   if (BL->isIn(F)) return false;
    231   SmallVector<Instruction*, 8> RetVec;
    232   SmallVector<Instruction*, 8> AllLoadsAndStores;
    233   SmallVector<Instruction*, 8> LocalLoadsAndStores;
    234   bool Res = false;
    235   bool HasCalls = false;
    236 
    237   // Traverse all instructions, collect loads/stores/returns, check for calls.
    238   for (Function::iterator FI = F.begin(), FE = F.end();
    239        FI != FE; ++FI) {
    240     BasicBlock &BB = *FI;
    241     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
    242          BI != BE; ++BI) {
    243       if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
    244         LocalLoadsAndStores.push_back(BI);
    245       else if (isa<ReturnInst>(BI))
    246         RetVec.push_back(BI);
    247       else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
    248         HasCalls = true;
    249         choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
    250       }
    251     }
    252     choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
    253   }
    254 
    255   // We have collected all loads and stores.
    256   // FIXME: many of these accesses do not need to be checked for races
    257   // (e.g. variables that do not escape, etc).
    258 
    259   // Instrument memory accesses.
    260   for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
    261     Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
    262   }
    263 
    264   // Instrument function entry/exit points if there were instrumented accesses.
    265   if (Res || HasCalls) {
    266     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
    267     Value *ReturnAddress = IRB.CreateCall(
    268         Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
    269         IRB.getInt32(0));
    270     IRB.CreateCall(TsanFuncEntry, ReturnAddress);
    271     for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
    272       IRBuilder<> IRBRet(RetVec[i]);
    273       IRBRet.CreateCall(TsanFuncExit);
    274     }
    275     Res = true;
    276   }
    277   return Res;
    278 }
    279 
    280 bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
    281   IRBuilder<> IRB(I);
    282   bool IsWrite = isa<StoreInst>(*I);
    283   Value *Addr = IsWrite
    284       ? cast<StoreInst>(I)->getPointerOperand()
    285       : cast<LoadInst>(I)->getPointerOperand();
    286   Type *OrigPtrTy = Addr->getType();
    287   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
    288   assert(OrigTy->isSized());
    289   uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
    290   if (TypeSize != 8  && TypeSize != 16 &&
    291       TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
    292     stats.NumAccessesWithBadSize++;
    293     // Ignore all unusual sizes.
    294     return false;
    295   }
    296   if (IsWrite && isVtableAccess(I)) {
    297     Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
    298     IRB.CreateCall2(TsanVptrUpdate,
    299                     IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
    300                     IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
    301     stats.NumInstrumentedVtableWrites++;
    302     return true;
    303   }
    304   size_t Idx = CountTrailingZeros_32(TypeSize / 8);
    305   assert(Idx < kNumberOfAccessSizes);
    306   Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
    307   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
    308   if (IsWrite) stats.NumInstrumentedWrites++;
    309   else         stats.NumInstrumentedReads++;
    310   return true;
    311 }
    312