1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of EfficiencySanitizer, a family of performance tuners 11 // that detects multiple performance issues via separate sub-tools. 12 // 13 // The instrumentation phase is straightforward: 14 // - Take action on every memory access: either inlined instrumentation, 15 // or Inserted calls to our run-time library. 16 // - Optimizations may apply to avoid instrumenting some of the accesses. 17 // - Turn mem{set,cpy,move} instrinsics into library calls. 18 // The rest is handled by the run-time library. 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/Instrumentation.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/Transforms/Utils/ModuleUtils.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "esan" 42 43 // The tool type must be just one of these ClTool* options, as the tools 44 // cannot be combined due to shadow memory constraints. 45 static cl::opt<bool> 46 ClToolCacheFrag("esan-cache-frag", cl::init(false), 47 cl::desc("Detect data cache fragmentation"), cl::Hidden); 48 static cl::opt<bool> 49 ClToolWorkingSet("esan-working-set", cl::init(false), 50 cl::desc("Measure the working set size"), cl::Hidden); 51 // Each new tool will get its own opt flag here. 52 // These are converted to EfficiencySanitizerOptions for use 53 // in the code. 54 55 static cl::opt<bool> ClInstrumentLoadsAndStores( 56 "esan-instrument-loads-and-stores", cl::init(true), 57 cl::desc("Instrument loads and stores"), cl::Hidden); 58 static cl::opt<bool> ClInstrumentMemIntrinsics( 59 "esan-instrument-memintrinsics", cl::init(true), 60 cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); 61 static cl::opt<bool> ClInstrumentFastpath( 62 "esan-instrument-fastpath", cl::init(true), 63 cl::desc("Instrument fastpath"), cl::Hidden); 64 static cl::opt<bool> ClAuxFieldInfo( 65 "esan-aux-field-info", cl::init(true), 66 cl::desc("Generate binary with auxiliary struct field information"), 67 cl::Hidden); 68 69 // Experiments show that the performance difference can be 2x or more, 70 // and accuracy loss is typically negligible, so we turn this on by default. 71 static cl::opt<bool> ClAssumeIntraCacheLine( 72 "esan-assume-intra-cache-line", cl::init(true), 73 cl::desc("Assume each memory access touches just one cache line, for " 74 "better performance but with a potential loss of accuracy."), 75 cl::Hidden); 76 77 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads"); 78 STATISTIC(NumInstrumentedStores, "Number of instrumented stores"); 79 STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); 80 STATISTIC(NumAccessesWithIrregularSize, 81 "Number of accesses with a size outside our targeted callout sizes"); 82 STATISTIC(NumIgnoredStructs, "Number of ignored structs"); 83 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions"); 84 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions"); 85 STATISTIC(NumAssumedIntraCacheLine, 86 "Number of accesses assumed to be intra-cache-line"); 87 88 static const uint64_t EsanCtorAndDtorPriority = 0; 89 static const char *const EsanModuleCtorName = "esan.module_ctor"; 90 static const char *const EsanModuleDtorName = "esan.module_dtor"; 91 static const char *const EsanInitName = "__esan_init"; 92 static const char *const EsanExitName = "__esan_exit"; 93 94 // We need to specify the tool to the runtime earlier than 95 // the ctor is called in some cases, so we set a global variable. 96 static const char *const EsanWhichToolName = "__esan_which_tool"; 97 98 // We must keep these Shadow* constants consistent with the esan runtime. 99 // FIXME: Try to place these shadow constants, the names of the __esan_* 100 // interface functions, and the ToolType enum into a header shared between 101 // llvm and compiler-rt. 102 static const uint64_t ShadowMask = 0x00000fffffffffffull; 103 static const uint64_t ShadowOffs[3] = { // Indexed by scale 104 0x0000130000000000ull, 105 0x0000220000000000ull, 106 0x0000440000000000ull, 107 }; 108 // This array is indexed by the ToolType enum. 109 static const int ShadowScale[] = { 110 0, // ESAN_None. 111 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2. 112 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6. 113 }; 114 115 // MaxStructCounterNameSize is a soft size limit to avoid insanely long 116 // names for those extremely large structs. 117 static const unsigned MaxStructCounterNameSize = 512; 118 119 namespace { 120 121 static EfficiencySanitizerOptions 122 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) { 123 if (ClToolCacheFrag) 124 Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; 125 else if (ClToolWorkingSet) 126 Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; 127 128 // Direct opt invocation with no params will have the default ESAN_None. 129 // We run the default tool in that case. 130 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None) 131 Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; 132 133 return Options; 134 } 135 136 // Create a constant for Str so that we can pass it to the run-time lib. 137 static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, 138 bool AllowMerging) { 139 Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); 140 // We use private linkage for module-local strings. If they can be merged 141 // with another one, we set the unnamed_addr attribute. 142 GlobalVariable *GV = 143 new GlobalVariable(M, StrConst->getType(), true, 144 GlobalValue::PrivateLinkage, StrConst, ""); 145 if (AllowMerging) 146 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 147 GV->setAlignment(1); // Strings may not be merged w/o setting align 1. 148 return GV; 149 } 150 151 /// EfficiencySanitizer: instrument each module to find performance issues. 152 class EfficiencySanitizer : public ModulePass { 153 public: 154 EfficiencySanitizer( 155 const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions()) 156 : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {} 157 const char *getPassName() const override; 158 void getAnalysisUsage(AnalysisUsage &AU) const override; 159 bool runOnModule(Module &M) override; 160 static char ID; 161 162 private: 163 bool initOnModule(Module &M); 164 void initializeCallbacks(Module &M); 165 bool shouldIgnoreStructType(StructType *StructTy); 166 void createStructCounterName( 167 StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr); 168 void createCacheFragAuxGV( 169 Module &M, const DataLayout &DL, StructType *StructTy, 170 GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size); 171 GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL, 172 Constant *UnitName); 173 Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL); 174 void createDestructor(Module &M, Constant *ToolInfoArg); 175 bool runOnFunction(Function &F, Module &M); 176 bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); 177 bool instrumentMemIntrinsic(MemIntrinsic *MI); 178 bool instrumentGetElementPtr(Instruction *I, Module &M); 179 bool insertCounterUpdate(Instruction *I, StructType *StructTy, 180 unsigned CounterIdx); 181 unsigned getFieldCounterIdx(StructType *StructTy) { 182 return 0; 183 } 184 unsigned getArrayCounterIdx(StructType *StructTy) { 185 return StructTy->getNumElements(); 186 } 187 unsigned getStructCounterSize(StructType *StructTy) { 188 // The struct counter array includes: 189 // - one counter for each struct field, 190 // - one counter for the struct access within an array. 191 return (StructTy->getNumElements()/*field*/ + 1/*array*/); 192 } 193 bool shouldIgnoreMemoryAccess(Instruction *I); 194 int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); 195 Value *appToShadow(Value *Shadow, IRBuilder<> &IRB); 196 bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore, 197 Value *Addr, unsigned Alignment); 198 // Each tool has its own fastpath routine: 199 bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, 200 Value *Addr, unsigned Alignment); 201 bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL, 202 Value *Addr, unsigned Alignment); 203 204 EfficiencySanitizerOptions Options; 205 LLVMContext *Ctx; 206 Type *IntptrTy; 207 // Our slowpath involves callouts to the runtime library. 208 // Access sizes are powers of two: 1, 2, 4, 8, 16. 209 static const size_t NumberOfAccessSizes = 5; 210 Function *EsanAlignedLoad[NumberOfAccessSizes]; 211 Function *EsanAlignedStore[NumberOfAccessSizes]; 212 Function *EsanUnalignedLoad[NumberOfAccessSizes]; 213 Function *EsanUnalignedStore[NumberOfAccessSizes]; 214 // For irregular sizes of any alignment: 215 Function *EsanUnalignedLoadN, *EsanUnalignedStoreN; 216 Function *MemmoveFn, *MemcpyFn, *MemsetFn; 217 Function *EsanCtorFunction; 218 Function *EsanDtorFunction; 219 // Remember the counter variable for each struct type to avoid 220 // recomputing the variable name later during instrumentation. 221 std::map<Type *, GlobalVariable *> StructTyMap; 222 }; 223 } // namespace 224 225 char EfficiencySanitizer::ID = 0; 226 INITIALIZE_PASS_BEGIN( 227 EfficiencySanitizer, "esan", 228 "EfficiencySanitizer: finds performance issues.", false, false) 229 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 230 INITIALIZE_PASS_END( 231 EfficiencySanitizer, "esan", 232 "EfficiencySanitizer: finds performance issues.", false, false) 233 234 const char *EfficiencySanitizer::getPassName() const { 235 return "EfficiencySanitizer"; 236 } 237 238 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const { 239 AU.addRequired<TargetLibraryInfoWrapperPass>(); 240 } 241 242 ModulePass * 243 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) { 244 return new EfficiencySanitizer(Options); 245 } 246 247 void EfficiencySanitizer::initializeCallbacks(Module &M) { 248 IRBuilder<> IRB(M.getContext()); 249 // Initialize the callbacks. 250 for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) { 251 const unsigned ByteSize = 1U << Idx; 252 std::string ByteSizeStr = utostr(ByteSize); 253 // We'll inline the most common (i.e., aligned and frequent sizes) 254 // load + store instrumentation: these callouts are for the slowpath. 255 SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr); 256 EsanAlignedLoad[Idx] = 257 checkSanitizerInterfaceFunction(M.getOrInsertFunction( 258 AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); 259 SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr); 260 EsanAlignedStore[Idx] = 261 checkSanitizerInterfaceFunction(M.getOrInsertFunction( 262 AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); 263 SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr); 264 EsanUnalignedLoad[Idx] = 265 checkSanitizerInterfaceFunction(M.getOrInsertFunction( 266 UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); 267 SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr); 268 EsanUnalignedStore[Idx] = 269 checkSanitizerInterfaceFunction(M.getOrInsertFunction( 270 UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); 271 } 272 EsanUnalignedLoadN = checkSanitizerInterfaceFunction( 273 M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(), 274 IRB.getInt8PtrTy(), IntptrTy, nullptr)); 275 EsanUnalignedStoreN = checkSanitizerInterfaceFunction( 276 M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(), 277 IRB.getInt8PtrTy(), IntptrTy, nullptr)); 278 MemmoveFn = checkSanitizerInterfaceFunction( 279 M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 280 IRB.getInt8PtrTy(), IntptrTy, nullptr)); 281 MemcpyFn = checkSanitizerInterfaceFunction( 282 M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 283 IRB.getInt8PtrTy(), IntptrTy, nullptr)); 284 MemsetFn = checkSanitizerInterfaceFunction( 285 M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 286 IRB.getInt32Ty(), IntptrTy, nullptr)); 287 } 288 289 bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) { 290 if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */) 291 return true; 292 return false; 293 } 294 295 void EfficiencySanitizer::createStructCounterName( 296 StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) { 297 // Append NumFields and field type ids to avoid struct conflicts 298 // with the same name but different fields. 299 if (StructTy->hasName()) 300 NameStr += StructTy->getName(); 301 else 302 NameStr += "struct.anon"; 303 // We allow the actual size of the StructCounterName to be larger than 304 // MaxStructCounterNameSize and append #NumFields and at least one 305 // field type id. 306 // Append #NumFields. 307 NameStr += "#"; 308 Twine(StructTy->getNumElements()).toVector(NameStr); 309 // Append struct field type ids in the reverse order. 310 for (int i = StructTy->getNumElements() - 1; i >= 0; --i) { 311 NameStr += "#"; 312 Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr); 313 if (NameStr.size() >= MaxStructCounterNameSize) 314 break; 315 } 316 if (StructTy->isLiteral()) { 317 // End with # for literal struct. 318 NameStr += "#"; 319 } 320 } 321 322 // Create global variables with auxiliary information (e.g., struct field size, 323 // offset, and type name) for better user report. 324 void EfficiencySanitizer::createCacheFragAuxGV( 325 Module &M, const DataLayout &DL, StructType *StructTy, 326 GlobalVariable *&TypeName, GlobalVariable *&Offset, 327 GlobalVariable *&Size) { 328 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); 329 auto *Int32Ty = Type::getInt32Ty(*Ctx); 330 // FieldTypeName. 331 auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements()); 332 TypeName = new GlobalVariable(M, TypeNameArrayTy, true, 333 GlobalVariable::InternalLinkage, nullptr); 334 SmallVector<Constant *, 16> TypeNameVec; 335 // FieldOffset. 336 auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); 337 Offset = new GlobalVariable(M, OffsetArrayTy, true, 338 GlobalVariable::InternalLinkage, nullptr); 339 SmallVector<Constant *, 16> OffsetVec; 340 // FieldSize 341 auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); 342 Size = new GlobalVariable(M, SizeArrayTy, true, 343 GlobalVariable::InternalLinkage, nullptr); 344 SmallVector<Constant *, 16> SizeVec; 345 for (unsigned i = 0; i < StructTy->getNumElements(); ++i) { 346 Type *Ty = StructTy->getElementType(i); 347 std::string Str; 348 raw_string_ostream StrOS(Str); 349 Ty->print(StrOS); 350 TypeNameVec.push_back( 351 ConstantExpr::getPointerCast( 352 createPrivateGlobalForString(M, StrOS.str(), true), 353 Int8PtrTy)); 354 OffsetVec.push_back( 355 ConstantInt::get(Int32Ty, 356 DL.getStructLayout(StructTy)->getElementOffset(i))); 357 SizeVec.push_back(ConstantInt::get(Int32Ty, 358 DL.getTypeAllocSize(Ty))); 359 } 360 TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec)); 361 Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec)); 362 Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec)); 363 } 364 365 // Create the global variable for the cache-fragmentation tool. 366 GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( 367 Module &M, const DataLayout &DL, Constant *UnitName) { 368 assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag); 369 370 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); 371 auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo(); 372 auto *Int32Ty = Type::getInt32Ty(*Ctx); 373 auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx); 374 auto *Int64Ty = Type::getInt64Ty(*Ctx); 375 auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx); 376 // This structure should be kept consistent with the StructInfo struct 377 // in the runtime library. 378 // struct StructInfo { 379 // const char *StructName; 380 // u32 Size; 381 // u32 NumFields; 382 // u32 *FieldOffset; // auxiliary struct field info. 383 // u32 *FieldSize; // auxiliary struct field info. 384 // const char **FieldTypeName; // auxiliary struct field info. 385 // u64 *FieldCounters; 386 // u64 *ArrayCounter; 387 // }; 388 auto *StructInfoTy = 389 StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, 390 Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); 391 auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); 392 // This structure should be kept consistent with the CacheFragInfo struct 393 // in the runtime library. 394 // struct CacheFragInfo { 395 // const char *UnitName; 396 // u32 NumStructs; 397 // StructInfo *Structs; 398 // }; 399 auto *CacheFragInfoTy = 400 StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); 401 402 std::vector<StructType *> Vec = M.getIdentifiedStructTypes(); 403 unsigned NumStructs = 0; 404 SmallVector<Constant *, 16> Initializers; 405 406 for (auto &StructTy : Vec) { 407 if (shouldIgnoreStructType(StructTy)) { 408 ++NumIgnoredStructs; 409 continue; 410 } 411 ++NumStructs; 412 413 // StructName. 414 SmallString<MaxStructCounterNameSize> CounterNameStr; 415 createStructCounterName(StructTy, CounterNameStr); 416 GlobalVariable *StructCounterName = createPrivateGlobalForString( 417 M, CounterNameStr, /*AllowMerging*/true); 418 419 // Counters. 420 // We create the counter array with StructCounterName and weak linkage 421 // so that the structs with the same name and layout from different 422 // compilation units will be merged into one. 423 auto *CounterArrayTy = ArrayType::get(Int64Ty, 424 getStructCounterSize(StructTy)); 425 GlobalVariable *Counters = 426 new GlobalVariable(M, CounterArrayTy, false, 427 GlobalVariable::WeakAnyLinkage, 428 ConstantAggregateZero::get(CounterArrayTy), 429 CounterNameStr); 430 431 // Remember the counter variable for each struct type. 432 StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters)); 433 434 // We pass the field type name array, offset array, and size array to 435 // the runtime for better reporting. 436 GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr; 437 if (ClAuxFieldInfo) 438 createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size); 439 440 Constant *FieldCounterIdx[2]; 441 FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0); 442 FieldCounterIdx[1] = ConstantInt::get(Int32Ty, 443 getFieldCounterIdx(StructTy)); 444 Constant *ArrayCounterIdx[2]; 445 ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); 446 ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, 447 getArrayCounterIdx(StructTy)); 448 Initializers.push_back( 449 ConstantStruct::get( 450 StructInfoTy, 451 ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), 452 ConstantInt::get(Int32Ty, 453 DL.getStructLayout(StructTy)->getSizeInBytes()), 454 ConstantInt::get(Int32Ty, StructTy->getNumElements()), 455 Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : 456 ConstantExpr::getPointerCast(Offset, Int32PtrTy), 457 Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : 458 ConstantExpr::getPointerCast(Size, Int32PtrTy), 459 TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : 460 ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), 461 ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, 462 FieldCounterIdx), 463 ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, 464 ArrayCounterIdx), 465 nullptr)); 466 } 467 // Structs. 468 Constant *StructInfo; 469 if (NumStructs == 0) { 470 StructInfo = ConstantPointerNull::get(StructInfoPtrTy); 471 } else { 472 auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs); 473 StructInfo = ConstantExpr::getPointerCast( 474 new GlobalVariable(M, StructInfoArrayTy, false, 475 GlobalVariable::InternalLinkage, 476 ConstantArray::get(StructInfoArrayTy, Initializers)), 477 StructInfoPtrTy); 478 } 479 480 auto *CacheFragInfoGV = new GlobalVariable( 481 M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, 482 ConstantStruct::get(CacheFragInfoTy, 483 UnitName, 484 ConstantInt::get(Int32Ty, NumStructs), 485 StructInfo, 486 nullptr)); 487 return CacheFragInfoGV; 488 } 489 490 // Create the tool-specific argument passed to EsanInit and EsanExit. 491 Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M, 492 const DataLayout &DL) { 493 // This structure contains tool-specific information about each compilation 494 // unit (module) and is passed to the runtime library. 495 GlobalVariable *ToolInfoGV = nullptr; 496 497 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); 498 // Compilation unit name. 499 auto *UnitName = ConstantExpr::getPointerCast( 500 createPrivateGlobalForString(M, M.getModuleIdentifier(), true), 501 Int8PtrTy); 502 503 // Create the tool-specific variable. 504 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) 505 ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName); 506 507 if (ToolInfoGV != nullptr) 508 return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy); 509 510 // Create the null pointer if no tool-specific variable created. 511 return ConstantPointerNull::get(Int8PtrTy); 512 } 513 514 void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) { 515 PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); 516 EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx), 517 false), 518 GlobalValue::InternalLinkage, 519 EsanModuleDtorName, &M); 520 ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction)); 521 IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator()); 522 Function *EsanExit = checkSanitizerInterfaceFunction( 523 M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(), 524 Int8PtrTy, nullptr)); 525 EsanExit->setLinkage(Function::ExternalLinkage); 526 IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg}); 527 appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority); 528 } 529 530 bool EfficiencySanitizer::initOnModule(Module &M) { 531 Ctx = &M.getContext(); 532 const DataLayout &DL = M.getDataLayout(); 533 IRBuilder<> IRB(M.getContext()); 534 IntegerType *OrdTy = IRB.getInt32Ty(); 535 PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); 536 IntptrTy = DL.getIntPtrType(M.getContext()); 537 // Create the variable passed to EsanInit and EsanExit. 538 Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL); 539 // Constructor 540 // We specify the tool type both in the EsanWhichToolName global 541 // and as an arg to the init routine as a sanity check. 542 std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( 543 M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy}, 544 /*InitArgs=*/{ 545 ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)), 546 ToolInfoArg}); 547 appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority); 548 549 createDestructor(M, ToolInfoArg); 550 551 new GlobalVariable(M, OrdTy, true, 552 GlobalValue::WeakAnyLinkage, 553 ConstantInt::get(OrdTy, 554 static_cast<int>(Options.ToolType)), 555 EsanWhichToolName); 556 557 return true; 558 } 559 560 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) { 561 // Shadow = ((App & Mask) + Offs) >> Scale 562 Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask)); 563 uint64_t Offs; 564 int Scale = ShadowScale[Options.ToolType]; 565 if (Scale <= 2) 566 Offs = ShadowOffs[Scale]; 567 else 568 Offs = ShadowOffs[0] << Scale; 569 Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs)); 570 if (Scale > 0) 571 Shadow = IRB.CreateLShr(Shadow, Scale); 572 return Shadow; 573 } 574 575 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) { 576 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { 577 // We'd like to know about cache fragmentation in vtable accesses and 578 // constant data references, so we do not currently ignore anything. 579 return false; 580 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { 581 // TODO: the instrumentation disturbs the data layout on the stack, so we 582 // may want to add an option to ignore stack references (if we can 583 // distinguish them) to reduce overhead. 584 } 585 // TODO(bruening): future tools will be returning true for some cases. 586 return false; 587 } 588 589 bool EfficiencySanitizer::runOnModule(Module &M) { 590 bool Res = initOnModule(M); 591 initializeCallbacks(M); 592 for (auto &F : M) { 593 Res |= runOnFunction(F, M); 594 } 595 return Res; 596 } 597 598 bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) { 599 // This is required to prevent instrumenting the call to __esan_init from 600 // within the module constructor. 601 if (&F == EsanCtorFunction) 602 return false; 603 SmallVector<Instruction *, 8> LoadsAndStores; 604 SmallVector<Instruction *, 8> MemIntrinCalls; 605 SmallVector<Instruction *, 8> GetElementPtrs; 606 bool Res = false; 607 const DataLayout &DL = M.getDataLayout(); 608 const TargetLibraryInfo *TLI = 609 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 610 611 for (auto &BB : F) { 612 for (auto &Inst : BB) { 613 if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || 614 isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) && 615 !shouldIgnoreMemoryAccess(&Inst)) 616 LoadsAndStores.push_back(&Inst); 617 else if (isa<MemIntrinsic>(Inst)) 618 MemIntrinCalls.push_back(&Inst); 619 else if (isa<GetElementPtrInst>(Inst)) 620 GetElementPtrs.push_back(&Inst); 621 else if (CallInst *CI = dyn_cast<CallInst>(&Inst)) 622 maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); 623 } 624 } 625 626 if (ClInstrumentLoadsAndStores) { 627 for (auto Inst : LoadsAndStores) { 628 Res |= instrumentLoadOrStore(Inst, DL); 629 } 630 } 631 632 if (ClInstrumentMemIntrinsics) { 633 for (auto Inst : MemIntrinCalls) { 634 Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 635 } 636 } 637 638 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { 639 for (auto Inst : GetElementPtrs) { 640 Res |= instrumentGetElementPtr(Inst, M); 641 } 642 } 643 644 return Res; 645 } 646 647 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I, 648 const DataLayout &DL) { 649 IRBuilder<> IRB(I); 650 bool IsStore; 651 Value *Addr; 652 unsigned Alignment; 653 if (LoadInst *Load = dyn_cast<LoadInst>(I)) { 654 IsStore = false; 655 Alignment = Load->getAlignment(); 656 Addr = Load->getPointerOperand(); 657 } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) { 658 IsStore = true; 659 Alignment = Store->getAlignment(); 660 Addr = Store->getPointerOperand(); 661 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 662 IsStore = true; 663 Alignment = 0; 664 Addr = RMW->getPointerOperand(); 665 } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) { 666 IsStore = true; 667 Alignment = 0; 668 Addr = Xchg->getPointerOperand(); 669 } else 670 llvm_unreachable("Unsupported mem access type"); 671 672 Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); 673 const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; 674 Value *OnAccessFunc = nullptr; 675 676 // Convert 0 to the default alignment. 677 if (Alignment == 0) 678 Alignment = DL.getPrefTypeAlignment(OrigTy); 679 680 if (IsStore) 681 NumInstrumentedStores++; 682 else 683 NumInstrumentedLoads++; 684 int Idx = getMemoryAccessFuncIndex(Addr, DL); 685 if (Idx < 0) { 686 OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN; 687 IRB.CreateCall(OnAccessFunc, 688 {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 689 ConstantInt::get(IntptrTy, TypeSizeBytes)}); 690 } else { 691 if (ClInstrumentFastpath && 692 instrumentFastpath(I, DL, IsStore, Addr, Alignment)) { 693 NumFastpaths++; 694 return true; 695 } 696 if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0) 697 OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx]; 698 else 699 OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx]; 700 IRB.CreateCall(OnAccessFunc, 701 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); 702 } 703 return true; 704 } 705 706 // It's simplest to replace the memset/memmove/memcpy intrinsics with 707 // calls that the runtime library intercepts. 708 // Our pass is late enough that calls should not turn back into intrinsics. 709 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { 710 IRBuilder<> IRB(MI); 711 bool Res = false; 712 if (isa<MemSetInst>(MI)) { 713 IRB.CreateCall( 714 MemsetFn, 715 {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), 716 IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false), 717 IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); 718 MI->eraseFromParent(); 719 Res = true; 720 } else if (isa<MemTransferInst>(MI)) { 721 IRB.CreateCall( 722 isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn, 723 {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), 724 IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()), 725 IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); 726 MI->eraseFromParent(); 727 Res = true; 728 } else 729 llvm_unreachable("Unsupported mem intrinsic type"); 730 return Res; 731 } 732 733 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { 734 GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I); 735 bool Res = false; 736 if (GepInst == nullptr || GepInst->getNumIndices() == 1) { 737 ++NumIgnoredGEPs; 738 return false; 739 } 740 Type *SourceTy = GepInst->getSourceElementType(); 741 StructType *StructTy; 742 ConstantInt *Idx; 743 // Check if GEP calculates address from a struct array. 744 if (isa<StructType>(SourceTy)) { 745 StructTy = cast<StructType>(SourceTy); 746 Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1)); 747 if ((Idx == nullptr || Idx->getSExtValue() != 0) && 748 !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0) 749 Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy)); 750 } 751 // Iterate all (except the first and the last) idx within each GEP instruction 752 // for possible nested struct field address calculation. 753 for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) { 754 SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(), 755 GepInst->idx_begin() + i); 756 Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec); 757 unsigned CounterIdx = 0; 758 if (isa<ArrayType>(Ty)) { 759 ArrayType *ArrayTy = cast<ArrayType>(Ty); 760 StructTy = dyn_cast<StructType>(ArrayTy->getElementType()); 761 if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) 762 continue; 763 // The last counter for struct array access. 764 CounterIdx = getArrayCounterIdx(StructTy); 765 } else if (isa<StructType>(Ty)) { 766 StructTy = cast<StructType>(Ty); 767 if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) 768 continue; 769 // Get the StructTy's subfield index. 770 Idx = cast<ConstantInt>(GepInst->getOperand(i+1)); 771 assert(Idx->getSExtValue() >= 0 && 772 Idx->getSExtValue() < StructTy->getNumElements()); 773 CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue(); 774 } 775 Res |= insertCounterUpdate(I, StructTy, CounterIdx); 776 } 777 if (Res) 778 ++NumInstrumentedGEPs; 779 else 780 ++NumIgnoredGEPs; 781 return Res; 782 } 783 784 bool EfficiencySanitizer::insertCounterUpdate(Instruction *I, 785 StructType *StructTy, 786 unsigned CounterIdx) { 787 GlobalVariable *CounterArray = StructTyMap[StructTy]; 788 if (CounterArray == nullptr) 789 return false; 790 IRBuilder<> IRB(I); 791 Constant *Indices[2]; 792 // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and 793 // http://llvm.org/docs/GetElementPtr.html. 794 // The first index of the GEP instruction steps through the first operand, 795 // i.e., the array itself. 796 Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0); 797 // The second index is the index within the array. 798 Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx); 799 Constant *Counter = 800 ConstantExpr::getGetElementPtr( 801 ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)), 802 CounterArray, Indices); 803 Value *Load = IRB.CreateLoad(Counter); 804 IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)), 805 Counter); 806 return true; 807 } 808 809 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr, 810 const DataLayout &DL) { 811 Type *OrigPtrTy = Addr->getType(); 812 Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); 813 assert(OrigTy->isSized()); 814 // The size is always a multiple of 8. 815 uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; 816 if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 && 817 TypeSizeBytes != 8 && TypeSizeBytes != 16) { 818 // Irregular sizes do not have per-size call targets. 819 NumAccessesWithIrregularSize++; 820 return -1; 821 } 822 size_t Idx = countTrailingZeros(TypeSizeBytes); 823 assert(Idx < NumberOfAccessSizes); 824 return Idx; 825 } 826 827 bool EfficiencySanitizer::instrumentFastpath(Instruction *I, 828 const DataLayout &DL, bool IsStore, 829 Value *Addr, unsigned Alignment) { 830 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { 831 return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); 832 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { 833 return instrumentFastpathWorkingSet(I, DL, Addr, Alignment); 834 } 835 return false; 836 } 837 838 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I, 839 const DataLayout &DL, 840 Value *Addr, 841 unsigned Alignment) { 842 // Do nothing. 843 return true; // Return true to avoid slowpath instrumentation. 844 } 845 846 bool EfficiencySanitizer::instrumentFastpathWorkingSet( 847 Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { 848 assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this 849 IRBuilder<> IRB(I); 850 Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); 851 const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); 852 // Bail to the slowpath if the access might touch multiple cache lines. 853 // An access aligned to its size is guaranteed to be intra-cache-line. 854 // getMemoryAccessFuncIndex has already ruled out a size larger than 16 855 // and thus larger than a cache line for platforms this tool targets 856 // (and our shadow memory setup assumes 64-byte cache lines). 857 assert(TypeSize <= 128); 858 if (!(TypeSize == 8 || 859 (Alignment % (TypeSize / 8)) == 0)) { 860 if (ClAssumeIntraCacheLine) 861 ++NumAssumedIntraCacheLine; 862 else 863 return false; 864 } 865 866 // We inline instrumentation to set the corresponding shadow bits for 867 // each cache line touched by the application. Here we handle a single 868 // load or store where we've already ruled out the possibility that it 869 // might touch more than one cache line and thus we simply update the 870 // shadow memory for a single cache line. 871 // Our shadow memory model is fine with races when manipulating shadow values. 872 // We generate the following code: 873 // 874 // const char BitMask = 0x81; 875 // char *ShadowAddr = appToShadow(AppAddr); 876 // if ((*ShadowAddr & BitMask) != BitMask) 877 // *ShadowAddr |= Bitmask; 878 // 879 Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy); 880 Value *ShadowPtr = appToShadow(AddrPtr, IRB); 881 Type *ShadowTy = IntegerType::get(*Ctx, 8U); 882 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 883 // The bottom bit is used for the current sampling period's working set. 884 // The top bit is used for the total working set. We set both on each 885 // memory access, if they are not already set. 886 Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B 887 888 Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); 889 // The AND and CMP will be turned into a TEST instruction by the compiler. 890 Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask); 891 TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false); 892 // FIXME: do I need to call SetCurrentDebugLocation? 893 IRB.SetInsertPoint(CmpTerm); 894 // We use OR to set the shadow bits to avoid corrupting the middle 6 bits, 895 // which are used by the runtime library. 896 Value *NewVal = IRB.CreateOr(OldValue, ValueMask); 897 IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); 898 IRB.SetInsertPoint(I); 899 900 return true; 901 } 902