Home | History | Annotate | Download | only in Instrumentation
      1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of EfficiencySanitizer, a family of performance tuners
     11 // that detects multiple performance issues via separate sub-tools.
     12 //
     13 // The instrumentation phase is straightforward:
     14 //   - Take action on every memory access: either inlined instrumentation,
     15 //     or Inserted calls to our run-time library.
     16 //   - Optimizations may apply to avoid instrumenting some of the accesses.
     17 //   - Turn mem{set,cpy,move} instrinsics into library calls.
     18 // The rest is handled by the run-time library.
     19 //===----------------------------------------------------------------------===//
     20 
     21 #include "llvm/Transforms/Instrumentation.h"
     22 #include "llvm/ADT/SmallString.h"
     23 #include "llvm/ADT/SmallVector.h"
     24 #include "llvm/ADT/Statistic.h"
     25 #include "llvm/ADT/StringExtras.h"
     26 #include "llvm/Analysis/TargetLibraryInfo.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/IRBuilder.h"
     29 #include "llvm/IR/IntrinsicInst.h"
     30 #include "llvm/IR/Module.h"
     31 #include "llvm/IR/Type.h"
     32 #include "llvm/Support/CommandLine.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/raw_ostream.h"
     35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     36 #include "llvm/Transforms/Utils/Local.h"
     37 #include "llvm/Transforms/Utils/ModuleUtils.h"
     38 
     39 using namespace llvm;
     40 
     41 #define DEBUG_TYPE "esan"
     42 
     43 // The tool type must be just one of these ClTool* options, as the tools
     44 // cannot be combined due to shadow memory constraints.
     45 static cl::opt<bool>
     46     ClToolCacheFrag("esan-cache-frag", cl::init(false),
     47                     cl::desc("Detect data cache fragmentation"), cl::Hidden);
     48 static cl::opt<bool>
     49     ClToolWorkingSet("esan-working-set", cl::init(false),
     50                     cl::desc("Measure the working set size"), cl::Hidden);
     51 // Each new tool will get its own opt flag here.
     52 // These are converted to EfficiencySanitizerOptions for use
     53 // in the code.
     54 
     55 static cl::opt<bool> ClInstrumentLoadsAndStores(
     56     "esan-instrument-loads-and-stores", cl::init(true),
     57     cl::desc("Instrument loads and stores"), cl::Hidden);
     58 static cl::opt<bool> ClInstrumentMemIntrinsics(
     59     "esan-instrument-memintrinsics", cl::init(true),
     60     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
     61 static cl::opt<bool> ClInstrumentFastpath(
     62     "esan-instrument-fastpath", cl::init(true),
     63     cl::desc("Instrument fastpath"), cl::Hidden);
     64 static cl::opt<bool> ClAuxFieldInfo(
     65     "esan-aux-field-info", cl::init(true),
     66     cl::desc("Generate binary with auxiliary struct field information"),
     67     cl::Hidden);
     68 
     69 // Experiments show that the performance difference can be 2x or more,
     70 // and accuracy loss is typically negligible, so we turn this on by default.
     71 static cl::opt<bool> ClAssumeIntraCacheLine(
     72     "esan-assume-intra-cache-line", cl::init(true),
     73     cl::desc("Assume each memory access touches just one cache line, for "
     74              "better performance but with a potential loss of accuracy."),
     75     cl::Hidden);
     76 
     77 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
     78 STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
     79 STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
     80 STATISTIC(NumAccessesWithIrregularSize,
     81           "Number of accesses with a size outside our targeted callout sizes");
     82 STATISTIC(NumIgnoredStructs, "Number of ignored structs");
     83 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
     84 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
     85 STATISTIC(NumAssumedIntraCacheLine,
     86           "Number of accesses assumed to be intra-cache-line");
     87 
     88 static const uint64_t EsanCtorAndDtorPriority = 0;
     89 static const char *const EsanModuleCtorName = "esan.module_ctor";
     90 static const char *const EsanModuleDtorName = "esan.module_dtor";
     91 static const char *const EsanInitName = "__esan_init";
     92 static const char *const EsanExitName = "__esan_exit";
     93 
     94 // We need to specify the tool to the runtime earlier than
     95 // the ctor is called in some cases, so we set a global variable.
     96 static const char *const EsanWhichToolName = "__esan_which_tool";
     97 
     98 // We must keep these Shadow* constants consistent with the esan runtime.
     99 // FIXME: Try to place these shadow constants, the names of the __esan_*
    100 // interface functions, and the ToolType enum into a header shared between
    101 // llvm and compiler-rt.
    102 static const uint64_t ShadowMask = 0x00000fffffffffffull;
    103 static const uint64_t ShadowOffs[3] = { // Indexed by scale
    104   0x0000130000000000ull,
    105   0x0000220000000000ull,
    106   0x0000440000000000ull,
    107 };
    108 // This array is indexed by the ToolType enum.
    109 static const int ShadowScale[] = {
    110   0, // ESAN_None.
    111   2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
    112   6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
    113 };
    114 
    115 // MaxStructCounterNameSize is a soft size limit to avoid insanely long
    116 // names for those extremely large structs.
    117 static const unsigned MaxStructCounterNameSize = 512;
    118 
    119 namespace {
    120 
    121 static EfficiencySanitizerOptions
    122 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
    123   if (ClToolCacheFrag)
    124     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
    125   else if (ClToolWorkingSet)
    126     Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
    127 
    128   // Direct opt invocation with no params will have the default ESAN_None.
    129   // We run the default tool in that case.
    130   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
    131     Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
    132 
    133   return Options;
    134 }
    135 
    136 // Create a constant for Str so that we can pass it to the run-time lib.
    137 static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
    138                                                     bool AllowMerging) {
    139   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
    140   // We use private linkage for module-local strings. If they can be merged
    141   // with another one, we set the unnamed_addr attribute.
    142   GlobalVariable *GV =
    143     new GlobalVariable(M, StrConst->getType(), true,
    144                        GlobalValue::PrivateLinkage, StrConst, "");
    145   if (AllowMerging)
    146     GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    147   GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
    148   return GV;
    149 }
    150 
    151 /// EfficiencySanitizer: instrument each module to find performance issues.
    152 class EfficiencySanitizer : public ModulePass {
    153 public:
    154   EfficiencySanitizer(
    155       const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
    156       : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
    157   const char *getPassName() const override;
    158   void getAnalysisUsage(AnalysisUsage &AU) const override;
    159   bool runOnModule(Module &M) override;
    160   static char ID;
    161 
    162 private:
    163   bool initOnModule(Module &M);
    164   void initializeCallbacks(Module &M);
    165   bool shouldIgnoreStructType(StructType *StructTy);
    166   void createStructCounterName(
    167       StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
    168   void createCacheFragAuxGV(
    169     Module &M, const DataLayout &DL, StructType *StructTy,
    170     GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
    171   GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
    172                                         Constant *UnitName);
    173   Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
    174   void createDestructor(Module &M, Constant *ToolInfoArg);
    175   bool runOnFunction(Function &F, Module &M);
    176   bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
    177   bool instrumentMemIntrinsic(MemIntrinsic *MI);
    178   bool instrumentGetElementPtr(Instruction *I, Module &M);
    179   bool insertCounterUpdate(Instruction *I, StructType *StructTy,
    180                            unsigned CounterIdx);
    181   unsigned getFieldCounterIdx(StructType *StructTy) {
    182     return 0;
    183   }
    184   unsigned getArrayCounterIdx(StructType *StructTy) {
    185     return StructTy->getNumElements();
    186   }
    187   unsigned getStructCounterSize(StructType *StructTy) {
    188     // The struct counter array includes:
    189     // - one counter for each struct field,
    190     // - one counter for the struct access within an array.
    191     return (StructTy->getNumElements()/*field*/ + 1/*array*/);
    192   }
    193   bool shouldIgnoreMemoryAccess(Instruction *I);
    194   int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
    195   Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
    196   bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
    197                           Value *Addr, unsigned Alignment);
    198   // Each tool has its own fastpath routine:
    199   bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
    200                                    Value *Addr, unsigned Alignment);
    201   bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
    202                                     Value *Addr, unsigned Alignment);
    203 
    204   EfficiencySanitizerOptions Options;
    205   LLVMContext *Ctx;
    206   Type *IntptrTy;
    207   // Our slowpath involves callouts to the runtime library.
    208   // Access sizes are powers of two: 1, 2, 4, 8, 16.
    209   static const size_t NumberOfAccessSizes = 5;
    210   Function *EsanAlignedLoad[NumberOfAccessSizes];
    211   Function *EsanAlignedStore[NumberOfAccessSizes];
    212   Function *EsanUnalignedLoad[NumberOfAccessSizes];
    213   Function *EsanUnalignedStore[NumberOfAccessSizes];
    214   // For irregular sizes of any alignment:
    215   Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
    216   Function *MemmoveFn, *MemcpyFn, *MemsetFn;
    217   Function *EsanCtorFunction;
    218   Function *EsanDtorFunction;
    219   // Remember the counter variable for each struct type to avoid
    220   // recomputing the variable name later during instrumentation.
    221   std::map<Type *, GlobalVariable *> StructTyMap;
    222 };
    223 } // namespace
    224 
    225 char EfficiencySanitizer::ID = 0;
    226 INITIALIZE_PASS_BEGIN(
    227     EfficiencySanitizer, "esan",
    228     "EfficiencySanitizer: finds performance issues.", false, false)
    229 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
    230 INITIALIZE_PASS_END(
    231     EfficiencySanitizer, "esan",
    232     "EfficiencySanitizer: finds performance issues.", false, false)
    233 
    234 const char *EfficiencySanitizer::getPassName() const {
    235   return "EfficiencySanitizer";
    236 }
    237 
    238 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
    239   AU.addRequired<TargetLibraryInfoWrapperPass>();
    240 }
    241 
    242 ModulePass *
    243 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
    244   return new EfficiencySanitizer(Options);
    245 }
    246 
    247 void EfficiencySanitizer::initializeCallbacks(Module &M) {
    248   IRBuilder<> IRB(M.getContext());
    249   // Initialize the callbacks.
    250   for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
    251     const unsigned ByteSize = 1U << Idx;
    252     std::string ByteSizeStr = utostr(ByteSize);
    253     // We'll inline the most common (i.e., aligned and frequent sizes)
    254     // load + store instrumentation: these callouts are for the slowpath.
    255     SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
    256     EsanAlignedLoad[Idx] =
    257         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
    258             AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
    259     SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
    260     EsanAlignedStore[Idx] =
    261         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
    262             AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
    263     SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
    264     EsanUnalignedLoad[Idx] =
    265         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
    266             UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
    267     SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
    268     EsanUnalignedStore[Idx] =
    269         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
    270             UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
    271   }
    272   EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
    273       M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
    274                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
    275   EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
    276       M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
    277                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
    278   MemmoveFn = checkSanitizerInterfaceFunction(
    279       M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
    280                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
    281   MemcpyFn = checkSanitizerInterfaceFunction(
    282       M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
    283                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
    284   MemsetFn = checkSanitizerInterfaceFunction(
    285       M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
    286                             IRB.getInt32Ty(), IntptrTy, nullptr));
    287 }
    288 
    289 bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
    290   if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
    291     return true;
    292   return false;
    293 }
    294 
    295 void EfficiencySanitizer::createStructCounterName(
    296     StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
    297   // Append NumFields and field type ids to avoid struct conflicts
    298   // with the same name but different fields.
    299   if (StructTy->hasName())
    300     NameStr += StructTy->getName();
    301   else
    302     NameStr += "struct.anon";
    303   // We allow the actual size of the StructCounterName to be larger than
    304   // MaxStructCounterNameSize and append #NumFields and at least one
    305   // field type id.
    306   // Append #NumFields.
    307   NameStr += "#";
    308   Twine(StructTy->getNumElements()).toVector(NameStr);
    309   // Append struct field type ids in the reverse order.
    310   for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
    311     NameStr += "#";
    312     Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
    313     if (NameStr.size() >= MaxStructCounterNameSize)
    314       break;
    315   }
    316   if (StructTy->isLiteral()) {
    317     // End with # for literal struct.
    318     NameStr += "#";
    319   }
    320 }
    321 
    322 // Create global variables with auxiliary information (e.g., struct field size,
    323 // offset, and type name) for better user report.
    324 void EfficiencySanitizer::createCacheFragAuxGV(
    325     Module &M, const DataLayout &DL, StructType *StructTy,
    326     GlobalVariable *&TypeName, GlobalVariable *&Offset,
    327     GlobalVariable *&Size) {
    328   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
    329   auto *Int32Ty = Type::getInt32Ty(*Ctx);
    330   // FieldTypeName.
    331   auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
    332   TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
    333                                  GlobalVariable::InternalLinkage, nullptr);
    334   SmallVector<Constant *, 16> TypeNameVec;
    335   // FieldOffset.
    336   auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
    337   Offset = new GlobalVariable(M, OffsetArrayTy, true,
    338                               GlobalVariable::InternalLinkage, nullptr);
    339   SmallVector<Constant *, 16> OffsetVec;
    340   // FieldSize
    341   auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
    342   Size = new GlobalVariable(M, SizeArrayTy, true,
    343                             GlobalVariable::InternalLinkage, nullptr);
    344   SmallVector<Constant *, 16> SizeVec;
    345   for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
    346     Type *Ty = StructTy->getElementType(i);
    347     std::string Str;
    348     raw_string_ostream StrOS(Str);
    349     Ty->print(StrOS);
    350     TypeNameVec.push_back(
    351         ConstantExpr::getPointerCast(
    352             createPrivateGlobalForString(M, StrOS.str(), true),
    353             Int8PtrTy));
    354     OffsetVec.push_back(
    355         ConstantInt::get(Int32Ty,
    356                          DL.getStructLayout(StructTy)->getElementOffset(i)));
    357     SizeVec.push_back(ConstantInt::get(Int32Ty,
    358                                        DL.getTypeAllocSize(Ty)));
    359     }
    360   TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
    361   Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
    362   Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
    363 }
    364 
    365 // Create the global variable for the cache-fragmentation tool.
    366 GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
    367     Module &M, const DataLayout &DL, Constant *UnitName) {
    368   assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
    369 
    370   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
    371   auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
    372   auto *Int32Ty = Type::getInt32Ty(*Ctx);
    373   auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
    374   auto *Int64Ty = Type::getInt64Ty(*Ctx);
    375   auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
    376   // This structure should be kept consistent with the StructInfo struct
    377   // in the runtime library.
    378   // struct StructInfo {
    379   //   const char *StructName;
    380   //   u32 Size;
    381   //   u32 NumFields;
    382   //   u32 *FieldOffset;           // auxiliary struct field info.
    383   //   u32 *FieldSize;             // auxiliary struct field info.
    384   //   const char **FieldTypeName; // auxiliary struct field info.
    385   //   u64 *FieldCounters;
    386   //   u64 *ArrayCounter;
    387   // };
    388   auto *StructInfoTy =
    389     StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
    390                     Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr);
    391   auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
    392   // This structure should be kept consistent with the CacheFragInfo struct
    393   // in the runtime library.
    394   // struct CacheFragInfo {
    395   //   const char *UnitName;
    396   //   u32 NumStructs;
    397   //   StructInfo *Structs;
    398   // };
    399   auto *CacheFragInfoTy =
    400     StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr);
    401 
    402   std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
    403   unsigned NumStructs = 0;
    404   SmallVector<Constant *, 16> Initializers;
    405 
    406   for (auto &StructTy : Vec) {
    407     if (shouldIgnoreStructType(StructTy)) {
    408       ++NumIgnoredStructs;
    409       continue;
    410     }
    411     ++NumStructs;
    412 
    413     // StructName.
    414     SmallString<MaxStructCounterNameSize> CounterNameStr;
    415     createStructCounterName(StructTy, CounterNameStr);
    416     GlobalVariable *StructCounterName = createPrivateGlobalForString(
    417         M, CounterNameStr, /*AllowMerging*/true);
    418 
    419     // Counters.
    420     // We create the counter array with StructCounterName and weak linkage
    421     // so that the structs with the same name and layout from different
    422     // compilation units will be merged into one.
    423     auto *CounterArrayTy = ArrayType::get(Int64Ty,
    424                                           getStructCounterSize(StructTy));
    425     GlobalVariable *Counters =
    426       new GlobalVariable(M, CounterArrayTy, false,
    427                          GlobalVariable::WeakAnyLinkage,
    428                          ConstantAggregateZero::get(CounterArrayTy),
    429                          CounterNameStr);
    430 
    431     // Remember the counter variable for each struct type.
    432     StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
    433 
    434     // We pass the field type name array, offset array, and size array to
    435     // the runtime for better reporting.
    436     GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
    437     if (ClAuxFieldInfo)
    438       createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
    439 
    440     Constant *FieldCounterIdx[2];
    441     FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
    442     FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
    443                                           getFieldCounterIdx(StructTy));
    444     Constant *ArrayCounterIdx[2];
    445     ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
    446     ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
    447                                           getArrayCounterIdx(StructTy));
    448     Initializers.push_back(
    449         ConstantStruct::get(
    450             StructInfoTy,
    451             ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
    452             ConstantInt::get(Int32Ty,
    453                              DL.getStructLayout(StructTy)->getSizeInBytes()),
    454             ConstantInt::get(Int32Ty, StructTy->getNumElements()),
    455             Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) :
    456                 ConstantExpr::getPointerCast(Offset, Int32PtrTy),
    457             Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) :
    458                 ConstantExpr::getPointerCast(Size, Int32PtrTy),
    459             TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) :
    460                 ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
    461             ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
    462                                            FieldCounterIdx),
    463             ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
    464                                            ArrayCounterIdx),
    465             nullptr));
    466   }
    467   // Structs.
    468   Constant *StructInfo;
    469   if (NumStructs == 0) {
    470     StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
    471   } else {
    472     auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
    473     StructInfo = ConstantExpr::getPointerCast(
    474         new GlobalVariable(M, StructInfoArrayTy, false,
    475                            GlobalVariable::InternalLinkage,
    476                            ConstantArray::get(StructInfoArrayTy, Initializers)),
    477         StructInfoPtrTy);
    478   }
    479 
    480   auto *CacheFragInfoGV = new GlobalVariable(
    481       M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
    482       ConstantStruct::get(CacheFragInfoTy,
    483                           UnitName,
    484                           ConstantInt::get(Int32Ty, NumStructs),
    485                           StructInfo,
    486                           nullptr));
    487   return CacheFragInfoGV;
    488 }
    489 
    490 // Create the tool-specific argument passed to EsanInit and EsanExit.
    491 Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
    492                                                          const DataLayout &DL) {
    493   // This structure contains tool-specific information about each compilation
    494   // unit (module) and is passed to the runtime library.
    495   GlobalVariable *ToolInfoGV = nullptr;
    496 
    497   auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
    498   // Compilation unit name.
    499   auto *UnitName = ConstantExpr::getPointerCast(
    500       createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
    501       Int8PtrTy);
    502 
    503   // Create the tool-specific variable.
    504   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
    505     ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
    506 
    507   if (ToolInfoGV != nullptr)
    508     return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
    509 
    510   // Create the null pointer if no tool-specific variable created.
    511   return ConstantPointerNull::get(Int8PtrTy);
    512 }
    513 
    514 void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
    515   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
    516   EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
    517                                                         false),
    518                                       GlobalValue::InternalLinkage,
    519                                       EsanModuleDtorName, &M);
    520   ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
    521   IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
    522   Function *EsanExit = checkSanitizerInterfaceFunction(
    523       M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
    524                             Int8PtrTy, nullptr));
    525   EsanExit->setLinkage(Function::ExternalLinkage);
    526   IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
    527   appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
    528 }
    529 
    530 bool EfficiencySanitizer::initOnModule(Module &M) {
    531   Ctx = &M.getContext();
    532   const DataLayout &DL = M.getDataLayout();
    533   IRBuilder<> IRB(M.getContext());
    534   IntegerType *OrdTy = IRB.getInt32Ty();
    535   PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
    536   IntptrTy = DL.getIntPtrType(M.getContext());
    537   // Create the variable passed to EsanInit and EsanExit.
    538   Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
    539   // Constructor
    540   // We specify the tool type both in the EsanWhichToolName global
    541   // and as an arg to the init routine as a sanity check.
    542   std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
    543       M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
    544       /*InitArgs=*/{
    545         ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
    546         ToolInfoArg});
    547   appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
    548 
    549   createDestructor(M, ToolInfoArg);
    550 
    551   new GlobalVariable(M, OrdTy, true,
    552                      GlobalValue::WeakAnyLinkage,
    553                      ConstantInt::get(OrdTy,
    554                                       static_cast<int>(Options.ToolType)),
    555                      EsanWhichToolName);
    556 
    557   return true;
    558 }
    559 
    560 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
    561   // Shadow = ((App & Mask) + Offs) >> Scale
    562   Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask));
    563   uint64_t Offs;
    564   int Scale = ShadowScale[Options.ToolType];
    565   if (Scale <= 2)
    566     Offs = ShadowOffs[Scale];
    567   else
    568     Offs = ShadowOffs[0] << Scale;
    569   Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
    570   if (Scale > 0)
    571     Shadow = IRB.CreateLShr(Shadow, Scale);
    572   return Shadow;
    573 }
    574 
    575 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
    576   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
    577     // We'd like to know about cache fragmentation in vtable accesses and
    578     // constant data references, so we do not currently ignore anything.
    579     return false;
    580   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
    581     // TODO: the instrumentation disturbs the data layout on the stack, so we
    582     // may want to add an option to ignore stack references (if we can
    583     // distinguish them) to reduce overhead.
    584   }
    585   // TODO(bruening): future tools will be returning true for some cases.
    586   return false;
    587 }
    588 
    589 bool EfficiencySanitizer::runOnModule(Module &M) {
    590   bool Res = initOnModule(M);
    591   initializeCallbacks(M);
    592   for (auto &F : M) {
    593     Res |= runOnFunction(F, M);
    594   }
    595   return Res;
    596 }
    597 
    598 bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
    599   // This is required to prevent instrumenting the call to __esan_init from
    600   // within the module constructor.
    601   if (&F == EsanCtorFunction)
    602     return false;
    603   SmallVector<Instruction *, 8> LoadsAndStores;
    604   SmallVector<Instruction *, 8> MemIntrinCalls;
    605   SmallVector<Instruction *, 8> GetElementPtrs;
    606   bool Res = false;
    607   const DataLayout &DL = M.getDataLayout();
    608   const TargetLibraryInfo *TLI =
    609       &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    610 
    611   for (auto &BB : F) {
    612     for (auto &Inst : BB) {
    613       if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
    614            isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
    615           !shouldIgnoreMemoryAccess(&Inst))
    616         LoadsAndStores.push_back(&Inst);
    617       else if (isa<MemIntrinsic>(Inst))
    618         MemIntrinCalls.push_back(&Inst);
    619       else if (isa<GetElementPtrInst>(Inst))
    620         GetElementPtrs.push_back(&Inst);
    621       else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
    622         maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
    623     }
    624   }
    625 
    626   if (ClInstrumentLoadsAndStores) {
    627     for (auto Inst : LoadsAndStores) {
    628       Res |= instrumentLoadOrStore(Inst, DL);
    629     }
    630   }
    631 
    632   if (ClInstrumentMemIntrinsics) {
    633     for (auto Inst : MemIntrinCalls) {
    634       Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
    635     }
    636   }
    637 
    638   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
    639     for (auto Inst : GetElementPtrs) {
    640       Res |= instrumentGetElementPtr(Inst, M);
    641     }
    642   }
    643 
    644   return Res;
    645 }
    646 
    647 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
    648                                                 const DataLayout &DL) {
    649   IRBuilder<> IRB(I);
    650   bool IsStore;
    651   Value *Addr;
    652   unsigned Alignment;
    653   if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
    654     IsStore = false;
    655     Alignment = Load->getAlignment();
    656     Addr = Load->getPointerOperand();
    657   } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
    658     IsStore = true;
    659     Alignment = Store->getAlignment();
    660     Addr = Store->getPointerOperand();
    661   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
    662     IsStore = true;
    663     Alignment = 0;
    664     Addr = RMW->getPointerOperand();
    665   } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
    666     IsStore = true;
    667     Alignment = 0;
    668     Addr = Xchg->getPointerOperand();
    669   } else
    670     llvm_unreachable("Unsupported mem access type");
    671 
    672   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
    673   const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
    674   Value *OnAccessFunc = nullptr;
    675 
    676   // Convert 0 to the default alignment.
    677   if (Alignment == 0)
    678     Alignment = DL.getPrefTypeAlignment(OrigTy);
    679 
    680   if (IsStore)
    681     NumInstrumentedStores++;
    682   else
    683     NumInstrumentedLoads++;
    684   int Idx = getMemoryAccessFuncIndex(Addr, DL);
    685   if (Idx < 0) {
    686     OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
    687     IRB.CreateCall(OnAccessFunc,
    688                    {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
    689                     ConstantInt::get(IntptrTy, TypeSizeBytes)});
    690   } else {
    691     if (ClInstrumentFastpath &&
    692         instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
    693       NumFastpaths++;
    694       return true;
    695     }
    696     if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
    697       OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
    698     else
    699       OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
    700     IRB.CreateCall(OnAccessFunc,
    701                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
    702   }
    703   return true;
    704 }
    705 
    706 // It's simplest to replace the memset/memmove/memcpy intrinsics with
    707 // calls that the runtime library intercepts.
    708 // Our pass is late enough that calls should not turn back into intrinsics.
    709 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
    710   IRBuilder<> IRB(MI);
    711   bool Res = false;
    712   if (isa<MemSetInst>(MI)) {
    713     IRB.CreateCall(
    714         MemsetFn,
    715         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
    716          IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
    717          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
    718     MI->eraseFromParent();
    719     Res = true;
    720   } else if (isa<MemTransferInst>(MI)) {
    721     IRB.CreateCall(
    722         isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
    723         {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
    724          IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
    725          IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
    726     MI->eraseFromParent();
    727     Res = true;
    728   } else
    729     llvm_unreachable("Unsupported mem intrinsic type");
    730   return Res;
    731 }
    732 
    733 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
    734   GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
    735   bool Res = false;
    736   if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
    737     ++NumIgnoredGEPs;
    738     return false;
    739   }
    740   Type *SourceTy = GepInst->getSourceElementType();
    741   StructType *StructTy;
    742   ConstantInt *Idx;
    743   // Check if GEP calculates address from a struct array.
    744   if (isa<StructType>(SourceTy)) {
    745     StructTy = cast<StructType>(SourceTy);
    746     Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
    747     if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
    748         !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
    749       Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
    750   }
    751   // Iterate all (except the first and the last) idx within each GEP instruction
    752   // for possible nested struct field address calculation.
    753   for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
    754     SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
    755                                    GepInst->idx_begin() + i);
    756     Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
    757     unsigned CounterIdx = 0;
    758     if (isa<ArrayType>(Ty)) {
    759       ArrayType *ArrayTy = cast<ArrayType>(Ty);
    760       StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
    761       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
    762         continue;
    763       // The last counter for struct array access.
    764       CounterIdx = getArrayCounterIdx(StructTy);
    765     } else if (isa<StructType>(Ty)) {
    766       StructTy = cast<StructType>(Ty);
    767       if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
    768         continue;
    769       // Get the StructTy's subfield index.
    770       Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
    771       assert(Idx->getSExtValue() >= 0 &&
    772              Idx->getSExtValue() < StructTy->getNumElements());
    773       CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
    774     }
    775     Res |= insertCounterUpdate(I, StructTy, CounterIdx);
    776   }
    777   if (Res)
    778     ++NumInstrumentedGEPs;
    779   else
    780     ++NumIgnoredGEPs;
    781   return Res;
    782 }
    783 
    784 bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
    785                                               StructType *StructTy,
    786                                               unsigned CounterIdx) {
    787   GlobalVariable *CounterArray = StructTyMap[StructTy];
    788   if (CounterArray == nullptr)
    789     return false;
    790   IRBuilder<> IRB(I);
    791   Constant *Indices[2];
    792   // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
    793   // http://llvm.org/docs/GetElementPtr.html.
    794   // The first index of the GEP instruction steps through the first operand,
    795   // i.e., the array itself.
    796   Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
    797   // The second index is the index within the array.
    798   Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
    799   Constant *Counter =
    800     ConstantExpr::getGetElementPtr(
    801         ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
    802         CounterArray, Indices);
    803   Value *Load = IRB.CreateLoad(Counter);
    804   IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
    805                   Counter);
    806   return true;
    807 }
    808 
    809 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
    810                                                   const DataLayout &DL) {
    811   Type *OrigPtrTy = Addr->getType();
    812   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
    813   assert(OrigTy->isSized());
    814   // The size is always a multiple of 8.
    815   uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
    816   if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
    817       TypeSizeBytes != 8 && TypeSizeBytes != 16) {
    818     // Irregular sizes do not have per-size call targets.
    819     NumAccessesWithIrregularSize++;
    820     return -1;
    821   }
    822   size_t Idx = countTrailingZeros(TypeSizeBytes);
    823   assert(Idx < NumberOfAccessSizes);
    824   return Idx;
    825 }
    826 
    827 bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
    828                                              const DataLayout &DL, bool IsStore,
    829                                              Value *Addr, unsigned Alignment) {
    830   if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
    831     return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
    832   } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
    833     return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
    834   }
    835   return false;
    836 }
    837 
    838 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
    839                                                       const DataLayout &DL,
    840                                                       Value *Addr,
    841                                                       unsigned Alignment) {
    842   // Do nothing.
    843   return true; // Return true to avoid slowpath instrumentation.
    844 }
    845 
    846 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
    847     Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
    848   assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
    849   IRBuilder<> IRB(I);
    850   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
    851   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
    852   // Bail to the slowpath if the access might touch multiple cache lines.
    853   // An access aligned to its size is guaranteed to be intra-cache-line.
    854   // getMemoryAccessFuncIndex has already ruled out a size larger than 16
    855   // and thus larger than a cache line for platforms this tool targets
    856   // (and our shadow memory setup assumes 64-byte cache lines).
    857   assert(TypeSize <= 128);
    858   if (!(TypeSize == 8 ||
    859         (Alignment % (TypeSize / 8)) == 0)) {
    860     if (ClAssumeIntraCacheLine)
    861       ++NumAssumedIntraCacheLine;
    862     else
    863       return false;
    864   }
    865 
    866   // We inline instrumentation to set the corresponding shadow bits for
    867   // each cache line touched by the application.  Here we handle a single
    868   // load or store where we've already ruled out the possibility that it
    869   // might touch more than one cache line and thus we simply update the
    870   // shadow memory for a single cache line.
    871   // Our shadow memory model is fine with races when manipulating shadow values.
    872   // We generate the following code:
    873   //
    874   //   const char BitMask = 0x81;
    875   //   char *ShadowAddr = appToShadow(AppAddr);
    876   //   if ((*ShadowAddr & BitMask) != BitMask)
    877   //     *ShadowAddr |= Bitmask;
    878   //
    879   Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
    880   Value *ShadowPtr = appToShadow(AddrPtr, IRB);
    881   Type *ShadowTy = IntegerType::get(*Ctx, 8U);
    882   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
    883   // The bottom bit is used for the current sampling period's working set.
    884   // The top bit is used for the total working set.  We set both on each
    885   // memory access, if they are not already set.
    886   Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
    887 
    888   Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
    889   // The AND and CMP will be turned into a TEST instruction by the compiler.
    890   Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
    891   TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
    892   // FIXME: do I need to call SetCurrentDebugLocation?
    893   IRB.SetInsertPoint(CmpTerm);
    894   // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
    895   // which are used by the runtime library.
    896   Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
    897   IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
    898   IRB.SetInsertPoint(I);
    899 
    900   return true;
    901 }
    902