1 //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is a utility class for instrumentation passes (like AddressSanitizer 11 // or ThreadSanitizer) to avoid instrumenting some functions or global 12 // variables, or to instrument some functions or global variables in a specific 13 // way, based on a user-supplied list. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Utils/SpecialCaseList.h" 18 #include "llvm/ADT/OwningPtr.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/IR/DerivedTypes.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/GlobalVariable.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Support/system_error.h" 31 #include <string> 32 #include <utility> 33 34 namespace llvm { 35 36 /// Represents a set of regular expressions. Regular expressions which are 37 /// "literal" (i.e. no regex metacharacters) are stored in Strings, while all 38 /// others are represented as a single pipe-separated regex in RegEx. The 39 /// reason for doing so is efficiency; StringSet is much faster at matching 40 /// literal strings than Regex. 41 struct SpecialCaseList::Entry { 42 StringSet<> Strings; 43 Regex *RegEx; 44 45 Entry() : RegEx(0) {} 46 47 bool match(StringRef Query) const { 48 return Strings.count(Query) || (RegEx && RegEx->match(Query)); 49 } 50 }; 51 52 SpecialCaseList::SpecialCaseList(const StringRef Path) { 53 // Validate and open blacklist file. 54 if (Path.empty()) return; 55 OwningPtr<MemoryBuffer> File; 56 if (error_code EC = MemoryBuffer::getFile(Path, File)) { 57 report_fatal_error("Can't open blacklist file: " + Path + ": " + 58 EC.message()); 59 } 60 61 init(File.get()); 62 } 63 64 SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) { 65 init(MB); 66 } 67 68 void SpecialCaseList::init(const MemoryBuffer *MB) { 69 // Iterate through each line in the blacklist file. 70 SmallVector<StringRef, 16> Lines; 71 SplitString(MB->getBuffer(), Lines, "\n\r"); 72 StringMap<StringMap<std::string> > Regexps; 73 for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); 74 I != E; ++I) { 75 // Ignore empty lines and lines starting with "#" 76 if (I->empty() || I->startswith("#")) 77 continue; 78 // Get our prefix and unparsed regexp. 79 std::pair<StringRef, StringRef> SplitLine = I->split(":"); 80 StringRef Prefix = SplitLine.first; 81 if (SplitLine.second.empty()) { 82 // Missing ':' in the line. 83 report_fatal_error("malformed blacklist line: " + SplitLine.first); 84 } 85 86 std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); 87 std::string Regexp = SplitRegexp.first; 88 StringRef Category = SplitRegexp.second; 89 90 // Backwards compatibility. 91 if (Prefix == "global-init") { 92 Prefix = "global"; 93 Category = "init"; 94 } else if (Prefix == "global-init-type") { 95 Prefix = "type"; 96 Category = "init"; 97 } else if (Prefix == "global-init-src") { 98 Prefix = "src"; 99 Category = "init"; 100 } 101 102 // See if we can store Regexp in Strings. 103 if (Regex::isLiteralERE(Regexp)) { 104 Entries[Prefix][Category].Strings.insert(Regexp); 105 continue; 106 } 107 108 // Replace * with .* 109 for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; 110 pos += strlen(".*")) { 111 Regexp.replace(pos, strlen("*"), ".*"); 112 } 113 114 // Check that the regexp is valid. 115 Regex CheckRE(Regexp); 116 std::string Error; 117 if (!CheckRE.isValid(Error)) { 118 report_fatal_error("malformed blacklist regex: " + SplitLine.second + 119 ": " + Error); 120 } 121 122 // Add this regexp into the proper group by its prefix. 123 if (!Regexps[Prefix][Category].empty()) 124 Regexps[Prefix][Category] += "|"; 125 Regexps[Prefix][Category] += "^" + Regexp + "$"; 126 } 127 128 // Iterate through each of the prefixes, and create Regexs for them. 129 for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), 130 E = Regexps.end(); 131 I != E; ++I) { 132 for (StringMap<std::string>::const_iterator II = I->second.begin(), 133 IE = I->second.end(); 134 II != IE; ++II) { 135 Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); 136 } 137 } 138 } 139 140 SpecialCaseList::~SpecialCaseList() { 141 for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), 142 E = Entries.end(); 143 I != E; ++I) { 144 for (StringMap<Entry>::const_iterator II = I->second.begin(), 145 IE = I->second.end(); 146 II != IE; ++II) { 147 delete II->second.RegEx; 148 } 149 } 150 } 151 152 bool SpecialCaseList::findCategory(const Function &F, 153 StringRef &Category) const { 154 return findCategory(*F.getParent(), Category) || 155 findCategory("fun", F.getName(), Category); 156 } 157 158 bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { 159 return isIn(*F.getParent(), Category) || 160 inSectionCategory("fun", F.getName(), Category); 161 } 162 163 static StringRef GetGVTypeString(const GlobalVariable &G) { 164 // Types of GlobalVariables are always pointer types. 165 Type *GType = G.getType()->getElementType(); 166 // For now we support blacklisting struct types only. 167 if (StructType *SGType = dyn_cast<StructType>(GType)) { 168 if (!SGType->isLiteral()) 169 return SGType->getName(); 170 } 171 return "<unknown type>"; 172 } 173 174 bool SpecialCaseList::findCategory(const GlobalVariable &G, 175 StringRef &Category) const { 176 return findCategory(*G.getParent(), Category) || 177 findCategory("global", G.getName(), Category) || 178 findCategory("type", GetGVTypeString(G), Category); 179 } 180 181 bool SpecialCaseList::isIn(const GlobalVariable &G, 182 const StringRef Category) const { 183 return isIn(*G.getParent(), Category) || 184 inSectionCategory("global", G.getName(), Category) || 185 inSectionCategory("type", GetGVTypeString(G), Category); 186 } 187 188 bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const { 189 return findCategory("src", M.getModuleIdentifier(), Category); 190 } 191 192 bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { 193 return inSectionCategory("src", M.getModuleIdentifier(), Category); 194 } 195 196 bool SpecialCaseList::findCategory(const StringRef Section, 197 const StringRef Query, 198 StringRef &Category) const { 199 StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); 200 if (I == Entries.end()) return false; 201 202 for (StringMap<Entry>::const_iterator II = I->second.begin(), 203 IE = I->second.end(); 204 II != IE; ++II) { 205 if (II->getValue().match(Query)) { 206 Category = II->first(); 207 return true; 208 } 209 } 210 211 return false; 212 } 213 214 bool SpecialCaseList::inSectionCategory(const StringRef Section, 215 const StringRef Query, 216 const StringRef Category) const { 217 StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); 218 if (I == Entries.end()) return false; 219 StringMap<Entry>::const_iterator II = I->second.find(Category); 220 if (II == I->second.end()) return false; 221 222 return II->getValue().match(Query); 223 } 224 225 } // namespace llvm 226