1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 //===----------------------------------------------------------------------===// 8 // 9 // This is a utility class used to parse user-provided text files with 10 // "special case lists" for code sanitizers. Such files are used to 11 // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers 12 // like AddressSanitizer or UndefinedBehaviorSanitizer. 13 // 14 // Empty lines and lines starting with "#" are ignored. Sections are defined 15 // using a '[section_name]' header and can be used to specify sanitizers the 16 // entries below it apply to. Section names are regular expressions, and 17 // entries without a section header match all sections (e.g. an '[*]' header 18 // is assumed.) 19 // The remaining lines should have the form: 20 // prefix:wildcard_expression[=category] 21 // If category is not specified, it is assumed to be empty string. 22 // Definitions of "prefix" and "category" are sanitizer-specific. For example, 23 // sanitizer blacklists support prefixes "src", "fun" and "global". 24 // Wildcard expressions define, respectively, source files, functions or 25 // globals which shouldn't be instrumented. 26 // Examples of categories: 27 // "functional": used in DFSan to list functions with pure functional 28 // semantics. 29 // "init": used in ASan blacklist to disable initialization-order bugs 30 // detection for certain globals or source files. 31 // Full special case list file example: 32 // --- 33 // [address] 34 // # Blacklisted items: 35 // fun:*_ZN4base6subtle* 36 // global:*global_with_bad_access_or_initialization* 37 // global:*global_with_initialization_issues*=init 38 // type:*Namespace::ClassName*=init 39 // src:file_with_tricky_code.cc 40 // src:ignore-global-initializers-issues.cc=init 41 // 42 // [dataflow] 43 // # Functions with pure functional semantics: 44 // fun:cos=functional 45 // fun:sin=functional 46 // --- 47 // Note that the wild card is in fact an llvm::Regex, but * is automatically 48 // replaced with .* 49 // 50 //===----------------------------------------------------------------------===// 51 52 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H 53 #define LLVM_SUPPORT_SPECIALCASELIST_H 54 55 #include "llvm/ADT/StringMap.h" 56 #include "llvm/ADT/StringSet.h" 57 #include "llvm/Support/Regex.h" 58 #include "llvm/Support/TrigramIndex.h" 59 #include <string> 60 #include <vector> 61 62 namespace llvm { 63 class MemoryBuffer; 64 class Regex; 65 class StringRef; 66 67 class SpecialCaseList { 68 public: 69 /// Parses the special case list entries from files. On failure, returns 70 /// 0 and writes an error message to string. 71 static std::unique_ptr<SpecialCaseList> 72 create(const std::vector<std::string> &Paths, std::string &Error); 73 /// Parses the special case list from a memory buffer. On failure, returns 74 /// 0 and writes an error message to string. 75 static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB, 76 std::string &Error); 77 /// Parses the special case list entries from files. On failure, reports a 78 /// fatal error. 79 static std::unique_ptr<SpecialCaseList> 80 createOrDie(const std::vector<std::string> &Paths); 81 82 ~SpecialCaseList(); 83 84 /// Returns true, if special case list contains a line 85 /// \code 86 /// @Prefix:<E>=@Category 87 /// \endcode 88 /// where @Query satisfies wildcard expression <E> in a given @Section. 89 bool inSection(StringRef Section, StringRef Prefix, StringRef Query, 90 StringRef Category = StringRef()) const; 91 92 protected: 93 // Implementations of the create*() functions that can also be used by derived 94 // classes. 95 bool createInternal(const std::vector<std::string> &Paths, 96 std::string &Error); 97 bool createInternal(const MemoryBuffer *MB, std::string &Error); 98 99 SpecialCaseList(SpecialCaseList const &) = delete; 100 SpecialCaseList &operator=(SpecialCaseList const &) = delete; 101 102 /// Represents a set of regular expressions. Regular expressions which are 103 /// "literal" (i.e. no regex metacharacters) are stored in Strings, while all 104 /// others are represented as a single pipe-separated regex in RegEx. The 105 /// reason for doing so is efficiency; StringSet is much faster at matching 106 /// literal strings than Regex. 107 class Matcher { 108 public: 109 bool insert(std::string Regexp, std::string &REError); 110 void compile(); 111 bool match(StringRef Query) const; 112 113 private: 114 StringSet<> Strings; 115 TrigramIndex Trigrams; 116 std::unique_ptr<Regex> RegEx; 117 std::string UncompiledRegEx; 118 }; 119 120 using SectionEntries = StringMap<StringMap<Matcher>>; 121 122 struct Section { 123 Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){}; 124 125 std::unique_ptr<Matcher> SectionMatcher; 126 SectionEntries Entries; 127 }; 128 129 std::vector<Section> Sections; 130 bool IsCompiled; 131 132 SpecialCaseList(); 133 /// Parses just-constructed SpecialCaseList entries from a memory buffer. 134 bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap, 135 std::string &Error); 136 /// compile() should be called once, after parsing all the memory buffers. 137 void compile(); 138 139 // Helper method for derived classes to search by Prefix, Query, and Category 140 // once they have already resolved a section entry. 141 bool inSection(const SectionEntries &Entries, StringRef Prefix, 142 StringRef Query, StringRef Category) const; 143 }; 144 145 } // namespace llvm 146 147 #endif // LLVM_SUPPORT_SPECIALCASELIST_H 148 149