Home | History | Annotate | Download | only in Support
      1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This is a utility class used to parse user-provided text files with
     10 // "special case lists" for code sanitizers. Such files are used to
     11 // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers
     12 // like AddressSanitizer or UndefinedBehaviorSanitizer.
     13 //
     14 // Empty lines and lines starting with "#" are ignored. Sections are defined
     15 // using a '[section_name]' header and can be used to specify sanitizers the
     16 // entries below it apply to. Section names are regular expressions, and
     17 // entries without a section header match all sections (e.g. an '[*]' header
     18 // is assumed.)
     19 // The remaining lines should have the form:
     20 //   prefix:wildcard_expression[=category]
     21 // If category is not specified, it is assumed to be empty string.
     22 // Definitions of "prefix" and "category" are sanitizer-specific. For example,
     23 // sanitizer blacklists support prefixes "src", "fun" and "global".
     24 // Wildcard expressions define, respectively, source files, functions or
     25 // globals which shouldn't be instrumented.
     26 // Examples of categories:
     27 //   "functional": used in DFSan to list functions with pure functional
     28 //                 semantics.
     29 //   "init": used in ASan blacklist to disable initialization-order bugs
     30 //           detection for certain globals or source files.
     31 // Full special case list file example:
     32 // ---
     33 // [address]
     34 // # Blacklisted items:
     35 // fun:*_ZN4base6subtle*
     36 // global:*global_with_bad_access_or_initialization*
     37 // global:*global_with_initialization_issues*=init
     38 // type:*Namespace::ClassName*=init
     39 // src:file_with_tricky_code.cc
     40 // src:ignore-global-initializers-issues.cc=init
     41 //
     42 // [dataflow]
     43 // # Functions with pure functional semantics:
     44 // fun:cos=functional
     45 // fun:sin=functional
     46 // ---
     47 // Note that the wild card is in fact an llvm::Regex, but * is automatically
     48 // replaced with .*
     49 //
     50 //===----------------------------------------------------------------------===//
     51 
     52 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
     53 #define LLVM_SUPPORT_SPECIALCASELIST_H
     54 
     55 #include "llvm/ADT/StringMap.h"
     56 #include "llvm/ADT/StringSet.h"
     57 #include "llvm/Support/Regex.h"
     58 #include "llvm/Support/TrigramIndex.h"
     59 #include <string>
     60 #include <vector>
     61 
     62 namespace llvm {
     63 class MemoryBuffer;
     64 class Regex;
     65 class StringRef;
     66 
     67 class SpecialCaseList {
     68 public:
     69   /// Parses the special case list entries from files. On failure, returns
     70   /// 0 and writes an error message to string.
     71   static std::unique_ptr<SpecialCaseList>
     72   create(const std::vector<std::string> &Paths, std::string &Error);
     73   /// Parses the special case list from a memory buffer. On failure, returns
     74   /// 0 and writes an error message to string.
     75   static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
     76                                                  std::string &Error);
     77   /// Parses the special case list entries from files. On failure, reports a
     78   /// fatal error.
     79   static std::unique_ptr<SpecialCaseList>
     80   createOrDie(const std::vector<std::string> &Paths);
     81 
     82   ~SpecialCaseList();
     83 
     84   /// Returns true, if special case list contains a line
     85   /// \code
     86   ///   @Prefix:<E>=@Category
     87   /// \endcode
     88   /// where @Query satisfies wildcard expression <E> in a given @Section.
     89   bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
     90                  StringRef Category = StringRef()) const;
     91 
     92 protected:
     93   // Implementations of the create*() functions that can also be used by derived
     94   // classes.
     95   bool createInternal(const std::vector<std::string> &Paths,
     96                       std::string &Error);
     97   bool createInternal(const MemoryBuffer *MB, std::string &Error);
     98 
     99   SpecialCaseList(SpecialCaseList const &) = delete;
    100   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
    101 
    102   /// Represents a set of regular expressions.  Regular expressions which are
    103   /// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
    104   /// others are represented as a single pipe-separated regex in RegEx.  The
    105   /// reason for doing so is efficiency; StringSet is much faster at matching
    106   /// literal strings than Regex.
    107   class Matcher {
    108   public:
    109     bool insert(std::string Regexp, std::string &REError);
    110     void compile();
    111     bool match(StringRef Query) const;
    112 
    113   private:
    114     StringSet<> Strings;
    115     TrigramIndex Trigrams;
    116     std::unique_ptr<Regex> RegEx;
    117     std::string UncompiledRegEx;
    118   };
    119 
    120   using SectionEntries = StringMap<StringMap<Matcher>>;
    121 
    122   struct Section {
    123     Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
    124 
    125     std::unique_ptr<Matcher> SectionMatcher;
    126     SectionEntries Entries;
    127   };
    128 
    129   std::vector<Section> Sections;
    130   bool IsCompiled;
    131 
    132   SpecialCaseList();
    133   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
    134   bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
    135              std::string &Error);
    136   /// compile() should be called once, after parsing all the memory buffers.
    137   void compile();
    138 
    139   // Helper method for derived classes to search by Prefix, Query, and Category
    140   // once they have already resolved a section entry.
    141   bool inSection(const SectionEntries &Entries, StringRef Prefix,
    142                  StringRef Query, StringRef Category) const;
    143 };
    144 
    145 }  // namespace llvm
    146 
    147 #endif  // LLVM_SUPPORT_SPECIALCASELIST_H
    148 
    149