1 //===--- Parser.h - Matcher expression parser -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Simple matcher expression parser. 12 /// 13 /// The parser understands matcher expressions of the form: 14 /// MatcherName(Arg0, Arg1, ..., ArgN) 15 /// as well as simple types like strings. 16 /// The parser does not know how to process the matchers. It delegates this task 17 /// to a Sema object received as an argument. 18 /// 19 /// \code 20 /// Grammar for the expressions supported: 21 /// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> 22 /// <Literal> := <StringLiteral> | <Boolean> | <Double> | <Unsigned> 23 /// <StringLiteral> := "quoted string" 24 /// <Boolean> := true | false 25 /// <Double> := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+ 26 /// <Unsigned> := [0-9]+ 27 /// <NamedValue> := <Identifier> 28 /// <MatcherExpression> := <Identifier>(<ArgumentList>) | 29 /// <Identifier>(<ArgumentList>).bind(<StringLiteral>) 30 /// <Identifier> := [a-zA-Z]+ 31 /// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> 32 /// \endcode 33 /// 34 //===----------------------------------------------------------------------===// 35 36 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H 37 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H 38 39 #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 40 #include "clang/ASTMatchers/Dynamic/Registry.h" 41 #include "clang/ASTMatchers/Dynamic/VariantValue.h" 42 #include "clang/Basic/LLVM.h" 43 #include "llvm/ADT/ArrayRef.h" 44 #include "llvm/ADT/Optional.h" 45 #include "llvm/ADT/StringRef.h" 46 47 namespace clang { 48 namespace ast_matchers { 49 namespace dynamic { 50 51 /// \brief Matcher expression parser. 52 class Parser { 53 public: 54 /// \brief Interface to connect the parser with the registry and more. 55 /// 56 /// The parser uses the Sema instance passed into 57 /// parseMatcherExpression() to handle all matcher tokens. The simplest 58 /// processor implementation would simply call into the registry to create 59 /// the matchers. 60 /// However, a more complex processor might decide to intercept the matcher 61 /// creation and do some extra work. For example, it could apply some 62 /// transformation to the matcher by adding some id() nodes, or could detect 63 /// specific matcher nodes for more efficient lookup. 64 class Sema { 65 public: 66 virtual ~Sema(); 67 68 /// \brief Process a matcher expression. 69 /// 70 /// All the arguments passed here have already been processed. 71 /// 72 /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. 73 /// 74 /// \param NameRange The location of the name in the matcher source. 75 /// Useful for error reporting. 76 /// 77 /// \param BindID The ID to use to bind the matcher, or a null \c StringRef 78 /// if no ID is specified. 79 /// 80 /// \param Args The argument list for the matcher. 81 /// 82 /// \return The matcher objects constructed by the processor, or a null 83 /// matcher if an error occurred. In that case, \c Error will contain a 84 /// description of the error. 85 virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 86 SourceRange NameRange, 87 StringRef BindID, 88 ArrayRef<ParserValue> Args, 89 Diagnostics *Error) = 0; 90 91 /// \brief Look up a matcher by name. 92 /// 93 /// \param MatcherName The matcher name found by the parser. 94 /// 95 /// \return The matcher constructor, or Optional<MatcherCtor>() if not 96 /// found. 97 virtual llvm::Optional<MatcherCtor> 98 lookupMatcherCtor(StringRef MatcherName) = 0; 99 100 /// \brief Compute the list of completion types for \p Context. 101 /// 102 /// Each element of \p Context represents a matcher invocation, going from 103 /// outermost to innermost. Elements are pairs consisting of a reference to 104 /// the matcher constructor and the index of the next element in the 105 /// argument list of that matcher (or for the last element, the index of 106 /// the completion point in the argument list). An empty list requests 107 /// completion for the root matcher. 108 virtual std::vector<ArgKind> getAcceptedCompletionTypes( 109 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); 110 111 /// \brief Compute the list of completions that match any of 112 /// \p AcceptedTypes. 113 /// 114 /// \param AcceptedTypes All types accepted for this completion. 115 /// 116 /// \return All completions for the specified types. 117 /// Completions should be valid when used in \c lookupMatcherCtor(). 118 /// The matcher constructed from the return of \c lookupMatcherCtor() 119 /// should be convertible to some type in \p AcceptedTypes. 120 virtual std::vector<MatcherCompletion> 121 getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes); 122 }; 123 124 /// \brief Sema implementation that uses the matcher registry to process the 125 /// tokens. 126 class RegistrySema : public Parser::Sema { 127 public: 128 ~RegistrySema() override; 129 130 llvm::Optional<MatcherCtor> 131 lookupMatcherCtor(StringRef MatcherName) override; 132 133 VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 134 SourceRange NameRange, 135 StringRef BindID, 136 ArrayRef<ParserValue> Args, 137 Diagnostics *Error) override; 138 139 std::vector<ArgKind> getAcceptedCompletionTypes( 140 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override; 141 142 std::vector<MatcherCompletion> 143 getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override; 144 }; 145 146 typedef llvm::StringMap<VariantValue> NamedValueMap; 147 148 /// \brief Parse a matcher expression. 149 /// 150 /// \param MatcherCode The matcher expression to parse. 151 /// 152 /// \param S The Sema instance that will help the parser 153 /// construct the matchers. If null, it uses the default registry. 154 /// 155 /// \param NamedValues A map of precomputed named values. This provides 156 /// the dictionary for the <NamedValue> rule of the grammar. 157 /// If null, it is ignored. 158 /// 159 /// \return The matcher object constructed by the processor, or an empty 160 /// Optional if an error occurred. In that case, \c Error will contain a 161 /// description of the error. 162 /// The caller takes ownership of the DynTypedMatcher object returned. 163 static llvm::Optional<DynTypedMatcher> 164 parseMatcherExpression(StringRef MatcherCode, Sema *S, 165 const NamedValueMap *NamedValues, 166 Diagnostics *Error); 167 static llvm::Optional<DynTypedMatcher> 168 parseMatcherExpression(StringRef MatcherCode, Sema *S, 169 Diagnostics *Error) { 170 return parseMatcherExpression(MatcherCode, S, nullptr, Error); 171 } 172 static llvm::Optional<DynTypedMatcher> 173 parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) { 174 return parseMatcherExpression(MatcherCode, nullptr, Error); 175 } 176 177 /// \brief Parse an expression. 178 /// 179 /// Parses any expression supported by this parser. In general, the 180 /// \c parseMatcherExpression function is a better approach to get a matcher 181 /// object. 182 /// 183 /// \param S The Sema instance that will help the parser 184 /// construct the matchers. If null, it uses the default registry. 185 /// 186 /// \param NamedValues A map of precomputed named values. This provides 187 /// the dictionary for the <NamedValue> rule of the grammar. 188 /// If null, it is ignored. 189 static bool parseExpression(StringRef Code, Sema *S, 190 const NamedValueMap *NamedValues, 191 VariantValue *Value, Diagnostics *Error); 192 static bool parseExpression(StringRef Code, Sema *S, 193 VariantValue *Value, Diagnostics *Error) { 194 return parseExpression(Code, S, nullptr, Value, Error); 195 } 196 static bool parseExpression(StringRef Code, VariantValue *Value, 197 Diagnostics *Error) { 198 return parseExpression(Code, nullptr, Value, Error); 199 } 200 201 /// \brief Complete an expression at the given offset. 202 /// 203 /// \param S The Sema instance that will help the parser 204 /// construct the matchers. If null, it uses the default registry. 205 /// 206 /// \param NamedValues A map of precomputed named values. This provides 207 /// the dictionary for the <NamedValue> rule of the grammar. 208 /// If null, it is ignored. 209 /// 210 /// \return The list of completions, which may be empty if there are no 211 /// available completions or if an error occurred. 212 static std::vector<MatcherCompletion> 213 completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, 214 const NamedValueMap *NamedValues); 215 static std::vector<MatcherCompletion> 216 completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) { 217 return completeExpression(Code, CompletionOffset, S, nullptr); 218 } 219 static std::vector<MatcherCompletion> 220 completeExpression(StringRef Code, unsigned CompletionOffset) { 221 return completeExpression(Code, CompletionOffset, nullptr); 222 } 223 224 private: 225 class CodeTokenizer; 226 struct ScopedContextEntry; 227 struct TokenInfo; 228 229 Parser(CodeTokenizer *Tokenizer, Sema *S, 230 const NamedValueMap *NamedValues, 231 Diagnostics *Error); 232 233 bool parseExpressionImpl(VariantValue *Value); 234 bool parseMatcherExpressionImpl(const TokenInfo &NameToken, 235 VariantValue *Value); 236 bool parseIdentifierPrefixImpl(VariantValue *Value); 237 238 void addCompletion(const TokenInfo &CompToken, 239 const MatcherCompletion &Completion); 240 void addExpressionCompletions(); 241 242 std::vector<MatcherCompletion> 243 getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes); 244 245 CodeTokenizer *const Tokenizer; 246 Sema *const S; 247 const NamedValueMap *const NamedValues; 248 Diagnostics *const Error; 249 250 typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy; 251 ContextStackTy ContextStack; 252 std::vector<MatcherCompletion> Completions; 253 }; 254 255 } // namespace dynamic 256 } // namespace ast_matchers 257 } // namespace clang 258 259 #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H 260