1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Recursive parser implementation for the matcher expression grammar. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/ASTMatchers/Dynamic/Parser.h" 16 #include "clang/ASTMatchers/Dynamic/Registry.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/Twine.h" 20 #include <string> 21 #include <vector> 22 23 namespace clang { 24 namespace ast_matchers { 25 namespace dynamic { 26 27 /// \brief Simple structure to hold information for one token from the parser. 28 struct Parser::TokenInfo { 29 /// \brief Different possible tokens. 30 enum TokenKind { 31 TK_Eof, 32 TK_OpenParen, 33 TK_CloseParen, 34 TK_Comma, 35 TK_Period, 36 TK_Literal, 37 TK_Ident, 38 TK_InvalidChar, 39 TK_Error, 40 TK_CodeCompletion 41 }; 42 43 /// \brief Some known identifiers. 44 static const char* const ID_Bind; 45 46 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {} 47 48 StringRef Text; 49 TokenKind Kind; 50 SourceRange Range; 51 VariantValue Value; 52 }; 53 54 const char* const Parser::TokenInfo::ID_Bind = "bind"; 55 56 /// \brief Simple tokenizer for the parser. 57 class Parser::CodeTokenizer { 58 public: 59 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) 60 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error), 61 CodeCompletionLocation(nullptr) { 62 NextToken = getNextToken(); 63 } 64 65 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error, 66 unsigned CodeCompletionOffset) 67 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error), 68 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 69 NextToken = getNextToken(); 70 } 71 72 /// \brief Returns but doesn't consume the next token. 73 const TokenInfo &peekNextToken() const { return NextToken; } 74 75 /// \brief Consumes and returns the next token. 76 TokenInfo consumeNextToken() { 77 TokenInfo ThisToken = NextToken; 78 NextToken = getNextToken(); 79 return ThisToken; 80 } 81 82 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 83 84 private: 85 TokenInfo getNextToken() { 86 consumeWhitespace(); 87 TokenInfo Result; 88 Result.Range.Start = currentLocation(); 89 90 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 91 Result.Kind = TokenInfo::TK_CodeCompletion; 92 Result.Text = StringRef(CodeCompletionLocation, 0); 93 CodeCompletionLocation = nullptr; 94 return Result; 95 } 96 97 if (Code.empty()) { 98 Result.Kind = TokenInfo::TK_Eof; 99 Result.Text = ""; 100 return Result; 101 } 102 103 switch (Code[0]) { 104 case ',': 105 Result.Kind = TokenInfo::TK_Comma; 106 Result.Text = Code.substr(0, 1); 107 Code = Code.drop_front(); 108 break; 109 case '.': 110 Result.Kind = TokenInfo::TK_Period; 111 Result.Text = Code.substr(0, 1); 112 Code = Code.drop_front(); 113 break; 114 case '(': 115 Result.Kind = TokenInfo::TK_OpenParen; 116 Result.Text = Code.substr(0, 1); 117 Code = Code.drop_front(); 118 break; 119 case ')': 120 Result.Kind = TokenInfo::TK_CloseParen; 121 Result.Text = Code.substr(0, 1); 122 Code = Code.drop_front(); 123 break; 124 125 case '"': 126 case '\'': 127 // Parse a string literal. 128 consumeStringLiteral(&Result); 129 break; 130 131 case '0': case '1': case '2': case '3': case '4': 132 case '5': case '6': case '7': case '8': case '9': 133 // Parse an unsigned literal. 134 consumeUnsignedLiteral(&Result); 135 break; 136 137 default: 138 if (isAlphanumeric(Code[0])) { 139 // Parse an identifier 140 size_t TokenLength = 1; 141 while (1) { 142 // A code completion location in/immediately after an identifier will 143 // cause the portion of the identifier before the code completion 144 // location to become a code completion token. 145 if (CodeCompletionLocation == Code.data() + TokenLength) { 146 CodeCompletionLocation = nullptr; 147 Result.Kind = TokenInfo::TK_CodeCompletion; 148 Result.Text = Code.substr(0, TokenLength); 149 Code = Code.drop_front(TokenLength); 150 return Result; 151 } 152 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 153 break; 154 ++TokenLength; 155 } 156 Result.Kind = TokenInfo::TK_Ident; 157 Result.Text = Code.substr(0, TokenLength); 158 Code = Code.drop_front(TokenLength); 159 } else { 160 Result.Kind = TokenInfo::TK_InvalidChar; 161 Result.Text = Code.substr(0, 1); 162 Code = Code.drop_front(1); 163 } 164 break; 165 } 166 167 Result.Range.End = currentLocation(); 168 return Result; 169 } 170 171 /// \brief Consume an unsigned literal. 172 void consumeUnsignedLiteral(TokenInfo *Result) { 173 unsigned Length = 1; 174 if (Code.size() > 1) { 175 // Consume the 'x' or 'b' radix modifier, if present. 176 switch (toLowercase(Code[1])) { 177 case 'x': case 'b': Length = 2; 178 } 179 } 180 while (Length < Code.size() && isHexDigit(Code[Length])) 181 ++Length; 182 183 Result->Text = Code.substr(0, Length); 184 Code = Code.drop_front(Length); 185 186 unsigned Value; 187 if (!Result->Text.getAsInteger(0, Value)) { 188 Result->Kind = TokenInfo::TK_Literal; 189 Result->Value = Value; 190 } else { 191 SourceRange Range; 192 Range.Start = Result->Range.Start; 193 Range.End = currentLocation(); 194 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text; 195 Result->Kind = TokenInfo::TK_Error; 196 } 197 } 198 199 /// \brief Consume a string literal. 200 /// 201 /// \c Code must be positioned at the start of the literal (the opening 202 /// quote). Consumed until it finds the same closing quote character. 203 void consumeStringLiteral(TokenInfo *Result) { 204 bool InEscape = false; 205 const char Marker = Code[0]; 206 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 207 if (InEscape) { 208 InEscape = false; 209 continue; 210 } 211 if (Code[Length] == '\\') { 212 InEscape = true; 213 continue; 214 } 215 if (Code[Length] == Marker) { 216 Result->Kind = TokenInfo::TK_Literal; 217 Result->Text = Code.substr(0, Length + 1); 218 Result->Value = Code.substr(1, Length - 1).str(); 219 Code = Code.drop_front(Length + 1); 220 return; 221 } 222 } 223 224 StringRef ErrorText = Code; 225 Code = Code.drop_front(Code.size()); 226 SourceRange Range; 227 Range.Start = Result->Range.Start; 228 Range.End = currentLocation(); 229 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 230 Result->Kind = TokenInfo::TK_Error; 231 } 232 233 /// \brief Consume all leading whitespace from \c Code. 234 void consumeWhitespace() { 235 while (!Code.empty() && isWhitespace(Code[0])) { 236 if (Code[0] == '\n') { 237 ++Line; 238 StartOfLine = Code.drop_front(); 239 } 240 Code = Code.drop_front(); 241 } 242 } 243 244 SourceLocation currentLocation() { 245 SourceLocation Location; 246 Location.Line = Line; 247 Location.Column = Code.data() - StartOfLine.data() + 1; 248 return Location; 249 } 250 251 StringRef Code; 252 StringRef StartOfLine; 253 unsigned Line; 254 Diagnostics *Error; 255 TokenInfo NextToken; 256 const char *CodeCompletionLocation; 257 }; 258 259 Parser::Sema::~Sema() {} 260 261 VariantValue Parser::Sema::getNamedValue(StringRef Name) { 262 return VariantValue(); 263 } 264 265 struct Parser::ScopedContextEntry { 266 Parser *P; 267 268 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 269 P->ContextStack.push_back(std::make_pair(C, 0u)); 270 } 271 272 ~ScopedContextEntry() { 273 P->ContextStack.pop_back(); 274 } 275 276 void nextArg() { 277 ++P->ContextStack.back().second; 278 } 279 }; 280 281 /// \brief Parse expressions that start with an identifier. 282 /// 283 /// This function can parse named values and matchers. 284 /// In case of failure it will try to determine the user's intent to give 285 /// an appropriate error message. 286 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 287 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 288 289 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 290 // Parse as a named value. 291 if (const VariantValue NamedValue = S->getNamedValue(NameToken.Text)) { 292 *Value = NamedValue; 293 return true; 294 } 295 // If the syntax is correct and the name is not a matcher either, report 296 // unknown named value. 297 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 298 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 299 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 300 !S->lookupMatcherCtor(NameToken.Text)) { 301 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 302 << NameToken.Text; 303 return false; 304 } 305 // Otherwise, fallback to the matcher parser. 306 } 307 308 // Parse as a matcher expression. 309 return parseMatcherExpressionImpl(NameToken, Value); 310 } 311 312 /// \brief Parse and validate a matcher expression. 313 /// \return \c true on success, in which case \c Value has the matcher parsed. 314 /// If the input is malformed, or some argument has an error, it 315 /// returns \c false. 316 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 317 VariantValue *Value) { 318 assert(NameToken.Kind == TokenInfo::TK_Ident); 319 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 320 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 321 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 322 << OpenToken.Text; 323 return false; 324 } 325 326 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 327 328 if (!Ctor) { 329 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 330 << NameToken.Text; 331 // Do not return here. We need to continue to give completion suggestions. 332 } 333 334 std::vector<ParserValue> Args; 335 TokenInfo EndToken; 336 337 { 338 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); 339 340 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 341 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 342 // End of args. 343 EndToken = Tokenizer->consumeNextToken(); 344 break; 345 } 346 if (Args.size() > 0) { 347 // We must find a , token to continue. 348 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 349 if (CommaToken.Kind != TokenInfo::TK_Comma) { 350 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 351 << CommaToken.Text; 352 return false; 353 } 354 } 355 356 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 357 NameToken.Text, NameToken.Range, 358 Args.size() + 1); 359 ParserValue ArgValue; 360 ArgValue.Text = Tokenizer->peekNextToken().Text; 361 ArgValue.Range = Tokenizer->peekNextToken().Range; 362 if (!parseExpressionImpl(&ArgValue.Value)) { 363 return false; 364 } 365 366 Args.push_back(ArgValue); 367 SCE.nextArg(); 368 } 369 } 370 371 if (EndToken.Kind == TokenInfo::TK_Eof) { 372 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 373 return false; 374 } 375 376 std::string BindID; 377 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 378 // Parse .bind("foo") 379 Tokenizer->consumeNextToken(); // consume the period. 380 const TokenInfo BindToken = Tokenizer->consumeNextToken(); 381 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) { 382 addCompletion(BindToken, "bind(\"", "bind"); 383 return false; 384 } 385 386 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 387 const TokenInfo IDToken = Tokenizer->consumeNextToken(); 388 const TokenInfo CloseToken = Tokenizer->consumeNextToken(); 389 390 // TODO: We could use different error codes for each/some to be more 391 // explicit about the syntax error. 392 if (BindToken.Kind != TokenInfo::TK_Ident || 393 BindToken.Text != TokenInfo::ID_Bind) { 394 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); 395 return false; 396 } 397 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 398 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 399 return false; 400 } 401 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 402 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 403 return false; 404 } 405 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 406 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 407 return false; 408 } 409 BindID = IDToken.Value.getString(); 410 } 411 412 if (!Ctor) 413 return false; 414 415 // Merge the start and end infos. 416 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 417 NameToken.Text, NameToken.Range); 418 SourceRange MatcherRange = NameToken.Range; 419 MatcherRange.End = EndToken.Range.End; 420 VariantMatcher Result = S->actOnMatcherExpression( 421 *Ctor, MatcherRange, BindID, Args, Error); 422 if (Result.isNull()) return false; 423 424 *Value = Result; 425 return true; 426 } 427 428 // If the prefix of this completion matches the completion token, add it to 429 // Completions minus the prefix. 430 void Parser::addCompletion(const TokenInfo &CompToken, StringRef TypedText, 431 StringRef Decl) { 432 if (TypedText.size() >= CompToken.Text.size() && 433 TypedText.substr(0, CompToken.Text.size()) == CompToken.Text) { 434 Completions.push_back( 435 MatcherCompletion(TypedText.substr(CompToken.Text.size()), Decl)); 436 } 437 } 438 439 void Parser::addExpressionCompletions() { 440 const TokenInfo CompToken = Tokenizer->consumeNextToken(); 441 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 442 443 // We cannot complete code if there is an invalid element on the context 444 // stack. 445 for (ContextStackTy::iterator I = ContextStack.begin(), 446 E = ContextStack.end(); 447 I != E; ++I) { 448 if (!I->first) 449 return; 450 } 451 452 std::vector<MatcherCompletion> RegCompletions = 453 Registry::getCompletions(ContextStack); 454 for (std::vector<MatcherCompletion>::iterator I = RegCompletions.begin(), 455 E = RegCompletions.end(); 456 I != E; ++I) { 457 addCompletion(CompToken, I->TypedText, I->MatcherDecl); 458 } 459 } 460 461 /// \brief Parse an <Expresssion> 462 bool Parser::parseExpressionImpl(VariantValue *Value) { 463 switch (Tokenizer->nextTokenKind()) { 464 case TokenInfo::TK_Literal: 465 *Value = Tokenizer->consumeNextToken().Value; 466 return true; 467 468 case TokenInfo::TK_Ident: 469 return parseIdentifierPrefixImpl(Value); 470 471 case TokenInfo::TK_CodeCompletion: 472 addExpressionCompletions(); 473 return false; 474 475 case TokenInfo::TK_Eof: 476 Error->addError(Tokenizer->consumeNextToken().Range, 477 Error->ET_ParserNoCode); 478 return false; 479 480 case TokenInfo::TK_Error: 481 // This error was already reported by the tokenizer. 482 return false; 483 484 case TokenInfo::TK_OpenParen: 485 case TokenInfo::TK_CloseParen: 486 case TokenInfo::TK_Comma: 487 case TokenInfo::TK_Period: 488 case TokenInfo::TK_InvalidChar: 489 const TokenInfo Token = Tokenizer->consumeNextToken(); 490 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text; 491 return false; 492 } 493 494 llvm_unreachable("Unknown token kind."); 495 } 496 497 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 498 Diagnostics *Error) 499 : Tokenizer(Tokenizer), S(S), Error(Error) {} 500 501 Parser::RegistrySema::~RegistrySema() {} 502 503 llvm::Optional<MatcherCtor> 504 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 505 return Registry::lookupMatcherCtor(MatcherName); 506 } 507 508 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 509 MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID, 510 ArrayRef<ParserValue> Args, Diagnostics *Error) { 511 if (BindID.empty()) { 512 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 513 } else { 514 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 515 Error); 516 } 517 } 518 519 bool Parser::parseExpression(StringRef Code, VariantValue *Value, 520 Diagnostics *Error) { 521 RegistrySema S; 522 return parseExpression(Code, &S, Value, Error); 523 } 524 525 bool Parser::parseExpression(StringRef Code, Sema *S, 526 VariantValue *Value, Diagnostics *Error) { 527 CodeTokenizer Tokenizer(Code, Error); 528 if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false; 529 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { 530 Error->addError(Tokenizer.peekNextToken().Range, 531 Error->ET_ParserTrailingCode); 532 return false; 533 } 534 return true; 535 } 536 537 std::vector<MatcherCompletion> 538 Parser::completeExpression(StringRef Code, unsigned CompletionOffset) { 539 Diagnostics Error; 540 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 541 RegistrySema S; 542 Parser P(&Tokenizer, &S, &Error); 543 VariantValue Dummy; 544 P.parseExpressionImpl(&Dummy); 545 546 return P.Completions; 547 } 548 549 llvm::Optional<DynTypedMatcher> 550 Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) { 551 RegistrySema S; 552 return parseMatcherExpression(Code, &S, Error); 553 } 554 555 llvm::Optional<DynTypedMatcher> 556 Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S, 557 Diagnostics *Error) { 558 VariantValue Value; 559 if (!parseExpression(Code, S, &Value, Error)) 560 return llvm::Optional<DynTypedMatcher>(); 561 if (!Value.isMatcher()) { 562 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 563 return llvm::Optional<DynTypedMatcher>(); 564 } 565 llvm::Optional<DynTypedMatcher> Result = 566 Value.getMatcher().getSingleMatcher(); 567 if (!Result.hasValue()) { 568 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 569 << Value.getTypeAsString(); 570 } 571 return Result; 572 } 573 574 } // namespace dynamic 575 } // namespace ast_matchers 576 } // namespace clang 577