1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Recursive parser implementation for the matcher expression grammar. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/ASTMatchers/Dynamic/Parser.h" 16 #include "clang/ASTMatchers/Dynamic/Registry.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/Support/ManagedStatic.h" 21 #include <string> 22 #include <vector> 23 24 namespace clang { 25 namespace ast_matchers { 26 namespace dynamic { 27 28 /// \brief Simple structure to hold information for one token from the parser. 29 struct Parser::TokenInfo { 30 /// \brief Different possible tokens. 31 enum TokenKind { 32 TK_Eof, 33 TK_OpenParen, 34 TK_CloseParen, 35 TK_Comma, 36 TK_Period, 37 TK_Literal, 38 TK_Ident, 39 TK_InvalidChar, 40 TK_Error, 41 TK_CodeCompletion 42 }; 43 44 /// \brief Some known identifiers. 45 static const char* const ID_Bind; 46 47 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {} 48 49 StringRef Text; 50 TokenKind Kind; 51 SourceRange Range; 52 VariantValue Value; 53 }; 54 55 const char* const Parser::TokenInfo::ID_Bind = "bind"; 56 57 /// \brief Simple tokenizer for the parser. 58 class Parser::CodeTokenizer { 59 public: 60 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) 61 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error), 62 CodeCompletionLocation(nullptr) { 63 NextToken = getNextToken(); 64 } 65 66 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error, 67 unsigned CodeCompletionOffset) 68 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error), 69 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 70 NextToken = getNextToken(); 71 } 72 73 /// \brief Returns but doesn't consume the next token. 74 const TokenInfo &peekNextToken() const { return NextToken; } 75 76 /// \brief Consumes and returns the next token. 77 TokenInfo consumeNextToken() { 78 TokenInfo ThisToken = NextToken; 79 NextToken = getNextToken(); 80 return ThisToken; 81 } 82 83 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 84 85 private: 86 TokenInfo getNextToken() { 87 consumeWhitespace(); 88 TokenInfo Result; 89 Result.Range.Start = currentLocation(); 90 91 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 92 Result.Kind = TokenInfo::TK_CodeCompletion; 93 Result.Text = StringRef(CodeCompletionLocation, 0); 94 CodeCompletionLocation = nullptr; 95 return Result; 96 } 97 98 if (Code.empty()) { 99 Result.Kind = TokenInfo::TK_Eof; 100 Result.Text = ""; 101 return Result; 102 } 103 104 switch (Code[0]) { 105 case ',': 106 Result.Kind = TokenInfo::TK_Comma; 107 Result.Text = Code.substr(0, 1); 108 Code = Code.drop_front(); 109 break; 110 case '.': 111 Result.Kind = TokenInfo::TK_Period; 112 Result.Text = Code.substr(0, 1); 113 Code = Code.drop_front(); 114 break; 115 case '(': 116 Result.Kind = TokenInfo::TK_OpenParen; 117 Result.Text = Code.substr(0, 1); 118 Code = Code.drop_front(); 119 break; 120 case ')': 121 Result.Kind = TokenInfo::TK_CloseParen; 122 Result.Text = Code.substr(0, 1); 123 Code = Code.drop_front(); 124 break; 125 126 case '"': 127 case '\'': 128 // Parse a string literal. 129 consumeStringLiteral(&Result); 130 break; 131 132 case '0': case '1': case '2': case '3': case '4': 133 case '5': case '6': case '7': case '8': case '9': 134 // Parse an unsigned literal. 135 consumeUnsignedLiteral(&Result); 136 break; 137 138 default: 139 if (isAlphanumeric(Code[0])) { 140 // Parse an identifier 141 size_t TokenLength = 1; 142 while (1) { 143 // A code completion location in/immediately after an identifier will 144 // cause the portion of the identifier before the code completion 145 // location to become a code completion token. 146 if (CodeCompletionLocation == Code.data() + TokenLength) { 147 CodeCompletionLocation = nullptr; 148 Result.Kind = TokenInfo::TK_CodeCompletion; 149 Result.Text = Code.substr(0, TokenLength); 150 Code = Code.drop_front(TokenLength); 151 return Result; 152 } 153 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 154 break; 155 ++TokenLength; 156 } 157 Result.Kind = TokenInfo::TK_Ident; 158 Result.Text = Code.substr(0, TokenLength); 159 Code = Code.drop_front(TokenLength); 160 } else { 161 Result.Kind = TokenInfo::TK_InvalidChar; 162 Result.Text = Code.substr(0, 1); 163 Code = Code.drop_front(1); 164 } 165 break; 166 } 167 168 Result.Range.End = currentLocation(); 169 return Result; 170 } 171 172 /// \brief Consume an unsigned literal. 173 void consumeUnsignedLiteral(TokenInfo *Result) { 174 unsigned Length = 1; 175 if (Code.size() > 1) { 176 // Consume the 'x' or 'b' radix modifier, if present. 177 switch (toLowercase(Code[1])) { 178 case 'x': case 'b': Length = 2; 179 } 180 } 181 while (Length < Code.size() && isHexDigit(Code[Length])) 182 ++Length; 183 184 Result->Text = Code.substr(0, Length); 185 Code = Code.drop_front(Length); 186 187 unsigned Value; 188 if (!Result->Text.getAsInteger(0, Value)) { 189 Result->Kind = TokenInfo::TK_Literal; 190 Result->Value = Value; 191 } else { 192 SourceRange Range; 193 Range.Start = Result->Range.Start; 194 Range.End = currentLocation(); 195 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text; 196 Result->Kind = TokenInfo::TK_Error; 197 } 198 } 199 200 /// \brief Consume a string literal. 201 /// 202 /// \c Code must be positioned at the start of the literal (the opening 203 /// quote). Consumed until it finds the same closing quote character. 204 void consumeStringLiteral(TokenInfo *Result) { 205 bool InEscape = false; 206 const char Marker = Code[0]; 207 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 208 if (InEscape) { 209 InEscape = false; 210 continue; 211 } 212 if (Code[Length] == '\\') { 213 InEscape = true; 214 continue; 215 } 216 if (Code[Length] == Marker) { 217 Result->Kind = TokenInfo::TK_Literal; 218 Result->Text = Code.substr(0, Length + 1); 219 Result->Value = Code.substr(1, Length - 1); 220 Code = Code.drop_front(Length + 1); 221 return; 222 } 223 } 224 225 StringRef ErrorText = Code; 226 Code = Code.drop_front(Code.size()); 227 SourceRange Range; 228 Range.Start = Result->Range.Start; 229 Range.End = currentLocation(); 230 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 231 Result->Kind = TokenInfo::TK_Error; 232 } 233 234 /// \brief Consume all leading whitespace from \c Code. 235 void consumeWhitespace() { 236 while (!Code.empty() && isWhitespace(Code[0])) { 237 if (Code[0] == '\n') { 238 ++Line; 239 StartOfLine = Code.drop_front(); 240 } 241 Code = Code.drop_front(); 242 } 243 } 244 245 SourceLocation currentLocation() { 246 SourceLocation Location; 247 Location.Line = Line; 248 Location.Column = Code.data() - StartOfLine.data() + 1; 249 return Location; 250 } 251 252 StringRef Code; 253 StringRef StartOfLine; 254 unsigned Line; 255 Diagnostics *Error; 256 TokenInfo NextToken; 257 const char *CodeCompletionLocation; 258 }; 259 260 Parser::Sema::~Sema() {} 261 262 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 263 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 264 return std::vector<ArgKind>(); 265 } 266 267 std::vector<MatcherCompletion> 268 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 269 return std::vector<MatcherCompletion>(); 270 } 271 272 struct Parser::ScopedContextEntry { 273 Parser *P; 274 275 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 276 P->ContextStack.push_back(std::make_pair(C, 0u)); 277 } 278 279 ~ScopedContextEntry() { 280 P->ContextStack.pop_back(); 281 } 282 283 void nextArg() { 284 ++P->ContextStack.back().second; 285 } 286 }; 287 288 /// \brief Parse expressions that start with an identifier. 289 /// 290 /// This function can parse named values and matchers. 291 /// In case of failure it will try to determine the user's intent to give 292 /// an appropriate error message. 293 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 294 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 295 296 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 297 // Parse as a named value. 298 if (const VariantValue NamedValue = 299 NamedValues ? NamedValues->lookup(NameToken.Text) 300 : VariantValue()) { 301 *Value = NamedValue; 302 return true; 303 } 304 // If the syntax is correct and the name is not a matcher either, report 305 // unknown named value. 306 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 307 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 308 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 309 !S->lookupMatcherCtor(NameToken.Text)) { 310 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 311 << NameToken.Text; 312 return false; 313 } 314 // Otherwise, fallback to the matcher parser. 315 } 316 317 // Parse as a matcher expression. 318 return parseMatcherExpressionImpl(NameToken, Value); 319 } 320 321 /// \brief Parse and validate a matcher expression. 322 /// \return \c true on success, in which case \c Value has the matcher parsed. 323 /// If the input is malformed, or some argument has an error, it 324 /// returns \c false. 325 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 326 VariantValue *Value) { 327 assert(NameToken.Kind == TokenInfo::TK_Ident); 328 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 329 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 330 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 331 << OpenToken.Text; 332 return false; 333 } 334 335 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 336 337 if (!Ctor) { 338 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 339 << NameToken.Text; 340 // Do not return here. We need to continue to give completion suggestions. 341 } 342 343 std::vector<ParserValue> Args; 344 TokenInfo EndToken; 345 346 { 347 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); 348 349 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 350 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 351 // End of args. 352 EndToken = Tokenizer->consumeNextToken(); 353 break; 354 } 355 if (Args.size() > 0) { 356 // We must find a , token to continue. 357 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 358 if (CommaToken.Kind != TokenInfo::TK_Comma) { 359 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 360 << CommaToken.Text; 361 return false; 362 } 363 } 364 365 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 366 NameToken.Text, NameToken.Range, 367 Args.size() + 1); 368 ParserValue ArgValue; 369 ArgValue.Text = Tokenizer->peekNextToken().Text; 370 ArgValue.Range = Tokenizer->peekNextToken().Range; 371 if (!parseExpressionImpl(&ArgValue.Value)) { 372 return false; 373 } 374 375 Args.push_back(ArgValue); 376 SCE.nextArg(); 377 } 378 } 379 380 if (EndToken.Kind == TokenInfo::TK_Eof) { 381 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 382 return false; 383 } 384 385 std::string BindID; 386 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 387 // Parse .bind("foo") 388 Tokenizer->consumeNextToken(); // consume the period. 389 const TokenInfo BindToken = Tokenizer->consumeNextToken(); 390 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) { 391 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1)); 392 return false; 393 } 394 395 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 396 const TokenInfo IDToken = Tokenizer->consumeNextToken(); 397 const TokenInfo CloseToken = Tokenizer->consumeNextToken(); 398 399 // TODO: We could use different error codes for each/some to be more 400 // explicit about the syntax error. 401 if (BindToken.Kind != TokenInfo::TK_Ident || 402 BindToken.Text != TokenInfo::ID_Bind) { 403 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); 404 return false; 405 } 406 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 407 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 408 return false; 409 } 410 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 411 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 412 return false; 413 } 414 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 415 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 416 return false; 417 } 418 BindID = IDToken.Value.getString(); 419 } 420 421 if (!Ctor) 422 return false; 423 424 // Merge the start and end infos. 425 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 426 NameToken.Text, NameToken.Range); 427 SourceRange MatcherRange = NameToken.Range; 428 MatcherRange.End = EndToken.Range.End; 429 VariantMatcher Result = S->actOnMatcherExpression( 430 *Ctor, MatcherRange, BindID, Args, Error); 431 if (Result.isNull()) return false; 432 433 *Value = Result; 434 return true; 435 } 436 437 // If the prefix of this completion matches the completion token, add it to 438 // Completions minus the prefix. 439 void Parser::addCompletion(const TokenInfo &CompToken, 440 const MatcherCompletion& Completion) { 441 if (StringRef(Completion.TypedText).startswith(CompToken.Text) && 442 Completion.Specificity > 0) { 443 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 444 Completion.MatcherDecl, Completion.Specificity); 445 } 446 } 447 448 std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 449 ArrayRef<ArgKind> AcceptedTypes) { 450 if (!NamedValues) return std::vector<MatcherCompletion>(); 451 std::vector<MatcherCompletion> Result; 452 for (const auto &Entry : *NamedValues) { 453 unsigned Specificity; 454 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 455 std::string Decl = 456 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 457 Result.emplace_back(Entry.getKey(), Decl, Specificity); 458 } 459 } 460 return Result; 461 } 462 463 void Parser::addExpressionCompletions() { 464 const TokenInfo CompToken = Tokenizer->consumeNextToken(); 465 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 466 467 // We cannot complete code if there is an invalid element on the context 468 // stack. 469 for (ContextStackTy::iterator I = ContextStack.begin(), 470 E = ContextStack.end(); 471 I != E; ++I) { 472 if (!I->first) 473 return; 474 } 475 476 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 477 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 478 addCompletion(CompToken, Completion); 479 } 480 481 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 482 addCompletion(CompToken, Completion); 483 } 484 } 485 486 /// \brief Parse an <Expresssion> 487 bool Parser::parseExpressionImpl(VariantValue *Value) { 488 switch (Tokenizer->nextTokenKind()) { 489 case TokenInfo::TK_Literal: 490 *Value = Tokenizer->consumeNextToken().Value; 491 return true; 492 493 case TokenInfo::TK_Ident: 494 return parseIdentifierPrefixImpl(Value); 495 496 case TokenInfo::TK_CodeCompletion: 497 addExpressionCompletions(); 498 return false; 499 500 case TokenInfo::TK_Eof: 501 Error->addError(Tokenizer->consumeNextToken().Range, 502 Error->ET_ParserNoCode); 503 return false; 504 505 case TokenInfo::TK_Error: 506 // This error was already reported by the tokenizer. 507 return false; 508 509 case TokenInfo::TK_OpenParen: 510 case TokenInfo::TK_CloseParen: 511 case TokenInfo::TK_Comma: 512 case TokenInfo::TK_Period: 513 case TokenInfo::TK_InvalidChar: 514 const TokenInfo Token = Tokenizer->consumeNextToken(); 515 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text; 516 return false; 517 } 518 519 llvm_unreachable("Unknown token kind."); 520 } 521 522 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 523 524 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 525 const NamedValueMap *NamedValues, Diagnostics *Error) 526 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 527 NamedValues(NamedValues), Error(Error) {} 528 529 Parser::RegistrySema::~RegistrySema() {} 530 531 llvm::Optional<MatcherCtor> 532 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 533 return Registry::lookupMatcherCtor(MatcherName); 534 } 535 536 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 537 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 538 ArrayRef<ParserValue> Args, Diagnostics *Error) { 539 if (BindID.empty()) { 540 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 541 } else { 542 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 543 Error); 544 } 545 } 546 547 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 548 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 549 return Registry::getAcceptedCompletionTypes(Context); 550 } 551 552 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 553 ArrayRef<ArgKind> AcceptedTypes) { 554 return Registry::getMatcherCompletions(AcceptedTypes); 555 } 556 557 bool Parser::parseExpression(StringRef Code, Sema *S, 558 const NamedValueMap *NamedValues, 559 VariantValue *Value, Diagnostics *Error) { 560 CodeTokenizer Tokenizer(Code, Error); 561 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 562 return false; 563 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { 564 Error->addError(Tokenizer.peekNextToken().Range, 565 Error->ET_ParserTrailingCode); 566 return false; 567 } 568 return true; 569 } 570 571 std::vector<MatcherCompletion> 572 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, 573 const NamedValueMap *NamedValues) { 574 Diagnostics Error; 575 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 576 Parser P(&Tokenizer, S, NamedValues, &Error); 577 VariantValue Dummy; 578 P.parseExpressionImpl(&Dummy); 579 580 // Sort by specificity, then by name. 581 std::sort(P.Completions.begin(), P.Completions.end(), 582 [](const MatcherCompletion &A, const MatcherCompletion &B) { 583 if (A.Specificity != B.Specificity) 584 return A.Specificity > B.Specificity; 585 return A.TypedText < B.TypedText; 586 }); 587 588 return P.Completions; 589 } 590 591 llvm::Optional<DynTypedMatcher> 592 Parser::parseMatcherExpression(StringRef Code, Sema *S, 593 const NamedValueMap *NamedValues, 594 Diagnostics *Error) { 595 VariantValue Value; 596 if (!parseExpression(Code, S, NamedValues, &Value, Error)) 597 return llvm::Optional<DynTypedMatcher>(); 598 if (!Value.isMatcher()) { 599 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 600 return llvm::Optional<DynTypedMatcher>(); 601 } 602 llvm::Optional<DynTypedMatcher> Result = 603 Value.getMatcher().getSingleMatcher(); 604 if (!Result.hasValue()) { 605 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 606 << Value.getTypeAsString(); 607 } 608 return Result; 609 } 610 611 } // namespace dynamic 612 } // namespace ast_matchers 613 } // namespace clang 614