1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APFloat.h" 47 #include "llvm/ADT/STLExtras.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/StringExtras.h" 50 #include "llvm/Support/Capacity.h" 51 #include "llvm/Support/ConvertUTF.h" 52 #include "llvm/Support/MemoryBuffer.h" 53 #include "llvm/Support/raw_ostream.h" 54 using namespace clang; 55 56 //===----------------------------------------------------------------------===// 57 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 58 59 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 60 DiagnosticsEngine &diags, LangOptions &opts, 61 SourceManager &SM, HeaderSearch &Headers, 62 ModuleLoader &TheModuleLoader, 63 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 64 TranslationUnitKind TUKind) 65 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 66 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 67 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 68 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 69 Identifiers(opts, IILookup), 70 PragmaHandlers(new PragmaNamespace(StringRef())), 71 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 72 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 73 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 74 CodeCompletionReached(0), MainFileDir(nullptr), 75 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), 76 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), 77 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 78 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 79 OwnsHeaderSearch = OwnsHeaders; 80 81 CounterValue = 0; // __COUNTER__ starts at 0. 82 83 // Clear stats. 84 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 85 NumIf = NumElse = NumEndif = 0; 86 NumEnteredSourceFiles = 0; 87 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 88 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 89 MaxIncludeStackDepth = 0; 90 NumSkipped = 0; 91 92 // Default to discarding comments. 93 KeepComments = false; 94 KeepMacroComments = false; 95 SuppressIncludeNotFoundError = false; 96 97 // Macro expansion is enabled. 98 DisableMacroExpansion = false; 99 MacroExpansionInDirectivesOverride = false; 100 InMacroArgs = false; 101 InMacroArgPreExpansion = false; 102 NumCachedTokenLexers = 0; 103 PragmasEnabled = true; 104 ParsingIfOrElifDirective = false; 105 PreprocessedOutput = false; 106 107 CachedLexPos = 0; 108 109 // We haven't read anything from the external source. 110 ReadMacrosFromExternalSource = false; 111 112 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 113 // This gets unpoisoned where it is allowed. 114 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 115 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 116 117 // Initialize the pragma handlers. 118 RegisterBuiltinPragmas(); 119 120 // Initialize builtin macros like __LINE__ and friends. 121 RegisterBuiltinMacros(); 122 123 if(LangOpts.Borland) { 124 Ident__exception_info = getIdentifierInfo("_exception_info"); 125 Ident___exception_info = getIdentifierInfo("__exception_info"); 126 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 127 Ident__exception_code = getIdentifierInfo("_exception_code"); 128 Ident___exception_code = getIdentifierInfo("__exception_code"); 129 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 130 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 131 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 132 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 133 } else { 134 Ident__exception_info = Ident__exception_code = nullptr; 135 Ident__abnormal_termination = Ident___exception_info = nullptr; 136 Ident___exception_code = Ident___abnormal_termination = nullptr; 137 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 138 Ident_AbnormalTermination = nullptr; 139 } 140 } 141 142 Preprocessor::~Preprocessor() { 143 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 144 145 IncludeMacroStack.clear(); 146 147 // Destroy any macro definitions. 148 while (MacroInfoChain *I = MIChainHead) { 149 MIChainHead = I->Next; 150 I->~MacroInfoChain(); 151 } 152 153 // Free any cached macro expanders. 154 // This populates MacroArgCache, so all TokenLexers need to be destroyed 155 // before the code below that frees up the MacroArgCache list. 156 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 157 CurTokenLexer.reset(); 158 159 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 160 DeserialMIChainHead = I->Next; 161 I->~DeserializedMacroInfoChain(); 162 } 163 164 // Free any cached MacroArgs. 165 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 166 ArgList = ArgList->deallocate(); 167 168 // Delete the header search info, if we own it. 169 if (OwnsHeaderSearch) 170 delete &HeaderInfo; 171 } 172 173 void Preprocessor::Initialize(const TargetInfo &Target, 174 const TargetInfo *AuxTarget) { 175 assert((!this->Target || this->Target == &Target) && 176 "Invalid override of target information"); 177 this->Target = &Target; 178 179 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 180 "Invalid override of aux target information."); 181 this->AuxTarget = AuxTarget; 182 183 // Initialize information about built-ins. 184 BuiltinInfo.InitializeTarget(Target, AuxTarget); 185 HeaderInfo.setTarget(Target); 186 } 187 188 void Preprocessor::InitializeForModelFile() { 189 NumEnteredSourceFiles = 0; 190 191 // Reset pragmas 192 PragmaHandlersBackup = std::move(PragmaHandlers); 193 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 194 RegisterBuiltinPragmas(); 195 196 // Reset PredefinesFileID 197 PredefinesFileID = FileID(); 198 } 199 200 void Preprocessor::FinalizeForModelFile() { 201 NumEnteredSourceFiles = 1; 202 203 PragmaHandlers = std::move(PragmaHandlersBackup); 204 } 205 206 void Preprocessor::setPTHManager(PTHManager* pm) { 207 PTH.reset(pm); 208 FileMgr.addStatCache(PTH->createStatCache()); 209 } 210 211 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 212 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 213 << getSpelling(Tok) << "'"; 214 215 if (!DumpFlags) return; 216 217 llvm::errs() << "\t"; 218 if (Tok.isAtStartOfLine()) 219 llvm::errs() << " [StartOfLine]"; 220 if (Tok.hasLeadingSpace()) 221 llvm::errs() << " [LeadingSpace]"; 222 if (Tok.isExpandDisabled()) 223 llvm::errs() << " [ExpandDisabled]"; 224 if (Tok.needsCleaning()) { 225 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 226 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 227 << "']"; 228 } 229 230 llvm::errs() << "\tLoc=<"; 231 DumpLocation(Tok.getLocation()); 232 llvm::errs() << ">"; 233 } 234 235 void Preprocessor::DumpLocation(SourceLocation Loc) const { 236 Loc.dump(SourceMgr); 237 } 238 239 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 240 llvm::errs() << "MACRO: "; 241 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 242 DumpToken(MI.getReplacementToken(i)); 243 llvm::errs() << " "; 244 } 245 llvm::errs() << "\n"; 246 } 247 248 void Preprocessor::PrintStats() { 249 llvm::errs() << "\n*** Preprocessor Stats:\n"; 250 llvm::errs() << NumDirectives << " directives found:\n"; 251 llvm::errs() << " " << NumDefined << " #define.\n"; 252 llvm::errs() << " " << NumUndefined << " #undef.\n"; 253 llvm::errs() << " #include/#include_next/#import:\n"; 254 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 255 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 256 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 257 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 258 llvm::errs() << " " << NumEndif << " #endif.\n"; 259 llvm::errs() << " " << NumPragma << " #pragma.\n"; 260 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 261 262 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 263 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 264 << NumFastMacroExpanded << " on the fast path.\n"; 265 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 266 << " token paste (##) operations performed, " 267 << NumFastTokenPaste << " on the fast path.\n"; 268 269 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 270 271 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 272 llvm::errs() << "\n Macro Expanded Tokens: " 273 << llvm::capacity_in_bytes(MacroExpandedTokens); 274 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 275 // FIXME: List information for all submodules. 276 llvm::errs() << "\n Macros: " 277 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 278 llvm::errs() << "\n #pragma push_macro Info: " 279 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 280 llvm::errs() << "\n Poison Reasons: " 281 << llvm::capacity_in_bytes(PoisonReasons); 282 llvm::errs() << "\n Comment Handlers: " 283 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 284 } 285 286 Preprocessor::macro_iterator 287 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 288 if (IncludeExternalMacros && ExternalSource && 289 !ReadMacrosFromExternalSource) { 290 ReadMacrosFromExternalSource = true; 291 ExternalSource->ReadDefinedMacros(); 292 } 293 294 // Make sure we cover all macros in visible modules. 295 for (const ModuleMacro &Macro : ModuleMacros) 296 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 297 298 return CurSubmoduleState->Macros.begin(); 299 } 300 301 size_t Preprocessor::getTotalMemory() const { 302 return BP.getTotalMemory() 303 + llvm::capacity_in_bytes(MacroExpandedTokens) 304 + Predefines.capacity() /* Predefines buffer. */ 305 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 306 // and ModuleMacros. 307 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 308 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 309 + llvm::capacity_in_bytes(PoisonReasons) 310 + llvm::capacity_in_bytes(CommentHandlers); 311 } 312 313 Preprocessor::macro_iterator 314 Preprocessor::macro_end(bool IncludeExternalMacros) const { 315 if (IncludeExternalMacros && ExternalSource && 316 !ReadMacrosFromExternalSource) { 317 ReadMacrosFromExternalSource = true; 318 ExternalSource->ReadDefinedMacros(); 319 } 320 321 return CurSubmoduleState->Macros.end(); 322 } 323 324 /// \brief Compares macro tokens with a specified token value sequence. 325 static bool MacroDefinitionEquals(const MacroInfo *MI, 326 ArrayRef<TokenValue> Tokens) { 327 return Tokens.size() == MI->getNumTokens() && 328 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 329 } 330 331 StringRef Preprocessor::getLastMacroWithSpelling( 332 SourceLocation Loc, 333 ArrayRef<TokenValue> Tokens) const { 334 SourceLocation BestLocation; 335 StringRef BestSpelling; 336 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 337 I != E; ++I) { 338 const MacroDirective::DefInfo 339 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 340 if (!Def || !Def.getMacroInfo()) 341 continue; 342 if (!Def.getMacroInfo()->isObjectLike()) 343 continue; 344 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 345 continue; 346 SourceLocation Location = Def.getLocation(); 347 // Choose the macro defined latest. 348 if (BestLocation.isInvalid() || 349 (Location.isValid() && 350 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 351 BestLocation = Location; 352 BestSpelling = I->first->getName(); 353 } 354 } 355 return BestSpelling; 356 } 357 358 void Preprocessor::recomputeCurLexerKind() { 359 if (CurLexer) 360 CurLexerKind = CLK_Lexer; 361 else if (CurPTHLexer) 362 CurLexerKind = CLK_PTHLexer; 363 else if (CurTokenLexer) 364 CurLexerKind = CLK_TokenLexer; 365 else 366 CurLexerKind = CLK_CachingLexer; 367 } 368 369 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 370 unsigned CompleteLine, 371 unsigned CompleteColumn) { 372 assert(File); 373 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 374 assert(!CodeCompletionFile && "Already set"); 375 376 using llvm::MemoryBuffer; 377 378 // Load the actual file's contents. 379 bool Invalid = false; 380 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 381 if (Invalid) 382 return true; 383 384 // Find the byte position of the truncation point. 385 const char *Position = Buffer->getBufferStart(); 386 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 387 for (; *Position; ++Position) { 388 if (*Position != '\r' && *Position != '\n') 389 continue; 390 391 // Eat \r\n or \n\r as a single line. 392 if ((Position[1] == '\r' || Position[1] == '\n') && 393 Position[0] != Position[1]) 394 ++Position; 395 ++Position; 396 break; 397 } 398 } 399 400 Position += CompleteColumn - 1; 401 402 // If pointing inside the preamble, adjust the position at the beginning of 403 // the file after the preamble. 404 if (SkipMainFilePreamble.first && 405 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 406 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 407 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 408 } 409 410 if (Position > Buffer->getBufferEnd()) 411 Position = Buffer->getBufferEnd(); 412 413 CodeCompletionFile = File; 414 CodeCompletionOffset = Position - Buffer->getBufferStart(); 415 416 std::unique_ptr<MemoryBuffer> NewBuffer = 417 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 418 Buffer->getBufferIdentifier()); 419 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 420 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 421 *NewPos = '\0'; 422 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 423 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 424 425 return false; 426 } 427 428 void Preprocessor::CodeCompleteNaturalLanguage() { 429 if (CodeComplete) 430 CodeComplete->CodeCompleteNaturalLanguage(); 431 setCodeCompletionReached(); 432 } 433 434 /// getSpelling - This method is used to get the spelling of a token into a 435 /// SmallVector. Note that the returned StringRef may not point to the 436 /// supplied buffer if a copy can be avoided. 437 StringRef Preprocessor::getSpelling(const Token &Tok, 438 SmallVectorImpl<char> &Buffer, 439 bool *Invalid) const { 440 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 441 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 442 // Try the fast path. 443 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 444 return II->getName(); 445 } 446 447 // Resize the buffer if we need to copy into it. 448 if (Tok.needsCleaning()) 449 Buffer.resize(Tok.getLength()); 450 451 const char *Ptr = Buffer.data(); 452 unsigned Len = getSpelling(Tok, Ptr, Invalid); 453 return StringRef(Ptr, Len); 454 } 455 456 /// CreateString - Plop the specified string into a scratch buffer and return a 457 /// location for it. If specified, the source location provides a source 458 /// location for the token. 459 void Preprocessor::CreateString(StringRef Str, Token &Tok, 460 SourceLocation ExpansionLocStart, 461 SourceLocation ExpansionLocEnd) { 462 Tok.setLength(Str.size()); 463 464 const char *DestPtr; 465 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 466 467 if (ExpansionLocStart.isValid()) 468 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 469 ExpansionLocEnd, Str.size()); 470 Tok.setLocation(Loc); 471 472 // If this is a raw identifier or a literal token, set the pointer data. 473 if (Tok.is(tok::raw_identifier)) 474 Tok.setRawIdentifierData(DestPtr); 475 else if (Tok.isLiteral()) 476 Tok.setLiteralData(DestPtr); 477 } 478 479 Module *Preprocessor::getCurrentModule() { 480 if (getLangOpts().CurrentModule.empty()) 481 return nullptr; 482 483 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 484 } 485 486 //===----------------------------------------------------------------------===// 487 // Preprocessor Initialization Methods 488 //===----------------------------------------------------------------------===// 489 490 491 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 492 /// which implicitly adds the builtin defines etc. 493 void Preprocessor::EnterMainSourceFile() { 494 // We do not allow the preprocessor to reenter the main file. Doing so will 495 // cause FileID's to accumulate information from both runs (e.g. #line 496 // information) and predefined macros aren't guaranteed to be set properly. 497 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 498 FileID MainFileID = SourceMgr.getMainFileID(); 499 500 // If MainFileID is loaded it means we loaded an AST file, no need to enter 501 // a main file. 502 if (!SourceMgr.isLoadedFileID(MainFileID)) { 503 // Enter the main file source buffer. 504 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 505 506 // If we've been asked to skip bytes in the main file (e.g., as part of a 507 // precompiled preamble), do so now. 508 if (SkipMainFilePreamble.first > 0) 509 CurLexer->SkipBytes(SkipMainFilePreamble.first, 510 SkipMainFilePreamble.second); 511 512 // Tell the header info that the main file was entered. If the file is later 513 // #imported, it won't be re-entered. 514 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 515 HeaderInfo.IncrementIncludeCount(FE); 516 } 517 518 // Preprocess Predefines to populate the initial preprocessor state. 519 std::unique_ptr<llvm::MemoryBuffer> SB = 520 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 521 assert(SB && "Cannot create predefined source buffer"); 522 FileID FID = SourceMgr.createFileID(std::move(SB)); 523 assert(FID.isValid() && "Could not create FileID for predefines?"); 524 setPredefinesFileID(FID); 525 526 // Start parsing the predefines. 527 EnterSourceFile(FID, nullptr, SourceLocation()); 528 } 529 530 void Preprocessor::EndSourceFile() { 531 // Notify the client that we reached the end of the source file. 532 if (Callbacks) 533 Callbacks->EndOfMainFile(); 534 } 535 536 //===----------------------------------------------------------------------===// 537 // Lexer Event Handling. 538 //===----------------------------------------------------------------------===// 539 540 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 541 /// identifier information for the token and install it into the token, 542 /// updating the token kind accordingly. 543 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 544 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 545 546 // Look up this token, see if it is a macro, or if it is a language keyword. 547 IdentifierInfo *II; 548 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 549 // No cleaning needed, just use the characters from the lexed buffer. 550 II = getIdentifierInfo(Identifier.getRawIdentifier()); 551 } else { 552 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 553 SmallString<64> IdentifierBuffer; 554 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 555 556 if (Identifier.hasUCN()) { 557 SmallString<64> UCNIdentifierBuffer; 558 expandUCNs(UCNIdentifierBuffer, CleanedStr); 559 II = getIdentifierInfo(UCNIdentifierBuffer); 560 } else { 561 II = getIdentifierInfo(CleanedStr); 562 } 563 } 564 565 // Update the token info (identifier info and appropriate token kind). 566 Identifier.setIdentifierInfo(II); 567 Identifier.setKind(II->getTokenID()); 568 569 return II; 570 } 571 572 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 573 PoisonReasons[II] = DiagID; 574 } 575 576 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 577 assert(Ident__exception_code && Ident__exception_info); 578 assert(Ident___exception_code && Ident___exception_info); 579 Ident__exception_code->setIsPoisoned(Poison); 580 Ident___exception_code->setIsPoisoned(Poison); 581 Ident_GetExceptionCode->setIsPoisoned(Poison); 582 Ident__exception_info->setIsPoisoned(Poison); 583 Ident___exception_info->setIsPoisoned(Poison); 584 Ident_GetExceptionInfo->setIsPoisoned(Poison); 585 Ident__abnormal_termination->setIsPoisoned(Poison); 586 Ident___abnormal_termination->setIsPoisoned(Poison); 587 Ident_AbnormalTermination->setIsPoisoned(Poison); 588 } 589 590 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 591 assert(Identifier.getIdentifierInfo() && 592 "Can't handle identifiers without identifier info!"); 593 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 594 PoisonReasons.find(Identifier.getIdentifierInfo()); 595 if(it == PoisonReasons.end()) 596 Diag(Identifier, diag::err_pp_used_poisoned_id); 597 else 598 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 599 } 600 601 /// \brief Returns a diagnostic message kind for reporting a future keyword as 602 /// appropriate for the identifier and specified language. 603 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 604 const LangOptions &LangOpts) { 605 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 606 607 if (LangOpts.CPlusPlus) 608 return llvm::StringSwitch<diag::kind>(II.getName()) 609 #define CXX11_KEYWORD(NAME, FLAGS) \ 610 .Case(#NAME, diag::warn_cxx11_keyword) 611 #include "clang/Basic/TokenKinds.def" 612 ; 613 614 llvm_unreachable( 615 "Keyword not known to come from a newer Standard or proposed Standard"); 616 } 617 618 /// HandleIdentifier - This callback is invoked when the lexer reads an 619 /// identifier. This callback looks up the identifier in the map and/or 620 /// potentially macro expands it or turns it into a named token (like 'for'). 621 /// 622 /// Note that callers of this method are guarded by checking the 623 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 624 /// IdentifierInfo methods that compute these properties will need to change to 625 /// match. 626 bool Preprocessor::HandleIdentifier(Token &Identifier) { 627 assert(Identifier.getIdentifierInfo() && 628 "Can't handle identifiers without identifier info!"); 629 630 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 631 632 // If the information about this identifier is out of date, update it from 633 // the external source. 634 // We have to treat __VA_ARGS__ in a special way, since it gets 635 // serialized with isPoisoned = true, but our preprocessor may have 636 // unpoisoned it if we're defining a C99 macro. 637 if (II.isOutOfDate()) { 638 bool CurrentIsPoisoned = false; 639 if (&II == Ident__VA_ARGS__) 640 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 641 642 ExternalSource->updateOutOfDateIdentifier(II); 643 Identifier.setKind(II.getTokenID()); 644 645 if (&II == Ident__VA_ARGS__) 646 II.setIsPoisoned(CurrentIsPoisoned); 647 } 648 649 // If this identifier was poisoned, and if it was not produced from a macro 650 // expansion, emit an error. 651 if (II.isPoisoned() && CurPPLexer) { 652 HandlePoisonedIdentifier(Identifier); 653 } 654 655 // If this is a macro to be expanded, do it. 656 if (MacroDefinition MD = getMacroDefinition(&II)) { 657 auto *MI = MD.getMacroInfo(); 658 assert(MI && "macro definition with no macro info?"); 659 if (!DisableMacroExpansion) { 660 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 661 // C99 6.10.3p10: If the preprocessing token immediately after the 662 // macro name isn't a '(', this macro should not be expanded. 663 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 664 return HandleMacroExpandedIdentifier(Identifier, MD); 665 } else { 666 // C99 6.10.3.4p2 says that a disabled macro may never again be 667 // expanded, even if it's in a context where it could be expanded in the 668 // future. 669 Identifier.setFlag(Token::DisableExpand); 670 if (MI->isObjectLike() || isNextPPTokenLParen()) 671 Diag(Identifier, diag::pp_disabled_macro_expansion); 672 } 673 } 674 } 675 676 // If this identifier is a keyword in a newer Standard or proposed Standard, 677 // produce a warning. Don't warn if we're not considering macro expansion, 678 // since this identifier might be the name of a macro. 679 // FIXME: This warning is disabled in cases where it shouldn't be, like 680 // "#define constexpr constexpr", "int constexpr;" 681 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 682 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 683 << II.getName(); 684 // Don't diagnose this keyword again in this translation unit. 685 II.setIsFutureCompatKeyword(false); 686 } 687 688 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 689 // then we act as if it is the actual operator and not the textual 690 // representation of it. 691 if (II.isCPlusPlusOperatorKeyword()) 692 Identifier.setIdentifierInfo(nullptr); 693 694 // If this is an extension token, diagnose its use. 695 // We avoid diagnosing tokens that originate from macro definitions. 696 // FIXME: This warning is disabled in cases where it shouldn't be, 697 // like "#define TY typeof", "TY(1) x". 698 if (II.isExtensionToken() && !DisableMacroExpansion) 699 Diag(Identifier, diag::ext_token_used); 700 701 // If this is the 'import' contextual keyword following an '@', note 702 // that the next token indicates a module name. 703 // 704 // Note that we do not treat 'import' as a contextual 705 // keyword when we're in a caching lexer, because caching lexers only get 706 // used in contexts where import declarations are disallowed. 707 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 708 !DisableMacroExpansion && 709 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 710 CurLexerKind != CLK_CachingLexer) { 711 ModuleImportLoc = Identifier.getLocation(); 712 ModuleImportPath.clear(); 713 ModuleImportExpectsIdentifier = true; 714 CurLexerKind = CLK_LexAfterModuleImport; 715 } 716 return true; 717 } 718 719 void Preprocessor::Lex(Token &Result) { 720 // We loop here until a lex function returns a token; this avoids recursion. 721 bool ReturnedToken; 722 do { 723 switch (CurLexerKind) { 724 case CLK_Lexer: 725 ReturnedToken = CurLexer->Lex(Result); 726 break; 727 case CLK_PTHLexer: 728 ReturnedToken = CurPTHLexer->Lex(Result); 729 break; 730 case CLK_TokenLexer: 731 ReturnedToken = CurTokenLexer->Lex(Result); 732 break; 733 case CLK_CachingLexer: 734 CachingLex(Result); 735 ReturnedToken = true; 736 break; 737 case CLK_LexAfterModuleImport: 738 LexAfterModuleImport(Result); 739 ReturnedToken = true; 740 break; 741 } 742 } while (!ReturnedToken); 743 744 LastTokenWasAt = Result.is(tok::at); 745 } 746 747 748 /// \brief Lex a token following the 'import' contextual keyword. 749 /// 750 void Preprocessor::LexAfterModuleImport(Token &Result) { 751 // Figure out what kind of lexer we actually have. 752 recomputeCurLexerKind(); 753 754 // Lex the next token. 755 Lex(Result); 756 757 // The token sequence 758 // 759 // import identifier (. identifier)* 760 // 761 // indicates a module import directive. We already saw the 'import' 762 // contextual keyword, so now we're looking for the identifiers. 763 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 764 // We expected to see an identifier here, and we did; continue handling 765 // identifiers. 766 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 767 Result.getLocation())); 768 ModuleImportExpectsIdentifier = false; 769 CurLexerKind = CLK_LexAfterModuleImport; 770 return; 771 } 772 773 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 774 // see the next identifier. 775 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 776 ModuleImportExpectsIdentifier = true; 777 CurLexerKind = CLK_LexAfterModuleImport; 778 return; 779 } 780 781 // If we have a non-empty module path, load the named module. 782 if (!ModuleImportPath.empty()) { 783 Module *Imported = nullptr; 784 if (getLangOpts().Modules) { 785 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 786 ModuleImportPath, 787 Module::Hidden, 788 /*IsIncludeDirective=*/false); 789 if (Imported) 790 makeModuleVisible(Imported, ModuleImportLoc); 791 } 792 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 793 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 794 } 795 } 796 797 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 798 CurSubmoduleState->VisibleModules.setVisible( 799 M, Loc, [](Module *) {}, 800 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 801 // FIXME: Include the path in the diagnostic. 802 // FIXME: Include the import location for the conflicting module. 803 Diag(ModuleImportLoc, diag::warn_module_conflict) 804 << Path[0]->getFullModuleName() 805 << Conflict->getFullModuleName() 806 << Message; 807 }); 808 809 // Add this module to the imports list of the currently-built submodule. 810 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 811 BuildingSubmoduleStack.back().M->Imports.insert(M); 812 } 813 814 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 815 const char *DiagnosticTag, 816 bool AllowMacroExpansion) { 817 // We need at least one string literal. 818 if (Result.isNot(tok::string_literal)) { 819 Diag(Result, diag::err_expected_string_literal) 820 << /*Source='in...'*/0 << DiagnosticTag; 821 return false; 822 } 823 824 // Lex string literal tokens, optionally with macro expansion. 825 SmallVector<Token, 4> StrToks; 826 do { 827 StrToks.push_back(Result); 828 829 if (Result.hasUDSuffix()) 830 Diag(Result, diag::err_invalid_string_udl); 831 832 if (AllowMacroExpansion) 833 Lex(Result); 834 else 835 LexUnexpandedToken(Result); 836 } while (Result.is(tok::string_literal)); 837 838 // Concatenate and parse the strings. 839 StringLiteralParser Literal(StrToks, *this); 840 assert(Literal.isAscii() && "Didn't allow wide strings in"); 841 842 if (Literal.hadError) 843 return false; 844 845 if (Literal.Pascal) { 846 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 847 << /*Source='in...'*/0 << DiagnosticTag; 848 return false; 849 } 850 851 String = Literal.GetString(); 852 return true; 853 } 854 855 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 856 assert(Tok.is(tok::numeric_constant)); 857 SmallString<8> IntegerBuffer; 858 bool NumberInvalid = false; 859 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 860 if (NumberInvalid) 861 return false; 862 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 863 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 864 return false; 865 llvm::APInt APVal(64, 0); 866 if (Literal.GetIntegerValue(APVal)) 867 return false; 868 Lex(Tok); 869 Value = APVal.getLimitedValue(); 870 return true; 871 } 872 873 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 874 assert(Handler && "NULL comment handler"); 875 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 876 CommentHandlers.end() && "Comment handler already registered"); 877 CommentHandlers.push_back(Handler); 878 } 879 880 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 881 std::vector<CommentHandler *>::iterator Pos 882 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 883 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 884 CommentHandlers.erase(Pos); 885 } 886 887 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 888 bool AnyPendingTokens = false; 889 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 890 HEnd = CommentHandlers.end(); 891 H != HEnd; ++H) { 892 if ((*H)->HandleComment(*this, Comment)) 893 AnyPendingTokens = true; 894 } 895 if (!AnyPendingTokens || getCommentRetentionState()) 896 return false; 897 Lex(result); 898 return true; 899 } 900 901 ModuleLoader::~ModuleLoader() { } 902 903 CommentHandler::~CommentHandler() { } 904 905 CodeCompletionHandler::~CodeCompletionHandler() { } 906 907 void Preprocessor::createPreprocessingRecord() { 908 if (Record) 909 return; 910 911 Record = new PreprocessingRecord(getSourceManager()); 912 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 913 } 914