1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This code rewrites include invocations into their expansions. This gives you 11 // a file with all included files merged into it. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Rewrite/Frontend/Rewriters.h" 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Frontend/PreprocessorOutputOptions.h" 18 #include "clang/Lex/HeaderSearch.h" 19 #include "clang/Lex/Pragma.h" 20 #include "clang/Lex/Preprocessor.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 using namespace clang; 25 using namespace llvm; 26 27 namespace { 28 29 class InclusionRewriter : public PPCallbacks { 30 /// Information about which #includes were actually performed, 31 /// created by preprocessor callbacks. 32 struct FileChange { 33 const Module *Mod; 34 SourceLocation From; 35 FileID Id; 36 SrcMgr::CharacteristicKind FileType; 37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 38 } 39 }; 40 Preprocessor &PP; ///< Used to find inclusion directives. 41 SourceManager &SM; ///< Used to read and manage source files. 42 raw_ostream &OS; ///< The destination stream for rewritten contents. 43 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines. 44 bool ShowLineMarkers; ///< Show #line markers. 45 bool UseLineDirective; ///< Use of line directives or line markers. 46 typedef std::map<unsigned, FileChange> FileChangeMap; 47 FileChangeMap FileChanges; ///< Tracks which files were included where. 48 /// Used transitively for building up the FileChanges mapping over the 49 /// various \c PPCallbacks callbacks. 50 FileChangeMap::iterator LastInsertedFileChange; 51 public: 52 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 53 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 54 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) { 55 PredefinesBuffer = Buf; 56 } 57 private: 58 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 59 SrcMgr::CharacteristicKind FileType, 60 FileID PrevFID); 61 virtual void FileSkipped(const FileEntry &ParentFile, 62 const Token &FilenameTok, 63 SrcMgr::CharacteristicKind FileType); 64 virtual void InclusionDirective(SourceLocation HashLoc, 65 const Token &IncludeTok, 66 StringRef FileName, 67 bool IsAngled, 68 CharSourceRange FilenameRange, 69 const FileEntry *File, 70 StringRef SearchPath, 71 StringRef RelativePath, 72 const Module *Imported); 73 void WriteLineInfo(const char *Filename, int Line, 74 SrcMgr::CharacteristicKind FileType, 75 StringRef EOL, StringRef Extra = StringRef()); 76 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 77 void OutputContentUpTo(const MemoryBuffer &FromFile, 78 unsigned &WriteFrom, unsigned WriteTo, 79 StringRef EOL, int &lines, 80 bool EnsureNewline = false); 81 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 82 const MemoryBuffer &FromFile, StringRef EOL, 83 unsigned &NextToWrite, int &Lines); 84 bool HandleHasInclude(FileID FileId, Lexer &RawLex, 85 const DirectoryLookup *Lookup, Token &Tok, 86 bool &FileExists); 87 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 88 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 89 }; 90 91 } // end anonymous namespace 92 93 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 94 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 95 bool ShowLineMarkers) 96 : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(0), 97 ShowLineMarkers(ShowLineMarkers), 98 LastInsertedFileChange(FileChanges.end()) { 99 // If we're in microsoft mode, use normal #line instead of line markers. 100 UseLineDirective = PP.getLangOpts().MicrosoftExt; 101 } 102 103 /// Write appropriate line information as either #line directives or GNU line 104 /// markers depending on what mode we're in, including the \p Filename and 105 /// \p Line we are located at, using the specified \p EOL line separator, and 106 /// any \p Extra context specifiers in GNU line directives. 107 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 108 SrcMgr::CharacteristicKind FileType, 109 StringRef EOL, StringRef Extra) { 110 if (!ShowLineMarkers) 111 return; 112 if (UseLineDirective) { 113 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 114 } else { 115 // Use GNU linemarkers as described here: 116 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 117 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 118 if (!Extra.empty()) 119 OS << Extra; 120 if (FileType == SrcMgr::C_System) 121 // "`3' This indicates that the following text comes from a system header 122 // file, so certain warnings should be suppressed." 123 OS << " 3"; 124 else if (FileType == SrcMgr::C_ExternCSystem) 125 // as above for `3', plus "`4' This indicates that the following text 126 // should be treated as being wrapped in an implicit extern "C" block." 127 OS << " 3 4"; 128 } 129 OS << EOL; 130 } 131 132 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 133 StringRef EOL) { 134 OS << "@import " << Mod->getFullModuleName() << ";" 135 << " /* clang -frewrite-includes: implicit import */" << EOL; 136 } 137 138 /// FileChanged - Whenever the preprocessor enters or exits a #include file 139 /// it invokes this handler. 140 void InclusionRewriter::FileChanged(SourceLocation Loc, 141 FileChangeReason Reason, 142 SrcMgr::CharacteristicKind NewFileType, 143 FileID) { 144 if (Reason != EnterFile) 145 return; 146 if (LastInsertedFileChange == FileChanges.end()) 147 // we didn't reach this file (eg: the main file) via an inclusion directive 148 return; 149 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 150 LastInsertedFileChange->second.FileType = NewFileType; 151 LastInsertedFileChange = FileChanges.end(); 152 } 153 154 /// Called whenever an inclusion is skipped due to canonical header protection 155 /// macros. 156 void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 157 const Token &/*FilenameTok*/, 158 SrcMgr::CharacteristicKind /*FileType*/) { 159 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 160 "found via an inclusion directive, was skipped"); 161 FileChanges.erase(LastInsertedFileChange); 162 LastInsertedFileChange = FileChanges.end(); 163 } 164 165 /// This should be called whenever the preprocessor encounters include 166 /// directives. It does not say whether the file has been included, but it 167 /// provides more information about the directive (hash location instead 168 /// of location inside the included file). It is assumed that the matching 169 /// FileChanged() or FileSkipped() is called after this. 170 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 171 const Token &/*IncludeTok*/, 172 StringRef /*FileName*/, 173 bool /*IsAngled*/, 174 CharSourceRange /*FilenameRange*/, 175 const FileEntry * /*File*/, 176 StringRef /*SearchPath*/, 177 StringRef /*RelativePath*/, 178 const Module *Imported) { 179 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 180 "directive was found before the previous one was processed"); 181 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 182 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 183 assert(p.second && "Unexpected revisitation of the same include directive"); 184 if (!Imported) 185 LastInsertedFileChange = p.first; 186 } 187 188 /// Simple lookup for a SourceLocation (specifically one denoting the hash in 189 /// an inclusion directive) in the map of inclusion information, FileChanges. 190 const InclusionRewriter::FileChange * 191 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 192 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 193 if (I != FileChanges.end()) 194 return &I->second; 195 return NULL; 196 } 197 198 /// Detect the likely line ending style of \p FromFile by examining the first 199 /// newline found within it. 200 static StringRef DetectEOL(const MemoryBuffer &FromFile) { 201 // detect what line endings the file uses, so that added content does not mix 202 // the style 203 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 204 if (Pos == NULL) 205 return "\n"; 206 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 207 return "\n\r"; 208 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 209 return "\r\n"; 210 return "\n"; 211 } 212 213 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 214 /// \p WriteTo - 1. 215 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 216 unsigned &WriteFrom, unsigned WriteTo, 217 StringRef EOL, int &Line, 218 bool EnsureNewline) { 219 if (WriteTo <= WriteFrom) 220 return; 221 if (&FromFile == PredefinesBuffer) { 222 // Ignore the #defines of the predefines buffer. 223 WriteFrom = WriteTo; 224 return; 225 } 226 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 227 // count lines manually, it's faster than getPresumedLoc() 228 Line += std::count(FromFile.getBufferStart() + WriteFrom, 229 FromFile.getBufferStart() + WriteTo, '\n'); 230 if (EnsureNewline) { 231 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 232 if (LastChar != '\n' && LastChar != '\r') 233 OS << EOL; 234 } 235 WriteFrom = WriteTo; 236 } 237 238 /// Print characters from \p FromFile starting at \p NextToWrite up until the 239 /// inclusion directive at \p StartToken, then print out the inclusion 240 /// inclusion directive disabled by a #if directive, updating \p NextToWrite 241 /// and \p Line to track the number of source lines visited and the progress 242 /// through the \p FromFile buffer. 243 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 244 const Token &StartToken, 245 const MemoryBuffer &FromFile, 246 StringRef EOL, 247 unsigned &NextToWrite, int &Line) { 248 OutputContentUpTo(FromFile, NextToWrite, 249 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 250 Token DirectiveToken; 251 do { 252 DirectiveLex.LexFromRawLexer(DirectiveToken); 253 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 254 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 255 OutputContentUpTo(FromFile, NextToWrite, 256 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 257 EOL, Line); 258 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 259 } 260 261 /// Find the next identifier in the pragma directive specified by \p RawToken. 262 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 263 Token &RawToken) { 264 RawLex.LexFromRawLexer(RawToken); 265 if (RawToken.is(tok::raw_identifier)) 266 PP.LookUpIdentifierInfo(RawToken); 267 if (RawToken.is(tok::identifier)) 268 return RawToken.getIdentifierInfo()->getName(); 269 return StringRef(); 270 } 271 272 // Expand __has_include and __has_include_next if possible. If there's no 273 // definitive answer return false. 274 bool InclusionRewriter::HandleHasInclude( 275 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok, 276 bool &FileExists) { 277 // Lex the opening paren. 278 RawLex.LexFromRawLexer(Tok); 279 if (Tok.isNot(tok::l_paren)) 280 return false; 281 282 RawLex.LexFromRawLexer(Tok); 283 284 SmallString<128> FilenameBuffer; 285 StringRef Filename; 286 // Since the raw lexer doesn't give us angle_literals we have to parse them 287 // ourselves. 288 // FIXME: What to do if the file name is a macro? 289 if (Tok.is(tok::less)) { 290 RawLex.LexFromRawLexer(Tok); 291 292 FilenameBuffer += '<'; 293 do { 294 if (Tok.is(tok::eod)) // Sanity check. 295 return false; 296 297 if (Tok.is(tok::raw_identifier)) 298 PP.LookUpIdentifierInfo(Tok); 299 300 // Get the string piece. 301 SmallVector<char, 128> TmpBuffer; 302 bool Invalid = false; 303 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid); 304 if (Invalid) 305 return false; 306 307 FilenameBuffer += TmpName; 308 309 RawLex.LexFromRawLexer(Tok); 310 } while (Tok.isNot(tok::greater)); 311 312 FilenameBuffer += '>'; 313 Filename = FilenameBuffer; 314 } else { 315 if (Tok.isNot(tok::string_literal)) 316 return false; 317 318 bool Invalid = false; 319 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); 320 if (Invalid) 321 return false; 322 } 323 324 // Lex the closing paren. 325 RawLex.LexFromRawLexer(Tok); 326 if (Tok.isNot(tok::r_paren)) 327 return false; 328 329 // Now ask HeaderInfo if it knows about the header. 330 // FIXME: Subframeworks aren't handled here. Do we care? 331 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); 332 const DirectoryLookup *CurDir; 333 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile( 334 Filename, isAngled, 0, CurDir, 335 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false); 336 337 FileExists = File != 0; 338 return true; 339 } 340 341 /// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 342 /// and including content of included files recursively. 343 bool InclusionRewriter::Process(FileID FileId, 344 SrcMgr::CharacteristicKind FileType) 345 { 346 bool Invalid; 347 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 348 if (Invalid) // invalid inclusion 349 return false; 350 const char *FileName = FromFile.getBufferIdentifier(); 351 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 352 RawLex.SetCommentRetentionState(false); 353 354 StringRef EOL = DetectEOL(FromFile); 355 356 // Per the GNU docs: "1" indicates the start of a new file. 357 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 358 359 if (SM.getFileIDSize(FileId) == 0) 360 return false; 361 362 // The next byte to be copied from the source file 363 unsigned NextToWrite = 0; 364 int Line = 1; // The current input file line number. 365 366 Token RawToken; 367 RawLex.LexFromRawLexer(RawToken); 368 369 // TODO: Consider adding a switch that strips possibly unimportant content, 370 // such as comments, to reduce the size of repro files. 371 while (RawToken.isNot(tok::eof)) { 372 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 373 RawLex.setParsingPreprocessorDirective(true); 374 Token HashToken = RawToken; 375 RawLex.LexFromRawLexer(RawToken); 376 if (RawToken.is(tok::raw_identifier)) 377 PP.LookUpIdentifierInfo(RawToken); 378 if (RawToken.getIdentifierInfo() != NULL) { 379 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 380 case tok::pp_include: 381 case tok::pp_include_next: 382 case tok::pp_import: { 383 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 384 Line); 385 StringRef LineInfoExtra; 386 if (const FileChange *Change = FindFileChangeLocation( 387 HashToken.getLocation())) { 388 if (Change->Mod) { 389 WriteImplicitModuleImport(Change->Mod, EOL); 390 391 // else now include and recursively process the file 392 } else if (Process(Change->Id, Change->FileType)) { 393 // and set lineinfo back to this file, if the nested one was 394 // actually included 395 // `2' indicates returning to a file (after having included 396 // another file. 397 LineInfoExtra = " 2"; 398 } 399 } 400 // fix up lineinfo (since commented out directive changed line 401 // numbers) for inclusions that were skipped due to header guards 402 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 403 break; 404 } 405 case tok::pp_pragma: { 406 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 407 if (Identifier == "clang" || Identifier == "GCC") { 408 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 409 // keep the directive in, commented out 410 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 411 NextToWrite, Line); 412 // update our own type 413 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 414 WriteLineInfo(FileName, Line, FileType, EOL); 415 } 416 } else if (Identifier == "once") { 417 // keep the directive in, commented out 418 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 419 NextToWrite, Line); 420 WriteLineInfo(FileName, Line, FileType, EOL); 421 } 422 break; 423 } 424 case tok::pp_if: 425 case tok::pp_elif: { 426 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() == 427 tok::pp_elif); 428 // Rewrite special builtin macros to avoid pulling in host details. 429 do { 430 // Walk over the directive. 431 RawLex.LexFromRawLexer(RawToken); 432 if (RawToken.is(tok::raw_identifier)) 433 PP.LookUpIdentifierInfo(RawToken); 434 435 if (RawToken.is(tok::identifier)) { 436 bool HasFile; 437 SourceLocation Loc = RawToken.getLocation(); 438 439 // Rewrite __has_include(x) 440 if (RawToken.getIdentifierInfo()->isStr("__has_include")) { 441 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile)) 442 continue; 443 // Rewrite __has_include_next(x) 444 } else if (RawToken.getIdentifierInfo()->isStr( 445 "__has_include_next")) { 446 const DirectoryLookup *Lookup = PP.GetCurDirLookup(); 447 if (Lookup) 448 ++Lookup; 449 450 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken, 451 HasFile)) 452 continue; 453 } else { 454 continue; 455 } 456 // Replace the macro with (0) or (1), followed by the commented 457 // out macro for reference. 458 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), 459 EOL, Line); 460 OS << '(' << (int) HasFile << ")/*"; 461 OutputContentUpTo(FromFile, NextToWrite, 462 SM.getFileOffset(RawToken.getLocation()) + 463 RawToken.getLength(), 464 EOL, Line); 465 OS << "*/"; 466 } 467 } while (RawToken.isNot(tok::eod)); 468 if (elif) { 469 OutputContentUpTo(FromFile, NextToWrite, 470 SM.getFileOffset(RawToken.getLocation()) + 471 RawToken.getLength(), 472 EOL, Line, /*EnsureNewLine*/ true); 473 WriteLineInfo(FileName, Line, FileType, EOL); 474 } 475 break; 476 } 477 case tok::pp_endif: 478 case tok::pp_else: { 479 // We surround every #include by #if 0 to comment it out, but that 480 // changes line numbers. These are fixed up right after that, but 481 // the whole #include could be inside a preprocessor conditional 482 // that is not processed. So it is necessary to fix the line 483 // numbers one the next line after each #else/#endif as well. 484 RawLex.SetKeepWhitespaceMode(true); 485 do { 486 RawLex.LexFromRawLexer(RawToken); 487 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); 488 OutputContentUpTo( 489 FromFile, NextToWrite, 490 SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), 491 EOL, Line, /*EnsureNewLine*/ true); 492 WriteLineInfo(FileName, Line, FileType, EOL); 493 RawLex.SetKeepWhitespaceMode(false); 494 } 495 default: 496 break; 497 } 498 } 499 RawLex.setParsingPreprocessorDirective(false); 500 } 501 RawLex.LexFromRawLexer(RawToken); 502 } 503 OutputContentUpTo(FromFile, NextToWrite, 504 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line, 505 /*EnsureNewline*/true); 506 return true; 507 } 508 509 /// InclusionRewriterInInput - Implement -frewrite-includes mode. 510 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 511 const PreprocessorOutputOptions &Opts) { 512 SourceManager &SM = PP.getSourceManager(); 513 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 514 Opts.ShowLineMarkers); 515 PP.addPPCallbacks(Rewrite); 516 // Ignore all pragmas, otherwise there will be warnings about unknown pragmas 517 // (because there's nothing to handle them). 518 PP.AddPragmaHandler(new EmptyPragmaHandler()); 519 // Ignore also all pragma in all namespaces created 520 // in Preprocessor::RegisterBuiltinPragmas(). 521 PP.AddPragmaHandler("GCC", new EmptyPragmaHandler()); 522 PP.AddPragmaHandler("clang", new EmptyPragmaHandler()); 523 524 // First let the preprocessor process the entire file and call callbacks. 525 // Callbacks will record which #include's were actually performed. 526 PP.EnterMainSourceFile(); 527 Token Tok; 528 // Only preprocessor directives matter here, so disable macro expansion 529 // everywhere else as an optimization. 530 // TODO: It would be even faster if the preprocessor could be switched 531 // to a mode where it would parse only preprocessor directives and comments, 532 // nothing else matters for parsing or processing. 533 PP.SetMacroExpansionOnlyInDirectives(); 534 do { 535 PP.Lex(Tok); 536 } while (Tok.isNot(tok::eof)); 537 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID())); 538 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User); 539 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 540 OS->flush(); 541 } 542