Home | History | Annotate | Download | only in Frontend
      1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This code rewrites include invocations into their expansions.  This gives you
     11 // a file with all included files merged into it.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Rewrite/Frontend/Rewriters.h"
     16 #include "clang/Basic/SourceManager.h"
     17 #include "clang/Frontend/PreprocessorOutputOptions.h"
     18 #include "clang/Lex/Preprocessor.h"
     19 #include "llvm/Support/raw_ostream.h"
     20 
     21 using namespace clang;
     22 using namespace llvm;
     23 
     24 namespace {
     25 
     26 class InclusionRewriter : public PPCallbacks {
     27   /// Information about which #includes were actually performed,
     28   /// created by preprocessor callbacks.
     29   struct FileChange {
     30     SourceLocation From;
     31     FileID Id;
     32     SrcMgr::CharacteristicKind FileType;
     33     FileChange(SourceLocation From) : From(From) {
     34     }
     35   };
     36   Preprocessor &PP; ///< Used to find inclusion directives.
     37   SourceManager &SM; ///< Used to read and manage source files.
     38   raw_ostream &OS; ///< The destination stream for rewritten contents.
     39   bool ShowLineMarkers; ///< Show #line markers.
     40   bool UseLineDirective; ///< Use of line directives or line markers.
     41   typedef std::map<unsigned, FileChange> FileChangeMap;
     42   FileChangeMap FileChanges; ///< Tracks which files were included where.
     43   /// Used transitively for building up the FileChanges mapping over the
     44   /// various \c PPCallbacks callbacks.
     45   FileChangeMap::iterator LastInsertedFileChange;
     46 public:
     47   InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
     48   bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
     49 private:
     50   virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
     51                            SrcMgr::CharacteristicKind FileType,
     52                            FileID PrevFID);
     53   virtual void FileSkipped(const FileEntry &ParentFile,
     54                            const Token &FilenameTok,
     55                            SrcMgr::CharacteristicKind FileType);
     56   virtual void InclusionDirective(SourceLocation HashLoc,
     57                                   const Token &IncludeTok,
     58                                   StringRef FileName,
     59                                   bool IsAngled,
     60                                   CharSourceRange FilenameRange,
     61                                   const FileEntry *File,
     62                                   StringRef SearchPath,
     63                                   StringRef RelativePath,
     64                                   const Module *Imported);
     65   void WriteLineInfo(const char *Filename, int Line,
     66                      SrcMgr::CharacteristicKind FileType,
     67                      StringRef EOL, StringRef Extra = StringRef());
     68   void OutputContentUpTo(const MemoryBuffer &FromFile,
     69                          unsigned &WriteFrom, unsigned WriteTo,
     70                          StringRef EOL, int &lines,
     71                          bool EnsureNewline = false);
     72   void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
     73                            const MemoryBuffer &FromFile, StringRef EOL,
     74                            unsigned &NextToWrite, int &Lines);
     75   const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
     76   StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
     77 };
     78 
     79 }  // end anonymous namespace
     80 
     81 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
     82 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
     83                                      bool ShowLineMarkers)
     84     : PP(PP), SM(PP.getSourceManager()), OS(OS),
     85     ShowLineMarkers(ShowLineMarkers),
     86     LastInsertedFileChange(FileChanges.end()) {
     87   // If we're in microsoft mode, use normal #line instead of line markers.
     88   UseLineDirective = PP.getLangOpts().MicrosoftExt;
     89 }
     90 
     91 /// Write appropriate line information as either #line directives or GNU line
     92 /// markers depending on what mode we're in, including the \p Filename and
     93 /// \p Line we are located at, using the specified \p EOL line separator, and
     94 /// any \p Extra context specifiers in GNU line directives.
     95 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
     96                                       SrcMgr::CharacteristicKind FileType,
     97                                       StringRef EOL, StringRef Extra) {
     98   if (!ShowLineMarkers)
     99     return;
    100   if (UseLineDirective) {
    101     OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
    102   } else {
    103     // Use GNU linemarkers as described here:
    104     // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
    105     OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
    106     if (!Extra.empty())
    107       OS << Extra;
    108     if (FileType == SrcMgr::C_System)
    109       // "`3' This indicates that the following text comes from a system header
    110       // file, so certain warnings should be suppressed."
    111       OS << " 3";
    112     else if (FileType == SrcMgr::C_ExternCSystem)
    113       // as above for `3', plus "`4' This indicates that the following text
    114       // should be treated as being wrapped in an implicit extern "C" block."
    115       OS << " 3 4";
    116   }
    117   OS << EOL;
    118 }
    119 
    120 /// FileChanged - Whenever the preprocessor enters or exits a #include file
    121 /// it invokes this handler.
    122 void InclusionRewriter::FileChanged(SourceLocation Loc,
    123                                     FileChangeReason Reason,
    124                                     SrcMgr::CharacteristicKind NewFileType,
    125                                     FileID) {
    126   if (Reason != EnterFile)
    127     return;
    128   if (LastInsertedFileChange == FileChanges.end())
    129     // we didn't reach this file (eg: the main file) via an inclusion directive
    130     return;
    131   LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
    132   LastInsertedFileChange->second.FileType = NewFileType;
    133   LastInsertedFileChange = FileChanges.end();
    134 }
    135 
    136 /// Called whenever an inclusion is skipped due to canonical header protection
    137 /// macros.
    138 void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
    139                                     const Token &/*FilenameTok*/,
    140                                     SrcMgr::CharacteristicKind /*FileType*/) {
    141   assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
    142     "found via an inclusion directive, was skipped");
    143   FileChanges.erase(LastInsertedFileChange);
    144   LastInsertedFileChange = FileChanges.end();
    145 }
    146 
    147 /// This should be called whenever the preprocessor encounters include
    148 /// directives. It does not say whether the file has been included, but it
    149 /// provides more information about the directive (hash location instead
    150 /// of location inside the included file). It is assumed that the matching
    151 /// FileChanged() or FileSkipped() is called after this.
    152 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
    153                                            const Token &/*IncludeTok*/,
    154                                            StringRef /*FileName*/,
    155                                            bool /*IsAngled*/,
    156                                            CharSourceRange /*FilenameRange*/,
    157                                            const FileEntry * /*File*/,
    158                                            StringRef /*SearchPath*/,
    159                                            StringRef /*RelativePath*/,
    160                                            const Module * /*Imported*/) {
    161   assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
    162     "directive was found before the previous one was processed");
    163   std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
    164     std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
    165   assert(p.second && "Unexpected revisitation of the same include directive");
    166   LastInsertedFileChange = p.first;
    167 }
    168 
    169 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
    170 /// an inclusion directive) in the map of inclusion information, FileChanges.
    171 const InclusionRewriter::FileChange *
    172 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
    173   FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
    174   if (I != FileChanges.end())
    175     return &I->second;
    176   return NULL;
    177 }
    178 
    179 /// Detect the likely line ending style of \p FromFile by examining the first
    180 /// newline found within it.
    181 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
    182   // detect what line endings the file uses, so that added content does not mix
    183   // the style
    184   const char *Pos = strchr(FromFile.getBufferStart(), '\n');
    185   if (Pos == NULL)
    186     return "\n";
    187   if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
    188     return "\n\r";
    189   if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
    190     return "\r\n";
    191   return "\n";
    192 }
    193 
    194 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
    195 /// \p WriteTo - 1.
    196 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
    197                                           unsigned &WriteFrom, unsigned WriteTo,
    198                                           StringRef EOL, int &Line,
    199                                           bool EnsureNewline) {
    200   if (WriteTo <= WriteFrom)
    201     return;
    202   OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
    203   // count lines manually, it's faster than getPresumedLoc()
    204   Line += std::count(FromFile.getBufferStart() + WriteFrom,
    205                      FromFile.getBufferStart() + WriteTo, '\n');
    206   if (EnsureNewline) {
    207     char LastChar = FromFile.getBufferStart()[WriteTo - 1];
    208     if (LastChar != '\n' && LastChar != '\r')
    209       OS << EOL;
    210   }
    211   WriteFrom = WriteTo;
    212 }
    213 
    214 /// Print characters from \p FromFile starting at \p NextToWrite up until the
    215 /// inclusion directive at \p StartToken, then print out the inclusion
    216 /// inclusion directive disabled by a #if directive, updating \p NextToWrite
    217 /// and \p Line to track the number of source lines visited and the progress
    218 /// through the \p FromFile buffer.
    219 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
    220                                             const Token &StartToken,
    221                                             const MemoryBuffer &FromFile,
    222                                             StringRef EOL,
    223                                             unsigned &NextToWrite, int &Line) {
    224   OutputContentUpTo(FromFile, NextToWrite,
    225     SM.getFileOffset(StartToken.getLocation()), EOL, Line);
    226   Token DirectiveToken;
    227   do {
    228     DirectiveLex.LexFromRawLexer(DirectiveToken);
    229   } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
    230   OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
    231   OutputContentUpTo(FromFile, NextToWrite,
    232     SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
    233     EOL, Line);
    234   OS << "#endif /* expanded by -frewrite-includes */" << EOL;
    235 }
    236 
    237 /// Find the next identifier in the pragma directive specified by \p RawToken.
    238 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
    239                                                 Token &RawToken) {
    240   RawLex.LexFromRawLexer(RawToken);
    241   if (RawToken.is(tok::raw_identifier))
    242     PP.LookUpIdentifierInfo(RawToken);
    243   if (RawToken.is(tok::identifier))
    244     return RawToken.getIdentifierInfo()->getName();
    245   return StringRef();
    246 }
    247 
    248 /// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
    249 /// and including content of included files recursively.
    250 bool InclusionRewriter::Process(FileID FileId,
    251                                 SrcMgr::CharacteristicKind FileType)
    252 {
    253   bool Invalid;
    254   const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
    255   if (Invalid) // invalid inclusion
    256     return true;
    257   const char *FileName = FromFile.getBufferIdentifier();
    258   Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
    259   RawLex.SetCommentRetentionState(false);
    260 
    261   StringRef EOL = DetectEOL(FromFile);
    262 
    263   // Per the GNU docs: "1" indicates the start of a new file.
    264   WriteLineInfo(FileName, 1, FileType, EOL, " 1");
    265 
    266   if (SM.getFileIDSize(FileId) == 0)
    267     return true;
    268 
    269   // The next byte to be copied from the source file
    270   unsigned NextToWrite = 0;
    271   int Line = 1; // The current input file line number.
    272 
    273   Token RawToken;
    274   RawLex.LexFromRawLexer(RawToken);
    275 
    276   // TODO: Consider adding a switch that strips possibly unimportant content,
    277   // such as comments, to reduce the size of repro files.
    278   while (RawToken.isNot(tok::eof)) {
    279     if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
    280       RawLex.setParsingPreprocessorDirective(true);
    281       Token HashToken = RawToken;
    282       RawLex.LexFromRawLexer(RawToken);
    283       if (RawToken.is(tok::raw_identifier))
    284         PP.LookUpIdentifierInfo(RawToken);
    285       if (RawToken.is(tok::identifier)) {
    286         switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
    287           case tok::pp_include:
    288           case tok::pp_include_next:
    289           case tok::pp_import: {
    290             CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
    291               Line);
    292             if (const FileChange *Change = FindFileChangeLocation(
    293                 HashToken.getLocation())) {
    294               // now include and recursively process the file
    295               if (Process(Change->Id, Change->FileType))
    296                 // and set lineinfo back to this file, if the nested one was
    297                 // actually included
    298                 // `2' indicates returning to a file (after having included
    299                 // another file.
    300                 WriteLineInfo(FileName, Line, FileType, EOL, " 2");
    301             } else
    302               // fix up lineinfo (since commented out directive changed line
    303               // numbers) for inclusions that were skipped due to header guards
    304               WriteLineInfo(FileName, Line, FileType, EOL);
    305             break;
    306           }
    307           case tok::pp_pragma: {
    308             StringRef Identifier = NextIdentifierName(RawLex, RawToken);
    309             if (Identifier == "clang" || Identifier == "GCC") {
    310               if (NextIdentifierName(RawLex, RawToken) == "system_header") {
    311                 // keep the directive in, commented out
    312                 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
    313                   NextToWrite, Line);
    314                 // update our own type
    315                 FileType = SM.getFileCharacteristic(RawToken.getLocation());
    316                 WriteLineInfo(FileName, Line, FileType, EOL);
    317               }
    318             } else if (Identifier == "once") {
    319               // keep the directive in, commented out
    320               CommentOutDirective(RawLex, HashToken, FromFile, EOL,
    321                 NextToWrite, Line);
    322               WriteLineInfo(FileName, Line, FileType, EOL);
    323             }
    324             break;
    325           }
    326           default:
    327             break;
    328         }
    329       }
    330       RawLex.setParsingPreprocessorDirective(false);
    331     }
    332     RawLex.LexFromRawLexer(RawToken);
    333   }
    334   OutputContentUpTo(FromFile, NextToWrite,
    335     SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
    336     /*EnsureNewline*/true);
    337   return true;
    338 }
    339 
    340 /// InclusionRewriterInInput - Implement -frewrite-includes mode.
    341 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
    342                                    const PreprocessorOutputOptions &Opts) {
    343   SourceManager &SM = PP.getSourceManager();
    344   InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
    345                                                      Opts.ShowLineMarkers);
    346   PP.addPPCallbacks(Rewrite);
    347 
    348   // First let the preprocessor process the entire file and call callbacks.
    349   // Callbacks will record which #include's were actually performed.
    350   PP.EnterMainSourceFile();
    351   Token Tok;
    352   // Only preprocessor directives matter here, so disable macro expansion
    353   // everywhere else as an optimization.
    354   // TODO: It would be even faster if the preprocessor could be switched
    355   // to a mode where it would parse only preprocessor directives and comments,
    356   // nothing else matters for parsing or processing.
    357   PP.SetMacroExpansionOnlyInDirectives();
    358   do {
    359     PP.Lex(Tok);
    360   } while (Tok.isNot(tok::eof));
    361   Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
    362   OS->flush();
    363 }
    364