Home | History | Annotate | Download | only in Rewrite
      1 //===--- Rewriter.cpp - Code rewriting interface --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines the Rewriter class, which is used for code
     11 //  transformations.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Rewrite/Rewriter.h"
     16 #include "clang/AST/Stmt.h"
     17 #include "clang/AST/Decl.h"
     18 #include "clang/Lex/Lexer.h"
     19 #include "clang/Basic/SourceManager.h"
     20 #include "llvm/Support/raw_ostream.h"
     21 using namespace clang;
     22 
     23 llvm::raw_ostream &RewriteBuffer::write(llvm::raw_ostream &os) const {
     24   // FIXME: eliminate the copy by writing out each chunk at a time
     25   os << std::string(begin(), end());
     26   return os;
     27 }
     28 
     29 /// \brief Return true if this character is non-new-line whitespace:
     30 /// ' ', '\t', '\f', '\v', '\r'.
     31 static inline bool isWhitespace(unsigned char c) {
     32   switch (c) {
     33   case ' ':
     34   case '\t':
     35   case '\f':
     36   case '\v':
     37   case '\r':
     38     return true;
     39   default:
     40     return false;
     41   }
     42 }
     43 
     44 void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size,
     45                                bool removeLineIfEmpty) {
     46   // Nothing to remove, exit early.
     47   if (Size == 0) return;
     48 
     49   unsigned RealOffset = getMappedOffset(OrigOffset, true);
     50   assert(RealOffset+Size < Buffer.size() && "Invalid location");
     51 
     52   // Remove the dead characters.
     53   Buffer.erase(RealOffset, Size);
     54 
     55   // Add a delta so that future changes are offset correctly.
     56   AddReplaceDelta(OrigOffset, -Size);
     57 
     58   if (removeLineIfEmpty) {
     59     // Find the line that the remove occurred and if it is completely empty
     60     // remove the line as well.
     61 
     62     iterator curLineStart = begin();
     63     unsigned curLineStartOffs = 0;
     64     iterator posI = begin();
     65     for (unsigned i = 0; i != RealOffset; ++i) {
     66       if (*posI == '\n') {
     67         curLineStart = posI;
     68         ++curLineStart;
     69         curLineStartOffs = i + 1;
     70       }
     71       ++posI;
     72     }
     73 
     74     unsigned lineSize = 0;
     75     posI = curLineStart;
     76     while (posI != end() && isWhitespace(*posI)) {
     77       ++posI;
     78       ++lineSize;
     79     }
     80     if (posI != end() && *posI == '\n') {
     81       Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/);
     82       AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/));
     83     }
     84   }
     85 }
     86 
     87 void RewriteBuffer::InsertText(unsigned OrigOffset, llvm::StringRef Str,
     88                                bool InsertAfter) {
     89 
     90   // Nothing to insert, exit early.
     91   if (Str.empty()) return;
     92 
     93   unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter);
     94   Buffer.insert(RealOffset, Str.begin(), Str.end());
     95 
     96   // Add a delta so that future changes are offset correctly.
     97   AddInsertDelta(OrigOffset, Str.size());
     98 }
     99 
    100 /// ReplaceText - This method replaces a range of characters in the input
    101 /// buffer with a new string.  This is effectively a combined "remove+insert"
    102 /// operation.
    103 void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength,
    104                                 llvm::StringRef NewStr) {
    105   unsigned RealOffset = getMappedOffset(OrigOffset, true);
    106   Buffer.erase(RealOffset, OrigLength);
    107   Buffer.insert(RealOffset, NewStr.begin(), NewStr.end());
    108   if (OrigLength != NewStr.size())
    109     AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength);
    110 }
    111 
    112 
    113 //===----------------------------------------------------------------------===//
    114 // Rewriter class
    115 //===----------------------------------------------------------------------===//
    116 
    117 /// getRangeSize - Return the size in bytes of the specified range if they
    118 /// are in the same file.  If not, this returns -1.
    119 int Rewriter::getRangeSize(const CharSourceRange &Range,
    120                            RewriteOptions opts) const {
    121   if (!isRewritable(Range.getBegin()) ||
    122       !isRewritable(Range.getEnd())) return -1;
    123 
    124   FileID StartFileID, EndFileID;
    125   unsigned StartOff, EndOff;
    126 
    127   StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
    128   EndOff   = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
    129 
    130   if (StartFileID != EndFileID)
    131     return -1;
    132 
    133   // If edits have been made to this buffer, the delta between the range may
    134   // have changed.
    135   std::map<FileID, RewriteBuffer>::const_iterator I =
    136     RewriteBuffers.find(StartFileID);
    137   if (I != RewriteBuffers.end()) {
    138     const RewriteBuffer &RB = I->second;
    139     EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange);
    140     StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange);
    141   }
    142 
    143 
    144   // Adjust the end offset to the end of the last token, instead of being the
    145   // start of the last token if this is a token range.
    146   if (Range.isTokenRange())
    147     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    148 
    149   return EndOff-StartOff;
    150 }
    151 
    152 int Rewriter::getRangeSize(SourceRange Range, RewriteOptions opts) const {
    153   return getRangeSize(CharSourceRange::getTokenRange(Range), opts);
    154 }
    155 
    156 
    157 /// getRewrittenText - Return the rewritten form of the text in the specified
    158 /// range.  If the start or end of the range was unrewritable or if they are
    159 /// in different buffers, this returns an empty string.
    160 ///
    161 /// Note that this method is not particularly efficient.
    162 ///
    163 std::string Rewriter::getRewrittenText(SourceRange Range) const {
    164   if (!isRewritable(Range.getBegin()) ||
    165       !isRewritable(Range.getEnd()))
    166     return "";
    167 
    168   FileID StartFileID, EndFileID;
    169   unsigned StartOff, EndOff;
    170   StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
    171   EndOff   = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
    172 
    173   if (StartFileID != EndFileID)
    174     return ""; // Start and end in different buffers.
    175 
    176   // If edits have been made to this buffer, the delta between the range may
    177   // have changed.
    178   std::map<FileID, RewriteBuffer>::const_iterator I =
    179     RewriteBuffers.find(StartFileID);
    180   if (I == RewriteBuffers.end()) {
    181     // If the buffer hasn't been rewritten, just return the text from the input.
    182     const char *Ptr = SourceMgr->getCharacterData(Range.getBegin());
    183 
    184     // Adjust the end offset to the end of the last token, instead of being the
    185     // start of the last token.
    186     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    187     return std::string(Ptr, Ptr+EndOff-StartOff);
    188   }
    189 
    190   const RewriteBuffer &RB = I->second;
    191   EndOff = RB.getMappedOffset(EndOff, true);
    192   StartOff = RB.getMappedOffset(StartOff);
    193 
    194   // Adjust the end offset to the end of the last token, instead of being the
    195   // start of the last token.
    196   EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    197 
    198   // Advance the iterators to the right spot, yay for linear time algorithms.
    199   RewriteBuffer::iterator Start = RB.begin();
    200   std::advance(Start, StartOff);
    201   RewriteBuffer::iterator End = Start;
    202   std::advance(End, EndOff-StartOff);
    203 
    204   return std::string(Start, End);
    205 }
    206 
    207 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc,
    208                                               FileID &FID) const {
    209   assert(Loc.isValid() && "Invalid location");
    210   std::pair<FileID,unsigned> V = SourceMgr->getDecomposedLoc(Loc);
    211   FID = V.first;
    212   return V.second;
    213 }
    214 
    215 
    216 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
    217 ///
    218 RewriteBuffer &Rewriter::getEditBuffer(FileID FID) {
    219   std::map<FileID, RewriteBuffer>::iterator I =
    220     RewriteBuffers.lower_bound(FID);
    221   if (I != RewriteBuffers.end() && I->first == FID)
    222     return I->second;
    223   I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer()));
    224 
    225   llvm::StringRef MB = SourceMgr->getBufferData(FID);
    226   I->second.Initialize(MB.begin(), MB.end());
    227 
    228   return I->second;
    229 }
    230 
    231 /// InsertText - Insert the specified string at the specified location in the
    232 /// original buffer.
    233 bool Rewriter::InsertText(SourceLocation Loc, llvm::StringRef Str,
    234                           bool InsertAfter, bool indentNewLines) {
    235   using llvm::StringRef;
    236 
    237   if (!isRewritable(Loc)) return true;
    238   FileID FID;
    239   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
    240 
    241   llvm::SmallString<128> indentedStr;
    242   if (indentNewLines && Str.find('\n') != StringRef::npos) {
    243     StringRef MB = SourceMgr->getBufferData(FID);
    244 
    245     unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1;
    246     const SrcMgr::ContentCache *
    247         Content = SourceMgr->getSLocEntry(FID).getFile().getContentCache();
    248     unsigned lineOffs = Content->SourceLineCache[lineNo];
    249 
    250     // Find the whitespace at the start of the line.
    251     StringRef indentSpace;
    252     {
    253       unsigned i = lineOffs;
    254       while (isWhitespace(MB[i]))
    255         ++i;
    256       indentSpace = MB.substr(lineOffs, i-lineOffs);
    257     }
    258 
    259     llvm::SmallVector<StringRef, 4> lines;
    260     Str.split(lines, "\n");
    261 
    262     for (unsigned i = 0, e = lines.size(); i != e; ++i) {
    263       indentedStr += lines[i];
    264       if (i < e-1) {
    265         indentedStr += '\n';
    266         indentedStr += indentSpace;
    267       }
    268     }
    269     Str = indentedStr.str();
    270   }
    271 
    272   getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter);
    273   return false;
    274 }
    275 
    276 bool Rewriter::InsertTextAfterToken(SourceLocation Loc, llvm::StringRef Str) {
    277   if (!isRewritable(Loc)) return true;
    278   FileID FID;
    279   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
    280   RewriteOptions rangeOpts;
    281   rangeOpts.IncludeInsertsAtBeginOfRange = false;
    282   StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts);
    283   getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true);
    284   return false;
    285 }
    286 
    287 /// RemoveText - Remove the specified text region.
    288 bool Rewriter::RemoveText(SourceLocation Start, unsigned Length,
    289                           RewriteOptions opts) {
    290   if (!isRewritable(Start)) return true;
    291   FileID FID;
    292   unsigned StartOffs = getLocationOffsetAndFileID(Start, FID);
    293   getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty);
    294   return false;
    295 }
    296 
    297 /// ReplaceText - This method replaces a range of characters in the input
    298 /// buffer with a new string.  This is effectively a combined "remove/insert"
    299 /// operation.
    300 bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength,
    301                            llvm::StringRef NewStr) {
    302   if (!isRewritable(Start)) return true;
    303   FileID StartFileID;
    304   unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID);
    305 
    306   getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr);
    307   return false;
    308 }
    309 
    310 bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) {
    311   if (!isRewritable(range.getBegin())) return true;
    312   if (!isRewritable(range.getEnd())) return true;
    313   if (replacementRange.isInvalid()) return true;
    314   SourceLocation start = range.getBegin();
    315   unsigned origLength = getRangeSize(range);
    316   unsigned newLength = getRangeSize(replacementRange);
    317   FileID FID;
    318   unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(),
    319                                                 FID);
    320   llvm::StringRef MB = SourceMgr->getBufferData(FID);
    321   return ReplaceText(start, origLength, MB.substr(newOffs, newLength));
    322 }
    323 
    324 /// ReplaceStmt - This replaces a Stmt/Expr with another, using the pretty
    325 /// printer to generate the replacement code.  This returns true if the input
    326 /// could not be rewritten, or false if successful.
    327 bool Rewriter::ReplaceStmt(Stmt *From, Stmt *To) {
    328   // Measaure the old text.
    329   int Size = getRangeSize(From->getSourceRange());
    330   if (Size == -1)
    331     return true;
    332 
    333   // Get the new text.
    334   std::string SStr;
    335   llvm::raw_string_ostream S(SStr);
    336   To->printPretty(S, 0, PrintingPolicy(*LangOpts));
    337   const std::string &Str = S.str();
    338 
    339   ReplaceText(From->getLocStart(), Size, Str);
    340   return false;
    341 }
    342 
    343 std::string Rewriter::ConvertToString(Stmt *From) {
    344   std::string SStr;
    345   llvm::raw_string_ostream S(SStr);
    346   From->printPretty(S, 0, PrintingPolicy(*LangOpts));
    347   return S.str();
    348 }
    349 
    350 bool Rewriter::IncreaseIndentation(CharSourceRange range,
    351                                    SourceLocation parentIndent) {
    352   using llvm::StringRef;
    353 
    354   if (range.isInvalid()) return true;
    355   if (!isRewritable(range.getBegin())) return true;
    356   if (!isRewritable(range.getEnd())) return true;
    357   if (!isRewritable(parentIndent)) return true;
    358 
    359   FileID StartFileID, EndFileID, parentFileID;
    360   unsigned StartOff, EndOff, parentOff;
    361 
    362   StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID);
    363   EndOff   = getLocationOffsetAndFileID(range.getEnd(), EndFileID);
    364   parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID);
    365 
    366   if (StartFileID != EndFileID || StartFileID != parentFileID)
    367     return true;
    368   if (StartOff > EndOff)
    369     return true;
    370 
    371   FileID FID = StartFileID;
    372   StringRef MB = SourceMgr->getBufferData(FID);
    373 
    374   unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1;
    375   unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1;
    376   unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1;
    377 
    378   const SrcMgr::ContentCache *
    379       Content = SourceMgr->getSLocEntry(FID).getFile().getContentCache();
    380 
    381   // Find where the lines start.
    382   unsigned parentLineOffs = Content->SourceLineCache[parentLineNo];
    383   unsigned startLineOffs = Content->SourceLineCache[startLineNo];
    384 
    385   // Find the whitespace at the start of each line.
    386   StringRef parentSpace, startSpace;
    387   {
    388     unsigned i = parentLineOffs;
    389     while (isWhitespace(MB[i]))
    390       ++i;
    391     parentSpace = MB.substr(parentLineOffs, i-parentLineOffs);
    392 
    393     i = startLineOffs;
    394     while (isWhitespace(MB[i]))
    395       ++i;
    396     startSpace = MB.substr(startLineOffs, i-startLineOffs);
    397   }
    398   if (parentSpace.size() >= startSpace.size())
    399     return true;
    400   if (!startSpace.startswith(parentSpace))
    401     return true;
    402 
    403   llvm::StringRef indent = startSpace.substr(parentSpace.size());
    404 
    405   // Indent the lines between start/end offsets.
    406   RewriteBuffer &RB = getEditBuffer(FID);
    407   for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) {
    408     unsigned offs = Content->SourceLineCache[lineNo];
    409     unsigned i = offs;
    410     while (isWhitespace(MB[i]))
    411       ++i;
    412     StringRef origIndent = MB.substr(offs, i-offs);
    413     if (origIndent.startswith(startSpace))
    414       RB.InsertText(offs, indent, /*InsertAfter=*/false);
    415   }
    416 
    417   return false;
    418 }
    419