Home | History | Annotate | Download | only in AST
      1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "clang/AST/RawCommentList.h"
     11 #include "clang/AST/ASTContext.h"
     12 #include "clang/AST/Comment.h"
     13 #include "clang/AST/CommentBriefParser.h"
     14 #include "clang/AST/CommentCommandTraits.h"
     15 #include "clang/AST/CommentLexer.h"
     16 #include "clang/AST/CommentParser.h"
     17 #include "clang/AST/CommentSema.h"
     18 #include "clang/Basic/CharInfo.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 
     21 using namespace clang;
     22 
     23 namespace {
     24 /// Get comment kind and bool describing if it is a trailing comment.
     25 std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
     26                                                         bool ParseAllComments) {
     27   const size_t MinCommentLength = ParseAllComments ? 2 : 3;
     28   if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
     29     return std::make_pair(RawComment::RCK_Invalid, false);
     30 
     31   RawComment::CommentKind K;
     32   if (Comment[1] == '/') {
     33     if (Comment.size() < 3)
     34       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
     35 
     36     if (Comment[2] == '/')
     37       K = RawComment::RCK_BCPLSlash;
     38     else if (Comment[2] == '!')
     39       K = RawComment::RCK_BCPLExcl;
     40     else
     41       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
     42   } else {
     43     assert(Comment.size() >= 4);
     44 
     45     // Comment lexer does not understand escapes in comment markers, so pretend
     46     // that this is not a comment.
     47     if (Comment[1] != '*' ||
     48         Comment[Comment.size() - 2] != '*' ||
     49         Comment[Comment.size() - 1] != '/')
     50       return std::make_pair(RawComment::RCK_Invalid, false);
     51 
     52     if (Comment[2] == '*')
     53       K = RawComment::RCK_JavaDoc;
     54     else if (Comment[2] == '!')
     55       K = RawComment::RCK_Qt;
     56     else
     57       return std::make_pair(RawComment::RCK_OrdinaryC, false);
     58   }
     59   const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
     60   return std::make_pair(K, TrailingComment);
     61 }
     62 
     63 bool mergedCommentIsTrailingComment(StringRef Comment) {
     64   return (Comment.size() > 3) && (Comment[3] == '<');
     65 }
     66 
     67 /// Returns true if R1 and R2 both have valid locations that start on the same
     68 /// column.
     69 bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
     70                                const RawComment &R2) {
     71   SourceLocation L1 = R1.getLocStart();
     72   SourceLocation L2 = R2.getLocStart();
     73   bool Invalid = false;
     74   unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
     75   if (!Invalid) {
     76     unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
     77     return !Invalid && (C1 == C2);
     78   }
     79   return false;
     80 }
     81 } // unnamed namespace
     82 
     83 /// \brief Determines whether there is only whitespace in `Buffer` between `P`
     84 /// and the previous line.
     85 /// \param Buffer The buffer to search in.
     86 /// \param P The offset from the beginning of `Buffer` to start from.
     87 /// \return true if all of the characters in `Buffer` ranging from the closest
     88 /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
     89 /// are whitespace.
     90 static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
     91   // Search backwards until we see linefeed or carriage return.
     92   for (unsigned I = P; I != 0; --I) {
     93     char C = Buffer[I - 1];
     94     if (isVerticalWhitespace(C))
     95       return true;
     96     if (!isHorizontalWhitespace(C))
     97       return false;
     98   }
     99   // We hit the beginning of the buffer.
    100   return true;
    101 }
    102 
    103 /// Returns whether `K` is an ordinary comment kind.
    104 static bool isOrdinaryKind(RawComment::CommentKind K) {
    105   return (K == RawComment::RCK_OrdinaryBCPL) ||
    106          (K == RawComment::RCK_OrdinaryC);
    107 }
    108 
    109 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
    110                        bool Merged, bool ParseAllComments) :
    111     Range(SR), RawTextValid(false), BriefTextValid(false),
    112     IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
    113     ParseAllComments(ParseAllComments) {
    114   // Extract raw comment text, if possible.
    115   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
    116     Kind = RCK_Invalid;
    117     return;
    118   }
    119 
    120   // Guess comment kind.
    121   std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
    122 
    123   // Guess whether an ordinary comment is trailing.
    124   if (ParseAllComments && isOrdinaryKind(K.first)) {
    125     FileID BeginFileID;
    126     unsigned BeginOffset;
    127     std::tie(BeginFileID, BeginOffset) =
    128         SourceMgr.getDecomposedLoc(Range.getBegin());
    129     if (BeginOffset != 0) {
    130       bool Invalid = false;
    131       const char *Buffer =
    132           SourceMgr.getBufferData(BeginFileID, &Invalid).data();
    133       IsTrailingComment |=
    134           (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
    135     }
    136   }
    137 
    138   if (!Merged) {
    139     Kind = K.first;
    140     IsTrailingComment |= K.second;
    141 
    142     IsAlmostTrailingComment = RawText.startswith("//<") ||
    143                                  RawText.startswith("/*<");
    144   } else {
    145     Kind = RCK_Merged;
    146     IsTrailingComment =
    147         IsTrailingComment || mergedCommentIsTrailingComment(RawText);
    148   }
    149 }
    150 
    151 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
    152   FileID BeginFileID;
    153   FileID EndFileID;
    154   unsigned BeginOffset;
    155   unsigned EndOffset;
    156 
    157   std::tie(BeginFileID, BeginOffset) =
    158       SourceMgr.getDecomposedLoc(Range.getBegin());
    159   std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
    160 
    161   const unsigned Length = EndOffset - BeginOffset;
    162   if (Length < 2)
    163     return StringRef();
    164 
    165   // The comment can't begin in one file and end in another.
    166   assert(BeginFileID == EndFileID);
    167 
    168   bool Invalid = false;
    169   const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
    170                                                     &Invalid).data();
    171   if (Invalid)
    172     return StringRef();
    173 
    174   return StringRef(BufferStart + BeginOffset, Length);
    175 }
    176 
    177 const char *RawComment::extractBriefText(const ASTContext &Context) const {
    178   // Make sure that RawText is valid.
    179   getRawText(Context.getSourceManager());
    180 
    181   // Since we will be copying the resulting text, all allocations made during
    182   // parsing are garbage after resulting string is formed.  Thus we can use
    183   // a separate allocator for all temporary stuff.
    184   llvm::BumpPtrAllocator Allocator;
    185 
    186   comments::Lexer L(Allocator, Context.getDiagnostics(),
    187                     Context.getCommentCommandTraits(),
    188                     Range.getBegin(),
    189                     RawText.begin(), RawText.end());
    190   comments::BriefParser P(L, Context.getCommentCommandTraits());
    191 
    192   const std::string Result = P.Parse();
    193   const unsigned BriefTextLength = Result.size();
    194   char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
    195   memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
    196   BriefText = BriefTextPtr;
    197   BriefTextValid = true;
    198 
    199   return BriefTextPtr;
    200 }
    201 
    202 comments::FullComment *RawComment::parse(const ASTContext &Context,
    203                                          const Preprocessor *PP,
    204                                          const Decl *D) const {
    205   // Make sure that RawText is valid.
    206   getRawText(Context.getSourceManager());
    207 
    208   comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
    209                     Context.getCommentCommandTraits(),
    210                     getSourceRange().getBegin(),
    211                     RawText.begin(), RawText.end());
    212   comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
    213                    Context.getDiagnostics(),
    214                    Context.getCommentCommandTraits(),
    215                    PP);
    216   S.setDecl(D);
    217   comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
    218                      Context.getDiagnostics(),
    219                      Context.getCommentCommandTraits());
    220 
    221   return P.parseFullComment();
    222 }
    223 
    224 static bool onlyWhitespaceBetween(SourceManager &SM,
    225                                   SourceLocation Loc1, SourceLocation Loc2,
    226                                   unsigned MaxNewlinesAllowed) {
    227   std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
    228   std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
    229 
    230   // Question does not make sense if locations are in different files.
    231   if (Loc1Info.first != Loc2Info.first)
    232     return false;
    233 
    234   bool Invalid = false;
    235   const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
    236   if (Invalid)
    237     return false;
    238 
    239   unsigned NumNewlines = 0;
    240   assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
    241   // Look for non-whitespace characters and remember any newlines seen.
    242   for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
    243     switch (Buffer[I]) {
    244     default:
    245       return false;
    246     case ' ':
    247     case '\t':
    248     case '\f':
    249     case '\v':
    250       break;
    251     case '\r':
    252     case '\n':
    253       ++NumNewlines;
    254 
    255       // Check if we have found more than the maximum allowed number of
    256       // newlines.
    257       if (NumNewlines > MaxNewlinesAllowed)
    258         return false;
    259 
    260       // Collapse \r\n and \n\r into a single newline.
    261       if (I + 1 != Loc2Info.second &&
    262           (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
    263           Buffer[I] != Buffer[I + 1])
    264         ++I;
    265       break;
    266     }
    267   }
    268 
    269   return true;
    270 }
    271 
    272 void RawCommentList::addComment(const RawComment &RC,
    273                                 llvm::BumpPtrAllocator &Allocator) {
    274   if (RC.isInvalid())
    275     return;
    276 
    277   // Check if the comments are not in source order.
    278   while (!Comments.empty() &&
    279          !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
    280                                               RC.getLocStart())) {
    281     // If they are, just pop a few last comments that don't fit.
    282     // This happens if an \#include directive contains comments.
    283     Comments.pop_back();
    284   }
    285 
    286   // Ordinary comments are not interesting for us.
    287   if (RC.isOrdinary())
    288     return;
    289 
    290   // If this is the first Doxygen comment, save it (because there isn't
    291   // anything to merge it with).
    292   if (Comments.empty()) {
    293     Comments.push_back(new (Allocator) RawComment(RC));
    294     return;
    295   }
    296 
    297   const RawComment &C1 = *Comments.back();
    298   const RawComment &C2 = RC;
    299 
    300   // Merge comments only if there is only whitespace between them.
    301   // Can't merge trailing and non-trailing comments unless the second is
    302   // non-trailing ordinary in the same column, as in the case:
    303   //   int x; // documents x
    304   //          // more text
    305   // versus:
    306   //   int x; // documents x
    307   //   int y; // documents y
    308   // or:
    309   //   int x; // documents x
    310   //   // documents y
    311   //   int y;
    312   // Merge comments if they are on same or consecutive lines.
    313   if ((C1.isTrailingComment() == C2.isTrailingComment() ||
    314        (C1.isTrailingComment() && !C2.isTrailingComment() &&
    315         isOrdinaryKind(C2.getKind()) &&
    316         commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
    317       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
    318                             /*MaxNewlinesAllowed=*/1)) {
    319     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
    320     *Comments.back() = RawComment(SourceMgr, MergedRange, true,
    321                                   RC.isParseAllComments());
    322   } else {
    323     Comments.push_back(new (Allocator) RawComment(RC));
    324   }
    325 }
    326 
    327 void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
    328   std::vector<RawComment *> MergedComments;
    329   MergedComments.reserve(Comments.size() + DeserializedComments.size());
    330 
    331   std::merge(Comments.begin(), Comments.end(),
    332              DeserializedComments.begin(), DeserializedComments.end(),
    333              std::back_inserter(MergedComments),
    334              BeforeThanCompare<RawComment>(SourceMgr));
    335   std::swap(Comments, MergedComments);
    336 }
    337 
    338