1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "clang/AST/RawCommentList.h" 11 #include "clang/AST/ASTContext.h" 12 #include "clang/AST/Comment.h" 13 #include "clang/AST/CommentBriefParser.h" 14 #include "clang/AST/CommentCommandTraits.h" 15 #include "clang/AST/CommentLexer.h" 16 #include "clang/AST/CommentParser.h" 17 #include "clang/AST/CommentSema.h" 18 #include "llvm/ADT/STLExtras.h" 19 20 using namespace clang; 21 22 namespace { 23 /// Get comment kind and bool describing if it is a trailing comment. 24 std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment, 25 bool ParseAllComments) { 26 const size_t MinCommentLength = ParseAllComments ? 2 : 3; 27 if ((Comment.size() < MinCommentLength) || Comment[0] != '/') 28 return std::make_pair(RawComment::RCK_Invalid, false); 29 30 RawComment::CommentKind K; 31 if (Comment[1] == '/') { 32 if (Comment.size() < 3) 33 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 34 35 if (Comment[2] == '/') 36 K = RawComment::RCK_BCPLSlash; 37 else if (Comment[2] == '!') 38 K = RawComment::RCK_BCPLExcl; 39 else 40 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 41 } else { 42 assert(Comment.size() >= 4); 43 44 // Comment lexer does not understand escapes in comment markers, so pretend 45 // that this is not a comment. 46 if (Comment[1] != '*' || 47 Comment[Comment.size() - 2] != '*' || 48 Comment[Comment.size() - 1] != '/') 49 return std::make_pair(RawComment::RCK_Invalid, false); 50 51 if (Comment[2] == '*') 52 K = RawComment::RCK_JavaDoc; 53 else if (Comment[2] == '!') 54 K = RawComment::RCK_Qt; 55 else 56 return std::make_pair(RawComment::RCK_OrdinaryC, false); 57 } 58 const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); 59 return std::make_pair(K, TrailingComment); 60 } 61 62 bool mergedCommentIsTrailingComment(StringRef Comment) { 63 return (Comment.size() > 3) && (Comment[3] == '<'); 64 } 65 } // unnamed namespace 66 67 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, 68 bool Merged, bool ParseAllComments) : 69 Range(SR), RawTextValid(false), BriefTextValid(false), 70 IsAttached(false), IsAlmostTrailingComment(false), 71 ParseAllComments(ParseAllComments) { 72 // Extract raw comment text, if possible. 73 if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { 74 Kind = RCK_Invalid; 75 return; 76 } 77 78 if (!Merged) { 79 // Guess comment kind. 80 std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments); 81 Kind = K.first; 82 IsTrailingComment = K.second; 83 84 IsAlmostTrailingComment = RawText.startswith("//<") || 85 RawText.startswith("/*<"); 86 } else { 87 Kind = RCK_Merged; 88 IsTrailingComment = mergedCommentIsTrailingComment(RawText); 89 } 90 } 91 92 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { 93 FileID BeginFileID; 94 FileID EndFileID; 95 unsigned BeginOffset; 96 unsigned EndOffset; 97 98 std::tie(BeginFileID, BeginOffset) = 99 SourceMgr.getDecomposedLoc(Range.getBegin()); 100 std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd()); 101 102 const unsigned Length = EndOffset - BeginOffset; 103 if (Length < 2) 104 return StringRef(); 105 106 // The comment can't begin in one file and end in another. 107 assert(BeginFileID == EndFileID); 108 109 bool Invalid = false; 110 const char *BufferStart = SourceMgr.getBufferData(BeginFileID, 111 &Invalid).data(); 112 if (Invalid) 113 return StringRef(); 114 115 return StringRef(BufferStart + BeginOffset, Length); 116 } 117 118 const char *RawComment::extractBriefText(const ASTContext &Context) const { 119 // Make sure that RawText is valid. 120 getRawText(Context.getSourceManager()); 121 122 // Since we will be copying the resulting text, all allocations made during 123 // parsing are garbage after resulting string is formed. Thus we can use 124 // a separate allocator for all temporary stuff. 125 llvm::BumpPtrAllocator Allocator; 126 127 comments::Lexer L(Allocator, Context.getDiagnostics(), 128 Context.getCommentCommandTraits(), 129 Range.getBegin(), 130 RawText.begin(), RawText.end()); 131 comments::BriefParser P(L, Context.getCommentCommandTraits()); 132 133 const std::string Result = P.Parse(); 134 const unsigned BriefTextLength = Result.size(); 135 char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; 136 memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); 137 BriefText = BriefTextPtr; 138 BriefTextValid = true; 139 140 return BriefTextPtr; 141 } 142 143 comments::FullComment *RawComment::parse(const ASTContext &Context, 144 const Preprocessor *PP, 145 const Decl *D) const { 146 // Make sure that RawText is valid. 147 getRawText(Context.getSourceManager()); 148 149 comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(), 150 Context.getCommentCommandTraits(), 151 getSourceRange().getBegin(), 152 RawText.begin(), RawText.end()); 153 comments::Sema S(Context.getAllocator(), Context.getSourceManager(), 154 Context.getDiagnostics(), 155 Context.getCommentCommandTraits(), 156 PP); 157 S.setDecl(D); 158 comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), 159 Context.getDiagnostics(), 160 Context.getCommentCommandTraits()); 161 162 return P.parseFullComment(); 163 } 164 165 static bool onlyWhitespaceBetween(SourceManager &SM, 166 SourceLocation Loc1, SourceLocation Loc2, 167 unsigned MaxNewlinesAllowed) { 168 std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1); 169 std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2); 170 171 // Question does not make sense if locations are in different files. 172 if (Loc1Info.first != Loc2Info.first) 173 return false; 174 175 bool Invalid = false; 176 const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data(); 177 if (Invalid) 178 return false; 179 180 unsigned NumNewlines = 0; 181 assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!"); 182 // Look for non-whitespace characters and remember any newlines seen. 183 for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) { 184 switch (Buffer[I]) { 185 default: 186 return false; 187 case ' ': 188 case '\t': 189 case '\f': 190 case '\v': 191 break; 192 case '\r': 193 case '\n': 194 ++NumNewlines; 195 196 // Check if we have found more than the maximum allowed number of 197 // newlines. 198 if (NumNewlines > MaxNewlinesAllowed) 199 return false; 200 201 // Collapse \r\n and \n\r into a single newline. 202 if (I + 1 != Loc2Info.second && 203 (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') && 204 Buffer[I] != Buffer[I + 1]) 205 ++I; 206 break; 207 } 208 } 209 210 return true; 211 } 212 213 void RawCommentList::addComment(const RawComment &RC, 214 llvm::BumpPtrAllocator &Allocator) { 215 if (RC.isInvalid()) 216 return; 217 218 // Check if the comments are not in source order. 219 while (!Comments.empty() && 220 !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(), 221 RC.getLocStart())) { 222 // If they are, just pop a few last comments that don't fit. 223 // This happens if an \#include directive contains comments. 224 Comments.pop_back(); 225 } 226 227 // Ordinary comments are not interesting for us. 228 if (RC.isOrdinary()) 229 return; 230 231 // If this is the first Doxygen comment, save it (because there isn't 232 // anything to merge it with). 233 if (Comments.empty()) { 234 Comments.push_back(new (Allocator) RawComment(RC)); 235 return; 236 } 237 238 const RawComment &C1 = *Comments.back(); 239 const RawComment &C2 = RC; 240 241 // Merge comments only if there is only whitespace between them. 242 // Can't merge trailing and non-trailing comments. 243 // Merge comments if they are on same or consecutive lines. 244 if (C1.isTrailingComment() == C2.isTrailingComment() && 245 onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), 246 /*MaxNewlinesAllowed=*/1)) { 247 SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); 248 *Comments.back() = RawComment(SourceMgr, MergedRange, true, 249 RC.isParseAllComments()); 250 } else { 251 Comments.push_back(new (Allocator) RawComment(RC)); 252 } 253 } 254 255 void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) { 256 std::vector<RawComment *> MergedComments; 257 MergedComments.reserve(Comments.size() + DeserializedComments.size()); 258 259 std::merge(Comments.begin(), Comments.end(), 260 DeserializedComments.begin(), DeserializedComments.end(), 261 std::back_inserter(MergedComments), 262 BeforeThanCompare<RawComment>(SourceMgr)); 263 std::swap(Comments, MergedComments); 264 } 265 266