1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This implements a Clang tool to rewrite all instances of 6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to 7 // the .get() method. 8 9 #include <assert.h> 10 #include <algorithm> 11 #include <memory> 12 #include <string> 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/ASTMatchers/ASTMatchers.h" 16 #include "clang/ASTMatchers/ASTMatchersMacros.h" 17 #include "clang/ASTMatchers/ASTMatchFinder.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "clang/Lex/Lexer.h" 21 #include "clang/Tooling/CommonOptionsParser.h" 22 #include "clang/Tooling/Refactoring.h" 23 #include "clang/Tooling/Tooling.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetSelect.h" 26 27 using namespace clang::ast_matchers; 28 using clang::tooling::CommonOptionsParser; 29 using clang::tooling::Replacement; 30 using clang::tooling::Replacements; 31 using llvm::StringRef; 32 33 namespace clang { 34 namespace ast_matchers { 35 36 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl> 37 conversionDecl; 38 39 AST_MATCHER(QualType, isBoolean) { 40 return Node->isBooleanType(); 41 } 42 43 } // namespace ast_matchers 44 } // namespace clang 45 46 namespace { 47 48 // Returns true if expr needs to be put in parens (eg: when it is an operator 49 // syntactically). 50 bool NeedsParens(const clang::Expr* expr) { 51 if (llvm::dyn_cast<clang::UnaryOperator>(expr) || 52 llvm::dyn_cast<clang::BinaryOperator>(expr) || 53 llvm::dyn_cast<clang::ConditionalOperator>(expr)) { 54 return true; 55 } 56 // Calls to an overloaded operator also need parens, except for foo(...) and 57 // foo[...] expressions. 58 if (const clang::CXXOperatorCallExpr* op = 59 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { 60 return op->getOperator() != clang::OO_Call && 61 op->getOperator() != clang::OO_Subscript; 62 } 63 return false; 64 } 65 66 Replacement RewriteImplicitToExplicitConversion( 67 const MatchFinder::MatchResult& result, 68 const clang::Expr* expr) { 69 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( 70 result.SourceManager->getSpellingLoc(expr->getLocStart()), 71 result.SourceManager->getSpellingLoc(expr->getLocEnd())); 72 assert(range.isValid() && "Invalid range!"); 73 74 // Handle cases where an implicit cast is being done by dereferencing a 75 // pointer to a scoped_refptr<> (sadly, it happens...) 76 // 77 // This rewrites both "*foo" and "*(foo)" as "foo->get()". 78 if (const clang::UnaryOperator* op = 79 llvm::dyn_cast<clang::UnaryOperator>(expr)) { 80 if (op->getOpcode() == clang::UO_Deref) { 81 const clang::Expr* const sub_expr = 82 op->getSubExpr()->IgnoreParenImpCasts(); 83 clang::CharSourceRange sub_expr_range = 84 clang::CharSourceRange::getTokenRange( 85 result.SourceManager->getSpellingLoc(sub_expr->getLocStart()), 86 result.SourceManager->getSpellingLoc(sub_expr->getLocEnd())); 87 assert(sub_expr_range.isValid() && "Invalid subexpression range!"); 88 89 std::string inner_text = clang::Lexer::getSourceText( 90 sub_expr_range, *result.SourceManager, result.Context->getLangOpts()); 91 assert(!inner_text.empty() && "No text for subexpression!"); 92 if (NeedsParens(sub_expr)) { 93 inner_text.insert(0, "("); 94 inner_text.append(")"); 95 } 96 inner_text.append("->get()"); 97 return Replacement(*result.SourceManager, range, inner_text); 98 } 99 } 100 101 std::string text = clang::Lexer::getSourceText( 102 range, *result.SourceManager, result.Context->getLangOpts()); 103 assert(!text.empty() && "No text for expression!"); 104 105 // Unwrap any temporaries - for example, custom iterators that return 106 // scoped_refptr<T> as part of operator*. Any such iterators should also 107 // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72) 108 if (const clang::CXXBindTemporaryExpr* op = 109 llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) { 110 expr = op->getSubExpr(); 111 } 112 113 // Handle iterators (which are operator* calls, followed by implicit 114 // conversions) by rewriting *it as it->get() 115 if (const clang::CXXOperatorCallExpr* op = 116 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { 117 if (op->getOperator() == clang::OO_Star) { 118 // Note that this doesn't rewrite **it correctly, since it should be 119 // rewritten using parens, e.g. (*it)->get(). However, this shouldn't 120 // happen frequently, if at all, since it would likely indicate code is 121 // storing pointers to a scoped_refptr in a container. 122 text.erase(0, 1); 123 text.append("->get()"); 124 return Replacement(*result.SourceManager, range, text); 125 } 126 } 127 128 // The only remaining calls should be non-dereferencing calls (eg: member 129 // calls), so a simple ".get()" appending should suffice. 130 if (NeedsParens(expr)) { 131 text.insert(0, "("); 132 text.append(")"); 133 } 134 text.append(".get()"); 135 return Replacement(*result.SourceManager, range, text); 136 } 137 138 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result, 139 clang::SourceLocation begin, 140 clang::SourceLocation end) { 141 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( 142 result.SourceManager->getSpellingLoc(begin), 143 result.SourceManager->getSpellingLoc(end)); 144 assert(range.isValid() && "Invalid range!"); 145 146 std::string text = clang::Lexer::getSourceText( 147 range, *result.SourceManager, result.Context->getLangOpts()); 148 text.erase(text.rfind('*')); 149 150 std::string replacement_text("scoped_refptr<"); 151 replacement_text += text; 152 replacement_text += ">"; 153 154 return Replacement(*result.SourceManager, range, replacement_text); 155 } 156 157 class GetRewriterCallback : public MatchFinder::MatchCallback { 158 public: 159 explicit GetRewriterCallback(Replacements* replacements) 160 : replacements_(replacements) {} 161 virtual void run(const MatchFinder::MatchResult& result) override; 162 163 private: 164 Replacements* const replacements_; 165 }; 166 167 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) { 168 const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg"); 169 assert(arg && "Unexpected match! No Expr captured!"); 170 auto err = 171 replacements_->add(RewriteImplicitToExplicitConversion(result, arg)); 172 assert(!err); 173 } 174 175 class VarRewriterCallback : public MatchFinder::MatchCallback { 176 public: 177 explicit VarRewriterCallback(Replacements* replacements) 178 : replacements_(replacements) {} 179 virtual void run(const MatchFinder::MatchResult& result) override; 180 181 private: 182 Replacements* const replacements_; 183 }; 184 185 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) { 186 const clang::DeclaratorDecl* const var_decl = 187 result.Nodes.getNodeAs<clang::DeclaratorDecl>("var"); 188 assert(var_decl && "Unexpected match! No VarDecl captured!"); 189 190 const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo(); 191 192 // TODO(dcheng): This mishandles a case where a variable has multiple 193 // declarations, e.g.: 194 // 195 // in .h: 196 // Foo* my_global_magical_foo; 197 // 198 // in .cc: 199 // Foo* my_global_magical_foo = CreateFoo(); 200 // 201 // In this case, it will only rewrite the .cc definition. Oh well. This should 202 // be rare enough that these cases can be manually handled, since the style 203 // guide prohibits globals of non-POD type. 204 auto err = replacements_->add(RewriteRawPtrToScopedRefptr( 205 result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc())); 206 assert(!err); 207 } 208 209 class FunctionRewriterCallback : public MatchFinder::MatchCallback { 210 public: 211 explicit FunctionRewriterCallback(Replacements* replacements) 212 : replacements_(replacements) {} 213 virtual void run(const MatchFinder::MatchResult& result) override; 214 215 private: 216 Replacements* const replacements_; 217 }; 218 219 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) { 220 const clang::FunctionDecl* const function_decl = 221 result.Nodes.getNodeAs<clang::FunctionDecl>("fn"); 222 assert(function_decl && "Unexpected match! No FunctionDecl captured!"); 223 224 // If matched against an implicit conversion to a DeclRefExpr, make sure the 225 // referenced declaration is of class type, e.g. the tool skips trying to 226 // chase pointers/references to determine if the pointee is a scoped_refptr<T> 227 // with local storage. Instead, let a human manually handle those cases. 228 const clang::VarDecl* const var_decl = 229 result.Nodes.getNodeAs<clang::VarDecl>("var"); 230 if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) { 231 return; 232 } 233 234 for (clang::FunctionDecl* f : function_decl->redecls()) { 235 clang::SourceRange range = f->getReturnTypeSourceRange(); 236 auto err = replacements_->add( 237 RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd())); 238 assert(!err); 239 } 240 } 241 242 class MacroRewriterCallback : public MatchFinder::MatchCallback { 243 public: 244 explicit MacroRewriterCallback(Replacements* replacements) 245 : replacements_(replacements) {} 246 virtual void run(const MatchFinder::MatchResult& result) override; 247 248 private: 249 Replacements* const replacements_; 250 }; 251 252 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) { 253 const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr"); 254 assert(expr && "Unexpected match! No Expr captured!"); 255 auto err = 256 replacements_->add(RewriteImplicitToExplicitConversion(result, expr)); 257 assert(!err); 258 } 259 260 } // namespace 261 262 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage); 263 264 int main(int argc, const char* argv[]) { 265 // TODO(dcheng): Clang tooling should do this itself. 266 // http://llvm.org/bugs/show_bug.cgi?id=21627 267 llvm::InitializeNativeTarget(); 268 llvm::InitializeNativeTargetAsmParser(); 269 llvm::cl::OptionCategory category("Remove scoped_refptr conversions"); 270 CommonOptionsParser options(argc, argv, category); 271 clang::tooling::ClangTool tool(options.getCompilations(), 272 options.getSourcePathList()); 273 274 MatchFinder match_finder; 275 Replacements replacements; 276 277 auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"), 278 isTemplateInstantiation()); 279 280 // Finds all calls to conversion operator member function. This catches calls 281 // to "operator T*", "operator Testable", and "operator bool" equally. 282 auto base_matcher = 283 cxxMemberCallExpr(thisPointerType(is_scoped_refptr), 284 callee(conversionDecl()), on(id("arg", expr()))); 285 286 // The heuristic for whether or not converting a temporary is 'unsafe'. An 287 // unsafe conversion is one where a temporary scoped_refptr<T> is converted to 288 // another type. The matcher provides an exception for a temporary 289 // scoped_refptr that is the result of an operator call. In this case, assume 290 // that it's the result of an iterator dereference, and the container itself 291 // retains the necessary reference, since this is a common idiom to see in 292 // loop bodies. 293 auto is_unsafe_temporary_conversion = 294 on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr())))); 295 296 // Returning a scoped_refptr<T> as a T* is considered unsafe if either are 297 // true: 298 // - The scoped_refptr<T> is a temporary. 299 // - The scoped_refptr<T> has local lifetime. 300 auto returned_as_raw_ptr = hasParent( 301 returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType())))))); 302 // This matcher intentionally matches more than it should. For example, this 303 // will match: 304 // scoped_refptr<Foo>& foo = some_other_foo; 305 // return foo; 306 // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>, 307 // so those cases can be manually handled. 308 auto is_local_variable = 309 on(declRefExpr(to(id("var", varDecl(hasLocalStorage()))))); 310 auto is_unsafe_return = 311 anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)), 312 is_local_variable), 313 allOf(hasParent(implicitCastExpr( 314 hasParent(exprWithCleanups(returned_as_raw_ptr)))), 315 is_unsafe_temporary_conversion)); 316 317 // This catches both user-defined conversions (eg: "operator bool") and 318 // standard conversion sequence (C++03 13.3.3.1.1), such as converting a 319 // pointer to a bool. 320 auto implicit_to_bool = 321 implicitCastExpr(hasImplicitDestinationType(isBoolean())); 322 323 // Avoid converting calls to of "operator Testable" -> "bool" and calls of 324 // "operator T*" -> "bool". 325 auto bool_conversion_matcher = hasParent( 326 expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool))))); 327 328 auto is_logging_helper = 329 functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl"))); 330 auto is_gtest_helper = functionDecl( 331 anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom( 332 hasName("::testing::internal::EqHelper")))), 333 hasName("Compare")), 334 hasName("::testing::internal::CmpHelperNE"))); 335 auto is_gtest_assertion_result_ctor = 336 cxxConstructorDecl(ofClass(cxxRecordDecl( 337 isSameOrDerivedFrom(hasName("::testing::AssertionResult"))))); 338 339 // Find all calls to an operator overload that are 'safe'. 340 // 341 // All bool conversions will be handled with the Testable trick, but that 342 // can only be used once "operator T*" is removed, since otherwise it leaves 343 // the call ambiguous. 344 GetRewriterCallback get_callback(&replacements); 345 match_finder.addMatcher( 346 cxxMemberCallExpr( 347 base_matcher, 348 // Excluded since the conversion may be unsafe. 349 unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)), 350 // Excluded since the conversion occurs inside a helper function that 351 // the macro wraps. Letting this callback handle the rewrite would 352 // result in an incorrect replacement that changes the helper function 353 // itself. Instead, the right replacement is to rewrite the macro's 354 // arguments. 355 unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper, 356 is_gtest_assertion_result_ctor))))), 357 &get_callback); 358 359 // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*. 360 VarRewriterCallback var_callback(&replacements); 361 auto initialized_with_temporary = has(ignoringImpCasts( 362 cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion))); 363 match_finder.addMatcher( 364 id("var", varDecl(hasInitializer(initialized_with_temporary), 365 hasType(pointerType()))), 366 &var_callback); 367 match_finder.addMatcher( 368 cxxConstructorDecl(forEachConstructorInitializer( 369 allOf(withInitializer(initialized_with_temporary), 370 forField(id("var", fieldDecl(hasType(pointerType()))))))), 371 &var_callback); 372 373 // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when 374 // returning a value. 375 FunctionRewriterCallback fn_callback(&replacements); 376 match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return), 377 &fn_callback); 378 379 // Rewrite logging / gtest expressions that result in an implicit conversion. 380 // Luckily, the matchers don't need to handle the case where one of the macro 381 // arguments is NULL, such as: 382 // CHECK_EQ(my_scoped_refptr, NULL) 383 // because it simply doesn't compile--since NULL is actually of integral type, 384 // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is 385 // no comparison overload for scoped_refptr<T> and int, this fails to compile. 386 MacroRewriterCallback macro_callback(&replacements); 387 // CHECK_EQ/CHECK_NE helpers. 388 match_finder.addMatcher( 389 callExpr(callee(is_logging_helper), argumentCountIs(3), 390 hasAnyArgument(ignoringParenImpCasts( 391 id("expr", expr(hasType(is_scoped_refptr))))), 392 hasAnyArgument(ignoringParenImpCasts(hasType(pointerType()))), 393 hasArgument(2, stringLiteral())), 394 ¯o_callback); 395 // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying 396 // helper functions. Even though gtest has special handling for pointer to 397 // NULL comparisons, it doesn't trigger in this case, so no special handling 398 // is needed for the replacements. 399 match_finder.addMatcher( 400 callExpr(callee(is_gtest_helper), 401 argumentCountIs(4), 402 hasArgument(0, stringLiteral()), 403 hasArgument(1, stringLiteral()), 404 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))), 405 hasAnyArgument(hasType(pointerType()))), 406 ¯o_callback); 407 // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to 408 // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before 409 // being passed as an argument to AssertionResult's constructor. As a result, 410 // GetRewriterCallback handles this case properly since the conversion isn't 411 // hidden inside AssertionResult, and the generated replacement properly 412 // rewrites the macro argument. 413 // However, the tool does need to handle the _TRUE counterparts, since the 414 // conversion occurs inside the constructor in those cases. 415 match_finder.addMatcher( 416 cxxConstructExpr( 417 argumentCountIs(2), 418 hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))), 419 hasDeclaration(is_gtest_assertion_result_ctor)), 420 ¯o_callback); 421 422 std::unique_ptr<clang::tooling::FrontendActionFactory> factory = 423 clang::tooling::newFrontendActionFactory(&match_finder); 424 int result = tool.run(factory.get()); 425 if (result != 0) 426 return result; 427 428 // Serialization format is documented in tools/clang/scripts/run_tool.py 429 llvm::outs() << "==== BEGIN EDITS ====\n"; 430 for (const auto& r : replacements) { 431 std::string replacement_text = r.getReplacementText().str(); 432 std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0'); 433 llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::" 434 << r.getLength() << ":::" << replacement_text << "\n"; 435 } 436 llvm::outs() << "==== END EDITS ====\n"; 437 438 return 0; 439 } 440