1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This implements a Clang tool to rewrite all instances of 6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to 7 // the .get() method. 8 9 #include <assert.h> 10 #include <algorithm> 11 #include <memory> 12 #include <string> 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/ASTMatchers/ASTMatchers.h" 16 #include "clang/ASTMatchers/ASTMatchersMacros.h" 17 #include "clang/ASTMatchers/ASTMatchFinder.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Frontend/FrontendActions.h" 20 #include "clang/Lex/Lexer.h" 21 #include "clang/Tooling/CommonOptionsParser.h" 22 #include "clang/Tooling/Refactoring.h" 23 #include "clang/Tooling/Tooling.h" 24 #include "llvm/Support/CommandLine.h" 25 26 using namespace clang::ast_matchers; 27 using clang::tooling::CommonOptionsParser; 28 using clang::tooling::Replacement; 29 using clang::tooling::Replacements; 30 using llvm::StringRef; 31 32 namespace clang { 33 namespace ast_matchers { 34 35 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl> 36 conversionDecl; 37 38 AST_MATCHER(QualType, isBoolean) { 39 return Node->isBooleanType(); 40 } 41 42 } // namespace ast_matchers 43 } // namespace clang 44 45 namespace { 46 47 // Returns true if expr needs to be put in parens (eg: when it is an operator 48 // syntactically). 49 bool NeedsParens(const clang::Expr* expr) { 50 if (llvm::dyn_cast<clang::UnaryOperator>(expr) || 51 llvm::dyn_cast<clang::BinaryOperator>(expr) || 52 llvm::dyn_cast<clang::ConditionalOperator>(expr)) { 53 return true; 54 } 55 // Calls to an overloaded operator also need parens, except for foo(...) and 56 // foo[...] expressions. 57 if (const clang::CXXOperatorCallExpr* op = 58 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { 59 return op->getOperator() != clang::OO_Call && 60 op->getOperator() != clang::OO_Subscript; 61 } 62 return false; 63 } 64 65 Replacement RewriteImplicitToExplicitConversion( 66 const MatchFinder::MatchResult& result, 67 const clang::Expr* expr) { 68 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( 69 result.SourceManager->getSpellingLoc(expr->getLocStart()), 70 result.SourceManager->getSpellingLoc(expr->getLocEnd())); 71 assert(range.isValid() && "Invalid range!"); 72 73 // Handle cases where an implicit cast is being done by dereferencing a 74 // pointer to a scoped_refptr<> (sadly, it happens...) 75 // 76 // This rewrites both "*foo" and "*(foo)" as "foo->get()". 77 if (const clang::UnaryOperator* op = 78 llvm::dyn_cast<clang::UnaryOperator>(expr)) { 79 if (op->getOpcode() == clang::UO_Deref) { 80 const clang::Expr* const sub_expr = 81 op->getSubExpr()->IgnoreParenImpCasts(); 82 clang::CharSourceRange sub_expr_range = 83 clang::CharSourceRange::getTokenRange( 84 result.SourceManager->getSpellingLoc(sub_expr->getLocStart()), 85 result.SourceManager->getSpellingLoc(sub_expr->getLocEnd())); 86 assert(sub_expr_range.isValid() && "Invalid subexpression range!"); 87 88 std::string inner_text = clang::Lexer::getSourceText( 89 sub_expr_range, *result.SourceManager, result.Context->getLangOpts()); 90 assert(!inner_text.empty() && "No text for subexpression!"); 91 if (NeedsParens(sub_expr)) { 92 inner_text.insert(0, "("); 93 inner_text.append(")"); 94 } 95 inner_text.append("->get()"); 96 return Replacement(*result.SourceManager, range, inner_text); 97 } 98 } 99 100 std::string text = clang::Lexer::getSourceText( 101 range, *result.SourceManager, result.Context->getLangOpts()); 102 assert(!text.empty() && "No text for expression!"); 103 104 // Unwrap any temporaries - for example, custom iterators that return 105 // scoped_refptr<T> as part of operator*. Any such iterators should also 106 // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72) 107 if (const clang::CXXBindTemporaryExpr* op = 108 llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) { 109 expr = op->getSubExpr(); 110 } 111 112 // Handle iterators (which are operator* calls, followed by implicit 113 // conversions) by rewriting *it as it->get() 114 if (const clang::CXXOperatorCallExpr* op = 115 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { 116 if (op->getOperator() == clang::OO_Star) { 117 // Note that this doesn't rewrite **it correctly, since it should be 118 // rewritten using parens, e.g. (*it)->get(). However, this shouldn't 119 // happen frequently, if at all, since it would likely indicate code is 120 // storing pointers to a scoped_refptr in a container. 121 text.erase(0, 1); 122 text.append("->get()"); 123 return Replacement(*result.SourceManager, range, text); 124 } 125 } 126 127 // The only remaining calls should be non-dereferencing calls (eg: member 128 // calls), so a simple ".get()" appending should suffice. 129 if (NeedsParens(expr)) { 130 text.insert(0, "("); 131 text.append(")"); 132 } 133 text.append(".get()"); 134 return Replacement(*result.SourceManager, range, text); 135 } 136 137 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result, 138 clang::SourceLocation begin, 139 clang::SourceLocation end) { 140 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( 141 result.SourceManager->getSpellingLoc(begin), 142 result.SourceManager->getSpellingLoc(end)); 143 assert(range.isValid() && "Invalid range!"); 144 145 std::string text = clang::Lexer::getSourceText( 146 range, *result.SourceManager, result.Context->getLangOpts()); 147 text.erase(text.rfind('*')); 148 149 std::string replacement_text("scoped_refptr<"); 150 replacement_text += text; 151 replacement_text += ">"; 152 153 return Replacement(*result.SourceManager, range, replacement_text); 154 } 155 156 class GetRewriterCallback : public MatchFinder::MatchCallback { 157 public: 158 explicit GetRewriterCallback(Replacements* replacements) 159 : replacements_(replacements) {} 160 virtual void run(const MatchFinder::MatchResult& result) override; 161 162 private: 163 Replacements* const replacements_; 164 }; 165 166 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) { 167 const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg"); 168 assert(arg && "Unexpected match! No Expr captured!"); 169 replacements_->insert(RewriteImplicitToExplicitConversion(result, arg)); 170 } 171 172 class VarRewriterCallback : public MatchFinder::MatchCallback { 173 public: 174 explicit VarRewriterCallback(Replacements* replacements) 175 : replacements_(replacements) {} 176 virtual void run(const MatchFinder::MatchResult& result) override; 177 178 private: 179 Replacements* const replacements_; 180 }; 181 182 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) { 183 const clang::DeclaratorDecl* const var_decl = 184 result.Nodes.getNodeAs<clang::DeclaratorDecl>("var"); 185 assert(var_decl && "Unexpected match! No VarDecl captured!"); 186 187 const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo(); 188 189 // TODO(dcheng): This mishandles a case where a variable has multiple 190 // declarations, e.g.: 191 // 192 // in .h: 193 // Foo* my_global_magical_foo; 194 // 195 // in .cc: 196 // Foo* my_global_magical_foo = CreateFoo(); 197 // 198 // In this case, it will only rewrite the .cc definition. Oh well. This should 199 // be rare enough that these cases can be manually handled, since the style 200 // guide prohibits globals of non-POD type. 201 replacements_->insert(RewriteRawPtrToScopedRefptr( 202 result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc())); 203 } 204 205 class FunctionRewriterCallback : public MatchFinder::MatchCallback { 206 public: 207 explicit FunctionRewriterCallback(Replacements* replacements) 208 : replacements_(replacements) {} 209 virtual void run(const MatchFinder::MatchResult& result) override; 210 211 private: 212 Replacements* const replacements_; 213 }; 214 215 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) { 216 const clang::FunctionDecl* const function_decl = 217 result.Nodes.getNodeAs<clang::FunctionDecl>("fn"); 218 assert(function_decl && "Unexpected match! No FunctionDecl captured!"); 219 220 // If matched against an implicit conversion to a DeclRefExpr, make sure the 221 // referenced declaration is of class type, e.g. the tool skips trying to 222 // chase pointers/references to determine if the pointee is a scoped_refptr<T> 223 // with local storage. Instead, let a human manually handle those cases. 224 const clang::VarDecl* const var_decl = 225 result.Nodes.getNodeAs<clang::VarDecl>("var"); 226 if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) { 227 return; 228 } 229 230 for (clang::FunctionDecl* f : function_decl->redecls()) { 231 clang::SourceRange range = f->getReturnTypeSourceRange(); 232 replacements_->insert( 233 RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd())); 234 } 235 } 236 237 class MacroRewriterCallback : public MatchFinder::MatchCallback { 238 public: 239 explicit MacroRewriterCallback(Replacements* replacements) 240 : replacements_(replacements) {} 241 virtual void run(const MatchFinder::MatchResult& result) override; 242 243 private: 244 Replacements* const replacements_; 245 }; 246 247 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) { 248 const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr"); 249 assert(expr && "Unexpected match! No Expr captured!"); 250 replacements_->insert(RewriteImplicitToExplicitConversion(result, expr)); 251 } 252 253 } // namespace 254 255 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage); 256 257 int main(int argc, const char* argv[]) { 258 llvm::cl::OptionCategory category("Remove scoped_refptr conversions"); 259 CommonOptionsParser options(argc, argv, category); 260 clang::tooling::ClangTool tool(options.getCompilations(), 261 options.getSourcePathList()); 262 263 MatchFinder match_finder; 264 Replacements replacements; 265 266 auto is_scoped_refptr = recordDecl(isSameOrDerivedFrom("::scoped_refptr"), 267 isTemplateInstantiation()); 268 269 // Finds all calls to conversion operator member function. This catches calls 270 // to "operator T*", "operator Testable", and "operator bool" equally. 271 auto base_matcher = memberCallExpr(thisPointerType(is_scoped_refptr), 272 callee(conversionDecl()), 273 on(id("arg", expr()))); 274 275 // The heuristic for whether or not converting a temporary is 'unsafe'. An 276 // unsafe conversion is one where a temporary scoped_refptr<T> is converted to 277 // another type. The matcher provides an exception for a temporary 278 // scoped_refptr that is the result of an operator call. In this case, assume 279 // that it's the result of an iterator dereference, and the container itself 280 // retains the necessary reference, since this is a common idiom to see in 281 // loop bodies. 282 auto is_unsafe_temporary_conversion = 283 on(bindTemporaryExpr(unless(has(operatorCallExpr())))); 284 285 // Returning a scoped_refptr<T> as a T* is considered unsafe if either are 286 // true: 287 // - The scoped_refptr<T> is a temporary. 288 // - The scoped_refptr<T> has local lifetime. 289 auto returned_as_raw_ptr = hasParent( 290 returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType())))))); 291 // This matcher intentionally matches more than it should. For example, this 292 // will match: 293 // scoped_refptr<Foo>& foo = some_other_foo; 294 // return foo; 295 // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>, 296 // so those cases can be manually handled. 297 auto is_local_variable = 298 on(declRefExpr(to(id("var", varDecl(hasLocalStorage()))))); 299 auto is_unsafe_return = 300 anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)), 301 is_local_variable), 302 allOf(hasParent(implicitCastExpr( 303 hasParent(exprWithCleanups(returned_as_raw_ptr)))), 304 is_unsafe_temporary_conversion)); 305 306 // This catches both user-defined conversions (eg: "operator bool") and 307 // standard conversion sequence (C++03 13.3.3.1.1), such as converting a 308 // pointer to a bool. 309 auto implicit_to_bool = 310 implicitCastExpr(hasImplicitDestinationType(isBoolean())); 311 312 // Avoid converting calls to of "operator Testable" -> "bool" and calls of 313 // "operator T*" -> "bool". 314 auto bool_conversion_matcher = hasParent( 315 expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool))))); 316 317 auto is_logging_helper = 318 functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl"))); 319 auto is_gtest_helper = functionDecl( 320 anyOf(methodDecl(ofClass(recordDecl(isSameOrDerivedFrom( 321 hasName("::testing::internal::EqHelper")))), 322 hasName("Compare")), 323 hasName("::testing::internal::CmpHelperNE"))); 324 auto is_gtest_assertion_result_ctor = constructorDecl(ofClass( 325 recordDecl(isSameOrDerivedFrom(hasName("::testing::AssertionResult"))))); 326 327 // Find all calls to an operator overload that are 'safe'. 328 // 329 // All bool conversions will be handled with the Testable trick, but that 330 // can only be used once "operator T*" is removed, since otherwise it leaves 331 // the call ambiguous. 332 GetRewriterCallback get_callback(&replacements); 333 match_finder.addMatcher( 334 memberCallExpr( 335 base_matcher, 336 // Excluded since the conversion may be unsafe. 337 unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)), 338 // Excluded since the conversion occurs inside a helper function that 339 // the macro wraps. Letting this callback handle the rewrite would 340 // result in an incorrect replacement that changes the helper function 341 // itself. Instead, the right replacement is to rewrite the macro's 342 // arguments. 343 unless(hasAncestor(decl(anyOf(is_logging_helper, 344 is_gtest_helper, 345 is_gtest_assertion_result_ctor))))), 346 &get_callback); 347 348 // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*. 349 VarRewriterCallback var_callback(&replacements); 350 auto initialized_with_temporary = ignoringImpCasts(exprWithCleanups( 351 has(memberCallExpr(base_matcher, is_unsafe_temporary_conversion)))); 352 match_finder.addMatcher(id("var", 353 varDecl(hasInitializer(initialized_with_temporary), 354 hasType(pointerType()))), 355 &var_callback); 356 match_finder.addMatcher( 357 constructorDecl(forEachConstructorInitializer( 358 allOf(withInitializer(initialized_with_temporary), 359 forField(id("var", fieldDecl(hasType(pointerType()))))))), 360 &var_callback); 361 362 // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when 363 // returning a value. 364 FunctionRewriterCallback fn_callback(&replacements); 365 match_finder.addMatcher(memberCallExpr(base_matcher, is_unsafe_return), 366 &fn_callback); 367 368 // Rewrite logging / gtest expressions that result in an implicit conversion. 369 // Luckily, the matchers don't need to handle the case where one of the macro 370 // arguments is NULL, such as: 371 // CHECK_EQ(my_scoped_refptr, NULL) 372 // because it simply doesn't compile--since NULL is actually of integral type, 373 // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is 374 // no comparison overload for scoped_refptr<T> and int, this fails to compile. 375 MacroRewriterCallback macro_callback(&replacements); 376 // CHECK_EQ/CHECK_NE helpers. 377 match_finder.addMatcher( 378 callExpr(callee(is_logging_helper), 379 argumentCountIs(3), 380 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))), 381 hasAnyArgument(hasType(pointerType())), 382 hasArgument(2, stringLiteral())), 383 ¯o_callback); 384 // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying 385 // helper functions. Even though gtest has special handling for pointer to 386 // NULL comparisons, it doesn't trigger in this case, so no special handling 387 // is needed for the replacements. 388 match_finder.addMatcher( 389 callExpr(callee(is_gtest_helper), 390 argumentCountIs(4), 391 hasArgument(0, stringLiteral()), 392 hasArgument(1, stringLiteral()), 393 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))), 394 hasAnyArgument(hasType(pointerType()))), 395 ¯o_callback); 396 // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to 397 // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before 398 // being passed as an argument to AssertionResult's constructor. As a result, 399 // GetRewriterCallback handles this case properly since the conversion isn't 400 // hidden inside AssertionResult, and the generated replacement properly 401 // rewrites the macro argument. 402 // However, the tool does need to handle the _TRUE counterparts, since the 403 // conversion occurs inside the constructor in those cases. 404 match_finder.addMatcher( 405 constructExpr( 406 argumentCountIs(2), 407 hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))), 408 hasDeclaration(is_gtest_assertion_result_ctor)), 409 ¯o_callback); 410 411 std::unique_ptr<clang::tooling::FrontendActionFactory> factory = 412 clang::tooling::newFrontendActionFactory(&match_finder); 413 int result = tool.run(factory.get()); 414 if (result != 0) 415 return result; 416 417 // Serialization format is documented in tools/clang/scripts/run_tool.py 418 llvm::outs() << "==== BEGIN EDITS ====\n"; 419 for (const auto& r : replacements) { 420 std::string replacement_text = r.getReplacementText().str(); 421 std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0'); 422 llvm::outs() << "r:" << r.getFilePath() << ":" << r.getOffset() << ":" 423 << r.getLength() << ":" << replacement_text << "\n"; 424 } 425 llvm::outs() << "==== END EDITS ====\n"; 426 427 return 0; 428 } 429