Home | History | Annotate | Download | only in rewrite_scoped_refptr
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This implements a Clang tool to rewrite all instances of
      6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
      7 // the .get() method.
      8 
      9 #include <assert.h>
     10 #include <algorithm>
     11 #include <memory>
     12 #include <string>
     13 
     14 #include "clang/AST/ASTContext.h"
     15 #include "clang/ASTMatchers/ASTMatchers.h"
     16 #include "clang/ASTMatchers/ASTMatchersMacros.h"
     17 #include "clang/ASTMatchers/ASTMatchFinder.h"
     18 #include "clang/Basic/SourceManager.h"
     19 #include "clang/Frontend/FrontendActions.h"
     20 #include "clang/Lex/Lexer.h"
     21 #include "clang/Tooling/CommonOptionsParser.h"
     22 #include "clang/Tooling/Refactoring.h"
     23 #include "clang/Tooling/Tooling.h"
     24 #include "llvm/Support/CommandLine.h"
     25 
     26 using namespace clang::ast_matchers;
     27 using clang::tooling::CommonOptionsParser;
     28 using clang::tooling::Replacement;
     29 using clang::tooling::Replacements;
     30 using llvm::StringRef;
     31 
     32 namespace clang {
     33 namespace ast_matchers {
     34 
     35 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
     36     conversionDecl;
     37 
     38 AST_MATCHER(QualType, isBoolean) {
     39   return Node->isBooleanType();
     40 }
     41 
     42 }  // namespace ast_matchers
     43 }  // namespace clang
     44 
     45 namespace {
     46 
     47 // Returns true if expr needs to be put in parens (eg: when it is an operator
     48 // syntactically).
     49 bool NeedsParens(const clang::Expr* expr) {
     50   if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
     51       llvm::dyn_cast<clang::BinaryOperator>(expr) ||
     52       llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
     53     return true;
     54   }
     55   // Calls to an overloaded operator also need parens, except for foo(...) and
     56   // foo[...] expressions.
     57   if (const clang::CXXOperatorCallExpr* op =
     58           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
     59     return op->getOperator() != clang::OO_Call &&
     60            op->getOperator() != clang::OO_Subscript;
     61   }
     62   return false;
     63 }
     64 
     65 Replacement RewriteImplicitToExplicitConversion(
     66     const MatchFinder::MatchResult& result,
     67     const clang::Expr* expr) {
     68   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
     69       result.SourceManager->getSpellingLoc(expr->getLocStart()),
     70       result.SourceManager->getSpellingLoc(expr->getLocEnd()));
     71   assert(range.isValid() && "Invalid range!");
     72 
     73   // Handle cases where an implicit cast is being done by dereferencing a
     74   // pointer to a scoped_refptr<> (sadly, it happens...)
     75   //
     76   // This rewrites both "*foo" and "*(foo)" as "foo->get()".
     77   if (const clang::UnaryOperator* op =
     78           llvm::dyn_cast<clang::UnaryOperator>(expr)) {
     79     if (op->getOpcode() == clang::UO_Deref) {
     80       const clang::Expr* const sub_expr =
     81           op->getSubExpr()->IgnoreParenImpCasts();
     82       clang::CharSourceRange sub_expr_range =
     83           clang::CharSourceRange::getTokenRange(
     84               result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
     85               result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
     86       assert(sub_expr_range.isValid() && "Invalid subexpression range!");
     87 
     88       std::string inner_text = clang::Lexer::getSourceText(
     89           sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
     90       assert(!inner_text.empty() && "No text for subexpression!");
     91       if (NeedsParens(sub_expr)) {
     92         inner_text.insert(0, "(");
     93         inner_text.append(")");
     94       }
     95       inner_text.append("->get()");
     96       return Replacement(*result.SourceManager, range, inner_text);
     97     }
     98   }
     99 
    100   std::string text = clang::Lexer::getSourceText(
    101       range, *result.SourceManager, result.Context->getLangOpts());
    102   assert(!text.empty() && "No text for expression!");
    103 
    104   // Unwrap any temporaries - for example, custom iterators that return
    105   // scoped_refptr<T> as part of operator*. Any such iterators should also
    106   // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
    107   if (const clang::CXXBindTemporaryExpr* op =
    108           llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
    109     expr = op->getSubExpr();
    110   }
    111 
    112   // Handle iterators (which are operator* calls, followed by implicit
    113   // conversions) by rewriting *it as it->get()
    114   if (const clang::CXXOperatorCallExpr* op =
    115           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
    116     if (op->getOperator() == clang::OO_Star) {
    117       // Note that this doesn't rewrite **it correctly, since it should be
    118       // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
    119       // happen frequently, if at all, since it would likely indicate code is
    120       // storing pointers to a scoped_refptr in a container.
    121       text.erase(0, 1);
    122       text.append("->get()");
    123       return Replacement(*result.SourceManager, range, text);
    124     }
    125   }
    126 
    127   // The only remaining calls should be non-dereferencing calls (eg: member
    128   // calls), so a simple ".get()" appending should suffice.
    129   if (NeedsParens(expr)) {
    130     text.insert(0, "(");
    131     text.append(")");
    132   }
    133   text.append(".get()");
    134   return Replacement(*result.SourceManager, range, text);
    135 }
    136 
    137 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
    138                                         clang::SourceLocation begin,
    139                                         clang::SourceLocation end) {
    140   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
    141       result.SourceManager->getSpellingLoc(begin),
    142       result.SourceManager->getSpellingLoc(end));
    143   assert(range.isValid() && "Invalid range!");
    144 
    145   std::string text = clang::Lexer::getSourceText(
    146       range, *result.SourceManager, result.Context->getLangOpts());
    147   text.erase(text.rfind('*'));
    148 
    149   std::string replacement_text("scoped_refptr<");
    150   replacement_text += text;
    151   replacement_text += ">";
    152 
    153   return Replacement(*result.SourceManager, range, replacement_text);
    154 }
    155 
    156 class GetRewriterCallback : public MatchFinder::MatchCallback {
    157  public:
    158   explicit GetRewriterCallback(Replacements* replacements)
    159       : replacements_(replacements) {}
    160   virtual void run(const MatchFinder::MatchResult& result) override;
    161 
    162  private:
    163   Replacements* const replacements_;
    164 };
    165 
    166 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
    167   const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
    168   assert(arg && "Unexpected match! No Expr captured!");
    169   replacements_->insert(RewriteImplicitToExplicitConversion(result, arg));
    170 }
    171 
    172 class VarRewriterCallback : public MatchFinder::MatchCallback {
    173  public:
    174   explicit VarRewriterCallback(Replacements* replacements)
    175       : replacements_(replacements) {}
    176   virtual void run(const MatchFinder::MatchResult& result) override;
    177 
    178  private:
    179   Replacements* const replacements_;
    180 };
    181 
    182 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
    183   const clang::DeclaratorDecl* const var_decl =
    184       result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
    185   assert(var_decl && "Unexpected match! No VarDecl captured!");
    186 
    187   const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();
    188 
    189   // TODO(dcheng): This mishandles a case where a variable has multiple
    190   // declarations, e.g.:
    191   //
    192   // in .h:
    193   // Foo* my_global_magical_foo;
    194   //
    195   // in .cc:
    196   // Foo* my_global_magical_foo = CreateFoo();
    197   //
    198   // In this case, it will only rewrite the .cc definition. Oh well. This should
    199   // be rare enough that these cases can be manually handled, since the style
    200   // guide prohibits globals of non-POD type.
    201   replacements_->insert(RewriteRawPtrToScopedRefptr(
    202       result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
    203 }
    204 
    205 class FunctionRewriterCallback : public MatchFinder::MatchCallback {
    206  public:
    207   explicit FunctionRewriterCallback(Replacements* replacements)
    208       : replacements_(replacements) {}
    209   virtual void run(const MatchFinder::MatchResult& result) override;
    210 
    211  private:
    212   Replacements* const replacements_;
    213 };
    214 
    215 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
    216   const clang::FunctionDecl* const function_decl =
    217       result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
    218   assert(function_decl && "Unexpected match! No FunctionDecl captured!");
    219 
    220   // If matched against an implicit conversion to a DeclRefExpr, make sure the
    221   // referenced declaration is of class type, e.g. the tool skips trying to
    222   // chase pointers/references to determine if the pointee is a scoped_refptr<T>
    223   // with local storage. Instead, let a human manually handle those cases.
    224   const clang::VarDecl* const var_decl =
    225       result.Nodes.getNodeAs<clang::VarDecl>("var");
    226   if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
    227     return;
    228   }
    229 
    230   for (clang::FunctionDecl* f : function_decl->redecls()) {
    231     clang::SourceRange range = f->getReturnTypeSourceRange();
    232     replacements_->insert(
    233         RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
    234   }
    235 }
    236 
    237 class MacroRewriterCallback : public MatchFinder::MatchCallback {
    238  public:
    239   explicit MacroRewriterCallback(Replacements* replacements)
    240       : replacements_(replacements) {}
    241   virtual void run(const MatchFinder::MatchResult& result) override;
    242 
    243  private:
    244   Replacements* const replacements_;
    245 };
    246 
    247 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
    248   const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
    249   assert(expr && "Unexpected match! No Expr captured!");
    250   replacements_->insert(RewriteImplicitToExplicitConversion(result, expr));
    251 }
    252 
    253 }  // namespace
    254 
    255 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);
    256 
    257 int main(int argc, const char* argv[]) {
    258   llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
    259   CommonOptionsParser options(argc, argv, category);
    260   clang::tooling::ClangTool tool(options.getCompilations(),
    261                                  options.getSourcePathList());
    262 
    263   MatchFinder match_finder;
    264   Replacements replacements;
    265 
    266   auto is_scoped_refptr = recordDecl(isSameOrDerivedFrom("::scoped_refptr"),
    267                                      isTemplateInstantiation());
    268 
    269   // Finds all calls to conversion operator member function. This catches calls
    270   // to "operator T*", "operator Testable", and "operator bool" equally.
    271   auto base_matcher = memberCallExpr(thisPointerType(is_scoped_refptr),
    272                                      callee(conversionDecl()),
    273                                      on(id("arg", expr())));
    274 
    275   // The heuristic for whether or not converting a temporary is 'unsafe'. An
    276   // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
    277   // another type. The matcher provides an exception for a temporary
    278   // scoped_refptr that is the result of an operator call. In this case, assume
    279   // that it's the result of an iterator dereference, and the container itself
    280   // retains the necessary reference, since this is a common idiom to see in
    281   // loop bodies.
    282   auto is_unsafe_temporary_conversion =
    283       on(bindTemporaryExpr(unless(has(operatorCallExpr()))));
    284 
    285   // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
    286   // true:
    287   // - The scoped_refptr<T> is a temporary.
    288   // - The scoped_refptr<T> has local lifetime.
    289   auto returned_as_raw_ptr = hasParent(
    290       returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
    291   // This matcher intentionally matches more than it should. For example, this
    292   // will match:
    293   //   scoped_refptr<Foo>& foo = some_other_foo;
    294   //   return foo;
    295   // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
    296   // so those cases can be manually handled.
    297   auto is_local_variable =
    298       on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
    299   auto is_unsafe_return =
    300       anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
    301                   is_local_variable),
    302             allOf(hasParent(implicitCastExpr(
    303                       hasParent(exprWithCleanups(returned_as_raw_ptr)))),
    304                   is_unsafe_temporary_conversion));
    305 
    306   // This catches both user-defined conversions (eg: "operator bool") and
    307   // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
    308   // pointer to a bool.
    309   auto implicit_to_bool =
    310       implicitCastExpr(hasImplicitDestinationType(isBoolean()));
    311 
    312   // Avoid converting calls to of "operator Testable" -> "bool" and calls of
    313   // "operator T*" -> "bool".
    314   auto bool_conversion_matcher = hasParent(
    315       expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));
    316 
    317   auto is_logging_helper =
    318       functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
    319   auto is_gtest_helper = functionDecl(
    320       anyOf(methodDecl(ofClass(recordDecl(isSameOrDerivedFrom(
    321                            hasName("::testing::internal::EqHelper")))),
    322                        hasName("Compare")),
    323             hasName("::testing::internal::CmpHelperNE")));
    324   auto is_gtest_assertion_result_ctor = constructorDecl(ofClass(
    325       recordDecl(isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));
    326 
    327   // Find all calls to an operator overload that are 'safe'.
    328   //
    329   // All bool conversions will be handled with the Testable trick, but that
    330   // can only be used once "operator T*" is removed, since otherwise it leaves
    331   // the call ambiguous.
    332   GetRewriterCallback get_callback(&replacements);
    333   match_finder.addMatcher(
    334       memberCallExpr(
    335           base_matcher,
    336           // Excluded since the conversion may be unsafe.
    337           unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
    338           // Excluded since the conversion occurs inside a helper function that
    339           // the macro wraps. Letting this callback handle the rewrite would
    340           // result in an incorrect replacement that changes the helper function
    341           // itself. Instead, the right replacement is to rewrite the macro's
    342           // arguments.
    343           unless(hasAncestor(decl(anyOf(is_logging_helper,
    344                                         is_gtest_helper,
    345                                         is_gtest_assertion_result_ctor))))),
    346       &get_callback);
    347 
    348   // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
    349   VarRewriterCallback var_callback(&replacements);
    350   auto initialized_with_temporary = ignoringImpCasts(exprWithCleanups(
    351       has(memberCallExpr(base_matcher, is_unsafe_temporary_conversion))));
    352   match_finder.addMatcher(id("var",
    353                              varDecl(hasInitializer(initialized_with_temporary),
    354                                      hasType(pointerType()))),
    355                           &var_callback);
    356   match_finder.addMatcher(
    357       constructorDecl(forEachConstructorInitializer(
    358           allOf(withInitializer(initialized_with_temporary),
    359                 forField(id("var", fieldDecl(hasType(pointerType()))))))),
    360       &var_callback);
    361 
    362   // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
    363   // returning a value.
    364   FunctionRewriterCallback fn_callback(&replacements);
    365   match_finder.addMatcher(memberCallExpr(base_matcher, is_unsafe_return),
    366                           &fn_callback);
    367 
    368   // Rewrite logging / gtest expressions that result in an implicit conversion.
    369   // Luckily, the matchers don't need to handle the case where one of the macro
    370   // arguments is NULL, such as:
    371   // CHECK_EQ(my_scoped_refptr, NULL)
    372   // because it simply doesn't compile--since NULL is actually of integral type,
    373   // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
    374   // no comparison overload for scoped_refptr<T> and int, this fails to compile.
    375   MacroRewriterCallback macro_callback(&replacements);
    376   // CHECK_EQ/CHECK_NE helpers.
    377   match_finder.addMatcher(
    378       callExpr(callee(is_logging_helper),
    379                argumentCountIs(3),
    380                hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
    381                hasAnyArgument(hasType(pointerType())),
    382                hasArgument(2, stringLiteral())),
    383       &macro_callback);
    384   // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
    385   // helper functions. Even though gtest has special handling for pointer to
    386   // NULL comparisons, it doesn't trigger in this case, so no special handling
    387   // is needed for the replacements.
    388   match_finder.addMatcher(
    389       callExpr(callee(is_gtest_helper),
    390                argumentCountIs(4),
    391                hasArgument(0, stringLiteral()),
    392                hasArgument(1, stringLiteral()),
    393                hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
    394                hasAnyArgument(hasType(pointerType()))),
    395       &macro_callback);
    396   // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
    397   // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
    398   // being passed as an argument to AssertionResult's constructor. As a result,
    399   // GetRewriterCallback handles this case properly since the conversion isn't
    400   // hidden inside AssertionResult, and the generated replacement properly
    401   // rewrites the macro argument.
    402   // However, the tool does need to handle the _TRUE counterparts, since the
    403   // conversion occurs inside the constructor in those cases.
    404   match_finder.addMatcher(
    405       constructExpr(
    406           argumentCountIs(2),
    407           hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
    408           hasDeclaration(is_gtest_assertion_result_ctor)),
    409       &macro_callback);
    410 
    411   std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
    412       clang::tooling::newFrontendActionFactory(&match_finder);
    413   int result = tool.run(factory.get());
    414   if (result != 0)
    415     return result;
    416 
    417   // Serialization format is documented in tools/clang/scripts/run_tool.py
    418   llvm::outs() << "==== BEGIN EDITS ====\n";
    419   for (const auto& r : replacements) {
    420     std::string replacement_text = r.getReplacementText().str();
    421     std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
    422     llvm::outs() << "r:" << r.getFilePath() << ":" << r.getOffset() << ":"
    423                  << r.getLength() << ":" << replacement_text << "\n";
    424   }
    425   llvm::outs() << "==== END EDITS ====\n";
    426 
    427   return 0;
    428 }
    429