Home | History | Annotate | Download | only in rewrite_scoped_refptr
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This implements a Clang tool to rewrite all instances of
      6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
      7 // the .get() method.
      8 
      9 #include <assert.h>
     10 #include <algorithm>
     11 #include <memory>
     12 #include <string>
     13 
     14 #include "clang/AST/ASTContext.h"
     15 #include "clang/ASTMatchers/ASTMatchers.h"
     16 #include "clang/ASTMatchers/ASTMatchersMacros.h"
     17 #include "clang/ASTMatchers/ASTMatchFinder.h"
     18 #include "clang/Basic/SourceManager.h"
     19 #include "clang/Frontend/FrontendActions.h"
     20 #include "clang/Lex/Lexer.h"
     21 #include "clang/Tooling/CommonOptionsParser.h"
     22 #include "clang/Tooling/Refactoring.h"
     23 #include "clang/Tooling/Tooling.h"
     24 #include "llvm/Support/CommandLine.h"
     25 #include "llvm/Support/TargetSelect.h"
     26 
     27 using namespace clang::ast_matchers;
     28 using clang::tooling::CommonOptionsParser;
     29 using clang::tooling::Replacement;
     30 using clang::tooling::Replacements;
     31 using llvm::StringRef;
     32 
     33 namespace clang {
     34 namespace ast_matchers {
     35 
     36 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
     37     conversionDecl;
     38 
     39 AST_MATCHER(QualType, isBoolean) {
     40   return Node->isBooleanType();
     41 }
     42 
     43 }  // namespace ast_matchers
     44 }  // namespace clang
     45 
     46 namespace {
     47 
     48 // Returns true if expr needs to be put in parens (eg: when it is an operator
     49 // syntactically).
     50 bool NeedsParens(const clang::Expr* expr) {
     51   if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
     52       llvm::dyn_cast<clang::BinaryOperator>(expr) ||
     53       llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
     54     return true;
     55   }
     56   // Calls to an overloaded operator also need parens, except for foo(...) and
     57   // foo[...] expressions.
     58   if (const clang::CXXOperatorCallExpr* op =
     59           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
     60     return op->getOperator() != clang::OO_Call &&
     61            op->getOperator() != clang::OO_Subscript;
     62   }
     63   return false;
     64 }
     65 
     66 Replacement RewriteImplicitToExplicitConversion(
     67     const MatchFinder::MatchResult& result,
     68     const clang::Expr* expr) {
     69   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
     70       result.SourceManager->getSpellingLoc(expr->getLocStart()),
     71       result.SourceManager->getSpellingLoc(expr->getLocEnd()));
     72   assert(range.isValid() && "Invalid range!");
     73 
     74   // Handle cases where an implicit cast is being done by dereferencing a
     75   // pointer to a scoped_refptr<> (sadly, it happens...)
     76   //
     77   // This rewrites both "*foo" and "*(foo)" as "foo->get()".
     78   if (const clang::UnaryOperator* op =
     79           llvm::dyn_cast<clang::UnaryOperator>(expr)) {
     80     if (op->getOpcode() == clang::UO_Deref) {
     81       const clang::Expr* const sub_expr =
     82           op->getSubExpr()->IgnoreParenImpCasts();
     83       clang::CharSourceRange sub_expr_range =
     84           clang::CharSourceRange::getTokenRange(
     85               result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
     86               result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
     87       assert(sub_expr_range.isValid() && "Invalid subexpression range!");
     88 
     89       std::string inner_text = clang::Lexer::getSourceText(
     90           sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
     91       assert(!inner_text.empty() && "No text for subexpression!");
     92       if (NeedsParens(sub_expr)) {
     93         inner_text.insert(0, "(");
     94         inner_text.append(")");
     95       }
     96       inner_text.append("->get()");
     97       return Replacement(*result.SourceManager, range, inner_text);
     98     }
     99   }
    100 
    101   std::string text = clang::Lexer::getSourceText(
    102       range, *result.SourceManager, result.Context->getLangOpts());
    103   assert(!text.empty() && "No text for expression!");
    104 
    105   // Unwrap any temporaries - for example, custom iterators that return
    106   // scoped_refptr<T> as part of operator*. Any such iterators should also
    107   // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
    108   if (const clang::CXXBindTemporaryExpr* op =
    109           llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
    110     expr = op->getSubExpr();
    111   }
    112 
    113   // Handle iterators (which are operator* calls, followed by implicit
    114   // conversions) by rewriting *it as it->get()
    115   if (const clang::CXXOperatorCallExpr* op =
    116           llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
    117     if (op->getOperator() == clang::OO_Star) {
    118       // Note that this doesn't rewrite **it correctly, since it should be
    119       // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
    120       // happen frequently, if at all, since it would likely indicate code is
    121       // storing pointers to a scoped_refptr in a container.
    122       text.erase(0, 1);
    123       text.append("->get()");
    124       return Replacement(*result.SourceManager, range, text);
    125     }
    126   }
    127 
    128   // The only remaining calls should be non-dereferencing calls (eg: member
    129   // calls), so a simple ".get()" appending should suffice.
    130   if (NeedsParens(expr)) {
    131     text.insert(0, "(");
    132     text.append(")");
    133   }
    134   text.append(".get()");
    135   return Replacement(*result.SourceManager, range, text);
    136 }
    137 
    138 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
    139                                         clang::SourceLocation begin,
    140                                         clang::SourceLocation end) {
    141   clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
    142       result.SourceManager->getSpellingLoc(begin),
    143       result.SourceManager->getSpellingLoc(end));
    144   assert(range.isValid() && "Invalid range!");
    145 
    146   std::string text = clang::Lexer::getSourceText(
    147       range, *result.SourceManager, result.Context->getLangOpts());
    148   text.erase(text.rfind('*'));
    149 
    150   std::string replacement_text("scoped_refptr<");
    151   replacement_text += text;
    152   replacement_text += ">";
    153 
    154   return Replacement(*result.SourceManager, range, replacement_text);
    155 }
    156 
    157 class GetRewriterCallback : public MatchFinder::MatchCallback {
    158  public:
    159   explicit GetRewriterCallback(Replacements* replacements)
    160       : replacements_(replacements) {}
    161   virtual void run(const MatchFinder::MatchResult& result) override;
    162 
    163  private:
    164   Replacements* const replacements_;
    165 };
    166 
    167 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
    168   const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
    169   assert(arg && "Unexpected match! No Expr captured!");
    170   auto err =
    171       replacements_->add(RewriteImplicitToExplicitConversion(result, arg));
    172   assert(!err);
    173 }
    174 
    175 class VarRewriterCallback : public MatchFinder::MatchCallback {
    176  public:
    177   explicit VarRewriterCallback(Replacements* replacements)
    178       : replacements_(replacements) {}
    179   virtual void run(const MatchFinder::MatchResult& result) override;
    180 
    181  private:
    182   Replacements* const replacements_;
    183 };
    184 
    185 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
    186   const clang::DeclaratorDecl* const var_decl =
    187       result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
    188   assert(var_decl && "Unexpected match! No VarDecl captured!");
    189 
    190   const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();
    191 
    192   // TODO(dcheng): This mishandles a case where a variable has multiple
    193   // declarations, e.g.:
    194   //
    195   // in .h:
    196   // Foo* my_global_magical_foo;
    197   //
    198   // in .cc:
    199   // Foo* my_global_magical_foo = CreateFoo();
    200   //
    201   // In this case, it will only rewrite the .cc definition. Oh well. This should
    202   // be rare enough that these cases can be manually handled, since the style
    203   // guide prohibits globals of non-POD type.
    204   auto err = replacements_->add(RewriteRawPtrToScopedRefptr(
    205       result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
    206   assert(!err);
    207 }
    208 
    209 class FunctionRewriterCallback : public MatchFinder::MatchCallback {
    210  public:
    211   explicit FunctionRewriterCallback(Replacements* replacements)
    212       : replacements_(replacements) {}
    213   virtual void run(const MatchFinder::MatchResult& result) override;
    214 
    215  private:
    216   Replacements* const replacements_;
    217 };
    218 
    219 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
    220   const clang::FunctionDecl* const function_decl =
    221       result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
    222   assert(function_decl && "Unexpected match! No FunctionDecl captured!");
    223 
    224   // If matched against an implicit conversion to a DeclRefExpr, make sure the
    225   // referenced declaration is of class type, e.g. the tool skips trying to
    226   // chase pointers/references to determine if the pointee is a scoped_refptr<T>
    227   // with local storage. Instead, let a human manually handle those cases.
    228   const clang::VarDecl* const var_decl =
    229       result.Nodes.getNodeAs<clang::VarDecl>("var");
    230   if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
    231     return;
    232   }
    233 
    234   for (clang::FunctionDecl* f : function_decl->redecls()) {
    235     clang::SourceRange range = f->getReturnTypeSourceRange();
    236     auto err = replacements_->add(
    237         RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
    238     assert(!err);
    239   }
    240 }
    241 
    242 class MacroRewriterCallback : public MatchFinder::MatchCallback {
    243  public:
    244   explicit MacroRewriterCallback(Replacements* replacements)
    245       : replacements_(replacements) {}
    246   virtual void run(const MatchFinder::MatchResult& result) override;
    247 
    248  private:
    249   Replacements* const replacements_;
    250 };
    251 
    252 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
    253   const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
    254   assert(expr && "Unexpected match! No Expr captured!");
    255   auto err =
    256       replacements_->add(RewriteImplicitToExplicitConversion(result, expr));
    257   assert(!err);
    258 }
    259 
    260 }  // namespace
    261 
    262 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);
    263 
    264 int main(int argc, const char* argv[]) {
    265   // TODO(dcheng): Clang tooling should do this itself.
    266   // http://llvm.org/bugs/show_bug.cgi?id=21627
    267   llvm::InitializeNativeTarget();
    268   llvm::InitializeNativeTargetAsmParser();
    269   llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
    270   CommonOptionsParser options(argc, argv, category);
    271   clang::tooling::ClangTool tool(options.getCompilations(),
    272                                  options.getSourcePathList());
    273 
    274   MatchFinder match_finder;
    275   Replacements replacements;
    276 
    277   auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"),
    278                                         isTemplateInstantiation());
    279 
    280   // Finds all calls to conversion operator member function. This catches calls
    281   // to "operator T*", "operator Testable", and "operator bool" equally.
    282   auto base_matcher =
    283       cxxMemberCallExpr(thisPointerType(is_scoped_refptr),
    284                         callee(conversionDecl()), on(id("arg", expr())));
    285 
    286   // The heuristic for whether or not converting a temporary is 'unsafe'. An
    287   // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
    288   // another type. The matcher provides an exception for a temporary
    289   // scoped_refptr that is the result of an operator call. In this case, assume
    290   // that it's the result of an iterator dereference, and the container itself
    291   // retains the necessary reference, since this is a common idiom to see in
    292   // loop bodies.
    293   auto is_unsafe_temporary_conversion =
    294       on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr()))));
    295 
    296   // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
    297   // true:
    298   // - The scoped_refptr<T> is a temporary.
    299   // - The scoped_refptr<T> has local lifetime.
    300   auto returned_as_raw_ptr = hasParent(
    301       returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
    302   // This matcher intentionally matches more than it should. For example, this
    303   // will match:
    304   //   scoped_refptr<Foo>& foo = some_other_foo;
    305   //   return foo;
    306   // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
    307   // so those cases can be manually handled.
    308   auto is_local_variable =
    309       on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
    310   auto is_unsafe_return =
    311       anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
    312                   is_local_variable),
    313             allOf(hasParent(implicitCastExpr(
    314                       hasParent(exprWithCleanups(returned_as_raw_ptr)))),
    315                   is_unsafe_temporary_conversion));
    316 
    317   // This catches both user-defined conversions (eg: "operator bool") and
    318   // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
    319   // pointer to a bool.
    320   auto implicit_to_bool =
    321       implicitCastExpr(hasImplicitDestinationType(isBoolean()));
    322 
    323   // Avoid converting calls to of "operator Testable" -> "bool" and calls of
    324   // "operator T*" -> "bool".
    325   auto bool_conversion_matcher = hasParent(
    326       expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));
    327 
    328   auto is_logging_helper =
    329       functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
    330   auto is_gtest_helper = functionDecl(
    331       anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom(
    332                               hasName("::testing::internal::EqHelper")))),
    333                           hasName("Compare")),
    334             hasName("::testing::internal::CmpHelperNE")));
    335   auto is_gtest_assertion_result_ctor =
    336       cxxConstructorDecl(ofClass(cxxRecordDecl(
    337           isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));
    338 
    339   // Find all calls to an operator overload that are 'safe'.
    340   //
    341   // All bool conversions will be handled with the Testable trick, but that
    342   // can only be used once "operator T*" is removed, since otherwise it leaves
    343   // the call ambiguous.
    344   GetRewriterCallback get_callback(&replacements);
    345   match_finder.addMatcher(
    346       cxxMemberCallExpr(
    347           base_matcher,
    348           // Excluded since the conversion may be unsafe.
    349           unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
    350           // Excluded since the conversion occurs inside a helper function that
    351           // the macro wraps. Letting this callback handle the rewrite would
    352           // result in an incorrect replacement that changes the helper function
    353           // itself. Instead, the right replacement is to rewrite the macro's
    354           // arguments.
    355           unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper,
    356                                         is_gtest_assertion_result_ctor))))),
    357       &get_callback);
    358 
    359   // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
    360   VarRewriterCallback var_callback(&replacements);
    361   auto initialized_with_temporary = has(ignoringImpCasts(
    362       cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion)));
    363   match_finder.addMatcher(
    364       id("var", varDecl(hasInitializer(initialized_with_temporary),
    365                         hasType(pointerType()))),
    366       &var_callback);
    367   match_finder.addMatcher(
    368       cxxConstructorDecl(forEachConstructorInitializer(
    369           allOf(withInitializer(initialized_with_temporary),
    370                 forField(id("var", fieldDecl(hasType(pointerType()))))))),
    371       &var_callback);
    372 
    373   // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
    374   // returning a value.
    375   FunctionRewriterCallback fn_callback(&replacements);
    376   match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return),
    377                           &fn_callback);
    378 
    379   // Rewrite logging / gtest expressions that result in an implicit conversion.
    380   // Luckily, the matchers don't need to handle the case where one of the macro
    381   // arguments is NULL, such as:
    382   // CHECK_EQ(my_scoped_refptr, NULL)
    383   // because it simply doesn't compile--since NULL is actually of integral type,
    384   // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
    385   // no comparison overload for scoped_refptr<T> and int, this fails to compile.
    386   MacroRewriterCallback macro_callback(&replacements);
    387   // CHECK_EQ/CHECK_NE helpers.
    388   match_finder.addMatcher(
    389       callExpr(callee(is_logging_helper), argumentCountIs(3),
    390                hasAnyArgument(ignoringParenImpCasts(
    391                    id("expr", expr(hasType(is_scoped_refptr))))),
    392                hasAnyArgument(ignoringParenImpCasts(hasType(pointerType()))),
    393                hasArgument(2, stringLiteral())),
    394       &macro_callback);
    395   // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
    396   // helper functions. Even though gtest has special handling for pointer to
    397   // NULL comparisons, it doesn't trigger in this case, so no special handling
    398   // is needed for the replacements.
    399   match_finder.addMatcher(
    400       callExpr(callee(is_gtest_helper),
    401                argumentCountIs(4),
    402                hasArgument(0, stringLiteral()),
    403                hasArgument(1, stringLiteral()),
    404                hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
    405                hasAnyArgument(hasType(pointerType()))),
    406       &macro_callback);
    407   // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
    408   // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
    409   // being passed as an argument to AssertionResult's constructor. As a result,
    410   // GetRewriterCallback handles this case properly since the conversion isn't
    411   // hidden inside AssertionResult, and the generated replacement properly
    412   // rewrites the macro argument.
    413   // However, the tool does need to handle the _TRUE counterparts, since the
    414   // conversion occurs inside the constructor in those cases.
    415   match_finder.addMatcher(
    416       cxxConstructExpr(
    417           argumentCountIs(2),
    418           hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
    419           hasDeclaration(is_gtest_assertion_result_ctor)),
    420       &macro_callback);
    421 
    422   std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
    423       clang::tooling::newFrontendActionFactory(&match_finder);
    424   int result = tool.run(factory.get());
    425   if (result != 0)
    426     return result;
    427 
    428   // Serialization format is documented in tools/clang/scripts/run_tool.py
    429   llvm::outs() << "==== BEGIN EDITS ====\n";
    430   for (const auto& r : replacements) {
    431     std::string replacement_text = r.getReplacementText().str();
    432     std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
    433     llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::"
    434                  << r.getLength() << ":::" << replacement_text << "\n";
    435   }
    436   llvm::outs() << "==== END EDITS ====\n";
    437 
    438   return 0;
    439 }
    440