Home | History | Annotate | Download | only in CodeGen
      1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Instrumentation-based profile-guided optimization
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CodeGenPGO.h"
     15 #include "CodeGenFunction.h"
     16 #include "CoverageMappingGen.h"
     17 #include "clang/AST/RecursiveASTVisitor.h"
     18 #include "clang/AST/StmtVisitor.h"
     19 #include "llvm/IR/Intrinsics.h"
     20 #include "llvm/IR/MDBuilder.h"
     21 #include "llvm/ProfileData/InstrProfReader.h"
     22 #include "llvm/Support/Endian.h"
     23 #include "llvm/Support/FileSystem.h"
     24 #include "llvm/Support/MD5.h"
     25 
     26 using namespace clang;
     27 using namespace CodeGen;
     28 
     29 void CodeGenPGO::setFuncName(StringRef Name,
     30                              llvm::GlobalValue::LinkageTypes Linkage) {
     31   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
     32   FuncName = llvm::getPGOFuncName(
     33       Name, Linkage, CGM.getCodeGenOpts().MainFileName,
     34       PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
     35 
     36   // If we're generating a profile, create a variable for the name.
     37   if (CGM.getCodeGenOpts().ProfileInstrGenerate)
     38     FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
     39 }
     40 
     41 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
     42   setFuncName(Fn->getName(), Fn->getLinkage());
     43 }
     44 
     45 namespace {
     46 /// \brief Stable hasher for PGO region counters.
     47 ///
     48 /// PGOHash produces a stable hash of a given function's control flow.
     49 ///
     50 /// Changing the output of this hash will invalidate all previously generated
     51 /// profiles -- i.e., don't do it.
     52 ///
     53 /// \note  When this hash does eventually change (years?), we still need to
     54 /// support old hashes.  We'll need to pull in the version number from the
     55 /// profile data format and use the matching hash function.
     56 class PGOHash {
     57   uint64_t Working;
     58   unsigned Count;
     59   llvm::MD5 MD5;
     60 
     61   static const int NumBitsPerType = 6;
     62   static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
     63   static const unsigned TooBig = 1u << NumBitsPerType;
     64 
     65 public:
     66   /// \brief Hash values for AST nodes.
     67   ///
     68   /// Distinct values for AST nodes that have region counters attached.
     69   ///
     70   /// These values must be stable.  All new members must be added at the end,
     71   /// and no members should be removed.  Changing the enumeration value for an
     72   /// AST node will affect the hash of every function that contains that node.
     73   enum HashType : unsigned char {
     74     None = 0,
     75     LabelStmt = 1,
     76     WhileStmt,
     77     DoStmt,
     78     ForStmt,
     79     CXXForRangeStmt,
     80     ObjCForCollectionStmt,
     81     SwitchStmt,
     82     CaseStmt,
     83     DefaultStmt,
     84     IfStmt,
     85     CXXTryStmt,
     86     CXXCatchStmt,
     87     ConditionalOperator,
     88     BinaryOperatorLAnd,
     89     BinaryOperatorLOr,
     90     BinaryConditionalOperator,
     91 
     92     // Keep this last.  It's for the static assert that follows.
     93     LastHashType
     94   };
     95   static_assert(LastHashType <= TooBig, "Too many types in HashType");
     96 
     97   // TODO: When this format changes, take in a version number here, and use the
     98   // old hash calculation for file formats that used the old hash.
     99   PGOHash() : Working(0), Count(0) {}
    100   void combine(HashType Type);
    101   uint64_t finalize();
    102 };
    103 const int PGOHash::NumBitsPerType;
    104 const unsigned PGOHash::NumTypesPerWord;
    105 const unsigned PGOHash::TooBig;
    106 
    107 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
    108 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
    109   /// The next counter value to assign.
    110   unsigned NextCounter;
    111   /// The function hash.
    112   PGOHash Hash;
    113   /// The map of statements to counters.
    114   llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
    115 
    116   MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
    117       : NextCounter(0), CounterMap(CounterMap) {}
    118 
    119   // Blocks and lambdas are handled as separate functions, so we need not
    120   // traverse them in the parent context.
    121   bool TraverseBlockExpr(BlockExpr *BE) { return true; }
    122   bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
    123   bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
    124 
    125   bool VisitDecl(const Decl *D) {
    126     switch (D->getKind()) {
    127     default:
    128       break;
    129     case Decl::Function:
    130     case Decl::CXXMethod:
    131     case Decl::CXXConstructor:
    132     case Decl::CXXDestructor:
    133     case Decl::CXXConversion:
    134     case Decl::ObjCMethod:
    135     case Decl::Block:
    136     case Decl::Captured:
    137       CounterMap[D->getBody()] = NextCounter++;
    138       break;
    139     }
    140     return true;
    141   }
    142 
    143   bool VisitStmt(const Stmt *S) {
    144     auto Type = getHashType(S);
    145     if (Type == PGOHash::None)
    146       return true;
    147 
    148     CounterMap[S] = NextCounter++;
    149     Hash.combine(Type);
    150     return true;
    151   }
    152   PGOHash::HashType getHashType(const Stmt *S) {
    153     switch (S->getStmtClass()) {
    154     default:
    155       break;
    156     case Stmt::LabelStmtClass:
    157       return PGOHash::LabelStmt;
    158     case Stmt::WhileStmtClass:
    159       return PGOHash::WhileStmt;
    160     case Stmt::DoStmtClass:
    161       return PGOHash::DoStmt;
    162     case Stmt::ForStmtClass:
    163       return PGOHash::ForStmt;
    164     case Stmt::CXXForRangeStmtClass:
    165       return PGOHash::CXXForRangeStmt;
    166     case Stmt::ObjCForCollectionStmtClass:
    167       return PGOHash::ObjCForCollectionStmt;
    168     case Stmt::SwitchStmtClass:
    169       return PGOHash::SwitchStmt;
    170     case Stmt::CaseStmtClass:
    171       return PGOHash::CaseStmt;
    172     case Stmt::DefaultStmtClass:
    173       return PGOHash::DefaultStmt;
    174     case Stmt::IfStmtClass:
    175       return PGOHash::IfStmt;
    176     case Stmt::CXXTryStmtClass:
    177       return PGOHash::CXXTryStmt;
    178     case Stmt::CXXCatchStmtClass:
    179       return PGOHash::CXXCatchStmt;
    180     case Stmt::ConditionalOperatorClass:
    181       return PGOHash::ConditionalOperator;
    182     case Stmt::BinaryConditionalOperatorClass:
    183       return PGOHash::BinaryConditionalOperator;
    184     case Stmt::BinaryOperatorClass: {
    185       const BinaryOperator *BO = cast<BinaryOperator>(S);
    186       if (BO->getOpcode() == BO_LAnd)
    187         return PGOHash::BinaryOperatorLAnd;
    188       if (BO->getOpcode() == BO_LOr)
    189         return PGOHash::BinaryOperatorLOr;
    190       break;
    191     }
    192     }
    193     return PGOHash::None;
    194   }
    195 };
    196 
    197 /// A StmtVisitor that propagates the raw counts through the AST and
    198 /// records the count at statements where the value may change.
    199 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
    200   /// PGO state.
    201   CodeGenPGO &PGO;
    202 
    203   /// A flag that is set when the current count should be recorded on the
    204   /// next statement, such as at the exit of a loop.
    205   bool RecordNextStmtCount;
    206 
    207   /// The count at the current location in the traversal.
    208   uint64_t CurrentCount;
    209 
    210   /// The map of statements to count values.
    211   llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
    212 
    213   /// BreakContinueStack - Keep counts of breaks and continues inside loops.
    214   struct BreakContinue {
    215     uint64_t BreakCount;
    216     uint64_t ContinueCount;
    217     BreakContinue() : BreakCount(0), ContinueCount(0) {}
    218   };
    219   SmallVector<BreakContinue, 8> BreakContinueStack;
    220 
    221   ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
    222                       CodeGenPGO &PGO)
    223       : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
    224 
    225   void RecordStmtCount(const Stmt *S) {
    226     if (RecordNextStmtCount) {
    227       CountMap[S] = CurrentCount;
    228       RecordNextStmtCount = false;
    229     }
    230   }
    231 
    232   /// Set and return the current count.
    233   uint64_t setCount(uint64_t Count) {
    234     CurrentCount = Count;
    235     return Count;
    236   }
    237 
    238   void VisitStmt(const Stmt *S) {
    239     RecordStmtCount(S);
    240     for (const Stmt *Child : S->children())
    241       if (Child)
    242         this->Visit(Child);
    243   }
    244 
    245   void VisitFunctionDecl(const FunctionDecl *D) {
    246     // Counter tracks entry to the function body.
    247     uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    248     CountMap[D->getBody()] = BodyCount;
    249     Visit(D->getBody());
    250   }
    251 
    252   // Skip lambda expressions. We visit these as FunctionDecls when we're
    253   // generating them and aren't interested in the body when generating a
    254   // parent context.
    255   void VisitLambdaExpr(const LambdaExpr *LE) {}
    256 
    257   void VisitCapturedDecl(const CapturedDecl *D) {
    258     // Counter tracks entry to the capture body.
    259     uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    260     CountMap[D->getBody()] = BodyCount;
    261     Visit(D->getBody());
    262   }
    263 
    264   void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
    265     // Counter tracks entry to the method body.
    266     uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    267     CountMap[D->getBody()] = BodyCount;
    268     Visit(D->getBody());
    269   }
    270 
    271   void VisitBlockDecl(const BlockDecl *D) {
    272     // Counter tracks entry to the block body.
    273     uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
    274     CountMap[D->getBody()] = BodyCount;
    275     Visit(D->getBody());
    276   }
    277 
    278   void VisitReturnStmt(const ReturnStmt *S) {
    279     RecordStmtCount(S);
    280     if (S->getRetValue())
    281       Visit(S->getRetValue());
    282     CurrentCount = 0;
    283     RecordNextStmtCount = true;
    284   }
    285 
    286   void VisitCXXThrowExpr(const CXXThrowExpr *E) {
    287     RecordStmtCount(E);
    288     if (E->getSubExpr())
    289       Visit(E->getSubExpr());
    290     CurrentCount = 0;
    291     RecordNextStmtCount = true;
    292   }
    293 
    294   void VisitGotoStmt(const GotoStmt *S) {
    295     RecordStmtCount(S);
    296     CurrentCount = 0;
    297     RecordNextStmtCount = true;
    298   }
    299 
    300   void VisitLabelStmt(const LabelStmt *S) {
    301     RecordNextStmtCount = false;
    302     // Counter tracks the block following the label.
    303     uint64_t BlockCount = setCount(PGO.getRegionCount(S));
    304     CountMap[S] = BlockCount;
    305     Visit(S->getSubStmt());
    306   }
    307 
    308   void VisitBreakStmt(const BreakStmt *S) {
    309     RecordStmtCount(S);
    310     assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
    311     BreakContinueStack.back().BreakCount += CurrentCount;
    312     CurrentCount = 0;
    313     RecordNextStmtCount = true;
    314   }
    315 
    316   void VisitContinueStmt(const ContinueStmt *S) {
    317     RecordStmtCount(S);
    318     assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
    319     BreakContinueStack.back().ContinueCount += CurrentCount;
    320     CurrentCount = 0;
    321     RecordNextStmtCount = true;
    322   }
    323 
    324   void VisitWhileStmt(const WhileStmt *S) {
    325     RecordStmtCount(S);
    326     uint64_t ParentCount = CurrentCount;
    327 
    328     BreakContinueStack.push_back(BreakContinue());
    329     // Visit the body region first so the break/continue adjustments can be
    330     // included when visiting the condition.
    331     uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    332     CountMap[S->getBody()] = CurrentCount;
    333     Visit(S->getBody());
    334     uint64_t BackedgeCount = CurrentCount;
    335 
    336     // ...then go back and propagate counts through the condition. The count
    337     // at the start of the condition is the sum of the incoming edges,
    338     // the backedge from the end of the loop body, and the edges from
    339     // continue statements.
    340     BreakContinue BC = BreakContinueStack.pop_back_val();
    341     uint64_t CondCount =
    342         setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    343     CountMap[S->getCond()] = CondCount;
    344     Visit(S->getCond());
    345     setCount(BC.BreakCount + CondCount - BodyCount);
    346     RecordNextStmtCount = true;
    347   }
    348 
    349   void VisitDoStmt(const DoStmt *S) {
    350     RecordStmtCount(S);
    351     uint64_t LoopCount = PGO.getRegionCount(S);
    352 
    353     BreakContinueStack.push_back(BreakContinue());
    354     // The count doesn't include the fallthrough from the parent scope. Add it.
    355     uint64_t BodyCount = setCount(LoopCount + CurrentCount);
    356     CountMap[S->getBody()] = BodyCount;
    357     Visit(S->getBody());
    358     uint64_t BackedgeCount = CurrentCount;
    359 
    360     BreakContinue BC = BreakContinueStack.pop_back_val();
    361     // The count at the start of the condition is equal to the count at the
    362     // end of the body, plus any continues.
    363     uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
    364     CountMap[S->getCond()] = CondCount;
    365     Visit(S->getCond());
    366     setCount(BC.BreakCount + CondCount - LoopCount);
    367     RecordNextStmtCount = true;
    368   }
    369 
    370   void VisitForStmt(const ForStmt *S) {
    371     RecordStmtCount(S);
    372     if (S->getInit())
    373       Visit(S->getInit());
    374 
    375     uint64_t ParentCount = CurrentCount;
    376 
    377     BreakContinueStack.push_back(BreakContinue());
    378     // Visit the body region first. (This is basically the same as a while
    379     // loop; see further comments in VisitWhileStmt.)
    380     uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    381     CountMap[S->getBody()] = BodyCount;
    382     Visit(S->getBody());
    383     uint64_t BackedgeCount = CurrentCount;
    384     BreakContinue BC = BreakContinueStack.pop_back_val();
    385 
    386     // The increment is essentially part of the body but it needs to include
    387     // the count for all the continue statements.
    388     if (S->getInc()) {
    389       uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
    390       CountMap[S->getInc()] = IncCount;
    391       Visit(S->getInc());
    392     }
    393 
    394     // ...then go back and propagate counts through the condition.
    395     uint64_t CondCount =
    396         setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    397     if (S->getCond()) {
    398       CountMap[S->getCond()] = CondCount;
    399       Visit(S->getCond());
    400     }
    401     setCount(BC.BreakCount + CondCount - BodyCount);
    402     RecordNextStmtCount = true;
    403   }
    404 
    405   void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
    406     RecordStmtCount(S);
    407     Visit(S->getLoopVarStmt());
    408     Visit(S->getRangeStmt());
    409     Visit(S->getBeginEndStmt());
    410 
    411     uint64_t ParentCount = CurrentCount;
    412     BreakContinueStack.push_back(BreakContinue());
    413     // Visit the body region first. (This is basically the same as a while
    414     // loop; see further comments in VisitWhileStmt.)
    415     uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    416     CountMap[S->getBody()] = BodyCount;
    417     Visit(S->getBody());
    418     uint64_t BackedgeCount = CurrentCount;
    419     BreakContinue BC = BreakContinueStack.pop_back_val();
    420 
    421     // The increment is essentially part of the body but it needs to include
    422     // the count for all the continue statements.
    423     uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
    424     CountMap[S->getInc()] = IncCount;
    425     Visit(S->getInc());
    426 
    427     // ...then go back and propagate counts through the condition.
    428     uint64_t CondCount =
    429         setCount(ParentCount + BackedgeCount + BC.ContinueCount);
    430     CountMap[S->getCond()] = CondCount;
    431     Visit(S->getCond());
    432     setCount(BC.BreakCount + CondCount - BodyCount);
    433     RecordNextStmtCount = true;
    434   }
    435 
    436   void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
    437     RecordStmtCount(S);
    438     Visit(S->getElement());
    439     uint64_t ParentCount = CurrentCount;
    440     BreakContinueStack.push_back(BreakContinue());
    441     // Counter tracks the body of the loop.
    442     uint64_t BodyCount = setCount(PGO.getRegionCount(S));
    443     CountMap[S->getBody()] = BodyCount;
    444     Visit(S->getBody());
    445     uint64_t BackedgeCount = CurrentCount;
    446     BreakContinue BC = BreakContinueStack.pop_back_val();
    447 
    448     setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
    449              BodyCount);
    450     RecordNextStmtCount = true;
    451   }
    452 
    453   void VisitSwitchStmt(const SwitchStmt *S) {
    454     RecordStmtCount(S);
    455     Visit(S->getCond());
    456     CurrentCount = 0;
    457     BreakContinueStack.push_back(BreakContinue());
    458     Visit(S->getBody());
    459     // If the switch is inside a loop, add the continue counts.
    460     BreakContinue BC = BreakContinueStack.pop_back_val();
    461     if (!BreakContinueStack.empty())
    462       BreakContinueStack.back().ContinueCount += BC.ContinueCount;
    463     // Counter tracks the exit block of the switch.
    464     setCount(PGO.getRegionCount(S));
    465     RecordNextStmtCount = true;
    466   }
    467 
    468   void VisitSwitchCase(const SwitchCase *S) {
    469     RecordNextStmtCount = false;
    470     // Counter for this particular case. This counts only jumps from the
    471     // switch header and does not include fallthrough from the case before
    472     // this one.
    473     uint64_t CaseCount = PGO.getRegionCount(S);
    474     setCount(CurrentCount + CaseCount);
    475     // We need the count without fallthrough in the mapping, so it's more useful
    476     // for branch probabilities.
    477     CountMap[S] = CaseCount;
    478     RecordNextStmtCount = true;
    479     Visit(S->getSubStmt());
    480   }
    481 
    482   void VisitIfStmt(const IfStmt *S) {
    483     RecordStmtCount(S);
    484     uint64_t ParentCount = CurrentCount;
    485     Visit(S->getCond());
    486 
    487     // Counter tracks the "then" part of an if statement. The count for
    488     // the "else" part, if it exists, will be calculated from this counter.
    489     uint64_t ThenCount = setCount(PGO.getRegionCount(S));
    490     CountMap[S->getThen()] = ThenCount;
    491     Visit(S->getThen());
    492     uint64_t OutCount = CurrentCount;
    493 
    494     uint64_t ElseCount = ParentCount - ThenCount;
    495     if (S->getElse()) {
    496       setCount(ElseCount);
    497       CountMap[S->getElse()] = ElseCount;
    498       Visit(S->getElse());
    499       OutCount += CurrentCount;
    500     } else
    501       OutCount += ElseCount;
    502     setCount(OutCount);
    503     RecordNextStmtCount = true;
    504   }
    505 
    506   void VisitCXXTryStmt(const CXXTryStmt *S) {
    507     RecordStmtCount(S);
    508     Visit(S->getTryBlock());
    509     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
    510       Visit(S->getHandler(I));
    511     // Counter tracks the continuation block of the try statement.
    512     setCount(PGO.getRegionCount(S));
    513     RecordNextStmtCount = true;
    514   }
    515 
    516   void VisitCXXCatchStmt(const CXXCatchStmt *S) {
    517     RecordNextStmtCount = false;
    518     // Counter tracks the catch statement's handler block.
    519     uint64_t CatchCount = setCount(PGO.getRegionCount(S));
    520     CountMap[S] = CatchCount;
    521     Visit(S->getHandlerBlock());
    522   }
    523 
    524   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
    525     RecordStmtCount(E);
    526     uint64_t ParentCount = CurrentCount;
    527     Visit(E->getCond());
    528 
    529     // Counter tracks the "true" part of a conditional operator. The
    530     // count in the "false" part will be calculated from this counter.
    531     uint64_t TrueCount = setCount(PGO.getRegionCount(E));
    532     CountMap[E->getTrueExpr()] = TrueCount;
    533     Visit(E->getTrueExpr());
    534     uint64_t OutCount = CurrentCount;
    535 
    536     uint64_t FalseCount = setCount(ParentCount - TrueCount);
    537     CountMap[E->getFalseExpr()] = FalseCount;
    538     Visit(E->getFalseExpr());
    539     OutCount += CurrentCount;
    540 
    541     setCount(OutCount);
    542     RecordNextStmtCount = true;
    543   }
    544 
    545   void VisitBinLAnd(const BinaryOperator *E) {
    546     RecordStmtCount(E);
    547     uint64_t ParentCount = CurrentCount;
    548     Visit(E->getLHS());
    549     // Counter tracks the right hand side of a logical and operator.
    550     uint64_t RHSCount = setCount(PGO.getRegionCount(E));
    551     CountMap[E->getRHS()] = RHSCount;
    552     Visit(E->getRHS());
    553     setCount(ParentCount + RHSCount - CurrentCount);
    554     RecordNextStmtCount = true;
    555   }
    556 
    557   void VisitBinLOr(const BinaryOperator *E) {
    558     RecordStmtCount(E);
    559     uint64_t ParentCount = CurrentCount;
    560     Visit(E->getLHS());
    561     // Counter tracks the right hand side of a logical or operator.
    562     uint64_t RHSCount = setCount(PGO.getRegionCount(E));
    563     CountMap[E->getRHS()] = RHSCount;
    564     Visit(E->getRHS());
    565     setCount(ParentCount + RHSCount - CurrentCount);
    566     RecordNextStmtCount = true;
    567   }
    568 };
    569 } // end anonymous namespace
    570 
    571 void PGOHash::combine(HashType Type) {
    572   // Check that we never combine 0 and only have six bits.
    573   assert(Type && "Hash is invalid: unexpected type 0");
    574   assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
    575 
    576   // Pass through MD5 if enough work has built up.
    577   if (Count && Count % NumTypesPerWord == 0) {
    578     using namespace llvm::support;
    579     uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
    580     MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
    581     Working = 0;
    582   }
    583 
    584   // Accumulate the current type.
    585   ++Count;
    586   Working = Working << NumBitsPerType | Type;
    587 }
    588 
    589 uint64_t PGOHash::finalize() {
    590   // Use Working as the hash directly if we never used MD5.
    591   if (Count <= NumTypesPerWord)
    592     // No need to byte swap here, since none of the math was endian-dependent.
    593     // This number will be byte-swapped as required on endianness transitions,
    594     // so we will see the same value on the other side.
    595     return Working;
    596 
    597   // Check for remaining work in Working.
    598   if (Working)
    599     MD5.update(Working);
    600 
    601   // Finalize the MD5 and return the hash.
    602   llvm::MD5::MD5Result Result;
    603   MD5.final(Result);
    604   using namespace llvm::support;
    605   return endian::read<uint64_t, little, unaligned>(Result);
    606 }
    607 
    608 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
    609   const Decl *D = GD.getDecl();
    610   bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
    611   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
    612   if (!InstrumentRegions && !PGOReader)
    613     return;
    614   if (D->isImplicit())
    615     return;
    616   // Constructors and destructors may be represented by several functions in IR.
    617   // If so, instrument only base variant, others are implemented by delegation
    618   // to the base one, it would be counted twice otherwise.
    619   if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
    620       ((isa<CXXConstructorDecl>(GD.getDecl()) &&
    621         GD.getCtorType() != Ctor_Base) ||
    622        (isa<CXXDestructorDecl>(GD.getDecl()) &&
    623         GD.getDtorType() != Dtor_Base))) {
    624       return;
    625   }
    626   CGM.ClearUnusedCoverageMapping(D);
    627   setFuncName(Fn);
    628 
    629   mapRegionCounters(D);
    630   if (CGM.getCodeGenOpts().CoverageMapping)
    631     emitCounterRegionMapping(D);
    632   if (PGOReader) {
    633     SourceManager &SM = CGM.getContext().getSourceManager();
    634     loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
    635     computeRegionCounts(D);
    636     applyFunctionAttributes(PGOReader, Fn);
    637   }
    638 }
    639 
    640 void CodeGenPGO::mapRegionCounters(const Decl *D) {
    641   RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
    642   MapRegionCounters Walker(*RegionCounterMap);
    643   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    644     Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
    645   else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    646     Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
    647   else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    648     Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
    649   else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    650     Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
    651   assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
    652   NumRegionCounters = Walker.NextCounter;
    653   FunctionHash = Walker.Hash.finalize();
    654 }
    655 
    656 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
    657   if (SkipCoverageMapping)
    658     return;
    659   // Don't map the functions inside the system headers
    660   auto Loc = D->getBody()->getLocStart();
    661   if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    662     return;
    663 
    664   std::string CoverageMapping;
    665   llvm::raw_string_ostream OS(CoverageMapping);
    666   CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
    667                                 CGM.getContext().getSourceManager(),
    668                                 CGM.getLangOpts(), RegionCounterMap.get());
    669   MappingGen.emitCounterMapping(D, OS);
    670   OS.flush();
    671 
    672   if (CoverageMapping.empty())
    673     return;
    674 
    675   CGM.getCoverageMapping()->addFunctionMappingRecord(
    676       FuncNameVar, FuncName, FunctionHash, CoverageMapping);
    677 }
    678 
    679 void
    680 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
    681                                     llvm::GlobalValue::LinkageTypes Linkage) {
    682   if (SkipCoverageMapping)
    683     return;
    684   // Don't map the functions inside the system headers
    685   auto Loc = D->getBody()->getLocStart();
    686   if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
    687     return;
    688 
    689   std::string CoverageMapping;
    690   llvm::raw_string_ostream OS(CoverageMapping);
    691   CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
    692                                 CGM.getContext().getSourceManager(),
    693                                 CGM.getLangOpts());
    694   MappingGen.emitEmptyMapping(D, OS);
    695   OS.flush();
    696 
    697   if (CoverageMapping.empty())
    698     return;
    699 
    700   setFuncName(Name, Linkage);
    701   CGM.getCoverageMapping()->addFunctionMappingRecord(
    702       FuncNameVar, FuncName, FunctionHash, CoverageMapping);
    703 }
    704 
    705 void CodeGenPGO::computeRegionCounts(const Decl *D) {
    706   StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
    707   ComputeRegionCounts Walker(*StmtCountMap, *this);
    708   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
    709     Walker.VisitFunctionDecl(FD);
    710   else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
    711     Walker.VisitObjCMethodDecl(MD);
    712   else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
    713     Walker.VisitBlockDecl(BD);
    714   else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
    715     Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
    716 }
    717 
    718 void
    719 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
    720                                     llvm::Function *Fn) {
    721   if (!haveRegionCounts())
    722     return;
    723 
    724   uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
    725   uint64_t FunctionCount = getRegionCount(nullptr);
    726   if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
    727     // Turn on InlineHint attribute for hot functions.
    728     // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
    729     Fn->addFnAttr(llvm::Attribute::InlineHint);
    730   else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
    731     // Turn on Cold attribute for cold functions.
    732     // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
    733     Fn->addFnAttr(llvm::Attribute::Cold);
    734 
    735   Fn->setEntryCount(FunctionCount);
    736 }
    737 
    738 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
    739   if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
    740     return;
    741   if (!Builder.GetInsertBlock())
    742     return;
    743 
    744   unsigned Counter = (*RegionCounterMap)[S];
    745   auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
    746   Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
    747                      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
    748                       Builder.getInt64(FunctionHash),
    749                       Builder.getInt32(NumRegionCounters),
    750                       Builder.getInt32(Counter)});
    751 }
    752 
    753 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
    754                                   bool IsInMainFile) {
    755   CGM.getPGOStats().addVisited(IsInMainFile);
    756   RegionCounts.clear();
    757   if (std::error_code EC =
    758           PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
    759     if (EC == llvm::instrprof_error::unknown_function)
    760       CGM.getPGOStats().addMissing(IsInMainFile);
    761     else if (EC == llvm::instrprof_error::hash_mismatch)
    762       CGM.getPGOStats().addMismatched(IsInMainFile);
    763     else if (EC == llvm::instrprof_error::malformed)
    764       // TODO: Consider a more specific warning for this case.
    765       CGM.getPGOStats().addMismatched(IsInMainFile);
    766     RegionCounts.clear();
    767   }
    768 }
    769 
    770 /// \brief Calculate what to divide by to scale weights.
    771 ///
    772 /// Given the maximum weight, calculate a divisor that will scale all the
    773 /// weights to strictly less than UINT32_MAX.
    774 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
    775   return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
    776 }
    777 
    778 /// \brief Scale an individual branch weight (and add 1).
    779 ///
    780 /// Scale a 64-bit weight down to 32-bits using \c Scale.
    781 ///
    782 /// According to Laplace's Rule of Succession, it is better to compute the
    783 /// weight based on the count plus 1, so universally add 1 to the value.
    784 ///
    785 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
    786 /// greater than \c Weight.
    787 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
    788   assert(Scale && "scale by 0?");
    789   uint64_t Scaled = Weight / Scale + 1;
    790   assert(Scaled <= UINT32_MAX && "overflow 32-bits");
    791   return Scaled;
    792 }
    793 
    794 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
    795                                                     uint64_t FalseCount) {
    796   // Check for empty weights.
    797   if (!TrueCount && !FalseCount)
    798     return nullptr;
    799 
    800   // Calculate how to scale down to 32-bits.
    801   uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
    802 
    803   llvm::MDBuilder MDHelper(CGM.getLLVMContext());
    804   return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
    805                                       scaleBranchWeight(FalseCount, Scale));
    806 }
    807 
    808 llvm::MDNode *
    809 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
    810   // We need at least two elements to create meaningful weights.
    811   if (Weights.size() < 2)
    812     return nullptr;
    813 
    814   // Check for empty weights.
    815   uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
    816   if (MaxWeight == 0)
    817     return nullptr;
    818 
    819   // Calculate how to scale down to 32-bits.
    820   uint64_t Scale = calculateWeightScale(MaxWeight);
    821 
    822   SmallVector<uint32_t, 16> ScaledWeights;
    823   ScaledWeights.reserve(Weights.size());
    824   for (uint64_t W : Weights)
    825     ScaledWeights.push_back(scaleBranchWeight(W, Scale));
    826 
    827   llvm::MDBuilder MDHelper(CGM.getLLVMContext());
    828   return MDHelper.createBranchWeights(ScaledWeights);
    829 }
    830 
    831 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
    832                                                            uint64_t LoopCount) {
    833   if (!PGO.haveRegionCounts())
    834     return nullptr;
    835   Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
    836   assert(CondCount.hasValue() && "missing expected loop condition count");
    837   if (*CondCount == 0)
    838     return nullptr;
    839   return createProfileWeights(LoopCount,
    840                               std::max(*CondCount, LoopCount) - LoopCount);
    841 }
    842