Home | History | Annotate | Download | only in Analysis
      1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Loops should be simplified before this analysis.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
     15 #include "llvm/ADT/SCCIterator.h"
     16 #include "llvm/IR/Function.h"
     17 #include "llvm/Support/raw_ostream.h"
     18 #include <numeric>
     19 
     20 using namespace llvm;
     21 using namespace llvm::bfi_detail;
     22 
     23 #define DEBUG_TYPE "block-freq"
     24 
     25 ScaledNumber<uint64_t> BlockMass::toScaled() const {
     26   if (isFull())
     27     return ScaledNumber<uint64_t>(1, 0);
     28   return ScaledNumber<uint64_t>(getMass() + 1, -64);
     29 }
     30 
     31 LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
     32 
     33 static char getHexDigit(int N) {
     34   assert(N < 16);
     35   if (N < 10)
     36     return '0' + N;
     37   return 'a' + N - 10;
     38 }
     39 
     40 raw_ostream &BlockMass::print(raw_ostream &OS) const {
     41   for (int Digits = 0; Digits < 16; ++Digits)
     42     OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
     43   return OS;
     44 }
     45 
     46 namespace {
     47 
     48 typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
     49 typedef BlockFrequencyInfoImplBase::Distribution Distribution;
     50 typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
     51 typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
     52 typedef BlockFrequencyInfoImplBase::LoopData LoopData;
     53 typedef BlockFrequencyInfoImplBase::Weight Weight;
     54 typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
     55 
     56 /// \brief Dithering mass distributer.
     57 ///
     58 /// This class splits up a single mass into portions by weight, dithering to
     59 /// spread out error.  No mass is lost.  The dithering precision depends on the
     60 /// precision of the product of \a BlockMass and \a BranchProbability.
     61 ///
     62 /// The distribution algorithm follows.
     63 ///
     64 ///  1. Initialize by saving the sum of the weights in \a RemWeight and the
     65 ///     mass to distribute in \a RemMass.
     66 ///
     67 ///  2. For each portion:
     68 ///
     69 ///      1. Construct a branch probability, P, as the portion's weight divided
     70 ///         by the current value of \a RemWeight.
     71 ///      2. Calculate the portion's mass as \a RemMass times P.
     72 ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
     73 ///         the current portion's weight and mass.
     74 struct DitheringDistributer {
     75   uint32_t RemWeight;
     76   BlockMass RemMass;
     77 
     78   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
     79 
     80   BlockMass takeMass(uint32_t Weight);
     81 };
     82 
     83 } // end anonymous namespace
     84 
     85 DitheringDistributer::DitheringDistributer(Distribution &Dist,
     86                                            const BlockMass &Mass) {
     87   Dist.normalize();
     88   RemWeight = Dist.Total;
     89   RemMass = Mass;
     90 }
     91 
     92 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
     93   assert(Weight && "invalid weight");
     94   assert(Weight <= RemWeight);
     95   BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
     96 
     97   // Decrement totals (dither).
     98   RemWeight -= Weight;
     99   RemMass -= Mass;
    100   return Mass;
    101 }
    102 
    103 void Distribution::add(const BlockNode &Node, uint64_t Amount,
    104                        Weight::DistType Type) {
    105   assert(Amount && "invalid weight of 0");
    106   uint64_t NewTotal = Total + Amount;
    107 
    108   // Check for overflow.  It should be impossible to overflow twice.
    109   bool IsOverflow = NewTotal < Total;
    110   assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
    111   DidOverflow |= IsOverflow;
    112 
    113   // Update the total.
    114   Total = NewTotal;
    115 
    116   // Save the weight.
    117   Weights.push_back(Weight(Type, Node, Amount));
    118 }
    119 
    120 static void combineWeight(Weight &W, const Weight &OtherW) {
    121   assert(OtherW.TargetNode.isValid());
    122   if (!W.Amount) {
    123     W = OtherW;
    124     return;
    125   }
    126   assert(W.Type == OtherW.Type);
    127   assert(W.TargetNode == OtherW.TargetNode);
    128   assert(OtherW.Amount && "Expected non-zero weight");
    129   if (W.Amount > W.Amount + OtherW.Amount)
    130     // Saturate on overflow.
    131     W.Amount = UINT64_MAX;
    132   else
    133     W.Amount += OtherW.Amount;
    134 }
    135 
    136 static void combineWeightsBySorting(WeightList &Weights) {
    137   // Sort so edges to the same node are adjacent.
    138   std::sort(Weights.begin(), Weights.end(),
    139             [](const Weight &L,
    140                const Weight &R) { return L.TargetNode < R.TargetNode; });
    141 
    142   // Combine adjacent edges.
    143   WeightList::iterator O = Weights.begin();
    144   for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
    145        ++O, (I = L)) {
    146     *O = *I;
    147 
    148     // Find the adjacent weights to the same node.
    149     for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
    150       combineWeight(*O, *L);
    151   }
    152 
    153   // Erase extra entries.
    154   Weights.erase(O, Weights.end());
    155 }
    156 
    157 static void combineWeightsByHashing(WeightList &Weights) {
    158   // Collect weights into a DenseMap.
    159   typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
    160   HashTable Combined(NextPowerOf2(2 * Weights.size()));
    161   for (const Weight &W : Weights)
    162     combineWeight(Combined[W.TargetNode.Index], W);
    163 
    164   // Check whether anything changed.
    165   if (Weights.size() == Combined.size())
    166     return;
    167 
    168   // Fill in the new weights.
    169   Weights.clear();
    170   Weights.reserve(Combined.size());
    171   for (const auto &I : Combined)
    172     Weights.push_back(I.second);
    173 }
    174 
    175 static void combineWeights(WeightList &Weights) {
    176   // Use a hash table for many successors to keep this linear.
    177   if (Weights.size() > 128) {
    178     combineWeightsByHashing(Weights);
    179     return;
    180   }
    181 
    182   combineWeightsBySorting(Weights);
    183 }
    184 
    185 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
    186   assert(Shift >= 0);
    187   assert(Shift < 64);
    188   if (!Shift)
    189     return N;
    190   return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
    191 }
    192 
    193 void Distribution::normalize() {
    194   // Early exit for termination nodes.
    195   if (Weights.empty())
    196     return;
    197 
    198   // Only bother if there are multiple successors.
    199   if (Weights.size() > 1)
    200     combineWeights(Weights);
    201 
    202   // Early exit when combined into a single successor.
    203   if (Weights.size() == 1) {
    204     Total = 1;
    205     Weights.front().Amount = 1;
    206     return;
    207   }
    208 
    209   // Determine how much to shift right so that the total fits into 32-bits.
    210   //
    211   // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
    212   // for each weight can cause a 32-bit overflow.
    213   int Shift = 0;
    214   if (DidOverflow)
    215     Shift = 33;
    216   else if (Total > UINT32_MAX)
    217     Shift = 33 - countLeadingZeros(Total);
    218 
    219   // Early exit if nothing needs to be scaled.
    220   if (!Shift) {
    221     // If we didn't overflow then combineWeights() shouldn't have changed the
    222     // sum of the weights, but let's double-check.
    223     assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
    224                                     [](uint64_t Sum, const Weight &W) {
    225                       return Sum + W.Amount;
    226                     }) &&
    227            "Expected total to be correct");
    228     return;
    229   }
    230 
    231   // Recompute the total through accumulation (rather than shifting it) so that
    232   // it's accurate after shifting and any changes combineWeights() made above.
    233   Total = 0;
    234 
    235   // Sum the weights to each node and shift right if necessary.
    236   for (Weight &W : Weights) {
    237     // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
    238     // can round here without concern about overflow.
    239     assert(W.TargetNode.isValid());
    240     W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
    241     assert(W.Amount <= UINT32_MAX);
    242 
    243     // Update the total.
    244     Total += W.Amount;
    245   }
    246   assert(Total <= UINT32_MAX);
    247 }
    248 
    249 void BlockFrequencyInfoImplBase::clear() {
    250   // Swap with a default-constructed std::vector, since std::vector<>::clear()
    251   // does not actually clear heap storage.
    252   std::vector<FrequencyData>().swap(Freqs);
    253   std::vector<WorkingData>().swap(Working);
    254   Loops.clear();
    255 }
    256 
    257 /// \brief Clear all memory not needed downstream.
    258 ///
    259 /// Releases all memory not used downstream.  In particular, saves Freqs.
    260 static void cleanup(BlockFrequencyInfoImplBase &BFI) {
    261   std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
    262   BFI.clear();
    263   BFI.Freqs = std::move(SavedFreqs);
    264 }
    265 
    266 bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
    267                                            const LoopData *OuterLoop,
    268                                            const BlockNode &Pred,
    269                                            const BlockNode &Succ,
    270                                            uint64_t Weight) {
    271   if (!Weight)
    272     Weight = 1;
    273 
    274   auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
    275     return OuterLoop && OuterLoop->isHeader(Node);
    276   };
    277 
    278   BlockNode Resolved = Working[Succ.Index].getResolvedNode();
    279 
    280 #ifndef NDEBUG
    281   auto debugSuccessor = [&](const char *Type) {
    282     dbgs() << "  =>"
    283            << " [" << Type << "] weight = " << Weight;
    284     if (!isLoopHeader(Resolved))
    285       dbgs() << ", succ = " << getBlockName(Succ);
    286     if (Resolved != Succ)
    287       dbgs() << ", resolved = " << getBlockName(Resolved);
    288     dbgs() << "\n";
    289   };
    290   (void)debugSuccessor;
    291 #endif
    292 
    293   if (isLoopHeader(Resolved)) {
    294     DEBUG(debugSuccessor("backedge"));
    295     Dist.addBackedge(Resolved, Weight);
    296     return true;
    297   }
    298 
    299   if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
    300     DEBUG(debugSuccessor("  exit  "));
    301     Dist.addExit(Resolved, Weight);
    302     return true;
    303   }
    304 
    305   if (Resolved < Pred) {
    306     if (!isLoopHeader(Pred)) {
    307       // If OuterLoop is an irreducible loop, we can't actually handle this.
    308       assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
    309              "unhandled irreducible control flow");
    310 
    311       // Irreducible backedge.  Abort.
    312       DEBUG(debugSuccessor("abort!!!"));
    313       return false;
    314     }
    315 
    316     // If "Pred" is a loop header, then this isn't really a backedge; rather,
    317     // OuterLoop must be irreducible.  These false backedges can come only from
    318     // secondary loop headers.
    319     assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
    320            "unhandled irreducible control flow");
    321   }
    322 
    323   DEBUG(debugSuccessor(" local  "));
    324   Dist.addLocal(Resolved, Weight);
    325   return true;
    326 }
    327 
    328 bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
    329     const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
    330   // Copy the exit map into Dist.
    331   for (const auto &I : Loop.Exits)
    332     if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
    333                    I.second.getMass()))
    334       // Irreducible backedge.
    335       return false;
    336 
    337   return true;
    338 }
    339 
    340 /// \brief Compute the loop scale for a loop.
    341 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
    342   // Compute loop scale.
    343   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
    344 
    345   // Infinite loops need special handling. If we give the back edge an infinite
    346   // mass, they may saturate all the other scales in the function down to 1,
    347   // making all the other region temperatures look exactly the same. Choose an
    348   // arbitrary scale to avoid these issues.
    349   //
    350   // FIXME: An alternate way would be to select a symbolic scale which is later
    351   // replaced to be the maximum of all computed scales plus 1. This would
    352   // appropriately describe the loop as having a large scale, without skewing
    353   // the final frequency computation.
    354   const Scaled64 InfiniteLoopScale(1, 12);
    355 
    356   // LoopScale == 1 / ExitMass
    357   // ExitMass == HeadMass - BackedgeMass
    358   BlockMass TotalBackedgeMass;
    359   for (auto &Mass : Loop.BackedgeMass)
    360     TotalBackedgeMass += Mass;
    361   BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
    362 
    363   // Block scale stores the inverse of the scale. If this is an infinite loop,
    364   // its exit mass will be zero. In this case, use an arbitrary scale for the
    365   // loop scale.
    366   Loop.Scale =
    367       ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
    368 
    369   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
    370                << " - " << TotalBackedgeMass << ")\n"
    371                << " - scale = " << Loop.Scale << "\n");
    372 }
    373 
    374 /// \brief Package up a loop.
    375 void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
    376   DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
    377 
    378   // Clear the subloop exits to prevent quadratic memory usage.
    379   for (const BlockNode &M : Loop.Nodes) {
    380     if (auto *Loop = Working[M.Index].getPackagedLoop())
    381       Loop->Exits.clear();
    382     DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
    383   }
    384   Loop.IsPackaged = true;
    385 }
    386 
    387 #ifndef NDEBUG
    388 static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
    389                         const DitheringDistributer &D, const BlockNode &T,
    390                         const BlockMass &M, const char *Desc) {
    391   dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
    392   if (Desc)
    393     dbgs() << " [" << Desc << "]";
    394   if (T.isValid())
    395     dbgs() << " to " << BFI.getBlockName(T);
    396   dbgs() << "\n";
    397 }
    398 #endif
    399 
    400 void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
    401                                                 LoopData *OuterLoop,
    402                                                 Distribution &Dist) {
    403   BlockMass Mass = Working[Source.Index].getMass();
    404   DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
    405 
    406   // Distribute mass to successors as laid out in Dist.
    407   DitheringDistributer D(Dist, Mass);
    408 
    409   for (const Weight &W : Dist.Weights) {
    410     // Check for a local edge (non-backedge and non-exit).
    411     BlockMass Taken = D.takeMass(W.Amount);
    412     if (W.Type == Weight::Local) {
    413       Working[W.TargetNode.Index].getMass() += Taken;
    414       DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
    415       continue;
    416     }
    417 
    418     // Backedges and exits only make sense if we're processing a loop.
    419     assert(OuterLoop && "backedge or exit outside of loop");
    420 
    421     // Check for a backedge.
    422     if (W.Type == Weight::Backedge) {
    423       OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
    424       DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
    425       continue;
    426     }
    427 
    428     // This must be an exit.
    429     assert(W.Type == Weight::Exit);
    430     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
    431     DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
    432   }
    433 }
    434 
    435 static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
    436                                      const Scaled64 &Min, const Scaled64 &Max) {
    437   // Scale the Factor to a size that creates integers.  Ideally, integers would
    438   // be scaled so that Max == UINT64_MAX so that they can be best
    439   // differentiated.  However, in the presence of large frequency values, small
    440   // frequencies are scaled down to 1, making it impossible to differentiate
    441   // small, unequal numbers. When the spread between Min and Max frequencies
    442   // fits well within MaxBits, we make the scale be at least 8.
    443   const unsigned MaxBits = 64;
    444   const unsigned SpreadBits = (Max / Min).lg();
    445   Scaled64 ScalingFactor;
    446   if (SpreadBits <= MaxBits - 3) {
    447     // If the values are small enough, make the scaling factor at least 8 to
    448     // allow distinguishing small values.
    449     ScalingFactor = Min.inverse();
    450     ScalingFactor <<= 3;
    451   } else {
    452     // If the values need more than MaxBits to be represented, saturate small
    453     // frequency values down to 1 by using a scaling factor that benefits large
    454     // frequency values.
    455     ScalingFactor = Scaled64(1, MaxBits) / Max;
    456   }
    457 
    458   // Translate the floats to integers.
    459   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
    460                << ", factor = " << ScalingFactor << "\n");
    461   for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
    462     Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
    463     BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
    464     DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
    465                  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
    466                  << ", int = " << BFI.Freqs[Index].Integer << "\n");
    467   }
    468 }
    469 
    470 /// \brief Unwrap a loop package.
    471 ///
    472 /// Visits all the members of a loop, adjusting their BlockData according to
    473 /// the loop's pseudo-node.
    474 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
    475   DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
    476                << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
    477                << "\n");
    478   Loop.Scale *= Loop.Mass.toScaled();
    479   Loop.IsPackaged = false;
    480   DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
    481 
    482   // Propagate the head scale through the loop.  Since members are visited in
    483   // RPO, the head scale will be updated by the loop scale first, and then the
    484   // final head scale will be used for updated the rest of the members.
    485   for (const BlockNode &N : Loop.Nodes) {
    486     const auto &Working = BFI.Working[N.Index];
    487     Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
    488                                        : BFI.Freqs[N.Index].Scaled;
    489     Scaled64 New = Loop.Scale * F;
    490     DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
    491                  << "\n");
    492     F = New;
    493   }
    494 }
    495 
    496 void BlockFrequencyInfoImplBase::unwrapLoops() {
    497   // Set initial frequencies from loop-local masses.
    498   for (size_t Index = 0; Index < Working.size(); ++Index)
    499     Freqs[Index].Scaled = Working[Index].Mass.toScaled();
    500 
    501   for (LoopData &Loop : Loops)
    502     unwrapLoop(*this, Loop);
    503 }
    504 
    505 void BlockFrequencyInfoImplBase::finalizeMetrics() {
    506   // Unwrap loop packages in reverse post-order, tracking min and max
    507   // frequencies.
    508   auto Min = Scaled64::getLargest();
    509   auto Max = Scaled64::getZero();
    510   for (size_t Index = 0; Index < Working.size(); ++Index) {
    511     // Update min/max scale.
    512     Min = std::min(Min, Freqs[Index].Scaled);
    513     Max = std::max(Max, Freqs[Index].Scaled);
    514   }
    515 
    516   // Convert to integers.
    517   convertFloatingToInteger(*this, Min, Max);
    518 
    519   // Clean up data structures.
    520   cleanup(*this);
    521 
    522   // Print out the final stats.
    523   DEBUG(dump());
    524 }
    525 
    526 BlockFrequency
    527 BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
    528   if (!Node.isValid())
    529     return 0;
    530   return Freqs[Node.Index].Integer;
    531 }
    532 
    533 Optional<uint64_t>
    534 BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
    535                                                  const BlockNode &Node) const {
    536   auto EntryCount = F.getEntryCount();
    537   if (!EntryCount)
    538     return None;
    539   // Use 128 bit APInt to do the arithmetic to avoid overflow.
    540   APInt BlockCount(128, EntryCount.getValue());
    541   APInt BlockFreq(128, getBlockFreq(Node).getFrequency());
    542   APInt EntryFreq(128, getEntryFreq());
    543   BlockCount *= BlockFreq;
    544   BlockCount = BlockCount.udiv(EntryFreq);
    545   return BlockCount.getLimitedValue();
    546 }
    547 
    548 Scaled64
    549 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
    550   if (!Node.isValid())
    551     return Scaled64::getZero();
    552   return Freqs[Node.Index].Scaled;
    553 }
    554 
    555 void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
    556                                               uint64_t Freq) {
    557   assert(Node.isValid() && "Expected valid node");
    558   assert(Node.Index < Freqs.size() && "Expected legal index");
    559   Freqs[Node.Index].Integer = Freq;
    560 }
    561 
    562 std::string
    563 BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
    564   return std::string();
    565 }
    566 
    567 std::string
    568 BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
    569   return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
    570 }
    571 
    572 raw_ostream &
    573 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
    574                                            const BlockNode &Node) const {
    575   return OS << getFloatingBlockFreq(Node);
    576 }
    577 
    578 raw_ostream &
    579 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
    580                                            const BlockFrequency &Freq) const {
    581   Scaled64 Block(Freq.getFrequency(), 0);
    582   Scaled64 Entry(getEntryFreq(), 0);
    583 
    584   return OS << Block / Entry;
    585 }
    586 
    587 void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
    588   Start = OuterLoop.getHeader();
    589   Nodes.reserve(OuterLoop.Nodes.size());
    590   for (auto N : OuterLoop.Nodes)
    591     addNode(N);
    592   indexNodes();
    593 }
    594 
    595 void IrreducibleGraph::addNodesInFunction() {
    596   Start = 0;
    597   for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
    598     if (!BFI.Working[Index].isPackaged())
    599       addNode(Index);
    600   indexNodes();
    601 }
    602 
    603 void IrreducibleGraph::indexNodes() {
    604   for (auto &I : Nodes)
    605     Lookup[I.Node.Index] = &I;
    606 }
    607 
    608 void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
    609                                const BFIBase::LoopData *OuterLoop) {
    610   if (OuterLoop && OuterLoop->isHeader(Succ))
    611     return;
    612   auto L = Lookup.find(Succ.Index);
    613   if (L == Lookup.end())
    614     return;
    615   IrrNode &SuccIrr = *L->second;
    616   Irr.Edges.push_back(&SuccIrr);
    617   SuccIrr.Edges.push_front(&Irr);
    618   ++SuccIrr.NumIn;
    619 }
    620 
    621 namespace llvm {
    622 template <> struct GraphTraits<IrreducibleGraph> {
    623   typedef bfi_detail::IrreducibleGraph GraphT;
    624 
    625   typedef const GraphT::IrrNode NodeType;
    626   typedef GraphT::IrrNode::iterator ChildIteratorType;
    627 
    628   static const NodeType *getEntryNode(const GraphT &G) {
    629     return G.StartIrr;
    630   }
    631   static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
    632   static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
    633 };
    634 } // end namespace llvm
    635 
    636 /// \brief Find extra irreducible headers.
    637 ///
    638 /// Find entry blocks and other blocks with backedges, which exist when \c G
    639 /// contains irreducible sub-SCCs.
    640 static void findIrreducibleHeaders(
    641     const BlockFrequencyInfoImplBase &BFI,
    642     const IrreducibleGraph &G,
    643     const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
    644     LoopData::NodeList &Headers, LoopData::NodeList &Others) {
    645   // Map from nodes in the SCC to whether it's an entry block.
    646   SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
    647 
    648   // InSCC also acts the set of nodes in the graph.  Seed it.
    649   for (const auto *I : SCC)
    650     InSCC[I] = false;
    651 
    652   for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
    653     auto &Irr = *I->first;
    654     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
    655       if (InSCC.count(P))
    656         continue;
    657 
    658       // This is an entry block.
    659       I->second = true;
    660       Headers.push_back(Irr.Node);
    661       DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
    662       break;
    663     }
    664   }
    665   assert(Headers.size() >= 2 &&
    666          "Expected irreducible CFG; -loop-info is likely invalid");
    667   if (Headers.size() == InSCC.size()) {
    668     // Every block is a header.
    669     std::sort(Headers.begin(), Headers.end());
    670     return;
    671   }
    672 
    673   // Look for extra headers from irreducible sub-SCCs.
    674   for (const auto &I : InSCC) {
    675     // Entry blocks are already headers.
    676     if (I.second)
    677       continue;
    678 
    679     auto &Irr = *I.first;
    680     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
    681       // Skip forward edges.
    682       if (P->Node < Irr.Node)
    683         continue;
    684 
    685       // Skip predecessors from entry blocks.  These can have inverted
    686       // ordering.
    687       if (InSCC.lookup(P))
    688         continue;
    689 
    690       // Store the extra header.
    691       Headers.push_back(Irr.Node);
    692       DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
    693       break;
    694     }
    695     if (Headers.back() == Irr.Node)
    696       // Added this as a header.
    697       continue;
    698 
    699     // This is not a header.
    700     Others.push_back(Irr.Node);
    701     DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
    702   }
    703   std::sort(Headers.begin(), Headers.end());
    704   std::sort(Others.begin(), Others.end());
    705 }
    706 
    707 static void createIrreducibleLoop(
    708     BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
    709     LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
    710     const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
    711   // Translate the SCC into RPO.
    712   DEBUG(dbgs() << " - found-scc\n");
    713 
    714   LoopData::NodeList Headers;
    715   LoopData::NodeList Others;
    716   findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
    717 
    718   auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
    719                                 Headers.end(), Others.begin(), Others.end());
    720 
    721   // Update loop hierarchy.
    722   for (const auto &N : Loop->Nodes)
    723     if (BFI.Working[N.Index].isLoopHeader())
    724       BFI.Working[N.Index].Loop->Parent = &*Loop;
    725     else
    726       BFI.Working[N.Index].Loop = &*Loop;
    727 }
    728 
    729 iterator_range<std::list<LoopData>::iterator>
    730 BlockFrequencyInfoImplBase::analyzeIrreducible(
    731     const IrreducibleGraph &G, LoopData *OuterLoop,
    732     std::list<LoopData>::iterator Insert) {
    733   assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
    734   auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
    735 
    736   for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
    737     if (I->size() < 2)
    738       continue;
    739 
    740     // Translate the SCC into RPO.
    741     createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
    742   }
    743 
    744   if (OuterLoop)
    745     return make_range(std::next(Prev), Insert);
    746   return make_range(Loops.begin(), Insert);
    747 }
    748 
    749 void
    750 BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
    751   OuterLoop.Exits.clear();
    752   for (auto &Mass : OuterLoop.BackedgeMass)
    753     Mass = BlockMass::getEmpty();
    754   auto O = OuterLoop.Nodes.begin() + 1;
    755   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
    756     if (!Working[I->Index].isPackaged())
    757       *O++ = *I;
    758   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
    759 }
    760 
    761 void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
    762   assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
    763 
    764   // Since the loop has more than one header block, the mass flowing back into
    765   // each header will be different. Adjust the mass in each header loop to
    766   // reflect the masses flowing through back edges.
    767   //
    768   // To do this, we distribute the initial mass using the backedge masses
    769   // as weights for the distribution.
    770   BlockMass LoopMass = BlockMass::getFull();
    771   Distribution Dist;
    772 
    773   DEBUG(dbgs() << "adjust-loop-header-mass:\n");
    774   for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
    775     auto &HeaderNode = Loop.Nodes[H];
    776     auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
    777     DEBUG(dbgs() << " - Add back edge mass for node "
    778                  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
    779     if (BackedgeMass.getMass() > 0)
    780       Dist.addLocal(HeaderNode, BackedgeMass.getMass());
    781     else
    782       DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
    783   }
    784 
    785   DitheringDistributer D(Dist, LoopMass);
    786 
    787   DEBUG(dbgs() << " Distribute loop mass " << LoopMass
    788                << " to headers using above weights\n");
    789   for (const Weight &W : Dist.Weights) {
    790     BlockMass Taken = D.takeMass(W.Amount);
    791     assert(W.Type == Weight::Local && "all weights should be local");
    792     Working[W.TargetNode.Index].getMass() = Taken;
    793     DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
    794   }
    795 }
    796