Home | History | Annotate | Download | only in WebAssembly
      1 //===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===---------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief This implements the Relooper algorithm. This implementation includes
     12 /// optimizations added since the original academic paper [1] was published.
     13 ///
     14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
     15 /// Proceedings of the ACM international conference companion on Object
     16 /// oriented programming systems languages and applications companion
     17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
     18 /// http://doi.acm.org/10.1145/2048147.2048224
     19 ///
     20 //===-------------------------------------------------------------------===//
     21 
     22 #include "Relooper.h"
     23 #include "WebAssembly.h"
     24 
     25 #include "llvm/ADT/STLExtras.h"
     26 #include "llvm/IR/CFG.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/Pass.h"
     29 #include "llvm/Support/CommandLine.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/raw_ostream.h"
     32 
     33 #include <cstring>
     34 #include <cstdlib>
     35 #include <functional>
     36 #include <list>
     37 #include <stack>
     38 #include <string>
     39 
     40 #define DEBUG_TYPE "relooper"
     41 
     42 using namespace llvm;
     43 using namespace Relooper;
     44 
     45 static cl::opt<int> RelooperSplittingFactor(
     46     "relooper-splitting-factor",
     47     cl::desc(
     48         "How much to discount code size when deciding whether to split a node"),
     49     cl::init(5));
     50 
     51 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
     52     "relooper-multiple-switch-threshold",
     53     cl::desc(
     54         "How many entries to allow in a multiple before we use a switch"),
     55     cl::init(10));
     56 
     57 static cl::opt<unsigned> RelooperNestingLimit(
     58     "relooper-nesting-limit",
     59     cl::desc(
     60         "How much nesting is acceptable"),
     61     cl::init(20));
     62 
     63 
     64 namespace {
     65 ///
     66 /// Implements the relooper algorithm for a function's blocks.
     67 ///
     68 /// Implementation details: The Relooper instance has
     69 /// ownership of the blocks and shapes, and frees them when done.
     70 ///
     71 struct RelooperAlgorithm {
     72   std::deque<Block *> Blocks;
     73   std::deque<Shape *> Shapes;
     74   Shape *Root;
     75   bool MinSize;
     76   int BlockIdCounter;
     77   int ShapeIdCounter;
     78 
     79   RelooperAlgorithm();
     80   ~RelooperAlgorithm();
     81 
     82   void AddBlock(Block *New, int Id = -1);
     83 
     84   // Calculates the shapes
     85   void Calculate(Block *Entry);
     86 
     87   // Sets us to try to minimize size
     88   void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
     89 };
     90 
     91 struct RelooperAnalysis final : public FunctionPass {
     92   static char ID;
     93   RelooperAnalysis() : FunctionPass(ID) {}
     94   const char *getPassName() const override { return "relooper"; }
     95   void getAnalysisUsage(AnalysisUsage &AU) const override {
     96     AU.setPreservesAll();
     97   }
     98   bool runOnFunction(Function &F) override;
     99 };
    100 }
    101 
    102 // RelooperAnalysis
    103 
    104 char RelooperAnalysis::ID = 0;
    105 FunctionPass *llvm::createWebAssemblyRelooper() {
    106   return new RelooperAnalysis();
    107 }
    108 
    109 bool RelooperAnalysis::runOnFunction(Function &F) {
    110   DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
    111   RelooperAlgorithm R;
    112   // FIXME: remove duplication between relooper's and LLVM's BBs.
    113   std::map<const BasicBlock *, Block *> BB2B;
    114   std::map<const Block *, const BasicBlock *> B2BB;
    115   for (const BasicBlock &BB : F) {
    116     // FIXME: getName is wrong here, Code is meant to represent amount of code.
    117     // FIXME: use BranchVarInit for switch.
    118     Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
    119     R.AddBlock(B);
    120     assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
    121     assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
    122     BB2B[&BB] = B;
    123     B2BB[B] = &BB;
    124   }
    125   for (Block *B : R.Blocks) {
    126     const BasicBlock *BB = B2BB[B];
    127     for (const BasicBlock *Successor : successors(BB))
    128       // FIXME: add branch's Condition and Code below.
    129       B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
    130   }
    131   R.Calculate(BB2B[&F.getEntryBlock()]);
    132   return false; // Analysis passes don't modify anything.
    133 }
    134 
    135 // Helpers
    136 
    137 typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
    138 typedef std::list<Block *> BlockList;
    139 
    140 template <class T, class U>
    141 static bool contains(const T &container, const U &contained) {
    142   return container.count(contained);
    143 }
    144 
    145 
    146 // Branch
    147 
    148 Branch::Branch(const char *ConditionInit, const char *CodeInit)
    149     : Ancestor(nullptr), Labeled(true) {
    150   // FIXME: move from char* to LLVM data structures
    151   Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
    152   Code = CodeInit ? strdup(CodeInit) : nullptr;
    153 }
    154 
    155 Branch::~Branch() {
    156   // FIXME: move from char* to LLVM data structures
    157   free(static_cast<void *>(const_cast<char *>(Condition)));
    158   free(static_cast<void *>(const_cast<char *>(Code)));
    159 }
    160 
    161 // Block
    162 
    163 Block::Block(const char *CodeInit, const char *BranchVarInit)
    164     : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
    165   // FIXME: move from char* to LLVM data structures
    166   Code = strdup(CodeInit);
    167   BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
    168 }
    169 
    170 Block::~Block() {
    171   // FIXME: move from char* to LLVM data structures
    172   free(static_cast<void *>(const_cast<char *>(Code)));
    173   free(static_cast<void *>(const_cast<char *>(BranchVar)));
    174 }
    175 
    176 void Block::AddBranchTo(Block *Target, const char *Condition,
    177                         const char *Code) {
    178   assert(!contains(BranchesOut, Target) &&
    179          "cannot add more than one branch to the same target");
    180   BranchesOut[Target] = make_unique<Branch>(Condition, Code);
    181 }
    182 
    183 // Relooper
    184 
    185 RelooperAlgorithm::RelooperAlgorithm()
    186     : Root(nullptr), MinSize(false), BlockIdCounter(1),
    187       ShapeIdCounter(0) { // block ID 0 is reserved for clearings
    188 }
    189 
    190 RelooperAlgorithm::~RelooperAlgorithm() {
    191   for (auto Curr : Blocks)
    192     delete Curr;
    193   for (auto Curr : Shapes)
    194     delete Curr;
    195 }
    196 
    197 void RelooperAlgorithm::AddBlock(Block *New, int Id) {
    198   New->Id = Id == -1 ? BlockIdCounter++ : Id;
    199   Blocks.push_back(New);
    200 }
    201 
    202 struct RelooperRecursor {
    203   RelooperAlgorithm *Parent;
    204   RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
    205 };
    206 
    207 void RelooperAlgorithm::Calculate(Block *Entry) {
    208   // Scan and optimize the input
    209   struct PreOptimizer : public RelooperRecursor {
    210     PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
    211     BlockSet Live;
    212 
    213     void FindLive(Block *Root) {
    214       BlockList ToInvestigate;
    215       ToInvestigate.push_back(Root);
    216       while (!ToInvestigate.empty()) {
    217         Block *Curr = ToInvestigate.front();
    218         ToInvestigate.pop_front();
    219         if (contains(Live, Curr))
    220           continue;
    221         Live.insert(Curr);
    222         for (const auto &iter : Curr->BranchesOut)
    223           ToInvestigate.push_back(iter.first);
    224       }
    225     }
    226 
    227     // If a block has multiple entries but no exits, and it is small enough, it
    228     // is useful to split it. A common example is a C++ function where
    229     // everything ends up at a final exit block and does some RAII cleanup.
    230     // Without splitting, we will be forced to introduce labelled loops to
    231     // allow reaching the final block
    232     void SplitDeadEnds() {
    233       unsigned TotalCodeSize = 0;
    234       for (const auto &Curr : Live) {
    235         TotalCodeSize += strlen(Curr->Code);
    236       }
    237       BlockSet Splits;
    238       BlockSet Removed;
    239       for (const auto &Original : Live) {
    240         if (Original->BranchesIn.size() <= 1 ||
    241             !Original->BranchesOut.empty())
    242           continue; // only dead ends, for now
    243         if (contains(Original->BranchesOut, Original))
    244           continue; // cannot split a looping node
    245         if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
    246             TotalCodeSize / RelooperSplittingFactor)
    247           continue; // if splitting increases raw code size by a significant
    248                     // amount, abort
    249         // Split the node (for simplicity, we replace all the blocks, even
    250         // though we could have reused the original)
    251         DEBUG(dbgs() << "  Splitting '" << Original->Code << "'\n");
    252         for (const auto &Prior : Original->BranchesIn) {
    253           Block *Split = new Block(Original->Code, Original->BranchVar);
    254           Parent->AddBlock(Split, Original->Id);
    255           Split->BranchesIn.insert(Prior);
    256           std::unique_ptr<Branch> Details;
    257           Details.swap(Prior->BranchesOut[Original]);
    258           Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
    259                                                           Details->Code);
    260           for (const auto &iter : Original->BranchesOut) {
    261             Block *Post = iter.first;
    262             Branch *Details = iter.second.get();
    263             Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
    264                                                            Details->Code);
    265             Post->BranchesIn.insert(Split);
    266           }
    267           Splits.insert(Split);
    268           Removed.insert(Original);
    269         }
    270         for (const auto &iter : Original->BranchesOut) {
    271           Block *Post = iter.first;
    272           Post->BranchesIn.remove(Original);
    273         }
    274       }
    275       for (const auto &iter : Splits)
    276         Live.insert(iter);
    277       for (const auto &iter : Removed)
    278         Live.remove(iter);
    279     }
    280   };
    281   PreOptimizer Pre(this);
    282   Pre.FindLive(Entry);
    283 
    284   // Add incoming branches from live blocks, ignoring dead code
    285   for (unsigned i = 0; i < Blocks.size(); i++) {
    286     Block *Curr = Blocks[i];
    287     if (!contains(Pre.Live, Curr))
    288       continue;
    289     for (const auto &iter : Curr->BranchesOut)
    290       iter.first->BranchesIn.insert(Curr);
    291   }
    292 
    293   if (!MinSize)
    294     Pre.SplitDeadEnds();
    295 
    296   // Recursively process the graph
    297 
    298   struct Analyzer : public RelooperRecursor {
    299     Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
    300 
    301     // Add a shape to the list of shapes in this Relooper calculation
    302     void Notice(Shape *New) {
    303       New->Id = Parent->ShapeIdCounter++;
    304       Parent->Shapes.push_back(New);
    305     }
    306 
    307     // Create a list of entries from a block. If LimitTo is provided, only
    308     // results in that set will appear
    309     void GetBlocksOut(Block *Source, BlockSet &Entries,
    310                       BlockSet *LimitTo = nullptr) {
    311       for (const auto &iter : Source->BranchesOut)
    312         if (!LimitTo || contains(*LimitTo, iter.first))
    313           Entries.insert(iter.first);
    314     }
    315 
    316     // Converts/processes all branchings to a specific target
    317     void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
    318                    BlockSet &From) {
    319       DEBUG(dbgs() << "  Solipsize '" << Target->Code << "' type " << Type
    320                    << "\n");
    321       for (auto iter = Target->BranchesIn.begin();
    322            iter != Target->BranchesIn.end();) {
    323         Block *Prior = *iter;
    324         if (!contains(From, Prior)) {
    325           iter++;
    326           continue;
    327         }
    328         std::unique_ptr<Branch> PriorOut;
    329         PriorOut.swap(Prior->BranchesOut[Target]);
    330         PriorOut->Ancestor = Ancestor;
    331         PriorOut->Type = Type;
    332         if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
    333           Multiple->Breaks++; // We are breaking out of this Multiple, so need a
    334                               // loop
    335         iter++; // carefully increment iter before erasing
    336         Target->BranchesIn.remove(Prior);
    337         Target->ProcessedBranchesIn.insert(Prior);
    338         Prior->ProcessedBranchesOut[Target].swap(PriorOut);
    339       }
    340     }
    341 
    342     Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
    343       DEBUG(dbgs() << "  MakeSimple inner block '" << Inner->Code << "'\n");
    344       SimpleShape *Simple = new SimpleShape;
    345       Notice(Simple);
    346       Simple->Inner = Inner;
    347       Inner->Parent = Simple;
    348       if (Blocks.size() > 1) {
    349         Blocks.remove(Inner);
    350         GetBlocksOut(Inner, NextEntries, &Blocks);
    351         BlockSet JustInner;
    352         JustInner.insert(Inner);
    353         for (const auto &iter : NextEntries)
    354           Solipsize(iter, Branch::Direct, Simple, JustInner);
    355       }
    356       return Simple;
    357     }
    358 
    359     Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
    360                     BlockSet &NextEntries) {
    361       // Find the inner blocks in this loop. Proceed backwards from the entries
    362       // until
    363       // you reach a seen block, collecting as you go.
    364       BlockSet InnerBlocks;
    365       BlockSet Queue = Entries;
    366       while (!Queue.empty()) {
    367         Block *Curr = *(Queue.begin());
    368         Queue.remove(*Queue.begin());
    369         if (!contains(InnerBlocks, Curr)) {
    370           // This element is new, mark it as inner and remove from outer
    371           InnerBlocks.insert(Curr);
    372           Blocks.remove(Curr);
    373           // Add the elements prior to it
    374           for (const auto &iter : Curr->BranchesIn)
    375             Queue.insert(iter);
    376         }
    377       }
    378       assert(!InnerBlocks.empty());
    379 
    380       for (const auto &Curr : InnerBlocks) {
    381         for (const auto &iter : Curr->BranchesOut) {
    382           Block *Possible = iter.first;
    383           if (!contains(InnerBlocks, Possible))
    384             NextEntries.insert(Possible);
    385         }
    386       }
    387 
    388       LoopShape *Loop = new LoopShape();
    389       Notice(Loop);
    390 
    391       // Solipsize the loop, replacing with break/continue and marking branches
    392       // as Processed (will not affect later calculations)
    393       // A. Branches to the loop entries become a continue to this shape
    394       for (const auto &iter : Entries)
    395         Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
    396       // B. Branches to outside the loop (a next entry) become breaks on this
    397       // shape
    398       for (const auto &iter : NextEntries)
    399         Solipsize(iter, Branch::Break, Loop, InnerBlocks);
    400       // Finish up
    401       Shape *Inner = Process(InnerBlocks, Entries, nullptr);
    402       Loop->Inner = Inner;
    403       return Loop;
    404     }
    405 
    406     // For each entry, find the independent group reachable by it. The
    407     // independent group is the entry itself, plus all the blocks it can
    408     // reach that cannot be directly reached by another entry. Note that we
    409     // ignore directly reaching the entry itself by another entry.
    410     //   @param Ignore - previous blocks that are irrelevant
    411     void FindIndependentGroups(BlockSet &Entries,
    412                                BlockBlockSetMap &IndependentGroups,
    413                                BlockSet *Ignore = nullptr) {
    414       typedef std::map<Block *, Block *> BlockBlockMap;
    415 
    416       struct HelperClass {
    417         BlockBlockSetMap &IndependentGroups;
    418         BlockBlockMap Ownership; // For each block, which entry it belongs to.
    419                                  // We have reached it from there.
    420 
    421         HelperClass(BlockBlockSetMap &IndependentGroupsInit)
    422             : IndependentGroups(IndependentGroupsInit) {}
    423         void InvalidateWithChildren(Block *New) {
    424           // Being in the list means you need to be invalidated
    425           BlockList ToInvalidate;
    426           ToInvalidate.push_back(New);
    427           while (!ToInvalidate.empty()) {
    428             Block *Invalidatee = ToInvalidate.front();
    429             ToInvalidate.pop_front();
    430             Block *Owner = Ownership[Invalidatee];
    431             // Owner may have been invalidated, do not add to
    432             // IndependentGroups!
    433             if (contains(IndependentGroups, Owner))
    434               IndependentGroups[Owner].remove(Invalidatee);
    435             if (Ownership[Invalidatee]) { // may have been seen before and
    436                                           // invalidated already
    437               Ownership[Invalidatee] = nullptr;
    438               for (const auto &iter : Invalidatee->BranchesOut) {
    439                 Block *Target = iter.first;
    440                 BlockBlockMap::iterator Known = Ownership.find(Target);
    441                 if (Known != Ownership.end()) {
    442                   Block *TargetOwner = Known->second;
    443                   if (TargetOwner)
    444                     ToInvalidate.push_back(Target);
    445                 }
    446               }
    447             }
    448           }
    449         }
    450       };
    451       HelperClass Helper(IndependentGroups);
    452 
    453       // We flow out from each of the entries, simultaneously.
    454       // When we reach a new block, we add it as belonging to the one we got to
    455       // it from.
    456       // If we reach a new block that is already marked as belonging to someone,
    457       // it is reachable by two entries and is not valid for any of them.
    458       // Remove it and all it can reach that have been visited.
    459 
    460       // Being in the queue means we just added this item, and
    461       // we need to add its children
    462       BlockList Queue;
    463       for (const auto &Entry : Entries) {
    464         Helper.Ownership[Entry] = Entry;
    465         IndependentGroups[Entry].insert(Entry);
    466         Queue.push_back(Entry);
    467       }
    468       while (!Queue.empty()) {
    469         Block *Curr = Queue.front();
    470         Queue.pop_front();
    471         Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
    472                                                // map if we are in the queue
    473         if (!Owner)
    474           continue; // we have been invalidated meanwhile after being reached
    475                     // from two entries
    476         // Add all children
    477         for (const auto &iter : Curr->BranchesOut) {
    478           Block *New = iter.first;
    479           BlockBlockMap::iterator Known = Helper.Ownership.find(New);
    480           if (Known == Helper.Ownership.end()) {
    481             // New node. Add it, and put it in the queue
    482             Helper.Ownership[New] = Owner;
    483             IndependentGroups[Owner].insert(New);
    484             Queue.push_back(New);
    485             continue;
    486           }
    487           Block *NewOwner = Known->second;
    488           if (!NewOwner)
    489             continue; // We reached an invalidated node
    490           if (NewOwner != Owner)
    491             // Invalidate this and all reachable that we have seen - we reached
    492             // this from two locations
    493             Helper.InvalidateWithChildren(New);
    494           // otherwise, we have the same owner, so do nothing
    495         }
    496       }
    497 
    498       // Having processed all the interesting blocks, we remain with just one
    499       // potential issue:
    500       // If a->b, and a was invalidated, but then b was later reached by
    501       // someone else, we must invalidate b. To check for this, we go over all
    502       // elements in the independent groups, if an element has a parent which
    503       // does *not* have the same owner, we/ must remove it and all its
    504       // children.
    505 
    506       for (const auto &iter : Entries) {
    507         BlockSet &CurrGroup = IndependentGroups[iter];
    508         BlockList ToInvalidate;
    509         for (const auto &iter : CurrGroup) {
    510           Block *Child = iter;
    511           for (const auto &iter : Child->BranchesIn) {
    512             Block *Parent = iter;
    513             if (Ignore && contains(*Ignore, Parent))
    514               continue;
    515             if (Helper.Ownership[Parent] != Helper.Ownership[Child])
    516               ToInvalidate.push_back(Child);
    517           }
    518         }
    519         while (!ToInvalidate.empty()) {
    520           Block *Invalidatee = ToInvalidate.front();
    521           ToInvalidate.pop_front();
    522           Helper.InvalidateWithChildren(Invalidatee);
    523         }
    524       }
    525 
    526       // Remove empty groups
    527       for (const auto &iter : Entries)
    528         if (IndependentGroups[iter].empty())
    529           IndependentGroups.erase(iter);
    530     }
    531 
    532     Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
    533                         BlockBlockSetMap &IndependentGroups, Shape *Prev,
    534                         BlockSet &NextEntries) {
    535       bool Fused = isa<SimpleShape>(Prev);
    536       MultipleShape *Multiple = new MultipleShape();
    537       Notice(Multiple);
    538       BlockSet CurrEntries;
    539       for (auto &iter : IndependentGroups) {
    540         Block *CurrEntry = iter.first;
    541         BlockSet &CurrBlocks = iter.second;
    542         // Create inner block
    543         CurrEntries.clear();
    544         CurrEntries.insert(CurrEntry);
    545         for (const auto &CurrInner : CurrBlocks) {
    546           // Remove the block from the remaining blocks
    547           Blocks.remove(CurrInner);
    548           // Find new next entries and fix branches to them
    549           for (auto iter = CurrInner->BranchesOut.begin();
    550                iter != CurrInner->BranchesOut.end();) {
    551             Block *CurrTarget = iter->first;
    552             auto Next = iter;
    553             Next++;
    554             if (!contains(CurrBlocks, CurrTarget)) {
    555               NextEntries.insert(CurrTarget);
    556               Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
    557             }
    558             iter = Next; // increment carefully because Solipsize can remove us
    559           }
    560         }
    561         Multiple->InnerMap[CurrEntry->Id] =
    562             Process(CurrBlocks, CurrEntries, nullptr);
    563         // If we are not fused, then our entries will actually be checked
    564         if (!Fused)
    565           CurrEntry->IsCheckedMultipleEntry = true;
    566       }
    567       // Add entries not handled as next entries, they are deferred
    568       for (const auto &Entry : Entries)
    569         if (!contains(IndependentGroups, Entry))
    570           NextEntries.insert(Entry);
    571       // The multiple has been created, we can decide how to implement it
    572       if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
    573         Multiple->UseSwitch = true;
    574         Multiple->Breaks++; // switch captures breaks
    575       }
    576       return Multiple;
    577     }
    578 
    579     // Main function.
    580     // Process a set of blocks with specified entries, returns a shape
    581     // The Make* functions receive a NextEntries. If they fill it with data,
    582     // those are the entries for the ->Next block on them, and the blocks
    583     // are what remains in Blocks (which Make* modify). In this way
    584     // we avoid recursing on Next (imagine a long chain of Simples, if we
    585     // recursed we could blow the stack).
    586     Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
    587       BlockSet *Entries = &InitialEntries;
    588       BlockSet TempEntries[2];
    589       int CurrTempIndex = 0;
    590       BlockSet *NextEntries;
    591       Shape *Ret = nullptr;
    592 
    593       auto Make = [&](Shape *Temp) {
    594         if (Prev)
    595           Prev->Next = Temp;
    596         if (!Ret)
    597           Ret = Temp;
    598         Prev = Temp;
    599         Entries = NextEntries;
    600       };
    601 
    602       while (1) {
    603         CurrTempIndex = 1 - CurrTempIndex;
    604         NextEntries = &TempEntries[CurrTempIndex];
    605         NextEntries->clear();
    606 
    607         if (Entries->empty())
    608           return Ret;
    609         if (Entries->size() == 1) {
    610           Block *Curr = *(Entries->begin());
    611           if (Curr->BranchesIn.empty()) {
    612             // One entry, no looping ==> Simple
    613             Make(MakeSimple(Blocks, Curr, *NextEntries));
    614             if (NextEntries->empty())
    615               return Ret;
    616             continue;
    617           }
    618           // One entry, looping ==> Loop
    619           Make(MakeLoop(Blocks, *Entries, *NextEntries));
    620           if (NextEntries->empty())
    621             return Ret;
    622           continue;
    623         }
    624 
    625         // More than one entry, try to eliminate through a Multiple groups of
    626         // independent blocks from an entry/ies. It is important to remove
    627         // through multiples as opposed to looping since the former is more
    628         // performant.
    629         BlockBlockSetMap IndependentGroups;
    630         FindIndependentGroups(*Entries, IndependentGroups);
    631 
    632         if (!IndependentGroups.empty()) {
    633           // We can handle a group in a multiple if its entry cannot be reached
    634           // by another group.
    635           // Note that it might be reachable by itself - a loop. But that is
    636           // fine, we will create a loop inside the multiple block (which
    637           // is the performant order to do it).
    638           for (auto iter = IndependentGroups.begin();
    639                iter != IndependentGroups.end();) {
    640             Block *Entry = iter->first;
    641             BlockSet &Group = iter->second;
    642             auto curr = iter++; // iterate carefully, we may delete
    643             for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
    644                  iterBranch != Entry->BranchesIn.end(); iterBranch++) {
    645               Block *Origin = *iterBranch;
    646               if (!contains(Group, Origin)) {
    647                 // Reached from outside the group, so we cannot handle this
    648                 IndependentGroups.erase(curr);
    649                 break;
    650               }
    651             }
    652           }
    653 
    654           // As an optimization, if we have 2 independent groups, and one is a
    655           // small dead end, we can handle only that dead end.
    656           // The other then becomes a Next - without nesting in the code and
    657           // recursion in the analysis.
    658           // TODO: if the larger is the only dead end, handle that too
    659           // TODO: handle >2 groups
    660           // TODO: handle not just dead ends, but also that do not branch to the
    661           // NextEntries. However, must be careful there since we create a
    662           // Next, and that Next can prevent eliminating a break (since we no
    663           // longer naturally reach the same place), which may necessitate a
    664           // one-time loop, which makes the unnesting pointless.
    665           if (IndependentGroups.size() == 2) {
    666             // Find the smaller one
    667             auto iter = IndependentGroups.begin();
    668             Block *SmallEntry = iter->first;
    669             auto SmallSize = iter->second.size();
    670             iter++;
    671             Block *LargeEntry = iter->first;
    672             auto LargeSize = iter->second.size();
    673             if (SmallSize != LargeSize) { // ignore the case where they are
    674                                           // identical - keep things symmetrical
    675                                           // there
    676               if (SmallSize > LargeSize) {
    677                 Block *Temp = SmallEntry;
    678                 SmallEntry = LargeEntry;
    679                 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
    680                                    // are now invalid. TODO: use the smaller
    681                                    // size as a limit?
    682               }
    683               // Check if dead end
    684               bool DeadEnd = true;
    685               BlockSet &SmallGroup = IndependentGroups[SmallEntry];
    686               for (const auto &Curr : SmallGroup) {
    687                 for (const auto &iter : Curr->BranchesOut) {
    688                   Block *Target = iter.first;
    689                   if (!contains(SmallGroup, Target)) {
    690                     DeadEnd = false;
    691                     break;
    692                   }
    693                 }
    694                 if (!DeadEnd)
    695                   break;
    696               }
    697               if (DeadEnd)
    698                 IndependentGroups.erase(LargeEntry);
    699             }
    700           }
    701 
    702           if (!IndependentGroups.empty())
    703             // Some groups removable ==> Multiple
    704             Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
    705                               *NextEntries));
    706             if (NextEntries->empty())
    707               return Ret;
    708             continue;
    709         }
    710         // No independent groups, must be loopable ==> Loop
    711         Make(MakeLoop(Blocks, *Entries, *NextEntries));
    712         if (NextEntries->empty())
    713           return Ret;
    714         continue;
    715       }
    716     }
    717   };
    718 
    719   // Main
    720 
    721   BlockSet AllBlocks;
    722   for (const auto &Curr : Pre.Live) {
    723     AllBlocks.insert(Curr);
    724   }
    725 
    726   BlockSet Entries;
    727   Entries.insert(Entry);
    728   Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
    729   assert(Root);
    730 
    731   ///
    732   /// Relooper post-optimizer
    733   ///
    734   struct PostOptimizer {
    735     RelooperAlgorithm *Parent;
    736     std::stack<Shape *> LoopStack;
    737 
    738     PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
    739 
    740     void ShapeSwitch(Shape* var,
    741                      std::function<void (SimpleShape*)> simple,
    742                      std::function<void (MultipleShape*)> multiple,
    743                      std::function<void (LoopShape*)> loop) {
    744       switch (var->getKind()) {
    745         case Shape::SK_Simple: {
    746           simple(cast<SimpleShape>(var));
    747           break;
    748         }
    749         case Shape::SK_Multiple: {
    750           multiple(cast<MultipleShape>(var));
    751           break;
    752         }
    753         case Shape::SK_Loop: {
    754           loop(cast<LoopShape>(var));
    755           break;
    756         }
    757       }
    758     }
    759 
    760     // Find the blocks that natural control flow can get us directly to, or
    761     // through a multiple that we ignore
    762     void FollowNaturalFlow(Shape *S, BlockSet &Out) {
    763       ShapeSwitch(S, [&](SimpleShape* Simple) {
    764         Out.insert(Simple->Inner);
    765       }, [&](MultipleShape* Multiple) {
    766         for (const auto &iter : Multiple->InnerMap) {
    767           FollowNaturalFlow(iter.second, Out);
    768         }
    769         FollowNaturalFlow(Multiple->Next, Out);
    770       }, [&](LoopShape* Loop) {
    771         FollowNaturalFlow(Loop->Inner, Out);
    772       });
    773     }
    774 
    775     void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
    776       if (Root->Next) {
    777         Root->Natural = Root->Next;
    778         FindNaturals(Root->Next, Otherwise);
    779       } else {
    780         Root->Natural = Otherwise;
    781       }
    782 
    783       ShapeSwitch(Root, [](SimpleShape* Simple) {
    784       }, [&](MultipleShape* Multiple) {
    785         for (const auto &iter : Multiple->InnerMap) {
    786           FindNaturals(iter.second, Root->Natural);
    787         }
    788       }, [&](LoopShape* Loop){
    789         FindNaturals(Loop->Inner, Loop->Inner);
    790       });
    791     }
    792 
    793     // Remove unneeded breaks and continues.
    794     // A flow operation is trivially unneeded if the shape we naturally get to
    795     // by normal code execution is the same as the flow forces us to.
    796     void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
    797                              LoopShape *LastLoop = nullptr,
    798                              unsigned Depth = 0) {
    799       BlockSet NaturalBlocks;
    800       FollowNaturalFlow(Natural, NaturalBlocks);
    801       Shape *Next = Root;
    802       while (Next) {
    803         Root = Next;
    804         Next = nullptr;
    805         ShapeSwitch(
    806             Root,
    807             [&](SimpleShape* Simple) {
    808               if (Simple->Inner->BranchVar)
    809                 LastLoop =
    810                     nullptr; // a switch clears out the loop (TODO: only for
    811                              // breaks, not continue)
    812 
    813               if (Simple->Next) {
    814                 if (!Simple->Inner->BranchVar &&
    815                     Simple->Inner->ProcessedBranchesOut.size() == 2 &&
    816                     Depth < RelooperNestingLimit) {
    817                   // If there is a next block, we already know at Simple
    818                   // creation time to make direct branches, and we can do
    819                   // nothing more in general. But, we try to optimize the
    820                   // case of a break and a direct: This would normally be
    821                   //   if (break?) { break; } ..
    822                   // but if we make sure to nest the else, we can save the
    823                   // break,
    824                   //   if (!break?) { .. }
    825                   // This is also better because the more canonical nested
    826                   // form is easier to further optimize later. The
    827                   // downside is more nesting, which adds to size in builds with
    828                   // whitespace.
    829                   // Note that we avoid switches, as it complicates control flow
    830                   // and is not relevant for the common case we optimize here.
    831                   bool Found = false;
    832                   bool Abort = false;
    833                   for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
    834                     Block *Target = iter.first;
    835                     Branch *Details = iter.second.get();
    836                     if (Details->Type == Branch::Break) {
    837                       Found = true;
    838                       if (!contains(NaturalBlocks, Target))
    839                         Abort = true;
    840                     } else if (Details->Type != Branch::Direct)
    841                       Abort = true;
    842                   }
    843                   if (Found && !Abort) {
    844                     for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
    845                       Branch *Details = iter.second.get();
    846                       if (Details->Type == Branch::Break) {
    847                         Details->Type = Branch::Direct;
    848                         if (MultipleShape *Multiple =
    849                                 dyn_cast<MultipleShape>(Details->Ancestor))
    850                           Multiple->Breaks--;
    851                       } else {
    852                         assert(Details->Type == Branch::Direct);
    853                         Details->Type = Branch::Nested;
    854                       }
    855                     }
    856                   }
    857                   Depth++; // this optimization increases depth, for us and all
    858                            // our next chain (i.e., until this call returns)
    859                 }
    860                 Next = Simple->Next;
    861               } else {
    862                 // If there is no next then Natural is where we will
    863                 // go to by doing nothing, so we can potentially optimize some
    864                 // branches to direct.
    865                 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
    866                   Block *Target = iter.first;
    867                   Branch *Details = iter.second.get();
    868                   if (Details->Type != Branch::Direct &&
    869                       contains(NaturalBlocks,
    870                                Target)) { // note: cannot handle split blocks
    871                     Details->Type = Branch::Direct;
    872                     if (MultipleShape *Multiple =
    873                             dyn_cast<MultipleShape>(Details->Ancestor))
    874                       Multiple->Breaks--;
    875                   } else if (Details->Type == Branch::Break && LastLoop &&
    876                              LastLoop->Natural == Details->Ancestor->Natural) {
    877                     // it is important to simplify breaks, as simpler breaks
    878                     // enable other optimizations
    879                     Details->Labeled = false;
    880                     if (MultipleShape *Multiple =
    881                             dyn_cast<MultipleShape>(Details->Ancestor))
    882                       Multiple->Breaks--;
    883                   }
    884                 }
    885               }
    886             }, [&](MultipleShape* Multiple)
    887             {
    888               for (const auto &iter : Multiple->InnerMap) {
    889                 RemoveUnneededFlows(iter.second, Multiple->Next,
    890                                     Multiple->Breaks ? nullptr : LastLoop,
    891                                     Depth + 1);
    892               }
    893               Next = Multiple->Next;
    894             }, [&](LoopShape* Loop)
    895             {
    896               RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
    897               Next = Loop->Next;
    898             });
    899       }
    900     }
    901 
    902     // After we know which loops exist, we can calculate which need to be
    903     // labeled
    904     void FindLabeledLoops(Shape *Root) {
    905       Shape *Next = Root;
    906       while (Next) {
    907         Root = Next;
    908         Next = nullptr;
    909 
    910         ShapeSwitch(
    911             Root,
    912             [&](SimpleShape *Simple) {
    913           MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
    914           // If we are fusing a Multiple with a loop into this Simple, then
    915           // visit it now
    916           if (Fused && Fused->Breaks)
    917             LoopStack.push(Fused);
    918           if (Simple->Inner->BranchVar)
    919             LoopStack.push(nullptr); // a switch means breaks are now useless,
    920                                      // push a dummy
    921           if (Fused) {
    922             if (Fused->UseSwitch)
    923               LoopStack.push(nullptr); // a switch means breaks are now
    924                                        // useless, push a dummy
    925             for (const auto &iter : Fused->InnerMap) {
    926               FindLabeledLoops(iter.second);
    927             }
    928           }
    929           for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
    930             Branch *Details = iter.second.get();
    931             if (Details->Type == Branch::Break ||
    932                 Details->Type == Branch::Continue) {
    933               assert(!LoopStack.empty());
    934               if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
    935                 if (MultipleShape *Multiple =
    936                         dyn_cast<MultipleShape>(Details->Ancestor)) {
    937                   Multiple->Labeled = true;
    938                 } else {
    939                   LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
    940                   Loop->Labeled = true;
    941                 }
    942               } else {
    943                 Details->Labeled = false;
    944               }
    945             }
    946             if (Fused && Fused->UseSwitch)
    947               LoopStack.pop();
    948             if (Simple->Inner->BranchVar)
    949               LoopStack.pop();
    950             if (Fused && Fused->Breaks)
    951               LoopStack.pop();
    952             if (Fused)
    953               Next = Fused->Next;
    954             else
    955               Next = Root->Next;
    956           }
    957           }
    958           , [&](MultipleShape* Multiple) {
    959             if (Multiple->Breaks)
    960               LoopStack.push(Multiple);
    961             for (const auto &iter : Multiple->InnerMap)
    962               FindLabeledLoops(iter.second);
    963             if (Multiple->Breaks)
    964               LoopStack.pop();
    965             Next = Root->Next;
    966           }
    967           , [&](LoopShape* Loop) {
    968             LoopStack.push(Loop);
    969             FindLabeledLoops(Loop->Inner);
    970             LoopStack.pop();
    971             Next = Root->Next;
    972           });
    973       }
    974     }
    975 
    976     void Process(Shape * Root) {
    977       FindNaturals(Root);
    978       RemoveUnneededFlows(Root);
    979       FindLabeledLoops(Root);
    980     }
    981   };
    982 
    983   PostOptimizer(this).Process(Root);
    984 }
    985