1 //===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===---------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This implements the Relooper algorithm. This implementation includes 12 /// optimizations added since the original academic paper [1] was published. 13 /// 14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 15 /// Proceedings of the ACM international conference companion on Object 16 /// oriented programming systems languages and applications companion 17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 18 /// http://doi.acm.org/10.1145/2048147.2048224 19 /// 20 //===-------------------------------------------------------------------===// 21 22 #include "Relooper.h" 23 #include "WebAssembly.h" 24 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 #include <cstring> 34 #include <cstdlib> 35 #include <functional> 36 #include <list> 37 #include <stack> 38 #include <string> 39 40 #define DEBUG_TYPE "relooper" 41 42 using namespace llvm; 43 using namespace Relooper; 44 45 static cl::opt<int> RelooperSplittingFactor( 46 "relooper-splitting-factor", 47 cl::desc( 48 "How much to discount code size when deciding whether to split a node"), 49 cl::init(5)); 50 51 static cl::opt<unsigned> RelooperMultipleSwitchThreshold( 52 "relooper-multiple-switch-threshold", 53 cl::desc( 54 "How many entries to allow in a multiple before we use a switch"), 55 cl::init(10)); 56 57 static cl::opt<unsigned> RelooperNestingLimit( 58 "relooper-nesting-limit", 59 cl::desc( 60 "How much nesting is acceptable"), 61 cl::init(20)); 62 63 64 namespace { 65 /// 66 /// Implements the relooper algorithm for a function's blocks. 67 /// 68 /// Implementation details: The Relooper instance has 69 /// ownership of the blocks and shapes, and frees them when done. 70 /// 71 struct RelooperAlgorithm { 72 std::deque<Block *> Blocks; 73 std::deque<Shape *> Shapes; 74 Shape *Root; 75 bool MinSize; 76 int BlockIdCounter; 77 int ShapeIdCounter; 78 79 RelooperAlgorithm(); 80 ~RelooperAlgorithm(); 81 82 void AddBlock(Block *New, int Id = -1); 83 84 // Calculates the shapes 85 void Calculate(Block *Entry); 86 87 // Sets us to try to minimize size 88 void SetMinSize(bool MinSize_) { MinSize = MinSize_; } 89 }; 90 91 struct RelooperAnalysis final : public FunctionPass { 92 static char ID; 93 RelooperAnalysis() : FunctionPass(ID) {} 94 const char *getPassName() const override { return "relooper"; } 95 void getAnalysisUsage(AnalysisUsage &AU) const override { 96 AU.setPreservesAll(); 97 } 98 bool runOnFunction(Function &F) override; 99 }; 100 } 101 102 // RelooperAnalysis 103 104 char RelooperAnalysis::ID = 0; 105 FunctionPass *llvm::createWebAssemblyRelooper() { 106 return new RelooperAnalysis(); 107 } 108 109 bool RelooperAnalysis::runOnFunction(Function &F) { 110 DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); 111 RelooperAlgorithm R; 112 // FIXME: remove duplication between relooper's and LLVM's BBs. 113 std::map<const BasicBlock *, Block *> BB2B; 114 std::map<const Block *, const BasicBlock *> B2BB; 115 for (const BasicBlock &BB : F) { 116 // FIXME: getName is wrong here, Code is meant to represent amount of code. 117 // FIXME: use BranchVarInit for switch. 118 Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); 119 R.AddBlock(B); 120 assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); 121 assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); 122 BB2B[&BB] = B; 123 B2BB[B] = &BB; 124 } 125 for (Block *B : R.Blocks) { 126 const BasicBlock *BB = B2BB[B]; 127 for (const BasicBlock *Successor : successors(BB)) 128 // FIXME: add branch's Condition and Code below. 129 B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); 130 } 131 R.Calculate(BB2B[&F.getEntryBlock()]); 132 return false; // Analysis passes don't modify anything. 133 } 134 135 // Helpers 136 137 typedef MapVector<Block *, BlockSet> BlockBlockSetMap; 138 typedef std::list<Block *> BlockList; 139 140 template <class T, class U> 141 static bool contains(const T &container, const U &contained) { 142 return container.count(contained); 143 } 144 145 146 // Branch 147 148 Branch::Branch(const char *ConditionInit, const char *CodeInit) 149 : Ancestor(nullptr), Labeled(true) { 150 // FIXME: move from char* to LLVM data structures 151 Condition = ConditionInit ? strdup(ConditionInit) : nullptr; 152 Code = CodeInit ? strdup(CodeInit) : nullptr; 153 } 154 155 Branch::~Branch() { 156 // FIXME: move from char* to LLVM data structures 157 free(static_cast<void *>(const_cast<char *>(Condition))); 158 free(static_cast<void *>(const_cast<char *>(Code))); 159 } 160 161 // Block 162 163 Block::Block(const char *CodeInit, const char *BranchVarInit) 164 : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { 165 // FIXME: move from char* to LLVM data structures 166 Code = strdup(CodeInit); 167 BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; 168 } 169 170 Block::~Block() { 171 // FIXME: move from char* to LLVM data structures 172 free(static_cast<void *>(const_cast<char *>(Code))); 173 free(static_cast<void *>(const_cast<char *>(BranchVar))); 174 } 175 176 void Block::AddBranchTo(Block *Target, const char *Condition, 177 const char *Code) { 178 assert(!contains(BranchesOut, Target) && 179 "cannot add more than one branch to the same target"); 180 BranchesOut[Target] = make_unique<Branch>(Condition, Code); 181 } 182 183 // Relooper 184 185 RelooperAlgorithm::RelooperAlgorithm() 186 : Root(nullptr), MinSize(false), BlockIdCounter(1), 187 ShapeIdCounter(0) { // block ID 0 is reserved for clearings 188 } 189 190 RelooperAlgorithm::~RelooperAlgorithm() { 191 for (auto Curr : Blocks) 192 delete Curr; 193 for (auto Curr : Shapes) 194 delete Curr; 195 } 196 197 void RelooperAlgorithm::AddBlock(Block *New, int Id) { 198 New->Id = Id == -1 ? BlockIdCounter++ : Id; 199 Blocks.push_back(New); 200 } 201 202 struct RelooperRecursor { 203 RelooperAlgorithm *Parent; 204 RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} 205 }; 206 207 void RelooperAlgorithm::Calculate(Block *Entry) { 208 // Scan and optimize the input 209 struct PreOptimizer : public RelooperRecursor { 210 PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} 211 BlockSet Live; 212 213 void FindLive(Block *Root) { 214 BlockList ToInvestigate; 215 ToInvestigate.push_back(Root); 216 while (!ToInvestigate.empty()) { 217 Block *Curr = ToInvestigate.front(); 218 ToInvestigate.pop_front(); 219 if (contains(Live, Curr)) 220 continue; 221 Live.insert(Curr); 222 for (const auto &iter : Curr->BranchesOut) 223 ToInvestigate.push_back(iter.first); 224 } 225 } 226 227 // If a block has multiple entries but no exits, and it is small enough, it 228 // is useful to split it. A common example is a C++ function where 229 // everything ends up at a final exit block and does some RAII cleanup. 230 // Without splitting, we will be forced to introduce labelled loops to 231 // allow reaching the final block 232 void SplitDeadEnds() { 233 unsigned TotalCodeSize = 0; 234 for (const auto &Curr : Live) { 235 TotalCodeSize += strlen(Curr->Code); 236 } 237 BlockSet Splits; 238 BlockSet Removed; 239 for (const auto &Original : Live) { 240 if (Original->BranchesIn.size() <= 1 || 241 !Original->BranchesOut.empty()) 242 continue; // only dead ends, for now 243 if (contains(Original->BranchesOut, Original)) 244 continue; // cannot split a looping node 245 if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > 246 TotalCodeSize / RelooperSplittingFactor) 247 continue; // if splitting increases raw code size by a significant 248 // amount, abort 249 // Split the node (for simplicity, we replace all the blocks, even 250 // though we could have reused the original) 251 DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); 252 for (const auto &Prior : Original->BranchesIn) { 253 Block *Split = new Block(Original->Code, Original->BranchVar); 254 Parent->AddBlock(Split, Original->Id); 255 Split->BranchesIn.insert(Prior); 256 std::unique_ptr<Branch> Details; 257 Details.swap(Prior->BranchesOut[Original]); 258 Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, 259 Details->Code); 260 for (const auto &iter : Original->BranchesOut) { 261 Block *Post = iter.first; 262 Branch *Details = iter.second.get(); 263 Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, 264 Details->Code); 265 Post->BranchesIn.insert(Split); 266 } 267 Splits.insert(Split); 268 Removed.insert(Original); 269 } 270 for (const auto &iter : Original->BranchesOut) { 271 Block *Post = iter.first; 272 Post->BranchesIn.remove(Original); 273 } 274 } 275 for (const auto &iter : Splits) 276 Live.insert(iter); 277 for (const auto &iter : Removed) 278 Live.remove(iter); 279 } 280 }; 281 PreOptimizer Pre(this); 282 Pre.FindLive(Entry); 283 284 // Add incoming branches from live blocks, ignoring dead code 285 for (unsigned i = 0; i < Blocks.size(); i++) { 286 Block *Curr = Blocks[i]; 287 if (!contains(Pre.Live, Curr)) 288 continue; 289 for (const auto &iter : Curr->BranchesOut) 290 iter.first->BranchesIn.insert(Curr); 291 } 292 293 if (!MinSize) 294 Pre.SplitDeadEnds(); 295 296 // Recursively process the graph 297 298 struct Analyzer : public RelooperRecursor { 299 Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} 300 301 // Add a shape to the list of shapes in this Relooper calculation 302 void Notice(Shape *New) { 303 New->Id = Parent->ShapeIdCounter++; 304 Parent->Shapes.push_back(New); 305 } 306 307 // Create a list of entries from a block. If LimitTo is provided, only 308 // results in that set will appear 309 void GetBlocksOut(Block *Source, BlockSet &Entries, 310 BlockSet *LimitTo = nullptr) { 311 for (const auto &iter : Source->BranchesOut) 312 if (!LimitTo || contains(*LimitTo, iter.first)) 313 Entries.insert(iter.first); 314 } 315 316 // Converts/processes all branchings to a specific target 317 void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, 318 BlockSet &From) { 319 DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type 320 << "\n"); 321 for (auto iter = Target->BranchesIn.begin(); 322 iter != Target->BranchesIn.end();) { 323 Block *Prior = *iter; 324 if (!contains(From, Prior)) { 325 iter++; 326 continue; 327 } 328 std::unique_ptr<Branch> PriorOut; 329 PriorOut.swap(Prior->BranchesOut[Target]); 330 PriorOut->Ancestor = Ancestor; 331 PriorOut->Type = Type; 332 if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) 333 Multiple->Breaks++; // We are breaking out of this Multiple, so need a 334 // loop 335 iter++; // carefully increment iter before erasing 336 Target->BranchesIn.remove(Prior); 337 Target->ProcessedBranchesIn.insert(Prior); 338 Prior->ProcessedBranchesOut[Target].swap(PriorOut); 339 } 340 } 341 342 Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { 343 DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); 344 SimpleShape *Simple = new SimpleShape; 345 Notice(Simple); 346 Simple->Inner = Inner; 347 Inner->Parent = Simple; 348 if (Blocks.size() > 1) { 349 Blocks.remove(Inner); 350 GetBlocksOut(Inner, NextEntries, &Blocks); 351 BlockSet JustInner; 352 JustInner.insert(Inner); 353 for (const auto &iter : NextEntries) 354 Solipsize(iter, Branch::Direct, Simple, JustInner); 355 } 356 return Simple; 357 } 358 359 Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, 360 BlockSet &NextEntries) { 361 // Find the inner blocks in this loop. Proceed backwards from the entries 362 // until 363 // you reach a seen block, collecting as you go. 364 BlockSet InnerBlocks; 365 BlockSet Queue = Entries; 366 while (!Queue.empty()) { 367 Block *Curr = *(Queue.begin()); 368 Queue.remove(*Queue.begin()); 369 if (!contains(InnerBlocks, Curr)) { 370 // This element is new, mark it as inner and remove from outer 371 InnerBlocks.insert(Curr); 372 Blocks.remove(Curr); 373 // Add the elements prior to it 374 for (const auto &iter : Curr->BranchesIn) 375 Queue.insert(iter); 376 } 377 } 378 assert(!InnerBlocks.empty()); 379 380 for (const auto &Curr : InnerBlocks) { 381 for (const auto &iter : Curr->BranchesOut) { 382 Block *Possible = iter.first; 383 if (!contains(InnerBlocks, Possible)) 384 NextEntries.insert(Possible); 385 } 386 } 387 388 LoopShape *Loop = new LoopShape(); 389 Notice(Loop); 390 391 // Solipsize the loop, replacing with break/continue and marking branches 392 // as Processed (will not affect later calculations) 393 // A. Branches to the loop entries become a continue to this shape 394 for (const auto &iter : Entries) 395 Solipsize(iter, Branch::Continue, Loop, InnerBlocks); 396 // B. Branches to outside the loop (a next entry) become breaks on this 397 // shape 398 for (const auto &iter : NextEntries) 399 Solipsize(iter, Branch::Break, Loop, InnerBlocks); 400 // Finish up 401 Shape *Inner = Process(InnerBlocks, Entries, nullptr); 402 Loop->Inner = Inner; 403 return Loop; 404 } 405 406 // For each entry, find the independent group reachable by it. The 407 // independent group is the entry itself, plus all the blocks it can 408 // reach that cannot be directly reached by another entry. Note that we 409 // ignore directly reaching the entry itself by another entry. 410 // @param Ignore - previous blocks that are irrelevant 411 void FindIndependentGroups(BlockSet &Entries, 412 BlockBlockSetMap &IndependentGroups, 413 BlockSet *Ignore = nullptr) { 414 typedef std::map<Block *, Block *> BlockBlockMap; 415 416 struct HelperClass { 417 BlockBlockSetMap &IndependentGroups; 418 BlockBlockMap Ownership; // For each block, which entry it belongs to. 419 // We have reached it from there. 420 421 HelperClass(BlockBlockSetMap &IndependentGroupsInit) 422 : IndependentGroups(IndependentGroupsInit) {} 423 void InvalidateWithChildren(Block *New) { 424 // Being in the list means you need to be invalidated 425 BlockList ToInvalidate; 426 ToInvalidate.push_back(New); 427 while (!ToInvalidate.empty()) { 428 Block *Invalidatee = ToInvalidate.front(); 429 ToInvalidate.pop_front(); 430 Block *Owner = Ownership[Invalidatee]; 431 // Owner may have been invalidated, do not add to 432 // IndependentGroups! 433 if (contains(IndependentGroups, Owner)) 434 IndependentGroups[Owner].remove(Invalidatee); 435 if (Ownership[Invalidatee]) { // may have been seen before and 436 // invalidated already 437 Ownership[Invalidatee] = nullptr; 438 for (const auto &iter : Invalidatee->BranchesOut) { 439 Block *Target = iter.first; 440 BlockBlockMap::iterator Known = Ownership.find(Target); 441 if (Known != Ownership.end()) { 442 Block *TargetOwner = Known->second; 443 if (TargetOwner) 444 ToInvalidate.push_back(Target); 445 } 446 } 447 } 448 } 449 } 450 }; 451 HelperClass Helper(IndependentGroups); 452 453 // We flow out from each of the entries, simultaneously. 454 // When we reach a new block, we add it as belonging to the one we got to 455 // it from. 456 // If we reach a new block that is already marked as belonging to someone, 457 // it is reachable by two entries and is not valid for any of them. 458 // Remove it and all it can reach that have been visited. 459 460 // Being in the queue means we just added this item, and 461 // we need to add its children 462 BlockList Queue; 463 for (const auto &Entry : Entries) { 464 Helper.Ownership[Entry] = Entry; 465 IndependentGroups[Entry].insert(Entry); 466 Queue.push_back(Entry); 467 } 468 while (!Queue.empty()) { 469 Block *Curr = Queue.front(); 470 Queue.pop_front(); 471 Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership 472 // map if we are in the queue 473 if (!Owner) 474 continue; // we have been invalidated meanwhile after being reached 475 // from two entries 476 // Add all children 477 for (const auto &iter : Curr->BranchesOut) { 478 Block *New = iter.first; 479 BlockBlockMap::iterator Known = Helper.Ownership.find(New); 480 if (Known == Helper.Ownership.end()) { 481 // New node. Add it, and put it in the queue 482 Helper.Ownership[New] = Owner; 483 IndependentGroups[Owner].insert(New); 484 Queue.push_back(New); 485 continue; 486 } 487 Block *NewOwner = Known->second; 488 if (!NewOwner) 489 continue; // We reached an invalidated node 490 if (NewOwner != Owner) 491 // Invalidate this and all reachable that we have seen - we reached 492 // this from two locations 493 Helper.InvalidateWithChildren(New); 494 // otherwise, we have the same owner, so do nothing 495 } 496 } 497 498 // Having processed all the interesting blocks, we remain with just one 499 // potential issue: 500 // If a->b, and a was invalidated, but then b was later reached by 501 // someone else, we must invalidate b. To check for this, we go over all 502 // elements in the independent groups, if an element has a parent which 503 // does *not* have the same owner, we/ must remove it and all its 504 // children. 505 506 for (const auto &iter : Entries) { 507 BlockSet &CurrGroup = IndependentGroups[iter]; 508 BlockList ToInvalidate; 509 for (const auto &iter : CurrGroup) { 510 Block *Child = iter; 511 for (const auto &iter : Child->BranchesIn) { 512 Block *Parent = iter; 513 if (Ignore && contains(*Ignore, Parent)) 514 continue; 515 if (Helper.Ownership[Parent] != Helper.Ownership[Child]) 516 ToInvalidate.push_back(Child); 517 } 518 } 519 while (!ToInvalidate.empty()) { 520 Block *Invalidatee = ToInvalidate.front(); 521 ToInvalidate.pop_front(); 522 Helper.InvalidateWithChildren(Invalidatee); 523 } 524 } 525 526 // Remove empty groups 527 for (const auto &iter : Entries) 528 if (IndependentGroups[iter].empty()) 529 IndependentGroups.erase(iter); 530 } 531 532 Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, 533 BlockBlockSetMap &IndependentGroups, Shape *Prev, 534 BlockSet &NextEntries) { 535 bool Fused = isa<SimpleShape>(Prev); 536 MultipleShape *Multiple = new MultipleShape(); 537 Notice(Multiple); 538 BlockSet CurrEntries; 539 for (auto &iter : IndependentGroups) { 540 Block *CurrEntry = iter.first; 541 BlockSet &CurrBlocks = iter.second; 542 // Create inner block 543 CurrEntries.clear(); 544 CurrEntries.insert(CurrEntry); 545 for (const auto &CurrInner : CurrBlocks) { 546 // Remove the block from the remaining blocks 547 Blocks.remove(CurrInner); 548 // Find new next entries and fix branches to them 549 for (auto iter = CurrInner->BranchesOut.begin(); 550 iter != CurrInner->BranchesOut.end();) { 551 Block *CurrTarget = iter->first; 552 auto Next = iter; 553 Next++; 554 if (!contains(CurrBlocks, CurrTarget)) { 555 NextEntries.insert(CurrTarget); 556 Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); 557 } 558 iter = Next; // increment carefully because Solipsize can remove us 559 } 560 } 561 Multiple->InnerMap[CurrEntry->Id] = 562 Process(CurrBlocks, CurrEntries, nullptr); 563 // If we are not fused, then our entries will actually be checked 564 if (!Fused) 565 CurrEntry->IsCheckedMultipleEntry = true; 566 } 567 // Add entries not handled as next entries, they are deferred 568 for (const auto &Entry : Entries) 569 if (!contains(IndependentGroups, Entry)) 570 NextEntries.insert(Entry); 571 // The multiple has been created, we can decide how to implement it 572 if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { 573 Multiple->UseSwitch = true; 574 Multiple->Breaks++; // switch captures breaks 575 } 576 return Multiple; 577 } 578 579 // Main function. 580 // Process a set of blocks with specified entries, returns a shape 581 // The Make* functions receive a NextEntries. If they fill it with data, 582 // those are the entries for the ->Next block on them, and the blocks 583 // are what remains in Blocks (which Make* modify). In this way 584 // we avoid recursing on Next (imagine a long chain of Simples, if we 585 // recursed we could blow the stack). 586 Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { 587 BlockSet *Entries = &InitialEntries; 588 BlockSet TempEntries[2]; 589 int CurrTempIndex = 0; 590 BlockSet *NextEntries; 591 Shape *Ret = nullptr; 592 593 auto Make = [&](Shape *Temp) { 594 if (Prev) 595 Prev->Next = Temp; 596 if (!Ret) 597 Ret = Temp; 598 Prev = Temp; 599 Entries = NextEntries; 600 }; 601 602 while (1) { 603 CurrTempIndex = 1 - CurrTempIndex; 604 NextEntries = &TempEntries[CurrTempIndex]; 605 NextEntries->clear(); 606 607 if (Entries->empty()) 608 return Ret; 609 if (Entries->size() == 1) { 610 Block *Curr = *(Entries->begin()); 611 if (Curr->BranchesIn.empty()) { 612 // One entry, no looping ==> Simple 613 Make(MakeSimple(Blocks, Curr, *NextEntries)); 614 if (NextEntries->empty()) 615 return Ret; 616 continue; 617 } 618 // One entry, looping ==> Loop 619 Make(MakeLoop(Blocks, *Entries, *NextEntries)); 620 if (NextEntries->empty()) 621 return Ret; 622 continue; 623 } 624 625 // More than one entry, try to eliminate through a Multiple groups of 626 // independent blocks from an entry/ies. It is important to remove 627 // through multiples as opposed to looping since the former is more 628 // performant. 629 BlockBlockSetMap IndependentGroups; 630 FindIndependentGroups(*Entries, IndependentGroups); 631 632 if (!IndependentGroups.empty()) { 633 // We can handle a group in a multiple if its entry cannot be reached 634 // by another group. 635 // Note that it might be reachable by itself - a loop. But that is 636 // fine, we will create a loop inside the multiple block (which 637 // is the performant order to do it). 638 for (auto iter = IndependentGroups.begin(); 639 iter != IndependentGroups.end();) { 640 Block *Entry = iter->first; 641 BlockSet &Group = iter->second; 642 auto curr = iter++; // iterate carefully, we may delete 643 for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); 644 iterBranch != Entry->BranchesIn.end(); iterBranch++) { 645 Block *Origin = *iterBranch; 646 if (!contains(Group, Origin)) { 647 // Reached from outside the group, so we cannot handle this 648 IndependentGroups.erase(curr); 649 break; 650 } 651 } 652 } 653 654 // As an optimization, if we have 2 independent groups, and one is a 655 // small dead end, we can handle only that dead end. 656 // The other then becomes a Next - without nesting in the code and 657 // recursion in the analysis. 658 // TODO: if the larger is the only dead end, handle that too 659 // TODO: handle >2 groups 660 // TODO: handle not just dead ends, but also that do not branch to the 661 // NextEntries. However, must be careful there since we create a 662 // Next, and that Next can prevent eliminating a break (since we no 663 // longer naturally reach the same place), which may necessitate a 664 // one-time loop, which makes the unnesting pointless. 665 if (IndependentGroups.size() == 2) { 666 // Find the smaller one 667 auto iter = IndependentGroups.begin(); 668 Block *SmallEntry = iter->first; 669 auto SmallSize = iter->second.size(); 670 iter++; 671 Block *LargeEntry = iter->first; 672 auto LargeSize = iter->second.size(); 673 if (SmallSize != LargeSize) { // ignore the case where they are 674 // identical - keep things symmetrical 675 // there 676 if (SmallSize > LargeSize) { 677 Block *Temp = SmallEntry; 678 SmallEntry = LargeEntry; 679 LargeEntry = Temp; // Note: we did not flip the Sizes too, they 680 // are now invalid. TODO: use the smaller 681 // size as a limit? 682 } 683 // Check if dead end 684 bool DeadEnd = true; 685 BlockSet &SmallGroup = IndependentGroups[SmallEntry]; 686 for (const auto &Curr : SmallGroup) { 687 for (const auto &iter : Curr->BranchesOut) { 688 Block *Target = iter.first; 689 if (!contains(SmallGroup, Target)) { 690 DeadEnd = false; 691 break; 692 } 693 } 694 if (!DeadEnd) 695 break; 696 } 697 if (DeadEnd) 698 IndependentGroups.erase(LargeEntry); 699 } 700 } 701 702 if (!IndependentGroups.empty()) 703 // Some groups removable ==> Multiple 704 Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, 705 *NextEntries)); 706 if (NextEntries->empty()) 707 return Ret; 708 continue; 709 } 710 // No independent groups, must be loopable ==> Loop 711 Make(MakeLoop(Blocks, *Entries, *NextEntries)); 712 if (NextEntries->empty()) 713 return Ret; 714 continue; 715 } 716 } 717 }; 718 719 // Main 720 721 BlockSet AllBlocks; 722 for (const auto &Curr : Pre.Live) { 723 AllBlocks.insert(Curr); 724 } 725 726 BlockSet Entries; 727 Entries.insert(Entry); 728 Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); 729 assert(Root); 730 731 /// 732 /// Relooper post-optimizer 733 /// 734 struct PostOptimizer { 735 RelooperAlgorithm *Parent; 736 std::stack<Shape *> LoopStack; 737 738 PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} 739 740 void ShapeSwitch(Shape* var, 741 std::function<void (SimpleShape*)> simple, 742 std::function<void (MultipleShape*)> multiple, 743 std::function<void (LoopShape*)> loop) { 744 switch (var->getKind()) { 745 case Shape::SK_Simple: { 746 simple(cast<SimpleShape>(var)); 747 break; 748 } 749 case Shape::SK_Multiple: { 750 multiple(cast<MultipleShape>(var)); 751 break; 752 } 753 case Shape::SK_Loop: { 754 loop(cast<LoopShape>(var)); 755 break; 756 } 757 } 758 } 759 760 // Find the blocks that natural control flow can get us directly to, or 761 // through a multiple that we ignore 762 void FollowNaturalFlow(Shape *S, BlockSet &Out) { 763 ShapeSwitch(S, [&](SimpleShape* Simple) { 764 Out.insert(Simple->Inner); 765 }, [&](MultipleShape* Multiple) { 766 for (const auto &iter : Multiple->InnerMap) { 767 FollowNaturalFlow(iter.second, Out); 768 } 769 FollowNaturalFlow(Multiple->Next, Out); 770 }, [&](LoopShape* Loop) { 771 FollowNaturalFlow(Loop->Inner, Out); 772 }); 773 } 774 775 void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { 776 if (Root->Next) { 777 Root->Natural = Root->Next; 778 FindNaturals(Root->Next, Otherwise); 779 } else { 780 Root->Natural = Otherwise; 781 } 782 783 ShapeSwitch(Root, [](SimpleShape* Simple) { 784 }, [&](MultipleShape* Multiple) { 785 for (const auto &iter : Multiple->InnerMap) { 786 FindNaturals(iter.second, Root->Natural); 787 } 788 }, [&](LoopShape* Loop){ 789 FindNaturals(Loop->Inner, Loop->Inner); 790 }); 791 } 792 793 // Remove unneeded breaks and continues. 794 // A flow operation is trivially unneeded if the shape we naturally get to 795 // by normal code execution is the same as the flow forces us to. 796 void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, 797 LoopShape *LastLoop = nullptr, 798 unsigned Depth = 0) { 799 BlockSet NaturalBlocks; 800 FollowNaturalFlow(Natural, NaturalBlocks); 801 Shape *Next = Root; 802 while (Next) { 803 Root = Next; 804 Next = nullptr; 805 ShapeSwitch( 806 Root, 807 [&](SimpleShape* Simple) { 808 if (Simple->Inner->BranchVar) 809 LastLoop = 810 nullptr; // a switch clears out the loop (TODO: only for 811 // breaks, not continue) 812 813 if (Simple->Next) { 814 if (!Simple->Inner->BranchVar && 815 Simple->Inner->ProcessedBranchesOut.size() == 2 && 816 Depth < RelooperNestingLimit) { 817 // If there is a next block, we already know at Simple 818 // creation time to make direct branches, and we can do 819 // nothing more in general. But, we try to optimize the 820 // case of a break and a direct: This would normally be 821 // if (break?) { break; } .. 822 // but if we make sure to nest the else, we can save the 823 // break, 824 // if (!break?) { .. } 825 // This is also better because the more canonical nested 826 // form is easier to further optimize later. The 827 // downside is more nesting, which adds to size in builds with 828 // whitespace. 829 // Note that we avoid switches, as it complicates control flow 830 // and is not relevant for the common case we optimize here. 831 bool Found = false; 832 bool Abort = false; 833 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 834 Block *Target = iter.first; 835 Branch *Details = iter.second.get(); 836 if (Details->Type == Branch::Break) { 837 Found = true; 838 if (!contains(NaturalBlocks, Target)) 839 Abort = true; 840 } else if (Details->Type != Branch::Direct) 841 Abort = true; 842 } 843 if (Found && !Abort) { 844 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 845 Branch *Details = iter.second.get(); 846 if (Details->Type == Branch::Break) { 847 Details->Type = Branch::Direct; 848 if (MultipleShape *Multiple = 849 dyn_cast<MultipleShape>(Details->Ancestor)) 850 Multiple->Breaks--; 851 } else { 852 assert(Details->Type == Branch::Direct); 853 Details->Type = Branch::Nested; 854 } 855 } 856 } 857 Depth++; // this optimization increases depth, for us and all 858 // our next chain (i.e., until this call returns) 859 } 860 Next = Simple->Next; 861 } else { 862 // If there is no next then Natural is where we will 863 // go to by doing nothing, so we can potentially optimize some 864 // branches to direct. 865 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 866 Block *Target = iter.first; 867 Branch *Details = iter.second.get(); 868 if (Details->Type != Branch::Direct && 869 contains(NaturalBlocks, 870 Target)) { // note: cannot handle split blocks 871 Details->Type = Branch::Direct; 872 if (MultipleShape *Multiple = 873 dyn_cast<MultipleShape>(Details->Ancestor)) 874 Multiple->Breaks--; 875 } else if (Details->Type == Branch::Break && LastLoop && 876 LastLoop->Natural == Details->Ancestor->Natural) { 877 // it is important to simplify breaks, as simpler breaks 878 // enable other optimizations 879 Details->Labeled = false; 880 if (MultipleShape *Multiple = 881 dyn_cast<MultipleShape>(Details->Ancestor)) 882 Multiple->Breaks--; 883 } 884 } 885 } 886 }, [&](MultipleShape* Multiple) 887 { 888 for (const auto &iter : Multiple->InnerMap) { 889 RemoveUnneededFlows(iter.second, Multiple->Next, 890 Multiple->Breaks ? nullptr : LastLoop, 891 Depth + 1); 892 } 893 Next = Multiple->Next; 894 }, [&](LoopShape* Loop) 895 { 896 RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); 897 Next = Loop->Next; 898 }); 899 } 900 } 901 902 // After we know which loops exist, we can calculate which need to be 903 // labeled 904 void FindLabeledLoops(Shape *Root) { 905 Shape *Next = Root; 906 while (Next) { 907 Root = Next; 908 Next = nullptr; 909 910 ShapeSwitch( 911 Root, 912 [&](SimpleShape *Simple) { 913 MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); 914 // If we are fusing a Multiple with a loop into this Simple, then 915 // visit it now 916 if (Fused && Fused->Breaks) 917 LoopStack.push(Fused); 918 if (Simple->Inner->BranchVar) 919 LoopStack.push(nullptr); // a switch means breaks are now useless, 920 // push a dummy 921 if (Fused) { 922 if (Fused->UseSwitch) 923 LoopStack.push(nullptr); // a switch means breaks are now 924 // useless, push a dummy 925 for (const auto &iter : Fused->InnerMap) { 926 FindLabeledLoops(iter.second); 927 } 928 } 929 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 930 Branch *Details = iter.second.get(); 931 if (Details->Type == Branch::Break || 932 Details->Type == Branch::Continue) { 933 assert(!LoopStack.empty()); 934 if (Details->Ancestor != LoopStack.top() && Details->Labeled) { 935 if (MultipleShape *Multiple = 936 dyn_cast<MultipleShape>(Details->Ancestor)) { 937 Multiple->Labeled = true; 938 } else { 939 LoopShape *Loop = cast<LoopShape>(Details->Ancestor); 940 Loop->Labeled = true; 941 } 942 } else { 943 Details->Labeled = false; 944 } 945 } 946 if (Fused && Fused->UseSwitch) 947 LoopStack.pop(); 948 if (Simple->Inner->BranchVar) 949 LoopStack.pop(); 950 if (Fused && Fused->Breaks) 951 LoopStack.pop(); 952 if (Fused) 953 Next = Fused->Next; 954 else 955 Next = Root->Next; 956 } 957 } 958 , [&](MultipleShape* Multiple) { 959 if (Multiple->Breaks) 960 LoopStack.push(Multiple); 961 for (const auto &iter : Multiple->InnerMap) 962 FindLabeledLoops(iter.second); 963 if (Multiple->Breaks) 964 LoopStack.pop(); 965 Next = Root->Next; 966 } 967 , [&](LoopShape* Loop) { 968 LoopStack.push(Loop); 969 FindLabeledLoops(Loop->Inner); 970 LoopStack.pop(); 971 Next = Root->Next; 972 }); 973 } 974 } 975 976 void Process(Shape * Root) { 977 FindNaturals(Root); 978 RemoveUnneededFlows(Root); 979 FindLabeledLoops(Root); 980 } 981 }; 982 983 PostOptimizer(this).Process(Root); 984 } 985