1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/SetVector.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/Analysis/AliasSetTracker.h" 38 #include "llvm/Analysis/Dominators.h" 39 #include "llvm/Analysis/InstructionSimplify.h" 40 #include "llvm/Analysis/ValueTracking.h" 41 #include "llvm/DIBuilder.h" 42 #include "llvm/DebugInfo.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/InstVisitor.h" 50 #include "llvm/Support/CFG.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include <algorithm> 53 #include <queue> 54 using namespace llvm; 55 56 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 57 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 58 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 59 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 60 61 namespace { 62 63 struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { 64 const DataLayout *DL; 65 66 SmallVector<BasicBlock *, 32> DefiningBlocks; 67 SmallVector<BasicBlock *, 32> UsingBlocks; 68 SmallVector<Instruction *, 8> DeadInsts; 69 70 Type *AllocaTy; 71 StoreInst *OnlyStore; 72 BasicBlock *OnlyBlock; 73 bool OnlyUsedInOneBlock; 74 75 Value *AllocaPointerVal; 76 DbgDeclareInst *DbgDeclare; 77 78 AllocaInfo(const DataLayout *DL) : DL(DL) {} 79 80 void clear() { 81 DefiningBlocks.clear(); 82 UsingBlocks.clear(); 83 DeadInsts.clear(); 84 AllocaTy = 0; 85 OnlyStore = 0; 86 OnlyBlock = 0; 87 OnlyUsedInOneBlock = true; 88 AllocaPointerVal = 0; 89 DbgDeclare = 0; 90 } 91 92 /// Scan the uses of the specified alloca, filling in the AllocaInfo used 93 /// by the rest of the pass to reason about the uses of this alloca. 94 bool analyzeAlloca(AllocaInst &AI) { 95 clear(); 96 97 AllocaTy = AI.getAllocatedType(); 98 enqueueUsers(AI); 99 100 // Walk queued up uses in the worklist to handle nested uses. 101 while (!UseWorklist.empty()) { 102 U = UseWorklist.pop_back_val(); 103 Instruction &I = *cast<Instruction>(U->getUser()); 104 if (!visit(I)) 105 return false; // Propagate failure to promote up. 106 107 if (OnlyUsedInOneBlock) { 108 if (OnlyBlock == 0) 109 OnlyBlock = I.getParent(); 110 else if (OnlyBlock != I.getParent()) 111 OnlyUsedInOneBlock = false; 112 } 113 } 114 115 DbgDeclare = FindAllocaDbgDeclare(&AI); 116 return true; 117 } 118 119 private: 120 // Befriend the base class so it can call through private visitor methods. 121 friend class InstVisitor<AllocaInfo, bool>; 122 123 /// \brief A use pointer that is non-null when visiting uses. 124 Use *U; 125 126 /// \brief A worklist for recursively visiting all uses of an alloca. 127 SmallVector<Use *, 8> UseWorklist; 128 129 /// \brief A set for preventing cyclic visitation. 130 SmallPtrSet<Use *, 8> VisitedUses; 131 132 void enqueueUsers(Instruction &I) { 133 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; 134 ++UI) 135 if (VisitedUses.insert(&UI.getUse())) 136 UseWorklist.push_back(&UI.getUse()); 137 } 138 139 bool visitLoadInst(LoadInst &LI) { 140 if (LI.isVolatile() || LI.getType() != AllocaTy) 141 return false; 142 143 // Keep track of variable reads. 144 UsingBlocks.push_back(LI.getParent()); 145 AllocaPointerVal = &LI; 146 return true; 147 } 148 149 bool visitStoreInst(StoreInst &SI) { 150 if (SI.isVolatile() || SI.getValueOperand() == U->get() || 151 SI.getValueOperand()->getType() != AllocaTy) 152 return false; 153 154 // Remember the basic blocks which define new values for the alloca 155 DefiningBlocks.push_back(SI.getParent()); 156 AllocaPointerVal = SI.getOperand(0); 157 OnlyStore = &SI; 158 return true; 159 } 160 161 bool visitBitCastInst(BitCastInst &BC) { 162 if (BC.use_empty()) 163 DeadInsts.push_back(&BC); 164 else 165 enqueueUsers(BC); 166 return true; 167 } 168 169 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 170 if (GEPI.use_empty()) { 171 DeadInsts.push_back(&GEPI); 172 return true; 173 } 174 175 enqueueUsers(GEPI); 176 177 return GEPI.hasAllZeroIndices(); 178 } 179 180 // We can promote through debug info intrinsics as they don't alter the 181 // value stored in memory. 182 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { 183 DeadInsts.push_back(&I); 184 return true; 185 } 186 187 bool visitIntrinsicInst(IntrinsicInst &II) { 188 switch (II.getIntrinsicID()) { 189 default: 190 return false; 191 192 // Lifetime intrinsics don't preclude promoting the memory to a register. 193 // FIXME: We should use these to promote to undef when outside of a valid 194 // lifetime. 195 case Intrinsic::lifetime_start: 196 case Intrinsic::lifetime_end: 197 DeadInsts.push_back(&II); 198 return true; 199 } 200 } 201 202 // The fallback is that the alloca cannot be promoted. 203 bool visitInstruction(Instruction &I) { return false; } 204 }; 205 206 // Data package used by RenamePass() 207 class RenamePassData { 208 public: 209 typedef std::vector<Value *> ValVector; 210 211 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 212 RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) 213 : BB(B), Pred(P), Values(V) {} 214 BasicBlock *BB; 215 BasicBlock *Pred; 216 ValVector Values; 217 218 void swap(RenamePassData &RHS) { 219 std::swap(BB, RHS.BB); 220 std::swap(Pred, RHS.Pred); 221 Values.swap(RHS.Values); 222 } 223 }; 224 225 /// \brief This assigns and keeps a per-bb relative ordering of load/store 226 /// instructions in the block that directly load or store an alloca. 227 /// 228 /// This functionality is important because it avoids scanning large basic 229 /// blocks multiple times when promoting many allocas in the same block. 230 class LargeBlockInfo { 231 /// \brief For each instruction that we track, keep the index of the 232 /// instruction. 233 /// 234 /// The index starts out as the number of the instruction from the start of 235 /// the block. 236 DenseMap<const Instruction *, unsigned> InstNumbers; 237 238 public: 239 240 /// This code only looks at accesses to allocas. 241 static bool isInterestingInstruction(const Instruction *I) { 242 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 243 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 244 } 245 246 /// Get or calculate the index of the specified instruction. 247 unsigned getInstructionIndex(const Instruction *I) { 248 assert(isInterestingInstruction(I) && 249 "Not a load/store to/from an alloca?"); 250 251 // If we already have this instruction number, return it. 252 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 253 if (It != InstNumbers.end()) 254 return It->second; 255 256 // Scan the whole block to get the instruction. This accumulates 257 // information for every interesting instruction in the block, in order to 258 // avoid gratuitus rescans. 259 const BasicBlock *BB = I->getParent(); 260 unsigned InstNo = 0; 261 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; 262 ++BBI) 263 if (isInterestingInstruction(BBI)) 264 InstNumbers[BBI] = InstNo++; 265 It = InstNumbers.find(I); 266 267 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 268 return It->second; 269 } 270 271 void deleteValue(const Instruction *I) { InstNumbers.erase(I); } 272 273 void clear() { InstNumbers.clear(); } 274 }; 275 276 struct PromoteMem2Reg { 277 /// The alloca instructions being promoted. 278 std::vector<AllocaInst *> Allocas; 279 DominatorTree &DT; 280 DIBuilder DIB; 281 const DataLayout *DL; 282 283 /// An AliasSetTracker object to update. If null, don't update it. 284 AliasSetTracker *AST; 285 286 /// Reverse mapping of Allocas. 287 DenseMap<AllocaInst *, unsigned> AllocaLookup; 288 289 /// \brief The PhiNodes we're adding. 290 /// 291 /// That map is used to simplify some Phi nodes as we iterate over it, so 292 /// it should have deterministic iterators. We could use a MapVector, but 293 /// since we already maintain a map from BasicBlock* to a stable numbering 294 /// (BBNumbers), the DenseMap is more efficient (also supports removal). 295 DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; 296 297 /// For each PHI node, keep track of which entry in Allocas it corresponds 298 /// to. 299 DenseMap<PHINode *, unsigned> PhiToAllocaMap; 300 301 /// If we are updating an AliasSetTracker, then for each alloca that is of 302 /// pointer type, we keep track of what to copyValue to the inserted PHI 303 /// nodes here. 304 std::vector<Value *> PointerAllocaValues; 305 306 /// For each alloca, we keep track of the dbg.declare intrinsic that 307 /// describes it, if any, so that we can convert it to a dbg.value 308 /// intrinsic if the alloca gets promoted. 309 SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; 310 311 /// The set of basic blocks the renamer has already visited. 312 /// 313 SmallPtrSet<BasicBlock *, 16> Visited; 314 315 /// Contains a stable numbering of basic blocks to avoid non-determinstic 316 /// behavior. 317 DenseMap<BasicBlock *, unsigned> BBNumbers; 318 319 /// Maps DomTreeNodes to their level in the dominator tree. 320 DenseMap<DomTreeNode *, unsigned> DomLevels; 321 322 /// Lazily compute the number of predecessors a block has. 323 DenseMap<const BasicBlock *, unsigned> BBNumPreds; 324 325 public: 326 PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, 327 const DataLayout *DL, AliasSetTracker *AST) 328 : Allocas(Allocas.begin(), Allocas.end()), DT(DT), 329 DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {} 330 331 void run(); 332 333 private: 334 void RemoveFromAllocasList(unsigned &AllocaIdx) { 335 Allocas[AllocaIdx] = Allocas.back(); 336 Allocas.pop_back(); 337 --AllocaIdx; 338 } 339 340 unsigned getNumPreds(const BasicBlock *BB) { 341 unsigned &NP = BBNumPreds[BB]; 342 if (NP == 0) 343 NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; 344 return NP - 1; 345 } 346 347 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 348 AllocaInfo &Info); 349 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 350 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 351 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); 352 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 353 RenamePassData::ValVector &IncVals, 354 std::vector<RenamePassData> &Worklist); 355 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 356 }; 357 358 } // end of anonymous namespace 359 360 /// \brief Walk a small vector of dead instructions and recursively remove them 361 /// and subsequently dead instructions. 362 /// 363 /// This is only valid to call on dead instructions using an alloca which is 364 /// promotable, as we leverage that assumption to delete them faster. 365 static void removeDeadInstructions(AllocaInst *AI, 366 SmallVectorImpl<Instruction *> &DeadInsts) { 367 while (!DeadInsts.empty()) { 368 Instruction *I = DeadInsts.pop_back_val(); 369 370 // Don't delete the alloca itself. 371 if (I == AI) 372 continue; 373 374 // Note that we open code the deletion algorithm here because we know 375 // apriori that all of the instructions using an alloca that reaches here 376 // are trivially dead when their use list becomes empty (The only risk are 377 // lifetime markers which we specifically want to nuke). By coding it here 378 // we can skip the triviality test and be more efficient. 379 // 380 // Null out all of the instruction's operands to see if any operand becomes 381 // dead as we go. 382 for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; 383 ++OI) { 384 Instruction *Op = dyn_cast<Instruction>(*OI); 385 if (!Op) 386 continue; 387 388 OI->set(0); 389 if (!Op->use_empty()) 390 continue; 391 392 DeadInsts.push_back(Op); 393 } 394 I->eraseFromParent(); 395 } 396 } 397 398 /// \brief Rewrite as many loads as possible given a single store. 399 /// 400 /// When there is only a single store, we can use the domtree to trivially 401 /// replace all of the dominated loads with the stored value. Do so, and return 402 /// true if this has successfully promoted the alloca entirely. If this returns 403 /// false there were some loads which were not dominated by the single store 404 /// and thus must be phi-ed with undef. We fall back to the standard alloca 405 /// promotion algorithm in that case. 406 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 407 LargeBlockInfo &LBI, 408 DominatorTree &DT, 409 AliasSetTracker *AST) { 410 StoreInst *OnlyStore = Info.OnlyStore; 411 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 412 BasicBlock *StoreBB = OnlyStore->getParent(); 413 int StoreIndex = -1; 414 415 // Clear out UsingBlocks. We will reconstruct it here if needed. 416 Info.UsingBlocks.clear(); 417 418 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 419 Instruction *UserInst = cast<Instruction>(*UI++); 420 if (!isa<LoadInst>(UserInst)) { 421 assert(UserInst == OnlyStore && "Should only have load/stores"); 422 continue; 423 } 424 LoadInst *LI = cast<LoadInst>(UserInst); 425 426 // Okay, if we have a load from the alloca, we want to replace it with the 427 // only value stored to the alloca. We can do this if the value is 428 // dominated by the store. If not, we use the rest of the mem2reg machinery 429 // to insert the phi nodes as needed. 430 if (!StoringGlobalVal) { // Non-instructions are always dominated. 431 if (LI->getParent() == StoreBB) { 432 // If we have a use that is in the same block as the store, compare the 433 // indices of the two instructions to see which one came first. If the 434 // load came before the store, we can't handle it. 435 if (StoreIndex == -1) 436 StoreIndex = LBI.getInstructionIndex(OnlyStore); 437 438 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 439 // Can't handle this load, bail out. 440 Info.UsingBlocks.push_back(StoreBB); 441 continue; 442 } 443 444 } else if (LI->getParent() != StoreBB && 445 !DT.dominates(StoreBB, LI->getParent())) { 446 // If the load and store are in different blocks, use BB dominance to 447 // check their relationships. If the store doesn't dom the use, bail 448 // out. 449 Info.UsingBlocks.push_back(LI->getParent()); 450 continue; 451 } 452 } 453 454 // Otherwise, we *can* safely rewrite this load. 455 Value *ReplVal = OnlyStore->getOperand(0); 456 // If the replacement value is the load, this must occur in unreachable 457 // code. 458 if (ReplVal == LI) 459 ReplVal = UndefValue::get(LI->getType()); 460 LI->replaceAllUsesWith(ReplVal); 461 if (AST && LI->getType()->isPointerTy()) 462 AST->deleteValue(LI); 463 LI->eraseFromParent(); 464 LBI.deleteValue(LI); 465 } 466 467 // Finally, after the scan, check to see if the store is all that is left. 468 if (!Info.UsingBlocks.empty()) 469 return false; // If not, we'll have to fall back for the remainder. 470 471 // Record debuginfo for the store and remove the declaration's 472 // debuginfo. 473 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 474 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 475 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); 476 DDI->eraseFromParent(); 477 } 478 // Remove the (now dead) store and alloca. 479 Info.OnlyStore->eraseFromParent(); 480 LBI.deleteValue(Info.OnlyStore); 481 482 if (AST) 483 AST->deleteValue(AI); 484 AI->eraseFromParent(); 485 LBI.deleteValue(AI); 486 return true; 487 } 488 489 namespace { 490 /// This is a helper predicate used to search by the first element of a pair. 491 struct StoreIndexSearchPredicate { 492 bool operator()(const std::pair<unsigned, StoreInst *> &LHS, 493 const std::pair<unsigned, StoreInst *> &RHS) { 494 return LHS.first < RHS.first; 495 } 496 }; 497 } 498 499 /// Many allocas are only used within a single basic block. If this is the 500 /// case, avoid traversing the CFG and inserting a lot of potentially useless 501 /// PHI nodes by just performing a single linear pass over the basic block 502 /// using the Alloca. 503 /// 504 /// If we cannot promote this alloca (because it is read before it is written), 505 /// return true. This is necessary in cases where, due to control flow, the 506 /// alloca is potentially undefined on some control flow paths. e.g. code like 507 /// this is potentially correct: 508 /// 509 /// for (...) { if (c) { A = undef; undef = B; } } 510 /// 511 /// ... so long as A is not used before undef is set. 512 static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, 513 LargeBlockInfo &LBI, 514 AliasSetTracker *AST) { 515 // The trickiest case to handle is when we have large blocks. Because of this, 516 // this code is optimized assuming that large blocks happen. This does not 517 // significantly pessimize the small block case. This uses LargeBlockInfo to 518 // make it efficient to get the index of various operations in the block. 519 520 // Walk the use-def list of the alloca, getting the locations of all stores. 521 typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; 522 StoresByIndexTy StoresByIndex; 523 524 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; 525 ++UI) 526 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 527 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 528 529 // Sort the stores by their index, making it efficient to do a lookup with a 530 // binary search. 531 std::sort(StoresByIndex.begin(), StoresByIndex.end(), 532 StoreIndexSearchPredicate()); 533 534 // Walk all of the loads from this alloca, replacing them with the nearest 535 // store above them, if any. 536 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 537 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 538 if (!LI) 539 continue; 540 541 unsigned LoadIdx = LBI.getInstructionIndex(LI); 542 543 // Find the nearest store that has a lower index than this load. 544 StoresByIndexTy::iterator I = 545 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 546 std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), 547 StoreIndexSearchPredicate()); 548 549 if (I == StoresByIndex.begin()) 550 // If there is no store before this load, the load takes the undef value. 551 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 552 else 553 // Otherwise, there was a store before this load, the load takes its value. 554 LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); 555 556 if (AST && LI->getType()->isPointerTy()) 557 AST->deleteValue(LI); 558 LI->eraseFromParent(); 559 LBI.deleteValue(LI); 560 } 561 562 // Remove the (now dead) stores and alloca. 563 while (!AI->use_empty()) { 564 StoreInst *SI = cast<StoreInst>(AI->use_back()); 565 // Record debuginfo for the store before removing it. 566 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 567 DIBuilder DIB(*AI->getParent()->getParent()->getParent()); 568 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 569 } 570 SI->eraseFromParent(); 571 LBI.deleteValue(SI); 572 } 573 574 if (AST) 575 AST->deleteValue(AI); 576 AI->eraseFromParent(); 577 LBI.deleteValue(AI); 578 579 // The alloca's debuginfo can be removed as well. 580 if (DbgDeclareInst *DDI = Info.DbgDeclare) 581 DDI->eraseFromParent(); 582 583 ++NumLocalPromoted; 584 } 585 586 void PromoteMem2Reg::run() { 587 Function &F = *DT.getRoot()->getParent(); 588 589 if (AST) 590 PointerAllocaValues.resize(Allocas.size()); 591 AllocaDbgDeclares.resize(Allocas.size()); 592 593 AllocaInfo Info(DL); 594 LargeBlockInfo LBI; 595 596 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 597 AllocaInst *AI = Allocas[AllocaNum]; 598 599 assert(AI->getParent()->getParent() == &F && 600 "All allocas should be in the same function, which is same as DF!"); 601 602 // Calculate the set of read and write-locations for each alloca. This is 603 // analogous to finding the 'uses' and 'definitions' of each variable. 604 bool Good = Info.analyzeAlloca(*AI); 605 (void)Good; 606 assert(Good && "Cannot promote non-promotable alloca!"); 607 608 // Nuke all of the dead instructions. 609 removeDeadInstructions(AI, Info.DeadInsts); 610 611 if (AI->use_empty()) { 612 // If there are no uses of the alloca, just delete it now. 613 if (AST) 614 AST->deleteValue(AI); 615 AI->eraseFromParent(); 616 617 // Remove the alloca from the Allocas list, since it has been processed 618 RemoveFromAllocasList(AllocaNum); 619 ++NumDeadAlloca; 620 continue; 621 } 622 623 // If there is only a single store to this value, replace any loads of 624 // it that are directly dominated by the definition with the value stored. 625 if (Info.DefiningBlocks.size() == 1) { 626 if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { 627 // The alloca has been processed, move on. 628 RemoveFromAllocasList(AllocaNum); 629 ++NumSingleStore; 630 continue; 631 } 632 } 633 634 // If the alloca is only read and written in one basic block, just perform a 635 // linear sweep over the block to eliminate it. 636 if (Info.OnlyUsedInOneBlock) { 637 promoteSingleBlockAlloca(AI, Info, LBI, AST); 638 639 // The alloca has been processed, move on. 640 RemoveFromAllocasList(AllocaNum); 641 continue; 642 } 643 644 // If we haven't computed dominator tree levels, do so now. 645 if (DomLevels.empty()) { 646 SmallVector<DomTreeNode *, 32> Worklist; 647 648 DomTreeNode *Root = DT.getRootNode(); 649 DomLevels[Root] = 0; 650 Worklist.push_back(Root); 651 652 while (!Worklist.empty()) { 653 DomTreeNode *Node = Worklist.pop_back_val(); 654 unsigned ChildLevel = DomLevels[Node] + 1; 655 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 656 CI != CE; ++CI) { 657 DomLevels[*CI] = ChildLevel; 658 Worklist.push_back(*CI); 659 } 660 } 661 } 662 663 // If we haven't computed a numbering for the BB's in the function, do so 664 // now. 665 if (BBNumbers.empty()) { 666 unsigned ID = 0; 667 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 668 BBNumbers[I] = ID++; 669 } 670 671 // If we have an AST to keep updated, remember some pointer value that is 672 // stored into the alloca. 673 if (AST) 674 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 675 676 // Remember the dbg.declare intrinsic describing this alloca, if any. 677 if (Info.DbgDeclare) 678 AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 679 680 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 681 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 682 683 // At this point, we're committed to promoting the alloca using IDF's, and 684 // the standard SSA construction algorithm. Determine which blocks need PHI 685 // nodes and see if we can optimize out some work by avoiding insertion of 686 // dead phi nodes. 687 DetermineInsertionPoint(AI, AllocaNum, Info); 688 } 689 690 if (Allocas.empty()) 691 return; // All of the allocas must have been trivial! 692 693 LBI.clear(); 694 695 // Set the incoming values for the basic block to be null values for all of 696 // the alloca's. We do this in case there is a load of a value that has not 697 // been stored yet. In this case, it will get this null value. 698 // 699 RenamePassData::ValVector Values(Allocas.size()); 700 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 701 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 702 703 // Walks all basic blocks in the function performing the SSA rename algorithm 704 // and inserting the phi nodes we marked as necessary 705 // 706 std::vector<RenamePassData> RenamePassWorkList; 707 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 708 do { 709 RenamePassData RPD; 710 RPD.swap(RenamePassWorkList.back()); 711 RenamePassWorkList.pop_back(); 712 // RenamePass may add new worklist entries. 713 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 714 } while (!RenamePassWorkList.empty()); 715 716 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 717 Visited.clear(); 718 719 // Remove the allocas themselves from the function. 720 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 721 Instruction *A = Allocas[i]; 722 723 // If there are any uses of the alloca instructions left, they must be in 724 // unreachable basic blocks that were not processed by walking the dominator 725 // tree. Just delete the users now. 726 if (!A->use_empty()) 727 A->replaceAllUsesWith(UndefValue::get(A->getType())); 728 if (AST) 729 AST->deleteValue(A); 730 A->eraseFromParent(); 731 } 732 733 // Remove alloca's dbg.declare instrinsics from the function. 734 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 735 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 736 DDI->eraseFromParent(); 737 738 // Loop over all of the PHI nodes and see if there are any that we can get 739 // rid of because they merge all of the same incoming values. This can 740 // happen due to undef values coming into the PHI nodes. This process is 741 // iterative, because eliminating one PHI node can cause others to be removed. 742 bool EliminatedAPHI = true; 743 while (EliminatedAPHI) { 744 EliminatedAPHI = false; 745 746 // Iterating over NewPhiNodes is deterministic, so it is safe to try to 747 // simplify and RAUW them as we go. If it was not, we could add uses to 748 // the values we replace with in a non deterministic order, thus creating 749 // non deterministic def->use chains. 750 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 751 I = NewPhiNodes.begin(), 752 E = NewPhiNodes.end(); 753 I != E;) { 754 PHINode *PN = I->second; 755 756 // If this PHI node merges one value and/or undefs, get the value. 757 if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { 758 if (AST && PN->getType()->isPointerTy()) 759 AST->deleteValue(PN); 760 PN->replaceAllUsesWith(V); 761 PN->eraseFromParent(); 762 NewPhiNodes.erase(I++); 763 EliminatedAPHI = true; 764 continue; 765 } 766 ++I; 767 } 768 } 769 770 // At this point, the renamer has added entries to PHI nodes for all reachable 771 // code. Unfortunately, there may be unreachable blocks which the renamer 772 // hasn't traversed. If this is the case, the PHI nodes may not 773 // have incoming values for all predecessors. Loop over all PHI nodes we have 774 // created, inserting undef values if they are missing any incoming values. 775 // 776 for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator 777 I = NewPhiNodes.begin(), 778 E = NewPhiNodes.end(); 779 I != E; ++I) { 780 // We want to do this once per basic block. As such, only process a block 781 // when we find the PHI that is the first entry in the block. 782 PHINode *SomePHI = I->second; 783 BasicBlock *BB = SomePHI->getParent(); 784 if (&BB->front() != SomePHI) 785 continue; 786 787 // Only do work here if there the PHI nodes are missing incoming values. We 788 // know that all PHI nodes that were inserted in a block will have the same 789 // number of incoming values, so we can just check any of them. 790 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 791 continue; 792 793 // Get the preds for BB. 794 SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); 795 796 // Ok, now we know that all of the PHI nodes are missing entries for some 797 // basic blocks. Start by sorting the incoming predecessors for efficient 798 // access. 799 std::sort(Preds.begin(), Preds.end()); 800 801 // Now we loop through all BB's which have entries in SomePHI and remove 802 // them from the Preds list. 803 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 804 // Do a log(n) search of the Preds list for the entry we want. 805 SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( 806 Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); 807 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && 808 "PHI node has entry for a block which is not a predecessor!"); 809 810 // Remove the entry 811 Preds.erase(EntIt); 812 } 813 814 // At this point, the blocks left in the preds list must have dummy 815 // entries inserted into every PHI nodes for the block. Update all the phi 816 // nodes in this block that we are inserting (there could be phis before 817 // mem2reg runs). 818 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 819 BasicBlock::iterator BBI = BB->begin(); 820 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 821 SomePHI->getNumIncomingValues() == NumBadPreds) { 822 Value *UndefVal = UndefValue::get(SomePHI->getType()); 823 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 824 SomePHI->addIncoming(UndefVal, Preds[pred]); 825 } 826 } 827 828 NewPhiNodes.clear(); 829 } 830 831 /// \brief Determine which blocks the value is live in. 832 /// 833 /// These are blocks which lead to uses. Knowing this allows us to avoid 834 /// inserting PHI nodes into blocks which don't lead to uses (thus, the 835 /// inserted phi nodes would be dead). 836 void PromoteMem2Reg::ComputeLiveInBlocks( 837 AllocaInst *AI, AllocaInfo &Info, 838 const SmallPtrSet<BasicBlock *, 32> &DefBlocks, 839 SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { 840 841 // To determine liveness, we must iterate through the predecessors of blocks 842 // where the def is live. Blocks are added to the worklist if we need to 843 // check their predecessors. Start with all the using blocks. 844 SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 845 Info.UsingBlocks.end()); 846 847 // If any of the using blocks is also a definition block, check to see if the 848 // definition occurs before or after the use. If it happens before the use, 849 // the value isn't really live-in. 850 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 851 BasicBlock *BB = LiveInBlockWorklist[i]; 852 if (!DefBlocks.count(BB)) 853 continue; 854 855 // Okay, this is a block that both uses and defines the value. If the first 856 // reference to the alloca is a def (store), then we know it isn't live-in. 857 for (BasicBlock::iterator I = BB->begin();; ++I) { 858 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 859 if (SI->getOperand(1) != AI) 860 continue; 861 862 // We found a store to the alloca before a load. The alloca is not 863 // actually live-in here. 864 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 865 LiveInBlockWorklist.pop_back(); 866 --i, --e; 867 break; 868 } 869 870 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 871 if (LI->getOperand(0) != AI) 872 continue; 873 874 // Okay, we found a load before a store to the alloca. It is actually 875 // live into this block. 876 break; 877 } 878 } 879 } 880 881 // Now that we have a set of blocks where the phi is live-in, recursively add 882 // their predecessors until we find the full region the value is live. 883 while (!LiveInBlockWorklist.empty()) { 884 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 885 886 // The block really is live in here, insert it into the set. If already in 887 // the set, then it has already been processed. 888 if (!LiveInBlocks.insert(BB)) 889 continue; 890 891 // Since the value is live into BB, it is either defined in a predecessor or 892 // live into it to. Add the preds to the worklist unless they are a 893 // defining block. 894 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 895 BasicBlock *P = *PI; 896 897 // The value is not live into a predecessor if it defines the value. 898 if (DefBlocks.count(P)) 899 continue; 900 901 // Otherwise it is, add to the worklist. 902 LiveInBlockWorklist.push_back(P); 903 } 904 } 905 } 906 907 namespace { 908 typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; 909 910 struct DomTreeNodeCompare { 911 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 912 return LHS.second < RHS.second; 913 } 914 }; 915 } // end anonymous namespace 916 917 /// At this point, we're committed to promoting the alloca using IDF's, and the 918 /// standard SSA construction algorithm. Determine which blocks need phi nodes 919 /// and see if we can optimize out some work by avoiding insertion of dead phi 920 /// nodes. 921 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 922 AllocaInfo &Info) { 923 // Unique the set of defining blocks for efficient lookup. 924 SmallPtrSet<BasicBlock *, 32> DefBlocks; 925 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 926 927 // Determine which blocks the value is live in. These are blocks which lead 928 // to uses. 929 SmallPtrSet<BasicBlock *, 32> LiveInBlocks; 930 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 931 932 // Use a priority queue keyed on dominator tree level so that inserted nodes 933 // are handled from the bottom of the dominator tree upwards. 934 typedef std::priority_queue<DomTreeNodePair, 935 SmallVector<DomTreeNodePair, 32>, 936 DomTreeNodeCompare> IDFPriorityQueue; 937 IDFPriorityQueue PQ; 938 939 for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), 940 E = DefBlocks.end(); 941 I != E; ++I) { 942 if (DomTreeNode *Node = DT.getNode(*I)) 943 PQ.push(std::make_pair(Node, DomLevels[Node])); 944 } 945 946 SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; 947 SmallPtrSet<DomTreeNode *, 32> Visited; 948 SmallVector<DomTreeNode *, 32> Worklist; 949 while (!PQ.empty()) { 950 DomTreeNodePair RootPair = PQ.top(); 951 PQ.pop(); 952 DomTreeNode *Root = RootPair.first; 953 unsigned RootLevel = RootPair.second; 954 955 // Walk all dominator tree children of Root, inspecting their CFG edges with 956 // targets elsewhere on the dominator tree. Only targets whose level is at 957 // most Root's level are added to the iterated dominance frontier of the 958 // definition set. 959 960 Worklist.clear(); 961 Worklist.push_back(Root); 962 963 while (!Worklist.empty()) { 964 DomTreeNode *Node = Worklist.pop_back_val(); 965 BasicBlock *BB = Node->getBlock(); 966 967 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 968 ++SI) { 969 DomTreeNode *SuccNode = DT.getNode(*SI); 970 971 // Quickly skip all CFG edges that are also dominator tree edges instead 972 // of catching them below. 973 if (SuccNode->getIDom() == Node) 974 continue; 975 976 unsigned SuccLevel = DomLevels[SuccNode]; 977 if (SuccLevel > RootLevel) 978 continue; 979 980 if (!Visited.insert(SuccNode)) 981 continue; 982 983 BasicBlock *SuccBB = SuccNode->getBlock(); 984 if (!LiveInBlocks.count(SuccBB)) 985 continue; 986 987 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 988 if (!DefBlocks.count(SuccBB)) 989 PQ.push(std::make_pair(SuccNode, SuccLevel)); 990 } 991 992 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 993 ++CI) { 994 if (!Visited.count(*CI)) 995 Worklist.push_back(*CI); 996 } 997 } 998 } 999 1000 if (DFBlocks.size() > 1) 1001 std::sort(DFBlocks.begin(), DFBlocks.end()); 1002 1003 unsigned CurrentVersion = 0; 1004 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 1005 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 1006 } 1007 1008 /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. 1009 /// 1010 /// Returns true if there wasn't already a phi-node for that variable 1011 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 1012 unsigned &Version) { 1013 // Look up the basic-block in question. 1014 PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; 1015 1016 // If the BB already has a phi node added for the i'th alloca then we're done! 1017 if (PN) 1018 return false; 1019 1020 // Create a PhiNode using the dereferenced type... and add the phi-node to the 1021 // BasicBlock. 1022 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 1023 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 1024 BB->begin()); 1025 ++NumPHIInsert; 1026 PhiToAllocaMap[PN] = AllocaNo; 1027 1028 if (AST && PN->getType()->isPointerTy()) 1029 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 1030 1031 return true; 1032 } 1033 1034 /// \brief Recursively traverse the CFG of the function, renaming loads and 1035 /// stores to the allocas which we are promoting. 1036 /// 1037 /// IncomingVals indicates what value each Alloca contains on exit from the 1038 /// predecessor block Pred. 1039 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 1040 RenamePassData::ValVector &IncomingVals, 1041 std::vector<RenamePassData> &Worklist) { 1042 NextIteration: 1043 // If we are inserting any phi nodes into this BB, they will already be in the 1044 // block. 1045 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 1046 // If we have PHI nodes to update, compute the number of edges from Pred to 1047 // BB. 1048 if (PhiToAllocaMap.count(APN)) { 1049 // We want to be able to distinguish between PHI nodes being inserted by 1050 // this invocation of mem2reg from those phi nodes that already existed in 1051 // the IR before mem2reg was run. We determine that APN is being inserted 1052 // because it is missing incoming edges. All other PHI nodes being 1053 // inserted by this pass of mem2reg will have the same number of incoming 1054 // operands so far. Remember this count. 1055 unsigned NewPHINumOperands = APN->getNumOperands(); 1056 1057 unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); 1058 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1059 1060 // Add entries for all the phis. 1061 BasicBlock::iterator PNI = BB->begin(); 1062 do { 1063 unsigned AllocaNo = PhiToAllocaMap[APN]; 1064 1065 // Add N incoming values to the PHI node. 1066 for (unsigned i = 0; i != NumEdges; ++i) 1067 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1068 1069 // The currently active variable for this block is now the PHI. 1070 IncomingVals[AllocaNo] = APN; 1071 1072 // Get the next phi node. 1073 ++PNI; 1074 APN = dyn_cast<PHINode>(PNI); 1075 if (APN == 0) 1076 break; 1077 1078 // Verify that it is missing entries. If not, it is not being inserted 1079 // by this mem2reg invocation so we want to ignore it. 1080 } while (APN->getNumOperands() == NewPHINumOperands); 1081 } 1082 } 1083 1084 // Don't revisit blocks. 1085 if (!Visited.insert(BB)) 1086 return; 1087 1088 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { 1089 Instruction *I = II++; // get the instruction, increment iterator 1090 1091 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1092 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1093 if (!Src) 1094 continue; 1095 1096 DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); 1097 if (AI == AllocaLookup.end()) 1098 continue; 1099 1100 Value *V = IncomingVals[AI->second]; 1101 1102 // Anything using the load now uses the current value. 1103 LI->replaceAllUsesWith(V); 1104 if (AST && LI->getType()->isPointerTy()) 1105 AST->deleteValue(LI); 1106 BB->getInstList().erase(LI); 1107 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1108 // Delete this instruction and mark the name as the current holder of the 1109 // value 1110 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1111 if (!Dest) 1112 continue; 1113 1114 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1115 if (ai == AllocaLookup.end()) 1116 continue; 1117 1118 // what value were we writing? 1119 IncomingVals[ai->second] = SI->getOperand(0); 1120 // Record debuginfo for the store before removing it. 1121 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1122 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 1123 BB->getInstList().erase(SI); 1124 } 1125 } 1126 1127 // 'Recurse' to our successors. 1128 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1129 if (I == E) 1130 return; 1131 1132 // Keep track of the successors so we don't visit the same successor twice 1133 SmallPtrSet<BasicBlock *, 8> VisitedSuccs; 1134 1135 // Handle the first successor without using the worklist. 1136 VisitedSuccs.insert(*I); 1137 Pred = BB; 1138 BB = *I; 1139 ++I; 1140 1141 for (; I != E; ++I) 1142 if (VisitedSuccs.insert(*I)) 1143 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1144 1145 goto NextIteration; 1146 } 1147 1148 bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) { 1149 // We cast away constness because we re-use the non-const analysis that the 1150 // actual promotion routine uses. While it is non-const, it doesn't actually 1151 // mutate anything at this phase, and we discard the non-const results that 1152 // promotion uses to mutate the alloca. 1153 return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI)); 1154 } 1155 1156 void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, 1157 const DataLayout *DL, AliasSetTracker *AST) { 1158 // If there is nothing to do, bail out... 1159 if (Allocas.empty()) 1160 return; 1161 1162 PromoteMem2Reg(Allocas, DT, DL, AST).run(); 1163 } 1164