1 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file promotes memory references to be register references. It promotes 11 // alloca instructions which only have loads and stores as uses. An alloca is 12 // transformed by using iterated dominator frontiers to place PHI nodes, then 13 // traversing the function in depth-first order to rewrite loads and stores as 14 // appropriate. 15 // 16 // The algorithm used here is based on: 17 // 18 // Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19 // In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20 // Programming Languages 21 // POPL '95. ACM, New York, NY, 62-73. 22 // 23 // It has been modified to not explicitly use the DJ graph data structure and to 24 // directly compute pruned SSA using per-variable liveness information. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #define DEBUG_TYPE "mem2reg" 29 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/Hashing.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/SmallPtrSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/Statistic.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/Dominators.h" 38 #include "llvm/Analysis/InstructionSimplify.h" 39 #include "llvm/Analysis/ValueTracking.h" 40 #include "llvm/DIBuilder.h" 41 #include "llvm/DebugInfo.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/IntrinsicInst.h" 47 #include "llvm/IR/Metadata.h" 48 #include "llvm/Support/CFG.h" 49 #include "llvm/Transforms/Utils/Local.h" 50 #include <algorithm> 51 #include <queue> 52 using namespace llvm; 53 54 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 55 STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 56 STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 57 STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 58 59 namespace llvm { 60 template<> 61 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 62 typedef std::pair<BasicBlock*, unsigned> EltTy; 63 static inline EltTy getEmptyKey() { 64 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 65 } 66 static inline EltTy getTombstoneKey() { 67 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 68 } 69 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 70 using llvm::hash_value; 71 return static_cast<unsigned>(hash_value(Val)); 72 } 73 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 74 return LHS == RHS; 75 } 76 }; 77 } 78 79 /// isAllocaPromotable - Return true if this alloca is legal for promotion. 80 /// This is true if there are only loads and stores to the alloca. 81 /// 82 bool llvm::isAllocaPromotable(const AllocaInst *AI) { 83 // FIXME: If the memory unit is of pointer or integer type, we can permit 84 // assignments to subsections of the memory unit. 85 86 // Only allow direct and non-volatile loads and stores... 87 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 88 UI != UE; ++UI) { // Loop over all of the uses of the alloca 89 const User *U = *UI; 90 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 91 // Note that atomic loads can be transformed; atomic semantics do 92 // not have any meaning for a local alloca. 93 if (LI->isVolatile()) 94 return false; 95 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 96 if (SI->getOperand(0) == AI) 97 return false; // Don't allow a store OF the AI, only INTO the AI. 98 // Note that atomic stores can be transformed; atomic semantics do 99 // not have any meaning for a local alloca. 100 if (SI->isVolatile()) 101 return false; 102 } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 103 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 104 II->getIntrinsicID() != Intrinsic::lifetime_end) 105 return false; 106 } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 107 if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) 108 return false; 109 if (!onlyUsedByLifetimeMarkers(BCI)) 110 return false; 111 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 112 if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) 113 return false; 114 if (!GEPI->hasAllZeroIndices()) 115 return false; 116 if (!onlyUsedByLifetimeMarkers(GEPI)) 117 return false; 118 } else { 119 return false; 120 } 121 } 122 123 return true; 124 } 125 126 namespace { 127 struct AllocaInfo; 128 129 // Data package used by RenamePass() 130 class RenamePassData { 131 public: 132 typedef std::vector<Value *> ValVector; 133 134 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 135 RenamePassData(BasicBlock *B, BasicBlock *P, 136 const ValVector &V) : BB(B), Pred(P), Values(V) {} 137 BasicBlock *BB; 138 BasicBlock *Pred; 139 ValVector Values; 140 141 void swap(RenamePassData &RHS) { 142 std::swap(BB, RHS.BB); 143 std::swap(Pred, RHS.Pred); 144 Values.swap(RHS.Values); 145 } 146 }; 147 148 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 149 /// load/store instructions in the block that directly load or store an alloca. 150 /// 151 /// This functionality is important because it avoids scanning large basic 152 /// blocks multiple times when promoting many allocas in the same block. 153 class LargeBlockInfo { 154 /// InstNumbers - For each instruction that we track, keep the index of the 155 /// instruction. The index starts out as the number of the instruction from 156 /// the start of the block. 157 DenseMap<const Instruction *, unsigned> InstNumbers; 158 public: 159 160 /// isInterestingInstruction - This code only looks at accesses to allocas. 161 static bool isInterestingInstruction(const Instruction *I) { 162 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 163 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 164 } 165 166 /// getInstructionIndex - Get or calculate the index of the specified 167 /// instruction. 168 unsigned getInstructionIndex(const Instruction *I) { 169 assert(isInterestingInstruction(I) && 170 "Not a load/store to/from an alloca?"); 171 172 // If we already have this instruction number, return it. 173 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 174 if (It != InstNumbers.end()) return It->second; 175 176 // Scan the whole block to get the instruction. This accumulates 177 // information for every interesting instruction in the block, in order to 178 // avoid gratuitus rescans. 179 const BasicBlock *BB = I->getParent(); 180 unsigned InstNo = 0; 181 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 182 BBI != E; ++BBI) 183 if (isInterestingInstruction(BBI)) 184 InstNumbers[BBI] = InstNo++; 185 It = InstNumbers.find(I); 186 187 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 188 return It->second; 189 } 190 191 void deleteValue(const Instruction *I) { 192 InstNumbers.erase(I); 193 } 194 195 void clear() { 196 InstNumbers.clear(); 197 } 198 }; 199 200 struct PromoteMem2Reg { 201 /// Allocas - The alloca instructions being promoted. 202 /// 203 std::vector<AllocaInst*> Allocas; 204 DominatorTree &DT; 205 DIBuilder *DIB; 206 207 /// AST - An AliasSetTracker object to update. If null, don't update it. 208 /// 209 AliasSetTracker *AST; 210 211 /// AllocaLookup - Reverse mapping of Allocas. 212 /// 213 DenseMap<AllocaInst*, unsigned> AllocaLookup; 214 215 /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify 216 /// some Phi nodes as we iterate over it, so it should have deterministic 217 /// iterators. We could use a MapVector, but since we already maintain a 218 /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is 219 /// more efficient (also supports removal). 220 /// 221 DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes; 222 223 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 224 /// it corresponds to. 225 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 226 227 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 228 /// each alloca that is of pointer type, we keep track of what to copyValue 229 /// to the inserted PHI nodes here. 230 /// 231 std::vector<Value*> PointerAllocaValues; 232 233 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 234 /// intrinsic that describes it, if any, so that we can convert it to a 235 /// dbg.value intrinsic if the alloca gets promoted. 236 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 237 238 /// Visited - The set of basic blocks the renamer has already visited. 239 /// 240 SmallPtrSet<BasicBlock*, 16> Visited; 241 242 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 243 /// non-determinstic behavior. 244 DenseMap<BasicBlock*, unsigned> BBNumbers; 245 246 /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. 247 DenseMap<DomTreeNode*, unsigned> DomLevels; 248 249 /// BBNumPreds - Lazily compute the number of predecessors a block has. 250 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 251 public: 252 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 253 AliasSetTracker *ast) 254 : Allocas(A), DT(dt), DIB(0), AST(ast) {} 255 ~PromoteMem2Reg() { 256 delete DIB; 257 } 258 259 void run(); 260 261 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 262 /// 263 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 264 return DT.dominates(BB1, BB2); 265 } 266 267 private: 268 void RemoveFromAllocasList(unsigned &AllocaIdx) { 269 Allocas[AllocaIdx] = Allocas.back(); 270 Allocas.pop_back(); 271 --AllocaIdx; 272 } 273 274 unsigned getNumPreds(const BasicBlock *BB) { 275 unsigned &NP = BBNumPreds[BB]; 276 if (NP == 0) 277 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 278 return NP-1; 279 } 280 281 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 282 AllocaInfo &Info); 283 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 284 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 285 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 286 287 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 288 LargeBlockInfo &LBI); 289 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 290 LargeBlockInfo &LBI); 291 292 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 293 RenamePassData::ValVector &IncVals, 294 std::vector<RenamePassData> &Worklist); 295 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 296 }; 297 298 struct AllocaInfo { 299 SmallVector<BasicBlock*, 32> DefiningBlocks; 300 SmallVector<BasicBlock*, 32> UsingBlocks; 301 302 StoreInst *OnlyStore; 303 BasicBlock *OnlyBlock; 304 bool OnlyUsedInOneBlock; 305 306 Value *AllocaPointerVal; 307 DbgDeclareInst *DbgDeclare; 308 309 void clear() { 310 DefiningBlocks.clear(); 311 UsingBlocks.clear(); 312 OnlyStore = 0; 313 OnlyBlock = 0; 314 OnlyUsedInOneBlock = true; 315 AllocaPointerVal = 0; 316 DbgDeclare = 0; 317 } 318 319 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 320 /// ivars. 321 void AnalyzeAlloca(AllocaInst *AI) { 322 clear(); 323 324 // As we scan the uses of the alloca instruction, keep track of stores, 325 // and decide whether all of the loads and stores to the alloca are within 326 // the same basic block. 327 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 328 UI != E;) { 329 Instruction *User = cast<Instruction>(*UI++); 330 331 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 332 // Remember the basic blocks which define new values for the alloca 333 DefiningBlocks.push_back(SI->getParent()); 334 AllocaPointerVal = SI->getOperand(0); 335 OnlyStore = SI; 336 } else { 337 LoadInst *LI = cast<LoadInst>(User); 338 // Otherwise it must be a load instruction, keep track of variable 339 // reads. 340 UsingBlocks.push_back(LI->getParent()); 341 AllocaPointerVal = LI; 342 } 343 344 if (OnlyUsedInOneBlock) { 345 if (OnlyBlock == 0) 346 OnlyBlock = User->getParent(); 347 else if (OnlyBlock != User->getParent()) 348 OnlyUsedInOneBlock = false; 349 } 350 } 351 352 DbgDeclare = FindAllocaDbgDeclare(AI); 353 } 354 }; 355 356 typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; 357 358 struct DomTreeNodeCompare { 359 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 360 return LHS.second < RHS.second; 361 } 362 }; 363 } // end of anonymous namespace 364 365 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { 366 // Knowing that this alloca is promotable, we know that it's safe to kill all 367 // instructions except for load and store. 368 369 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 370 UI != UE;) { 371 Instruction *I = cast<Instruction>(*UI); 372 ++UI; 373 if (isa<LoadInst>(I) || isa<StoreInst>(I)) 374 continue; 375 376 if (!I->getType()->isVoidTy()) { 377 // The only users of this bitcast/GEP instruction are lifetime intrinsics. 378 // Follow the use/def chain to erase them now instead of leaving it for 379 // dead code elimination later. 380 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 381 UI != UE;) { 382 Instruction *Inst = cast<Instruction>(*UI); 383 ++UI; 384 Inst->eraseFromParent(); 385 } 386 } 387 I->eraseFromParent(); 388 } 389 } 390 391 void PromoteMem2Reg::run() { 392 Function &F = *DT.getRoot()->getParent(); 393 394 if (AST) PointerAllocaValues.resize(Allocas.size()); 395 AllocaDbgDeclares.resize(Allocas.size()); 396 397 AllocaInfo Info; 398 LargeBlockInfo LBI; 399 400 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 401 AllocaInst *AI = Allocas[AllocaNum]; 402 403 assert(isAllocaPromotable(AI) && 404 "Cannot promote non-promotable alloca!"); 405 assert(AI->getParent()->getParent() == &F && 406 "All allocas should be in the same function, which is same as DF!"); 407 408 removeLifetimeIntrinsicUsers(AI); 409 410 if (AI->use_empty()) { 411 // If there are no uses of the alloca, just delete it now. 412 if (AST) AST->deleteValue(AI); 413 AI->eraseFromParent(); 414 415 // Remove the alloca from the Allocas list, since it has been processed 416 RemoveFromAllocasList(AllocaNum); 417 ++NumDeadAlloca; 418 continue; 419 } 420 421 // Calculate the set of read and write-locations for each alloca. This is 422 // analogous to finding the 'uses' and 'definitions' of each variable. 423 Info.AnalyzeAlloca(AI); 424 425 // If there is only a single store to this value, replace any loads of 426 // it that are directly dominated by the definition with the value stored. 427 if (Info.DefiningBlocks.size() == 1) { 428 RewriteSingleStoreAlloca(AI, Info, LBI); 429 430 // Finally, after the scan, check to see if the store is all that is left. 431 if (Info.UsingBlocks.empty()) { 432 // Record debuginfo for the store and remove the declaration's 433 // debuginfo. 434 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 435 if (!DIB) 436 DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); 437 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); 438 DDI->eraseFromParent(); 439 } 440 // Remove the (now dead) store and alloca. 441 Info.OnlyStore->eraseFromParent(); 442 LBI.deleteValue(Info.OnlyStore); 443 444 if (AST) AST->deleteValue(AI); 445 AI->eraseFromParent(); 446 LBI.deleteValue(AI); 447 448 // The alloca has been processed, move on. 449 RemoveFromAllocasList(AllocaNum); 450 451 ++NumSingleStore; 452 continue; 453 } 454 } 455 456 // If the alloca is only read and written in one basic block, just perform a 457 // linear sweep over the block to eliminate it. 458 if (Info.OnlyUsedInOneBlock) { 459 PromoteSingleBlockAlloca(AI, Info, LBI); 460 461 // Finally, after the scan, check to see if the stores are all that is 462 // left. 463 if (Info.UsingBlocks.empty()) { 464 465 // Remove the (now dead) stores and alloca. 466 while (!AI->use_empty()) { 467 StoreInst *SI = cast<StoreInst>(AI->use_back()); 468 // Record debuginfo for the store before removing it. 469 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 470 if (!DIB) 471 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 472 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 473 } 474 SI->eraseFromParent(); 475 LBI.deleteValue(SI); 476 } 477 478 if (AST) AST->deleteValue(AI); 479 AI->eraseFromParent(); 480 LBI.deleteValue(AI); 481 482 // The alloca has been processed, move on. 483 RemoveFromAllocasList(AllocaNum); 484 485 // The alloca's debuginfo can be removed as well. 486 if (DbgDeclareInst *DDI = Info.DbgDeclare) 487 DDI->eraseFromParent(); 488 489 ++NumLocalPromoted; 490 continue; 491 } 492 } 493 494 // If we haven't computed dominator tree levels, do so now. 495 if (DomLevels.empty()) { 496 SmallVector<DomTreeNode*, 32> Worklist; 497 498 DomTreeNode *Root = DT.getRootNode(); 499 DomLevels[Root] = 0; 500 Worklist.push_back(Root); 501 502 while (!Worklist.empty()) { 503 DomTreeNode *Node = Worklist.pop_back_val(); 504 unsigned ChildLevel = DomLevels[Node] + 1; 505 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 506 CI != CE; ++CI) { 507 DomLevels[*CI] = ChildLevel; 508 Worklist.push_back(*CI); 509 } 510 } 511 } 512 513 // If we haven't computed a numbering for the BB's in the function, do so 514 // now. 515 if (BBNumbers.empty()) { 516 unsigned ID = 0; 517 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 518 BBNumbers[I] = ID++; 519 } 520 521 // If we have an AST to keep updated, remember some pointer value that is 522 // stored into the alloca. 523 if (AST) 524 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 525 526 // Remember the dbg.declare intrinsic describing this alloca, if any. 527 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 528 529 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 530 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 531 532 // At this point, we're committed to promoting the alloca using IDF's, and 533 // the standard SSA construction algorithm. Determine which blocks need PHI 534 // nodes and see if we can optimize out some work by avoiding insertion of 535 // dead phi nodes. 536 DetermineInsertionPoint(AI, AllocaNum, Info); 537 } 538 539 if (Allocas.empty()) 540 return; // All of the allocas must have been trivial! 541 542 LBI.clear(); 543 544 545 // Set the incoming values for the basic block to be null values for all of 546 // the alloca's. We do this in case there is a load of a value that has not 547 // been stored yet. In this case, it will get this null value. 548 // 549 RenamePassData::ValVector Values(Allocas.size()); 550 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 551 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 552 553 // Walks all basic blocks in the function performing the SSA rename algorithm 554 // and inserting the phi nodes we marked as necessary 555 // 556 std::vector<RenamePassData> RenamePassWorkList; 557 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 558 do { 559 RenamePassData RPD; 560 RPD.swap(RenamePassWorkList.back()); 561 RenamePassWorkList.pop_back(); 562 // RenamePass may add new worklist entries. 563 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 564 } while (!RenamePassWorkList.empty()); 565 566 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 567 Visited.clear(); 568 569 // Remove the allocas themselves from the function. 570 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 571 Instruction *A = Allocas[i]; 572 573 // If there are any uses of the alloca instructions left, they must be in 574 // unreachable basic blocks that were not processed by walking the dominator 575 // tree. Just delete the users now. 576 if (!A->use_empty()) 577 A->replaceAllUsesWith(UndefValue::get(A->getType())); 578 if (AST) AST->deleteValue(A); 579 A->eraseFromParent(); 580 } 581 582 // Remove alloca's dbg.declare instrinsics from the function. 583 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 584 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 585 DDI->eraseFromParent(); 586 587 // Loop over all of the PHI nodes and see if there are any that we can get 588 // rid of because they merge all of the same incoming values. This can 589 // happen due to undef values coming into the PHI nodes. This process is 590 // iterative, because eliminating one PHI node can cause others to be removed. 591 bool EliminatedAPHI = true; 592 while (EliminatedAPHI) { 593 EliminatedAPHI = false; 594 595 // Iterating over NewPhiNodes is deterministic, so it is safe to try to 596 // simplify and RAUW them as we go. If it was not, we could add uses to 597 // the values we replace with in a non deterministic order, thus creating 598 // non deterministic def->use chains. 599 for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = 600 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 601 PHINode *PN = I->second; 602 603 // If this PHI node merges one value and/or undefs, get the value. 604 if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { 605 if (AST && PN->getType()->isPointerTy()) 606 AST->deleteValue(PN); 607 PN->replaceAllUsesWith(V); 608 PN->eraseFromParent(); 609 NewPhiNodes.erase(I++); 610 EliminatedAPHI = true; 611 continue; 612 } 613 ++I; 614 } 615 } 616 617 // At this point, the renamer has added entries to PHI nodes for all reachable 618 // code. Unfortunately, there may be unreachable blocks which the renamer 619 // hasn't traversed. If this is the case, the PHI nodes may not 620 // have incoming values for all predecessors. Loop over all PHI nodes we have 621 // created, inserting undef values if they are missing any incoming values. 622 // 623 for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = 624 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 625 // We want to do this once per basic block. As such, only process a block 626 // when we find the PHI that is the first entry in the block. 627 PHINode *SomePHI = I->second; 628 BasicBlock *BB = SomePHI->getParent(); 629 if (&BB->front() != SomePHI) 630 continue; 631 632 // Only do work here if there the PHI nodes are missing incoming values. We 633 // know that all PHI nodes that were inserted in a block will have the same 634 // number of incoming values, so we can just check any of them. 635 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 636 continue; 637 638 // Get the preds for BB. 639 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 640 641 // Ok, now we know that all of the PHI nodes are missing entries for some 642 // basic blocks. Start by sorting the incoming predecessors for efficient 643 // access. 644 std::sort(Preds.begin(), Preds.end()); 645 646 // Now we loop through all BB's which have entries in SomePHI and remove 647 // them from the Preds list. 648 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 649 // Do a log(n) search of the Preds list for the entry we want. 650 SmallVector<BasicBlock*, 16>::iterator EntIt = 651 std::lower_bound(Preds.begin(), Preds.end(), 652 SomePHI->getIncomingBlock(i)); 653 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 654 "PHI node has entry for a block which is not a predecessor!"); 655 656 // Remove the entry 657 Preds.erase(EntIt); 658 } 659 660 // At this point, the blocks left in the preds list must have dummy 661 // entries inserted into every PHI nodes for the block. Update all the phi 662 // nodes in this block that we are inserting (there could be phis before 663 // mem2reg runs). 664 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 665 BasicBlock::iterator BBI = BB->begin(); 666 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 667 SomePHI->getNumIncomingValues() == NumBadPreds) { 668 Value *UndefVal = UndefValue::get(SomePHI->getType()); 669 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 670 SomePHI->addIncoming(UndefVal, Preds[pred]); 671 } 672 } 673 674 NewPhiNodes.clear(); 675 } 676 677 678 /// ComputeLiveInBlocks - Determine which blocks the value is live in. These 679 /// are blocks which lead to uses. Knowing this allows us to avoid inserting 680 /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 681 /// would be dead). 682 void PromoteMem2Reg:: 683 ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 684 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 685 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 686 687 // To determine liveness, we must iterate through the predecessors of blocks 688 // where the def is live. Blocks are added to the worklist if we need to 689 // check their predecessors. Start with all the using blocks. 690 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 691 Info.UsingBlocks.end()); 692 693 // If any of the using blocks is also a definition block, check to see if the 694 // definition occurs before or after the use. If it happens before the use, 695 // the value isn't really live-in. 696 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 697 BasicBlock *BB = LiveInBlockWorklist[i]; 698 if (!DefBlocks.count(BB)) continue; 699 700 // Okay, this is a block that both uses and defines the value. If the first 701 // reference to the alloca is a def (store), then we know it isn't live-in. 702 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 703 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 704 if (SI->getOperand(1) != AI) continue; 705 706 // We found a store to the alloca before a load. The alloca is not 707 // actually live-in here. 708 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 709 LiveInBlockWorklist.pop_back(); 710 --i, --e; 711 break; 712 } 713 714 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 715 if (LI->getOperand(0) != AI) continue; 716 717 // Okay, we found a load before a store to the alloca. It is actually 718 // live into this block. 719 break; 720 } 721 } 722 } 723 724 // Now that we have a set of blocks where the phi is live-in, recursively add 725 // their predecessors until we find the full region the value is live. 726 while (!LiveInBlockWorklist.empty()) { 727 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 728 729 // The block really is live in here, insert it into the set. If already in 730 // the set, then it has already been processed. 731 if (!LiveInBlocks.insert(BB)) 732 continue; 733 734 // Since the value is live into BB, it is either defined in a predecessor or 735 // live into it to. Add the preds to the worklist unless they are a 736 // defining block. 737 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 738 BasicBlock *P = *PI; 739 740 // The value is not live into a predecessor if it defines the value. 741 if (DefBlocks.count(P)) 742 continue; 743 744 // Otherwise it is, add to the worklist. 745 LiveInBlockWorklist.push_back(P); 746 } 747 } 748 } 749 750 /// DetermineInsertionPoint - At this point, we're committed to promoting the 751 /// alloca using IDF's, and the standard SSA construction algorithm. Determine 752 /// which blocks need phi nodes and see if we can optimize out some work by 753 /// avoiding insertion of dead phi nodes. 754 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 755 AllocaInfo &Info) { 756 // Unique the set of defining blocks for efficient lookup. 757 SmallPtrSet<BasicBlock*, 32> DefBlocks; 758 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 759 760 // Determine which blocks the value is live in. These are blocks which lead 761 // to uses. 762 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 763 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 764 765 // Use a priority queue keyed on dominator tree level so that inserted nodes 766 // are handled from the bottom of the dominator tree upwards. 767 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 768 DomTreeNodeCompare> IDFPriorityQueue; 769 IDFPriorityQueue PQ; 770 771 for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), 772 E = DefBlocks.end(); I != E; ++I) { 773 if (DomTreeNode *Node = DT.getNode(*I)) 774 PQ.push(std::make_pair(Node, DomLevels[Node])); 775 } 776 777 SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; 778 SmallPtrSet<DomTreeNode*, 32> Visited; 779 SmallVector<DomTreeNode*, 32> Worklist; 780 while (!PQ.empty()) { 781 DomTreeNodePair RootPair = PQ.top(); 782 PQ.pop(); 783 DomTreeNode *Root = RootPair.first; 784 unsigned RootLevel = RootPair.second; 785 786 // Walk all dominator tree children of Root, inspecting their CFG edges with 787 // targets elsewhere on the dominator tree. Only targets whose level is at 788 // most Root's level are added to the iterated dominance frontier of the 789 // definition set. 790 791 Worklist.clear(); 792 Worklist.push_back(Root); 793 794 while (!Worklist.empty()) { 795 DomTreeNode *Node = Worklist.pop_back_val(); 796 BasicBlock *BB = Node->getBlock(); 797 798 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 799 ++SI) { 800 DomTreeNode *SuccNode = DT.getNode(*SI); 801 802 // Quickly skip all CFG edges that are also dominator tree edges instead 803 // of catching them below. 804 if (SuccNode->getIDom() == Node) 805 continue; 806 807 unsigned SuccLevel = DomLevels[SuccNode]; 808 if (SuccLevel > RootLevel) 809 continue; 810 811 if (!Visited.insert(SuccNode)) 812 continue; 813 814 BasicBlock *SuccBB = SuccNode->getBlock(); 815 if (!LiveInBlocks.count(SuccBB)) 816 continue; 817 818 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 819 if (!DefBlocks.count(SuccBB)) 820 PQ.push(std::make_pair(SuccNode, SuccLevel)); 821 } 822 823 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 824 ++CI) { 825 if (!Visited.count(*CI)) 826 Worklist.push_back(*CI); 827 } 828 } 829 } 830 831 if (DFBlocks.size() > 1) 832 std::sort(DFBlocks.begin(), DFBlocks.end()); 833 834 unsigned CurrentVersion = 0; 835 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 836 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 837 } 838 839 /// RewriteSingleStoreAlloca - If there is only a single store to this value, 840 /// replace any loads of it that are directly dominated by the definition with 841 /// the value stored. 842 void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 843 AllocaInfo &Info, 844 LargeBlockInfo &LBI) { 845 StoreInst *OnlyStore = Info.OnlyStore; 846 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 847 BasicBlock *StoreBB = OnlyStore->getParent(); 848 int StoreIndex = -1; 849 850 // Clear out UsingBlocks. We will reconstruct it here if needed. 851 Info.UsingBlocks.clear(); 852 853 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 854 Instruction *UserInst = cast<Instruction>(*UI++); 855 if (!isa<LoadInst>(UserInst)) { 856 assert(UserInst == OnlyStore && "Should only have load/stores"); 857 continue; 858 } 859 LoadInst *LI = cast<LoadInst>(UserInst); 860 861 // Okay, if we have a load from the alloca, we want to replace it with the 862 // only value stored to the alloca. We can do this if the value is 863 // dominated by the store. If not, we use the rest of the mem2reg machinery 864 // to insert the phi nodes as needed. 865 if (!StoringGlobalVal) { // Non-instructions are always dominated. 866 if (LI->getParent() == StoreBB) { 867 // If we have a use that is in the same block as the store, compare the 868 // indices of the two instructions to see which one came first. If the 869 // load came before the store, we can't handle it. 870 if (StoreIndex == -1) 871 StoreIndex = LBI.getInstructionIndex(OnlyStore); 872 873 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 874 // Can't handle this load, bail out. 875 Info.UsingBlocks.push_back(StoreBB); 876 continue; 877 } 878 879 } else if (LI->getParent() != StoreBB && 880 !dominates(StoreBB, LI->getParent())) { 881 // If the load and store are in different blocks, use BB dominance to 882 // check their relationships. If the store doesn't dom the use, bail 883 // out. 884 Info.UsingBlocks.push_back(LI->getParent()); 885 continue; 886 } 887 } 888 889 // Otherwise, we *can* safely rewrite this load. 890 Value *ReplVal = OnlyStore->getOperand(0); 891 // If the replacement value is the load, this must occur in unreachable 892 // code. 893 if (ReplVal == LI) 894 ReplVal = UndefValue::get(LI->getType()); 895 LI->replaceAllUsesWith(ReplVal); 896 if (AST && LI->getType()->isPointerTy()) 897 AST->deleteValue(LI); 898 LI->eraseFromParent(); 899 LBI.deleteValue(LI); 900 } 901 } 902 903 namespace { 904 905 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the 906 /// first element of a pair. 907 struct StoreIndexSearchPredicate { 908 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 909 const std::pair<unsigned, StoreInst*> &RHS) { 910 return LHS.first < RHS.first; 911 } 912 }; 913 914 } 915 916 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 917 /// block. If this is the case, avoid traversing the CFG and inserting a lot of 918 /// potentially useless PHI nodes by just performing a single linear pass over 919 /// the basic block using the Alloca. 920 /// 921 /// If we cannot promote this alloca (because it is read before it is written), 922 /// return true. This is necessary in cases where, due to control flow, the 923 /// alloca is potentially undefined on some control flow paths. e.g. code like 924 /// this is potentially correct: 925 /// 926 /// for (...) { if (c) { A = undef; undef = B; } } 927 /// 928 /// ... so long as A is not used before undef is set. 929 /// 930 void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 931 LargeBlockInfo &LBI) { 932 // The trickiest case to handle is when we have large blocks. Because of this, 933 // this code is optimized assuming that large blocks happen. This does not 934 // significantly pessimize the small block case. This uses LargeBlockInfo to 935 // make it efficient to get the index of various operations in the block. 936 937 // Clear out UsingBlocks. We will reconstruct it here if needed. 938 Info.UsingBlocks.clear(); 939 940 // Walk the use-def list of the alloca, getting the locations of all stores. 941 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 942 StoresByIndexTy StoresByIndex; 943 944 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 945 UI != E; ++UI) 946 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 947 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 948 949 // If there are no stores to the alloca, just replace any loads with undef. 950 if (StoresByIndex.empty()) { 951 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 952 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 953 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 954 if (AST && LI->getType()->isPointerTy()) 955 AST->deleteValue(LI); 956 LBI.deleteValue(LI); 957 LI->eraseFromParent(); 958 } 959 return; 960 } 961 962 // Sort the stores by their index, making it efficient to do a lookup with a 963 // binary search. 964 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 965 966 // Walk all of the loads from this alloca, replacing them with the nearest 967 // store above them, if any. 968 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 969 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 970 if (!LI) continue; 971 972 unsigned LoadIdx = LBI.getInstructionIndex(LI); 973 974 // Find the nearest store that has a lower than this load. 975 StoresByIndexTy::iterator I = 976 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 977 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 978 StoreIndexSearchPredicate()); 979 980 // If there is no store before this load, then we can't promote this load. 981 if (I == StoresByIndex.begin()) { 982 // Can't handle this load, bail out. 983 Info.UsingBlocks.push_back(LI->getParent()); 984 continue; 985 } 986 987 // Otherwise, there was a store before this load, the load takes its value. 988 --I; 989 LI->replaceAllUsesWith(I->second->getOperand(0)); 990 if (AST && LI->getType()->isPointerTy()) 991 AST->deleteValue(LI); 992 LI->eraseFromParent(); 993 LBI.deleteValue(LI); 994 } 995 } 996 997 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 998 // Alloca returns true if there wasn't already a phi-node for that variable 999 // 1000 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 1001 unsigned &Version) { 1002 // Look up the basic-block in question. 1003 PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; 1004 1005 // If the BB already has a phi node added for the i'th alloca then we're done! 1006 if (PN) return false; 1007 1008 // Create a PhiNode using the dereferenced type... and add the phi-node to the 1009 // BasicBlock. 1010 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), 1011 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 1012 BB->begin()); 1013 ++NumPHIInsert; 1014 PhiToAllocaMap[PN] = AllocaNo; 1015 1016 if (AST && PN->getType()->isPointerTy()) 1017 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 1018 1019 return true; 1020 } 1021 1022 // RenamePass - Recursively traverse the CFG of the function, renaming loads and 1023 // stores to the allocas which we are promoting. IncomingVals indicates what 1024 // value each Alloca contains on exit from the predecessor block Pred. 1025 // 1026 void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 1027 RenamePassData::ValVector &IncomingVals, 1028 std::vector<RenamePassData> &Worklist) { 1029 NextIteration: 1030 // If we are inserting any phi nodes into this BB, they will already be in the 1031 // block. 1032 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 1033 // If we have PHI nodes to update, compute the number of edges from Pred to 1034 // BB. 1035 if (PhiToAllocaMap.count(APN)) { 1036 // We want to be able to distinguish between PHI nodes being inserted by 1037 // this invocation of mem2reg from those phi nodes that already existed in 1038 // the IR before mem2reg was run. We determine that APN is being inserted 1039 // because it is missing incoming edges. All other PHI nodes being 1040 // inserted by this pass of mem2reg will have the same number of incoming 1041 // operands so far. Remember this count. 1042 unsigned NewPHINumOperands = APN->getNumOperands(); 1043 1044 unsigned NumEdges = 0; 1045 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 1046 if (*I == BB) 1047 ++NumEdges; 1048 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1049 1050 // Add entries for all the phis. 1051 BasicBlock::iterator PNI = BB->begin(); 1052 do { 1053 unsigned AllocaNo = PhiToAllocaMap[APN]; 1054 1055 // Add N incoming values to the PHI node. 1056 for (unsigned i = 0; i != NumEdges; ++i) 1057 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1058 1059 // The currently active variable for this block is now the PHI. 1060 IncomingVals[AllocaNo] = APN; 1061 1062 // Get the next phi node. 1063 ++PNI; 1064 APN = dyn_cast<PHINode>(PNI); 1065 if (APN == 0) break; 1066 1067 // Verify that it is missing entries. If not, it is not being inserted 1068 // by this mem2reg invocation so we want to ignore it. 1069 } while (APN->getNumOperands() == NewPHINumOperands); 1070 } 1071 } 1072 1073 // Don't revisit blocks. 1074 if (!Visited.insert(BB)) return; 1075 1076 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 1077 Instruction *I = II++; // get the instruction, increment iterator 1078 1079 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1080 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1081 if (!Src) continue; 1082 1083 DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 1084 if (AI == AllocaLookup.end()) continue; 1085 1086 Value *V = IncomingVals[AI->second]; 1087 1088 // Anything using the load now uses the current value. 1089 LI->replaceAllUsesWith(V); 1090 if (AST && LI->getType()->isPointerTy()) 1091 AST->deleteValue(LI); 1092 BB->getInstList().erase(LI); 1093 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1094 // Delete this instruction and mark the name as the current holder of the 1095 // value 1096 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1097 if (!Dest) continue; 1098 1099 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1100 if (ai == AllocaLookup.end()) 1101 continue; 1102 1103 // what value were we writing? 1104 IncomingVals[ai->second] = SI->getOperand(0); 1105 // Record debuginfo for the store before removing it. 1106 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { 1107 if (!DIB) 1108 DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); 1109 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1110 } 1111 BB->getInstList().erase(SI); 1112 } 1113 } 1114 1115 // 'Recurse' to our successors. 1116 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1117 if (I == E) return; 1118 1119 // Keep track of the successors so we don't visit the same successor twice 1120 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1121 1122 // Handle the first successor without using the worklist. 1123 VisitedSuccs.insert(*I); 1124 Pred = BB; 1125 BB = *I; 1126 ++I; 1127 1128 for (; I != E; ++I) 1129 if (VisitedSuccs.insert(*I)) 1130 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1131 1132 goto NextIteration; 1133 } 1134 1135 /// PromoteMemToReg - Promote the specified list of alloca instructions into 1136 /// scalar registers, inserting PHI nodes as appropriate. This function does 1137 /// not modify the CFG of the function at all. All allocas must be from the 1138 /// same function. 1139 /// 1140 /// If AST is specified, the specified tracker is updated to reflect changes 1141 /// made to the IR. 1142 /// 1143 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1144 DominatorTree &DT, AliasSetTracker *AST) { 1145 // If there is nothing to do, bail out... 1146 if (Allocas.empty()) return; 1147 1148 PromoteMem2Reg(Allocas, DT, AST).run(); 1149 } 1150