1 //===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass reassociates n-ary add expressions and eliminates the redundancy 11 // exposed by the reassociation. 12 // 13 // A motivating example: 14 // 15 // void foo(int a, int b) { 16 // bar(a + b); 17 // bar((a + 2) + b); 18 // } 19 // 20 // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify 21 // the above code to 22 // 23 // int t = a + b; 24 // bar(t); 25 // bar(t + 2); 26 // 27 // However, the Reassociate pass is unable to do that because it processes each 28 // instruction individually and believes (a + 2) + b is the best form according 29 // to its rank system. 30 // 31 // To address this limitation, NaryReassociate reassociates an expression in a 32 // form that reuses existing instructions. As a result, NaryReassociate can 33 // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that 34 // (a + b) is computed before. 35 // 36 // NaryReassociate works as follows. For every instruction in the form of (a + 37 // b) + c, it checks whether a + c or b + c is already computed by a dominating 38 // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + 39 // c) + a and removes the redundancy accordingly. To efficiently look up whether 40 // an expression is computed before, we store each instruction seen and its SCEV 41 // into an SCEV-to-instruction map. 42 // 43 // Although the algorithm pattern-matches only ternary additions, it 44 // automatically handles many >3-ary expressions by walking through the function 45 // in the depth-first order. For example, given 46 // 47 // (a + c) + d 48 // ((a + b) + c) + d 49 // 50 // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites 51 // ((a + c) + b) + d into ((a + c) + d) + b. 52 // 53 // Finally, the above dominator-based algorithm may need to be run multiple 54 // iterations before emitting optimal code. One source of this need is that we 55 // only split an operand when it is used only once. The above algorithm can 56 // eliminate an instruction and decrease the usage count of its operands. As a 57 // result, an instruction that previously had multiple uses may become a 58 // single-use instruction and thus eligible for split consideration. For 59 // example, 60 // 61 // ac = a + c 62 // ab = a + b 63 // abc = ab + c 64 // ab2 = ab + b 65 // ab2c = ab2 + c 66 // 67 // In the first iteration, we cannot reassociate abc to ac+b because ab is used 68 // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a 69 // result, ab2 becomes dead and ab will be used only once in the second 70 // iteration. 71 // 72 // Limitations and TODO items: 73 // 74 // 1) We only considers n-ary adds and muls for now. This should be extended 75 // and generalized. 76 // 77 //===----------------------------------------------------------------------===// 78 79 #include "llvm/Analysis/AssumptionCache.h" 80 #include "llvm/Analysis/ScalarEvolution.h" 81 #include "llvm/Analysis/TargetLibraryInfo.h" 82 #include "llvm/Analysis/TargetTransformInfo.h" 83 #include "llvm/Analysis/ValueTracking.h" 84 #include "llvm/IR/Dominators.h" 85 #include "llvm/IR/Module.h" 86 #include "llvm/IR/PatternMatch.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/raw_ostream.h" 89 #include "llvm/Transforms/Scalar.h" 90 #include "llvm/Transforms/Utils/Local.h" 91 using namespace llvm; 92 using namespace PatternMatch; 93 94 #define DEBUG_TYPE "nary-reassociate" 95 96 namespace { 97 class NaryReassociate : public FunctionPass { 98 public: 99 static char ID; 100 101 NaryReassociate(): FunctionPass(ID) { 102 initializeNaryReassociatePass(*PassRegistry::getPassRegistry()); 103 } 104 105 bool doInitialization(Module &M) override { 106 DL = &M.getDataLayout(); 107 return false; 108 } 109 bool runOnFunction(Function &F) override; 110 111 void getAnalysisUsage(AnalysisUsage &AU) const override { 112 AU.addPreserved<DominatorTreeWrapperPass>(); 113 AU.addPreserved<ScalarEvolutionWrapperPass>(); 114 AU.addPreserved<TargetLibraryInfoWrapperPass>(); 115 AU.addRequired<AssumptionCacheTracker>(); 116 AU.addRequired<DominatorTreeWrapperPass>(); 117 AU.addRequired<ScalarEvolutionWrapperPass>(); 118 AU.addRequired<TargetLibraryInfoWrapperPass>(); 119 AU.addRequired<TargetTransformInfoWrapperPass>(); 120 AU.setPreservesCFG(); 121 } 122 123 private: 124 // Runs only one iteration of the dominator-based algorithm. See the header 125 // comments for why we need multiple iterations. 126 bool doOneIteration(Function &F); 127 128 // Reassociates I for better CSE. 129 Instruction *tryReassociate(Instruction *I); 130 131 // Reassociate GEP for better CSE. 132 Instruction *tryReassociateGEP(GetElementPtrInst *GEP); 133 // Try splitting GEP at the I-th index and see whether either part can be 134 // CSE'ed. This is a helper function for tryReassociateGEP. 135 // 136 // \p IndexedType The element type indexed by GEP's I-th index. This is 137 // equivalent to 138 // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, 139 // ..., i-th index). 140 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 141 unsigned I, Type *IndexedType); 142 // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or 143 // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. 144 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 145 unsigned I, Value *LHS, 146 Value *RHS, Type *IndexedType); 147 148 // Reassociate binary operators for better CSE. 149 Instruction *tryReassociateBinaryOp(BinaryOperator *I); 150 151 // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly 152 // passed. 153 Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, 154 BinaryOperator *I); 155 // Rewrites I to (LHS op RHS) if LHS is computed already. 156 Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, 157 BinaryOperator *I); 158 159 // Tries to match Op1 and Op2 by using V. 160 bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); 161 162 // Gets SCEV for (LHS op RHS). 163 const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, 164 const SCEV *RHS); 165 166 // Returns the closest dominator of \c Dominatee that computes 167 // \c CandidateExpr. Returns null if not found. 168 Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, 169 Instruction *Dominatee); 170 // GetElementPtrInst implicitly sign-extends an index if the index is shorter 171 // than the pointer size. This function returns whether Index is shorter than 172 // GEP's pointer size, i.e., whether Index needs to be sign-extended in order 173 // to be an index of GEP. 174 bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); 175 176 AssumptionCache *AC; 177 const DataLayout *DL; 178 DominatorTree *DT; 179 ScalarEvolution *SE; 180 TargetLibraryInfo *TLI; 181 TargetTransformInfo *TTI; 182 // A lookup table quickly telling which instructions compute the given SCEV. 183 // Note that there can be multiple instructions at different locations 184 // computing to the same SCEV, so we map a SCEV to an instruction list. For 185 // example, 186 // 187 // if (p1) 188 // foo(a + b); 189 // if (p2) 190 // bar(a + b); 191 DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs; 192 }; 193 } // anonymous namespace 194 195 char NaryReassociate::ID = 0; 196 INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", 197 false, false) 198 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 199 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 200 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 201 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 202 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 203 INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", 204 false, false) 205 206 FunctionPass *llvm::createNaryReassociatePass() { 207 return new NaryReassociate(); 208 } 209 210 bool NaryReassociate::runOnFunction(Function &F) { 211 if (skipOptnoneFunction(F)) 212 return false; 213 214 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 215 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 216 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 217 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 218 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 219 220 bool Changed = false, ChangedInThisIteration; 221 do { 222 ChangedInThisIteration = doOneIteration(F); 223 Changed |= ChangedInThisIteration; 224 } while (ChangedInThisIteration); 225 return Changed; 226 } 227 228 // Whitelist the instruction types NaryReassociate handles for now. 229 static bool isPotentiallyNaryReassociable(Instruction *I) { 230 switch (I->getOpcode()) { 231 case Instruction::Add: 232 case Instruction::GetElementPtr: 233 case Instruction::Mul: 234 return true; 235 default: 236 return false; 237 } 238 } 239 240 bool NaryReassociate::doOneIteration(Function &F) { 241 bool Changed = false; 242 SeenExprs.clear(); 243 // Process the basic blocks in pre-order of the dominator tree. This order 244 // ensures that all bases of a candidate are in Candidates when we process it. 245 for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT); 246 Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) { 247 BasicBlock *BB = Node->getBlock(); 248 for (auto I = BB->begin(); I != BB->end(); ++I) { 249 if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) { 250 const SCEV *OldSCEV = SE->getSCEV(&*I); 251 if (Instruction *NewI = tryReassociate(&*I)) { 252 Changed = true; 253 SE->forgetValue(&*I); 254 I->replaceAllUsesWith(NewI); 255 // If SeenExprs constains I's WeakVH, that entry will be replaced with 256 // nullptr. 257 RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); 258 I = NewI->getIterator(); 259 } 260 // Add the rewritten instruction to SeenExprs; the original instruction 261 // is deleted. 262 const SCEV *NewSCEV = SE->getSCEV(&*I); 263 SeenExprs[NewSCEV].push_back(WeakVH(&*I)); 264 // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) 265 // is equivalent to I. However, ScalarEvolution::getSCEV may 266 // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose 267 // we reassociate 268 // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4 269 // to 270 // NewI = &a[sext(i)] + sext(j). 271 // 272 // ScalarEvolution computes 273 // getSCEV(I) = a + 4 * sext(i + j) 274 // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j) 275 // which are different SCEVs. 276 // 277 // To alleviate this issue of ScalarEvolution not always capturing 278 // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can 279 // map both SCEV before and after tryReassociate(I) to I. 280 // 281 // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. 282 if (NewSCEV != OldSCEV) 283 SeenExprs[OldSCEV].push_back(WeakVH(&*I)); 284 } 285 } 286 } 287 return Changed; 288 } 289 290 Instruction *NaryReassociate::tryReassociate(Instruction *I) { 291 switch (I->getOpcode()) { 292 case Instruction::Add: 293 case Instruction::Mul: 294 return tryReassociateBinaryOp(cast<BinaryOperator>(I)); 295 case Instruction::GetElementPtr: 296 return tryReassociateGEP(cast<GetElementPtrInst>(I)); 297 default: 298 llvm_unreachable("should be filtered out by isPotentiallyNaryReassociable"); 299 } 300 } 301 302 // FIXME: extract this method into TTI->getGEPCost. 303 static bool isGEPFoldable(GetElementPtrInst *GEP, 304 const TargetTransformInfo *TTI, 305 const DataLayout *DL) { 306 GlobalVariable *BaseGV = nullptr; 307 int64_t BaseOffset = 0; 308 bool HasBaseReg = false; 309 int64_t Scale = 0; 310 311 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) 312 BaseGV = GV; 313 else 314 HasBaseReg = true; 315 316 gep_type_iterator GTI = gep_type_begin(GEP); 317 for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) { 318 if (isa<SequentialType>(*GTI)) { 319 int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); 320 if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) { 321 BaseOffset += ConstIdx->getSExtValue() * ElementSize; 322 } else { 323 // Needs scale register. 324 if (Scale != 0) { 325 // No addressing mode takes two scale registers. 326 return false; 327 } 328 Scale = ElementSize; 329 } 330 } else { 331 StructType *STy = cast<StructType>(*GTI); 332 uint64_t Field = cast<ConstantInt>(*I)->getZExtValue(); 333 BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); 334 } 335 } 336 337 unsigned AddrSpace = GEP->getPointerAddressSpace(); 338 return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV, 339 BaseOffset, HasBaseReg, Scale, AddrSpace); 340 } 341 342 Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) { 343 // Not worth reassociating GEP if it is foldable. 344 if (isGEPFoldable(GEP, TTI, DL)) 345 return nullptr; 346 347 gep_type_iterator GTI = gep_type_begin(*GEP); 348 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) { 349 if (isa<SequentialType>(*GTI++)) { 350 if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1, *GTI)) { 351 return NewGEP; 352 } 353 } 354 } 355 return nullptr; 356 } 357 358 bool NaryReassociate::requiresSignExtension(Value *Index, 359 GetElementPtrInst *GEP) { 360 unsigned PointerSizeInBits = 361 DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace()); 362 return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits; 363 } 364 365 GetElementPtrInst * 366 NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, 367 Type *IndexedType) { 368 Value *IndexToSplit = GEP->getOperand(I + 1); 369 if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) { 370 IndexToSplit = SExt->getOperand(0); 371 } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) { 372 // zext can be treated as sext if the source is non-negative. 373 if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT)) 374 IndexToSplit = ZExt->getOperand(0); 375 } 376 377 if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) { 378 // If the I-th index needs sext and the underlying add is not equipped with 379 // nsw, we cannot split the add because 380 // sext(LHS + RHS) != sext(LHS) + sext(RHS). 381 if (requiresSignExtension(IndexToSplit, GEP) && 382 computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) != 383 OverflowResult::NeverOverflows) 384 return nullptr; 385 386 Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1); 387 // IndexToSplit = LHS + RHS. 388 if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType)) 389 return NewGEP; 390 // Symmetrically, try IndexToSplit = RHS + LHS. 391 if (LHS != RHS) { 392 if (auto *NewGEP = 393 tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType)) 394 return NewGEP; 395 } 396 } 397 return nullptr; 398 } 399 400 GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( 401 GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS, 402 Type *IndexedType) { 403 // Look for GEP's closest dominator that has the same SCEV as GEP except that 404 // the I-th index is replaced with LHS. 405 SmallVector<const SCEV *, 4> IndexExprs; 406 for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) 407 IndexExprs.push_back(SE->getSCEV(*Index)); 408 // Replace the I-th index with LHS. 409 IndexExprs[I] = SE->getSCEV(LHS); 410 if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) && 411 DL->getTypeSizeInBits(LHS->getType()) < 412 DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) { 413 // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to 414 // zext if the source operand is proved non-negative. We should do that 415 // consistently so that CandidateExpr more likely appears before. See 416 // @reassociate_gep_assume for an example of this canonicalization. 417 IndexExprs[I] = 418 SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType()); 419 } 420 const SCEV *CandidateExpr = SE->getGEPExpr( 421 GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()), 422 IndexExprs, GEP->isInBounds()); 423 424 Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP); 425 if (Candidate == nullptr) 426 return nullptr; 427 428 IRBuilder<> Builder(GEP); 429 // Candidate does not necessarily have the same pointer type as GEP. Use 430 // bitcast or pointer cast to make sure they have the same type, so that the 431 // later RAUW doesn't complain. 432 Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType()); 433 assert(Candidate->getType() == GEP->getType()); 434 435 // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType) 436 uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType); 437 Type *ElementType = GEP->getType()->getElementType(); 438 uint64_t ElementSize = DL->getTypeAllocSize(ElementType); 439 // Another less rare case: because I is not necessarily the last index of the 440 // GEP, the size of the type at the I-th index (IndexedSize) is not 441 // necessarily divisible by ElementSize. For example, 442 // 443 // #pragma pack(1) 444 // struct S { 445 // int a[3]; 446 // int64 b[8]; 447 // }; 448 // #pragma pack() 449 // 450 // sizeof(S) = 100 is indivisible by sizeof(int64) = 8. 451 // 452 // TODO: bail out on this case for now. We could emit uglygep. 453 if (IndexedSize % ElementSize != 0) 454 return nullptr; 455 456 // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0]))); 457 Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); 458 if (RHS->getType() != IntPtrTy) 459 RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy); 460 if (IndexedSize != ElementSize) { 461 RHS = Builder.CreateMul( 462 RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize)); 463 } 464 GetElementPtrInst *NewGEP = 465 cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS)); 466 NewGEP->setIsInBounds(GEP->isInBounds()); 467 NewGEP->takeName(GEP); 468 return NewGEP; 469 } 470 471 Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) { 472 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 473 if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I)) 474 return NewI; 475 if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I)) 476 return NewI; 477 return nullptr; 478 } 479 480 Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS, 481 BinaryOperator *I) { 482 Value *A = nullptr, *B = nullptr; 483 // To be conservative, we reassociate I only when it is the only user of (A op 484 // B). 485 if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) { 486 // I = (A op B) op RHS 487 // = (A op RHS) op B or (B op RHS) op A 488 const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); 489 const SCEV *RHSExpr = SE->getSCEV(RHS); 490 if (BExpr != RHSExpr) { 491 if (auto *NewI = 492 tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I)) 493 return NewI; 494 } 495 if (AExpr != RHSExpr) { 496 if (auto *NewI = 497 tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I)) 498 return NewI; 499 } 500 } 501 return nullptr; 502 } 503 504 Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr, 505 Value *RHS, 506 BinaryOperator *I) { 507 // Look for the closest dominator LHS of I that computes LHSExpr, and replace 508 // I with LHS op RHS. 509 auto *LHS = findClosestMatchingDominator(LHSExpr, I); 510 if (LHS == nullptr) 511 return nullptr; 512 513 Instruction *NewI = nullptr; 514 switch (I->getOpcode()) { 515 case Instruction::Add: 516 NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); 517 break; 518 case Instruction::Mul: 519 NewI = BinaryOperator::CreateMul(LHS, RHS, "", I); 520 break; 521 default: 522 llvm_unreachable("Unexpected instruction."); 523 } 524 NewI->takeName(I); 525 return NewI; 526 } 527 528 bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, 529 Value *&Op2) { 530 switch (I->getOpcode()) { 531 case Instruction::Add: 532 return match(V, m_Add(m_Value(Op1), m_Value(Op2))); 533 case Instruction::Mul: 534 return match(V, m_Mul(m_Value(Op1), m_Value(Op2))); 535 default: 536 llvm_unreachable("Unexpected instruction."); 537 } 538 return false; 539 } 540 541 const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS, 542 const SCEV *RHS) { 543 switch (I->getOpcode()) { 544 case Instruction::Add: 545 return SE->getAddExpr(LHS, RHS); 546 case Instruction::Mul: 547 return SE->getMulExpr(LHS, RHS); 548 default: 549 llvm_unreachable("Unexpected instruction."); 550 } 551 return nullptr; 552 } 553 554 Instruction * 555 NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr, 556 Instruction *Dominatee) { 557 auto Pos = SeenExprs.find(CandidateExpr); 558 if (Pos == SeenExprs.end()) 559 return nullptr; 560 561 auto &Candidates = Pos->second; 562 // Because we process the basic blocks in pre-order of the dominator tree, a 563 // candidate that doesn't dominate the current instruction won't dominate any 564 // future instruction either. Therefore, we pop it out of the stack. This 565 // optimization makes the algorithm O(n). 566 while (!Candidates.empty()) { 567 // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed 568 // during rewriting. 569 if (Value *Candidate = Candidates.back()) { 570 Instruction *CandidateInstruction = cast<Instruction>(Candidate); 571 if (DT->dominates(CandidateInstruction, Dominatee)) 572 return CandidateInstruction; 573 } 574 Candidates.pop_back(); 575 } 576 return nullptr; 577 } 578