1 //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //! \file 11 //! \brief This pass performs merges of loads and stores on both sides of a 12 // diamond (hammock). It hoists the loads and sinks the stores. 13 // 14 // The algorithm iteratively hoists two loads to the same address out of a 15 // diamond (hammock) and merges them into a single load in the header. Similar 16 // it sinks and merges two stores to the tail block (footer). The algorithm 17 // iterates over the instructions of one side of the diamond and attempts to 18 // find a matching load/store on the other side. It hoists / sinks when it 19 // thinks it safe to do so. This optimization helps with eg. hiding load 20 // latencies, triggering if-conversion, and reducing static code size. 21 // 22 //===----------------------------------------------------------------------===// 23 // 24 // 25 // Example: 26 // Diamond shaped code before merge: 27 // 28 // header: 29 // br %cond, label %if.then, label %if.else 30 // + + 31 // + + 32 // + + 33 // if.then: if.else: 34 // %lt = load %addr_l %le = load %addr_l 35 // <use %lt> <use %le> 36 // <...> <...> 37 // store %st, %addr_s store %se, %addr_s 38 // br label %if.end br label %if.end 39 // + + 40 // + + 41 // + + 42 // if.end ("footer"): 43 // <...> 44 // 45 // Diamond shaped code after merge: 46 // 47 // header: 48 // %l = load %addr_l 49 // br %cond, label %if.then, label %if.else 50 // + + 51 // + + 52 // + + 53 // if.then: if.else: 54 // <use %l> <use %l> 55 // <...> <...> 56 // br label %if.end br label %if.end 57 // + + 58 // + + 59 // + + 60 // if.end ("footer"): 61 // %s.sink = phi [%st, if.then], [%se, if.else] 62 // <...> 63 // store %s.sink, %addr_s 64 // <...> 65 // 66 // 67 //===----------------------- TODO -----------------------------------------===// 68 // 69 // 1) Generalize to regions other than diamonds 70 // 2) Be more aggressive merging memory operations 71 // Note that both changes require register pressure control 72 // 73 //===----------------------------------------------------------------------===// 74 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/ADT/SetVector.h" 77 #include "llvm/ADT/SmallPtrSet.h" 78 #include "llvm/ADT/Statistic.h" 79 #include "llvm/Analysis/AliasAnalysis.h" 80 #include "llvm/Analysis/CFG.h" 81 #include "llvm/Analysis/Loads.h" 82 #include "llvm/Analysis/MemoryBuiltins.h" 83 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 84 #include "llvm/Analysis/TargetLibraryInfo.h" 85 #include "llvm/IR/Metadata.h" 86 #include "llvm/IR/PatternMatch.h" 87 #include "llvm/Support/Allocator.h" 88 #include "llvm/Support/CommandLine.h" 89 #include "llvm/Support/Debug.h" 90 #include "llvm/Support/raw_ostream.h" 91 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 92 #include "llvm/Transforms/Utils/SSAUpdater.h" 93 #include <vector> 94 using namespace llvm; 95 96 #define DEBUG_TYPE "mldst-motion" 97 98 //===----------------------------------------------------------------------===// 99 // MergedLoadStoreMotion Pass 100 //===----------------------------------------------------------------------===// 101 102 namespace { 103 class MergedLoadStoreMotion : public FunctionPass { 104 AliasAnalysis *AA; 105 MemoryDependenceAnalysis *MD; 106 107 public: 108 static char ID; // Pass identification, replacement for typeid 109 explicit MergedLoadStoreMotion(void) 110 : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { 111 initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); 112 } 113 114 bool runOnFunction(Function &F) override; 115 116 private: 117 // This transformation requires dominator postdominator info 118 void getAnalysisUsage(AnalysisUsage &AU) const override { 119 AU.addRequired<TargetLibraryInfoWrapperPass>(); 120 AU.addRequired<MemoryDependenceAnalysis>(); 121 AU.addRequired<AliasAnalysis>(); 122 AU.addPreserved<AliasAnalysis>(); 123 } 124 125 // Helper routines 126 127 /// 128 /// \brief Remove instruction from parent and update memory dependence 129 /// analysis. 130 /// 131 void removeInstruction(Instruction *Inst); 132 BasicBlock *getDiamondTail(BasicBlock *BB); 133 bool isDiamondHead(BasicBlock *BB); 134 // Routines for hoisting loads 135 bool isLoadHoistBarrierInRange(const Instruction& Start, 136 const Instruction& End, 137 LoadInst* LI); 138 LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); 139 void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, 140 Instruction *ElseInst); 141 bool isSafeToHoist(Instruction *I) const; 142 bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); 143 bool mergeLoads(BasicBlock *BB); 144 // Routines for sinking stores 145 StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); 146 PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); 147 bool isStoreSinkBarrierInRange(const Instruction& Start, 148 const Instruction& End, 149 AliasAnalysis::Location Loc); 150 bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); 151 bool mergeStores(BasicBlock *BB); 152 // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, 153 // where Size0 and Size1 are the #instructions on the two sides of 154 // the diamond. The constant chosen here is arbitrary. Compiler Time 155 // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. 156 const int MagicCompileTimeControl; 157 }; 158 159 char MergedLoadStoreMotion::ID = 0; 160 } 161 162 /// 163 /// \brief createMergedLoadStoreMotionPass - The public interface to this file. 164 /// 165 FunctionPass *llvm::createMergedLoadStoreMotionPass() { 166 return new MergedLoadStoreMotion(); 167 } 168 169 INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion", 170 "MergedLoadStoreMotion", false, false) 171 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 172 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 173 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 174 INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion", 175 "MergedLoadStoreMotion", false, false) 176 177 /// 178 /// \brief Remove instruction from parent and update memory dependence analysis. 179 /// 180 void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) { 181 // Notify the memory dependence analysis. 182 if (MD) { 183 MD->removeInstruction(Inst); 184 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 185 MD->invalidateCachedPointerInfo(LI->getPointerOperand()); 186 if (Inst->getType()->getScalarType()->isPointerTy()) { 187 MD->invalidateCachedPointerInfo(Inst); 188 } 189 } 190 Inst->eraseFromParent(); 191 } 192 193 /// 194 /// \brief Return tail block of a diamond. 195 /// 196 BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) { 197 assert(isDiamondHead(BB) && "Basic block is not head of a diamond"); 198 BranchInst *BI = (BranchInst *)(BB->getTerminator()); 199 BasicBlock *Succ0 = BI->getSuccessor(0); 200 BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0); 201 return Tail; 202 } 203 204 /// 205 /// \brief True when BB is the head of a diamond (hammock) 206 /// 207 bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { 208 if (!BB) 209 return false; 210 if (!isa<BranchInst>(BB->getTerminator())) 211 return false; 212 if (BB->getTerminator()->getNumSuccessors() != 2) 213 return false; 214 215 BranchInst *BI = (BranchInst *)(BB->getTerminator()); 216 BasicBlock *Succ0 = BI->getSuccessor(0); 217 BasicBlock *Succ1 = BI->getSuccessor(1); 218 219 if (!Succ0->getSinglePredecessor() || 220 Succ0->getTerminator()->getNumSuccessors() != 1) 221 return false; 222 if (!Succ1->getSinglePredecessor() || 223 Succ1->getTerminator()->getNumSuccessors() != 1) 224 return false; 225 226 BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0); 227 // Ignore triangles. 228 if (Succ1->getTerminator()->getSuccessor(0) != Tail) 229 return false; 230 return true; 231 } 232 233 /// 234 /// \brief True when instruction is a hoist barrier for a load 235 /// 236 /// Whenever an instruction could possibly modify the value 237 /// being loaded or protect against the load from happening 238 /// it is considered a hoist barrier. 239 /// 240 241 bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, 242 const Instruction& End, 243 LoadInst* LI) { 244 AliasAnalysis::Location Loc = AA->getLocation(LI); 245 return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); 246 } 247 248 /// 249 /// \brief Decide if a load can be hoisted 250 /// 251 /// When there is a load in \p BB to the same address as \p LI 252 /// and it can be hoisted from \p BB, return that load. 253 /// Otherwise return Null. 254 /// 255 LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, 256 LoadInst *Load0) { 257 258 for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; 259 ++BBI) { 260 Instruction *Inst = BBI; 261 262 // Only merge and hoist loads when their result in used only in BB 263 if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1)) 264 continue; 265 266 LoadInst *Load1 = dyn_cast<LoadInst>(Inst); 267 BasicBlock *BB0 = Load0->getParent(); 268 269 AliasAnalysis::Location Loc0 = AA->getLocation(Load0); 270 AliasAnalysis::Location Loc1 = AA->getLocation(Load1); 271 if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && 272 !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && 273 !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { 274 return Load1; 275 } 276 } 277 return nullptr; 278 } 279 280 /// 281 /// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into 282 /// \p BB 283 /// 284 /// BB is the head of a diamond 285 /// 286 void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB, 287 Instruction *HoistCand, 288 Instruction *ElseInst) { 289 DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump(); 290 dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n"; 291 dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n"); 292 // Hoist the instruction. 293 assert(HoistCand->getParent() != BB); 294 295 // Intersect optional metadata. 296 HoistCand->intersectOptionalDataWith(ElseInst); 297 HoistCand->dropUnknownMetadata(); 298 299 // Prepend point for instruction insert 300 Instruction *HoistPt = BB->getTerminator(); 301 302 // Merged instruction 303 Instruction *HoistedInst = HoistCand->clone(); 304 305 // Notify AA of the new value. 306 if (isa<LoadInst>(HoistCand)) 307 AA->copyValue(HoistCand, HoistedInst); 308 309 // Hoist instruction. 310 HoistedInst->insertBefore(HoistPt); 311 312 HoistCand->replaceAllUsesWith(HoistedInst); 313 removeInstruction(HoistCand); 314 // Replace the else block instruction. 315 ElseInst->replaceAllUsesWith(HoistedInst); 316 removeInstruction(ElseInst); 317 } 318 319 /// 320 /// \brief Return true if no operand of \p I is defined in I's parent block 321 /// 322 bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const { 323 BasicBlock *Parent = I->getParent(); 324 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 325 Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i)); 326 if (Instr && Instr->getParent() == Parent) 327 return false; 328 } 329 return true; 330 } 331 332 /// 333 /// \brief Merge two equivalent loads and GEPs and hoist into diamond head 334 /// 335 bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0, 336 LoadInst *L1) { 337 // Only one definition? 338 Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand()); 339 Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand()); 340 if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) && 341 A0->hasOneUse() && (A0->getParent() == L0->getParent()) && 342 A1->hasOneUse() && (A1->getParent() == L1->getParent()) && 343 isa<GetElementPtrInst>(A0)) { 344 DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump(); 345 dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n"; 346 dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n"); 347 hoistInstruction(BB, A0, A1); 348 hoistInstruction(BB, L0, L1); 349 return true; 350 } else 351 return false; 352 } 353 354 /// 355 /// \brief Try to hoist two loads to same address into diamond header 356 /// 357 /// Starting from a diamond head block, iterate over the instructions in one 358 /// successor block and try to match a load in the second successor. 359 /// 360 bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) { 361 bool MergedLoads = false; 362 assert(isDiamondHead(BB)); 363 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 364 BasicBlock *Succ0 = BI->getSuccessor(0); 365 BasicBlock *Succ1 = BI->getSuccessor(1); 366 // #Instructions in Succ1 for Compile Time Control 367 int Size1 = Succ1->size(); 368 int NLoads = 0; 369 for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end(); 370 BBI != BBE;) { 371 372 Instruction *I = BBI; 373 ++BBI; 374 375 // Only move non-simple (atomic, volatile) loads. 376 LoadInst *L0 = dyn_cast<LoadInst>(I); 377 if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0)) 378 continue; 379 380 ++NLoads; 381 if (NLoads * Size1 >= MagicCompileTimeControl) 382 break; 383 if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) { 384 bool Res = hoistLoad(BB, L0, L1); 385 MergedLoads |= Res; 386 // Don't attempt to hoist above loads that had not been hoisted. 387 if (!Res) 388 break; 389 } 390 } 391 return MergedLoads; 392 } 393 394 /// 395 /// \brief True when instruction is a sink barrier for a store 396 /// located in Loc 397 /// 398 /// Whenever an instruction could possibly read or modify the 399 /// value being stored or protect against the store from 400 /// happening it is considered a sink barrier. 401 /// 402 403 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start, 404 const Instruction& End, 405 AliasAnalysis::Location 406 Loc) { 407 return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef); 408 } 409 410 /// 411 /// \brief Check if \p BB contains a store to the same address as \p SI 412 /// 413 /// \return The store in \p when it is safe to sink. Otherwise return Null. 414 /// 415 StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, 416 StoreInst *Store0) { 417 DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); 418 BasicBlock *BB0 = Store0->getParent(); 419 for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend(); 420 RBI != RBE; ++RBI) { 421 Instruction *Inst = &*RBI; 422 423 if (!isa<StoreInst>(Inst)) 424 continue; 425 426 StoreInst *Store1 = cast<StoreInst>(Inst); 427 428 AliasAnalysis::Location Loc0 = AA->getLocation(Store0); 429 AliasAnalysis::Location Loc1 = AA->getLocation(Store1); 430 if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && 431 !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), 432 BB1->back(), Loc1) && 433 !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), 434 BB0->back(), Loc0)) { 435 return Store1; 436 } 437 } 438 return nullptr; 439 } 440 441 /// 442 /// \brief Create a PHI node in BB for the operands of S0 and S1 443 /// 444 PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, 445 StoreInst *S1) { 446 // Create a phi if the values mismatch. 447 PHINode *NewPN = 0; 448 Value *Opd1 = S0->getValueOperand(); 449 Value *Opd2 = S1->getValueOperand(); 450 if (Opd1 != Opd2) { 451 NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink", 452 BB->begin()); 453 NewPN->addIncoming(Opd1, S0->getParent()); 454 NewPN->addIncoming(Opd2, S1->getParent()); 455 if (NewPN->getType()->getScalarType()->isPointerTy()) { 456 // Notify AA of the new value. 457 AA->copyValue(Opd1, NewPN); 458 AA->copyValue(Opd2, NewPN); 459 // AA needs to be informed when a PHI-use of the pointer value is added 460 for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) { 461 unsigned J = PHINode::getOperandNumForIncomingValue(I); 462 AA->addEscapingUse(NewPN->getOperandUse(J)); 463 } 464 if (MD) 465 MD->invalidateCachedPointerInfo(NewPN); 466 } 467 } 468 return NewPN; 469 } 470 471 /// 472 /// \brief Merge two stores to same address and sink into \p BB 473 /// 474 /// Also sinks GEP instruction computing the store address 475 /// 476 bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, 477 StoreInst *S1) { 478 // Only one definition? 479 Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand()); 480 Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand()); 481 if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && 482 (A0->getParent() == S0->getParent()) && A1->hasOneUse() && 483 (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) { 484 DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); 485 dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; 486 dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); 487 // Hoist the instruction. 488 BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); 489 // Intersect optional metadata. 490 S0->intersectOptionalDataWith(S1); 491 S0->dropUnknownMetadata(); 492 493 // Create the new store to be inserted at the join point. 494 StoreInst *SNew = (StoreInst *)(S0->clone()); 495 Instruction *ANew = A0->clone(); 496 AA->copyValue(S0, SNew); 497 SNew->insertBefore(InsertPt); 498 ANew->insertBefore(SNew); 499 500 assert(S0->getParent() == A0->getParent()); 501 assert(S1->getParent() == A1->getParent()); 502 503 PHINode *NewPN = getPHIOperand(BB, S0, S1); 504 // New PHI operand? Use it. 505 if (NewPN) 506 SNew->setOperand(0, NewPN); 507 removeInstruction(S0); 508 removeInstruction(S1); 509 A0->replaceAllUsesWith(ANew); 510 removeInstruction(A0); 511 A1->replaceAllUsesWith(ANew); 512 removeInstruction(A1); 513 return true; 514 } 515 return false; 516 } 517 518 /// 519 /// \brief True when two stores are equivalent and can sink into the footer 520 /// 521 /// Starting from a diamond tail block, iterate over the instructions in one 522 /// predecessor block and try to match a store in the second predecessor. 523 /// 524 bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { 525 526 bool MergedStores = false; 527 assert(T && "Footer of a diamond cannot be empty"); 528 529 pred_iterator PI = pred_begin(T), E = pred_end(T); 530 assert(PI != E); 531 BasicBlock *Pred0 = *PI; 532 ++PI; 533 BasicBlock *Pred1 = *PI; 534 ++PI; 535 // tail block of a diamond/hammock? 536 if (Pred0 == Pred1) 537 return false; // No. 538 if (PI != E) 539 return false; // No. More than 2 predecessors. 540 541 // #Instructions in Succ1 for Compile Time Control 542 int Size1 = Pred1->size(); 543 int NStores = 0; 544 545 for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); 546 RBI != RBE;) { 547 548 Instruction *I = &*RBI; 549 ++RBI; 550 551 // Sink move non-simple (atomic, volatile) stores 552 if (!isa<StoreInst>(I)) 553 continue; 554 StoreInst *S0 = (StoreInst *)I; 555 if (!S0->isSimple()) 556 continue; 557 558 ++NStores; 559 if (NStores * Size1 >= MagicCompileTimeControl) 560 break; 561 if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { 562 bool Res = sinkStore(T, S0, S1); 563 MergedStores |= Res; 564 // Don't attempt to sink below stores that had to stick around 565 // But after removal of a store and some of its feeding 566 // instruction search again from the beginning since the iterator 567 // is likely stale at this point. 568 if (!Res) 569 break; 570 else { 571 RBI = Pred0->rbegin(); 572 RBE = Pred0->rend(); 573 DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); 574 } 575 } 576 } 577 return MergedStores; 578 } 579 /// 580 /// \brief Run the transformation for each function 581 /// 582 bool MergedLoadStoreMotion::runOnFunction(Function &F) { 583 MD = &getAnalysis<MemoryDependenceAnalysis>(); 584 AA = &getAnalysis<AliasAnalysis>(); 585 586 bool Changed = false; 587 DEBUG(dbgs() << "Instruction Merger\n"); 588 589 // Merge unconditional branches, allowing PRE to catch more 590 // optimization opportunities. 591 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { 592 BasicBlock *BB = FI++; 593 594 // Hoist equivalent loads and sink stores 595 // outside diamonds when possible 596 if (isDiamondHead(BB)) { 597 Changed |= mergeLoads(BB); 598 Changed |= mergeStores(getDiamondTail(BB)); 599 } 600 } 601 return Changed; 602 } 603