1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/GetElementPtrTypeIterator.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/InlineAsm.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/ValueHandle.h" 34 #include "llvm/IR/ValueMap.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetLibraryInfo.h" 40 #include "llvm/Target/TargetLowering.h" 41 #include "llvm/Target/TargetSubtargetInfo.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 #include "llvm/Transforms/Utils/BuildLibCalls.h" 44 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 45 #include "llvm/Transforms/Utils/Local.h" 46 using namespace llvm; 47 using namespace llvm::PatternMatch; 48 49 #define DEBUG_TYPE "codegenprepare" 50 51 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 52 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 53 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 54 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 55 "sunken Cmps"); 56 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 57 "of sunken Casts"); 58 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 59 "computations were sunk"); 60 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 61 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 62 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 63 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 64 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 65 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); 66 67 static cl::opt<bool> DisableBranchOpts( 68 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 69 cl::desc("Disable branch optimizations in CodeGenPrepare")); 70 71 static cl::opt<bool> DisableSelectToBranch( 72 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 73 cl::desc("Disable select to branch conversion.")); 74 75 static cl::opt<bool> AddrSinkUsingGEPs( 76 "addr-sink-using-gep", cl::Hidden, cl::init(false), 77 cl::desc("Address sinking in CGP using GEPs.")); 78 79 static cl::opt<bool> EnableAndCmpSinking( 80 "enable-andcmp-sinking", cl::Hidden, cl::init(true), 81 cl::desc("Enable sinkinig and/cmp into branches.")); 82 83 namespace { 84 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; 85 typedef DenseMap<Instruction *, Type *> InstrToOrigTy; 86 87 class CodeGenPrepare : public FunctionPass { 88 /// TLI - Keep a pointer of a TargetLowering to consult for determining 89 /// transformation profitability. 90 const TargetMachine *TM; 91 const TargetLowering *TLI; 92 const TargetLibraryInfo *TLInfo; 93 DominatorTree *DT; 94 95 /// CurInstIterator - As we scan instructions optimizing them, this is the 96 /// next instruction to optimize. Xforms that can invalidate this should 97 /// update it. 98 BasicBlock::iterator CurInstIterator; 99 100 /// Keeps track of non-local addresses that have been sunk into a block. 101 /// This allows us to avoid inserting duplicate code for blocks with 102 /// multiple load/stores of the same address. 103 ValueMap<Value*, Value*> SunkAddrs; 104 105 /// Keeps track of all truncates inserted for the current function. 106 SetOfInstrs InsertedTruncsSet; 107 /// Keeps track of the type of the related instruction before their 108 /// promotion for the current function. 109 InstrToOrigTy PromotedInsts; 110 111 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 112 /// be updated. 113 bool ModifiedDT; 114 115 /// OptSize - True if optimizing for size. 116 bool OptSize; 117 118 public: 119 static char ID; // Pass identification, replacement for typeid 120 explicit CodeGenPrepare(const TargetMachine *TM = nullptr) 121 : FunctionPass(ID), TM(TM), TLI(nullptr) { 122 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 123 } 124 bool runOnFunction(Function &F) override; 125 126 const char *getPassName() const override { return "CodeGen Prepare"; } 127 128 void getAnalysisUsage(AnalysisUsage &AU) const override { 129 AU.addPreserved<DominatorTreeWrapperPass>(); 130 AU.addRequired<TargetLibraryInfo>(); 131 } 132 133 private: 134 bool EliminateFallThrough(Function &F); 135 bool EliminateMostlyEmptyBlocks(Function &F); 136 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 137 void EliminateMostlyEmptyBlock(BasicBlock *BB); 138 bool OptimizeBlock(BasicBlock &BB); 139 bool OptimizeInst(Instruction *I); 140 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 141 bool OptimizeInlineAsmInst(CallInst *CS); 142 bool OptimizeCallInst(CallInst *CI); 143 bool MoveExtToFormExtLoad(Instruction *I); 144 bool OptimizeExtUses(Instruction *I); 145 bool OptimizeSelectInst(SelectInst *SI); 146 bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); 147 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 148 bool PlaceDbgValues(Function &F); 149 bool sinkAndCmp(Function &F); 150 }; 151 } 152 153 char CodeGenPrepare::ID = 0; 154 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", 155 "Optimize for code generation", false, false) 156 157 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 158 return new CodeGenPrepare(TM); 159 } 160 161 bool CodeGenPrepare::runOnFunction(Function &F) { 162 if (skipOptnoneFunction(F)) 163 return false; 164 165 bool EverMadeChange = false; 166 // Clear per function information. 167 InsertedTruncsSet.clear(); 168 PromotedInsts.clear(); 169 170 ModifiedDT = false; 171 if (TM) TLI = TM->getTargetLowering(); 172 TLInfo = &getAnalysis<TargetLibraryInfo>(); 173 DominatorTreeWrapperPass *DTWP = 174 getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 175 DT = DTWP ? &DTWP->getDomTree() : nullptr; 176 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 177 Attribute::OptimizeForSize); 178 179 /// This optimization identifies DIV instructions that can be 180 /// profitably bypassed and carried out with a shorter, faster divide. 181 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 182 const DenseMap<unsigned int, unsigned int> &BypassWidths = 183 TLI->getBypassSlowDivWidths(); 184 for (Function::iterator I = F.begin(); I != F.end(); I++) 185 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 186 } 187 188 // Eliminate blocks that contain only PHI nodes and an 189 // unconditional branch. 190 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 191 192 // llvm.dbg.value is far away from the value then iSel may not be able 193 // handle it properly. iSel will drop llvm.dbg.value if it can not 194 // find a node corresponding to the value. 195 EverMadeChange |= PlaceDbgValues(F); 196 197 // If there is a mask, compare against zero, and branch that can be combined 198 // into a single target instruction, push the mask and compare into branch 199 // users. Do this before OptimizeBlock -> OptimizeInst -> 200 // OptimizeCmpExpression, which perturbs the pattern being searched for. 201 if (!DisableBranchOpts) 202 EverMadeChange |= sinkAndCmp(F); 203 204 bool MadeChange = true; 205 while (MadeChange) { 206 MadeChange = false; 207 for (Function::iterator I = F.begin(); I != F.end(); ) { 208 BasicBlock *BB = I++; 209 MadeChange |= OptimizeBlock(*BB); 210 } 211 EverMadeChange |= MadeChange; 212 } 213 214 SunkAddrs.clear(); 215 216 if (!DisableBranchOpts) { 217 MadeChange = false; 218 SmallPtrSet<BasicBlock*, 8> WorkList; 219 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 220 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 221 MadeChange |= ConstantFoldTerminator(BB, true); 222 if (!MadeChange) continue; 223 224 for (SmallVectorImpl<BasicBlock*>::iterator 225 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 226 if (pred_begin(*II) == pred_end(*II)) 227 WorkList.insert(*II); 228 } 229 230 // Delete the dead blocks and any of their dead successors. 231 MadeChange |= !WorkList.empty(); 232 while (!WorkList.empty()) { 233 BasicBlock *BB = *WorkList.begin(); 234 WorkList.erase(BB); 235 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 236 237 DeleteDeadBlock(BB); 238 239 for (SmallVectorImpl<BasicBlock*>::iterator 240 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 241 if (pred_begin(*II) == pred_end(*II)) 242 WorkList.insert(*II); 243 } 244 245 // Merge pairs of basic blocks with unconditional branches, connected by 246 // a single edge. 247 if (EverMadeChange || MadeChange) 248 MadeChange |= EliminateFallThrough(F); 249 250 if (MadeChange) 251 ModifiedDT = true; 252 EverMadeChange |= MadeChange; 253 } 254 255 if (ModifiedDT && DT) 256 DT->recalculate(F); 257 258 return EverMadeChange; 259 } 260 261 /// EliminateFallThrough - Merge basic blocks which are connected 262 /// by a single edge, where one of the basic blocks has a single successor 263 /// pointing to the other basic block, which has a single predecessor. 264 bool CodeGenPrepare::EliminateFallThrough(Function &F) { 265 bool Changed = false; 266 // Scan all of the blocks in the function, except for the entry block. 267 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 268 BasicBlock *BB = I++; 269 // If the destination block has a single pred, then this is a trivial 270 // edge, just collapse it. 271 BasicBlock *SinglePred = BB->getSinglePredecessor(); 272 273 // Don't merge if BB's address is taken. 274 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 275 276 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 277 if (Term && !Term->isConditional()) { 278 Changed = true; 279 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 280 // Remember if SinglePred was the entry block of the function. 281 // If so, we will need to move BB back to the entry position. 282 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 283 MergeBasicBlockIntoOnlyPred(BB, this); 284 285 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 286 BB->moveBefore(&BB->getParent()->getEntryBlock()); 287 288 // We have erased a block. Update the iterator. 289 I = BB; 290 } 291 } 292 return Changed; 293 } 294 295 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 296 /// debug info directives, and an unconditional branch. Passes before isel 297 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 298 /// isel. Start by eliminating these blocks so we can split them the way we 299 /// want them. 300 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 301 bool MadeChange = false; 302 // Note that this intentionally skips the entry block. 303 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { 304 BasicBlock *BB = I++; 305 306 // If this block doesn't end with an uncond branch, ignore it. 307 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 308 if (!BI || !BI->isUnconditional()) 309 continue; 310 311 // If the instruction before the branch (skipping debug info) isn't a phi 312 // node, then other stuff is happening here. 313 BasicBlock::iterator BBI = BI; 314 if (BBI != BB->begin()) { 315 --BBI; 316 while (isa<DbgInfoIntrinsic>(BBI)) { 317 if (BBI == BB->begin()) 318 break; 319 --BBI; 320 } 321 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 322 continue; 323 } 324 325 // Do not break infinite loops. 326 BasicBlock *DestBB = BI->getSuccessor(0); 327 if (DestBB == BB) 328 continue; 329 330 if (!CanMergeBlocks(BB, DestBB)) 331 continue; 332 333 EliminateMostlyEmptyBlock(BB); 334 MadeChange = true; 335 } 336 return MadeChange; 337 } 338 339 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 340 /// single uncond branch between them, and BB contains no other non-phi 341 /// instructions. 342 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 343 const BasicBlock *DestBB) const { 344 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 345 // the successor. If there are more complex condition (e.g. preheaders), 346 // don't mess around with them. 347 BasicBlock::const_iterator BBI = BB->begin(); 348 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 349 for (const User *U : PN->users()) { 350 const Instruction *UI = cast<Instruction>(U); 351 if (UI->getParent() != DestBB || !isa<PHINode>(UI)) 352 return false; 353 // If User is inside DestBB block and it is a PHINode then check 354 // incoming value. If incoming value is not from BB then this is 355 // a complex condition (e.g. preheaders) we want to avoid here. 356 if (UI->getParent() == DestBB) { 357 if (const PHINode *UPN = dyn_cast<PHINode>(UI)) 358 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 359 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 360 if (Insn && Insn->getParent() == BB && 361 Insn->getParent() != UPN->getIncomingBlock(I)) 362 return false; 363 } 364 } 365 } 366 } 367 368 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 369 // and DestBB may have conflicting incoming values for the block. If so, we 370 // can't merge the block. 371 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 372 if (!DestBBPN) return true; // no conflict. 373 374 // Collect the preds of BB. 375 SmallPtrSet<const BasicBlock*, 16> BBPreds; 376 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 377 // It is faster to get preds from a PHI than with pred_iterator. 378 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 379 BBPreds.insert(BBPN->getIncomingBlock(i)); 380 } else { 381 BBPreds.insert(pred_begin(BB), pred_end(BB)); 382 } 383 384 // Walk the preds of DestBB. 385 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 386 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 387 if (BBPreds.count(Pred)) { // Common predecessor? 388 BBI = DestBB->begin(); 389 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 390 const Value *V1 = PN->getIncomingValueForBlock(Pred); 391 const Value *V2 = PN->getIncomingValueForBlock(BB); 392 393 // If V2 is a phi node in BB, look up what the mapped value will be. 394 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 395 if (V2PN->getParent() == BB) 396 V2 = V2PN->getIncomingValueForBlock(Pred); 397 398 // If there is a conflict, bail out. 399 if (V1 != V2) return false; 400 } 401 } 402 } 403 404 return true; 405 } 406 407 408 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 409 /// an unconditional branch in it. 410 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 411 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 412 BasicBlock *DestBB = BI->getSuccessor(0); 413 414 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 415 416 // If the destination block has a single pred, then this is a trivial edge, 417 // just collapse it. 418 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 419 if (SinglePred != DestBB) { 420 // Remember if SinglePred was the entry block of the function. If so, we 421 // will need to move BB back to the entry position. 422 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 423 MergeBasicBlockIntoOnlyPred(DestBB, this); 424 425 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 426 BB->moveBefore(&BB->getParent()->getEntryBlock()); 427 428 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 429 return; 430 } 431 } 432 433 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 434 // to handle the new incoming edges it is about to have. 435 PHINode *PN; 436 for (BasicBlock::iterator BBI = DestBB->begin(); 437 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 438 // Remove the incoming value for BB, and remember it. 439 Value *InVal = PN->removeIncomingValue(BB, false); 440 441 // Two options: either the InVal is a phi node defined in BB or it is some 442 // value that dominates BB. 443 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 444 if (InValPhi && InValPhi->getParent() == BB) { 445 // Add all of the input values of the input PHI as inputs of this phi. 446 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 447 PN->addIncoming(InValPhi->getIncomingValue(i), 448 InValPhi->getIncomingBlock(i)); 449 } else { 450 // Otherwise, add one instance of the dominating value for each edge that 451 // we will be adding. 452 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 453 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 454 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 455 } else { 456 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 457 PN->addIncoming(InVal, *PI); 458 } 459 } 460 } 461 462 // The PHIs are now updated, change everything that refers to BB to use 463 // DestBB and remove BB. 464 BB->replaceAllUsesWith(DestBB); 465 if (DT && !ModifiedDT) { 466 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 467 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 468 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 469 DT->changeImmediateDominator(DestBB, NewIDom); 470 DT->eraseNode(BB); 471 } 472 BB->eraseFromParent(); 473 ++NumBlocksElim; 474 475 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 476 } 477 478 /// SinkCast - Sink the specified cast instruction into its user blocks 479 static bool SinkCast(CastInst *CI) { 480 BasicBlock *DefBB = CI->getParent(); 481 482 /// InsertedCasts - Only insert a cast in each block once. 483 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 484 485 bool MadeChange = false; 486 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 487 UI != E; ) { 488 Use &TheUse = UI.getUse(); 489 Instruction *User = cast<Instruction>(*UI); 490 491 // Figure out which BB this cast is used in. For PHI's this is the 492 // appropriate predecessor block. 493 BasicBlock *UserBB = User->getParent(); 494 if (PHINode *PN = dyn_cast<PHINode>(User)) { 495 UserBB = PN->getIncomingBlock(TheUse); 496 } 497 498 // Preincrement use iterator so we don't invalidate it. 499 ++UI; 500 501 // If this user is in the same block as the cast, don't change the cast. 502 if (UserBB == DefBB) continue; 503 504 // If we have already inserted a cast into this block, use it. 505 CastInst *&InsertedCast = InsertedCasts[UserBB]; 506 507 if (!InsertedCast) { 508 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 509 InsertedCast = 510 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 511 InsertPt); 512 MadeChange = true; 513 } 514 515 // Replace a use of the cast with a use of the new cast. 516 TheUse = InsertedCast; 517 ++NumCastUses; 518 } 519 520 // If we removed all uses, nuke the cast. 521 if (CI->use_empty()) { 522 CI->eraseFromParent(); 523 MadeChange = true; 524 } 525 526 return MadeChange; 527 } 528 529 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 530 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 531 /// sink it into user blocks to reduce the number of virtual 532 /// registers that must be created and coalesced. 533 /// 534 /// Return true if any changes are made. 535 /// 536 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 537 // If this is a noop copy, 538 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 539 EVT DstVT = TLI.getValueType(CI->getType()); 540 541 // This is an fp<->int conversion? 542 if (SrcVT.isInteger() != DstVT.isInteger()) 543 return false; 544 545 // If this is an extension, it will be a zero or sign extension, which 546 // isn't a noop. 547 if (SrcVT.bitsLT(DstVT)) return false; 548 549 // If these values will be promoted, find out what they will be promoted 550 // to. This helps us consider truncates on PPC as noop copies when they 551 // are. 552 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 553 TargetLowering::TypePromoteInteger) 554 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 555 if (TLI.getTypeAction(CI->getContext(), DstVT) == 556 TargetLowering::TypePromoteInteger) 557 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 558 559 // If, after promotion, these are the same types, this is a noop copy. 560 if (SrcVT != DstVT) 561 return false; 562 563 return SinkCast(CI); 564 } 565 566 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 567 /// the number of virtual registers that must be created and coalesced. This is 568 /// a clear win except on targets with multiple condition code registers 569 /// (PowerPC), where it might lose; some adjustment may be wanted there. 570 /// 571 /// Return true if any changes are made. 572 static bool OptimizeCmpExpression(CmpInst *CI) { 573 BasicBlock *DefBB = CI->getParent(); 574 575 /// InsertedCmp - Only insert a cmp in each block once. 576 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 577 578 bool MadeChange = false; 579 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); 580 UI != E; ) { 581 Use &TheUse = UI.getUse(); 582 Instruction *User = cast<Instruction>(*UI); 583 584 // Preincrement use iterator so we don't invalidate it. 585 ++UI; 586 587 // Don't bother for PHI nodes. 588 if (isa<PHINode>(User)) 589 continue; 590 591 // Figure out which BB this cmp is used in. 592 BasicBlock *UserBB = User->getParent(); 593 594 // If this user is in the same block as the cmp, don't change the cmp. 595 if (UserBB == DefBB) continue; 596 597 // If we have already inserted a cmp into this block, use it. 598 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 599 600 if (!InsertedCmp) { 601 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 602 InsertedCmp = 603 CmpInst::Create(CI->getOpcode(), 604 CI->getPredicate(), CI->getOperand(0), 605 CI->getOperand(1), "", InsertPt); 606 MadeChange = true; 607 } 608 609 // Replace a use of the cmp with a use of the new cmp. 610 TheUse = InsertedCmp; 611 ++NumCmpUses; 612 } 613 614 // If we removed all uses, nuke the cmp. 615 if (CI->use_empty()) 616 CI->eraseFromParent(); 617 618 return MadeChange; 619 } 620 621 /// isExtractBitsCandidateUse - Check if the candidates could 622 /// be combined with shift instruction, which includes: 623 /// 1. Truncate instruction 624 /// 2. And instruction and the imm is a mask of the low bits: 625 /// imm & (imm+1) == 0 626 static bool isExtractBitsCandidateUse(Instruction *User) { 627 if (!isa<TruncInst>(User)) { 628 if (User->getOpcode() != Instruction::And || 629 !isa<ConstantInt>(User->getOperand(1))) 630 return false; 631 632 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); 633 634 if ((Cimm & (Cimm + 1)).getBoolValue()) 635 return false; 636 } 637 return true; 638 } 639 640 /// SinkShiftAndTruncate - sink both shift and truncate instruction 641 /// to the use of truncate's BB. 642 static bool 643 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, 644 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, 645 const TargetLowering &TLI) { 646 BasicBlock *UserBB = User->getParent(); 647 DenseMap<BasicBlock *, CastInst *> InsertedTruncs; 648 TruncInst *TruncI = dyn_cast<TruncInst>(User); 649 bool MadeChange = false; 650 651 for (Value::user_iterator TruncUI = TruncI->user_begin(), 652 TruncE = TruncI->user_end(); 653 TruncUI != TruncE;) { 654 655 Use &TruncTheUse = TruncUI.getUse(); 656 Instruction *TruncUser = cast<Instruction>(*TruncUI); 657 // Preincrement use iterator so we don't invalidate it. 658 659 ++TruncUI; 660 661 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); 662 if (!ISDOpcode) 663 continue; 664 665 // If the use is actually a legal node, there will not be an implicit 666 // truncate. 667 if (TLI.isOperationLegalOrCustom(ISDOpcode, 668 EVT::getEVT(TruncUser->getType()))) 669 continue; 670 671 // Don't bother for PHI nodes. 672 if (isa<PHINode>(TruncUser)) 673 continue; 674 675 BasicBlock *TruncUserBB = TruncUser->getParent(); 676 677 if (UserBB == TruncUserBB) 678 continue; 679 680 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; 681 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; 682 683 if (!InsertedShift && !InsertedTrunc) { 684 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); 685 // Sink the shift 686 if (ShiftI->getOpcode() == Instruction::AShr) 687 InsertedShift = 688 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 689 else 690 InsertedShift = 691 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 692 693 // Sink the trunc 694 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); 695 TruncInsertPt++; 696 697 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, 698 TruncI->getType(), "", TruncInsertPt); 699 700 MadeChange = true; 701 702 TruncTheUse = InsertedTrunc; 703 } 704 } 705 return MadeChange; 706 } 707 708 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if 709 /// the uses could potentially be combined with this shift instruction and 710 /// generate BitExtract instruction. It will only be applied if the architecture 711 /// supports BitExtract instruction. Here is an example: 712 /// BB1: 713 /// %x.extract.shift = lshr i64 %arg1, 32 714 /// BB2: 715 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 716 /// ==> 717 /// 718 /// BB2: 719 /// %x.extract.shift.1 = lshr i64 %arg1, 32 720 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 721 /// 722 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract 723 /// instruction. 724 /// Return true if any changes are made. 725 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, 726 const TargetLowering &TLI) { 727 BasicBlock *DefBB = ShiftI->getParent(); 728 729 /// Only insert instructions in each block once. 730 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; 731 732 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); 733 734 bool MadeChange = false; 735 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); 736 UI != E;) { 737 Use &TheUse = UI.getUse(); 738 Instruction *User = cast<Instruction>(*UI); 739 // Preincrement use iterator so we don't invalidate it. 740 ++UI; 741 742 // Don't bother for PHI nodes. 743 if (isa<PHINode>(User)) 744 continue; 745 746 if (!isExtractBitsCandidateUse(User)) 747 continue; 748 749 BasicBlock *UserBB = User->getParent(); 750 751 if (UserBB == DefBB) { 752 // If the shift and truncate instruction are in the same BB. The use of 753 // the truncate(TruncUse) may still introduce another truncate if not 754 // legal. In this case, we would like to sink both shift and truncate 755 // instruction to the BB of TruncUse. 756 // for example: 757 // BB1: 758 // i64 shift.result = lshr i64 opnd, imm 759 // trunc.result = trunc shift.result to i16 760 // 761 // BB2: 762 // ----> We will have an implicit truncate here if the architecture does 763 // not have i16 compare. 764 // cmp i16 trunc.result, opnd2 765 // 766 if (isa<TruncInst>(User) && shiftIsLegal 767 // If the type of the truncate is legal, no trucate will be 768 // introduced in other basic blocks. 769 && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) 770 MadeChange = 771 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); 772 773 continue; 774 } 775 // If we have already inserted a shift into this block, use it. 776 BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; 777 778 if (!InsertedShift) { 779 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 780 781 if (ShiftI->getOpcode() == Instruction::AShr) 782 InsertedShift = 783 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); 784 else 785 InsertedShift = 786 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); 787 788 MadeChange = true; 789 } 790 791 // Replace a use of the shift with a use of the new shift. 792 TheUse = InsertedShift; 793 } 794 795 // If we removed all uses, nuke the shift. 796 if (ShiftI->use_empty()) 797 ShiftI->eraseFromParent(); 798 799 return MadeChange; 800 } 801 802 namespace { 803 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 804 protected: 805 void replaceCall(Value *With) override { 806 CI->replaceAllUsesWith(With); 807 CI->eraseFromParent(); 808 } 809 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { 810 if (ConstantInt *SizeCI = 811 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 812 return SizeCI->isAllOnesValue(); 813 return false; 814 } 815 }; 816 } // end anonymous namespace 817 818 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 819 BasicBlock *BB = CI->getParent(); 820 821 // Lower inline assembly if we can. 822 // If we found an inline asm expession, and if the target knows how to 823 // lower it to normal LLVM code, do so now. 824 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 825 if (TLI->ExpandInlineAsm(CI)) { 826 // Avoid invalidating the iterator. 827 CurInstIterator = BB->begin(); 828 // Avoid processing instructions out of order, which could cause 829 // reuse before a value is defined. 830 SunkAddrs.clear(); 831 return true; 832 } 833 // Sink address computing for memory operands into the block. 834 if (OptimizeInlineAsmInst(CI)) 835 return true; 836 } 837 838 // Lower all uses of llvm.objectsize.* 839 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 840 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 841 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 842 Type *ReturnTy = CI->getType(); 843 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 844 845 // Substituting this can cause recursive simplifications, which can 846 // invalidate our iterator. Use a WeakVH to hold onto it in case this 847 // happens. 848 WeakVH IterHandle(CurInstIterator); 849 850 replaceAndRecursivelySimplify(CI, RetVal, 851 TLI ? TLI->getDataLayout() : nullptr, 852 TLInfo, ModifiedDT ? nullptr : DT); 853 854 // If the iterator instruction was recursively deleted, start over at the 855 // start of the block. 856 if (IterHandle != CurInstIterator) { 857 CurInstIterator = BB->begin(); 858 SunkAddrs.clear(); 859 } 860 return true; 861 } 862 863 if (II && TLI) { 864 SmallVector<Value*, 2> PtrOps; 865 Type *AccessTy; 866 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 867 while (!PtrOps.empty()) 868 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 869 return true; 870 } 871 872 // From here on out we're working with named functions. 873 if (!CI->getCalledFunction()) return false; 874 875 // We'll need DataLayout from here on out. 876 const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; 877 if (!TD) return false; 878 879 // Lower all default uses of _chk calls. This is very similar 880 // to what InstCombineCalls does, but here we are only lowering calls 881 // that have the default "don't know" as the objectsize. Anything else 882 // should be left alone. 883 CodeGenPrepareFortifiedLibCalls Simplifier; 884 return Simplifier.fold(CI, TD, TLInfo); 885 } 886 887 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 888 /// instructions to the predecessor to enable tail call optimizations. The 889 /// case it is currently looking for is: 890 /// @code 891 /// bb0: 892 /// %tmp0 = tail call i32 @f0() 893 /// br label %return 894 /// bb1: 895 /// %tmp1 = tail call i32 @f1() 896 /// br label %return 897 /// bb2: 898 /// %tmp2 = tail call i32 @f2() 899 /// br label %return 900 /// return: 901 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 902 /// ret i32 %retval 903 /// @endcode 904 /// 905 /// => 906 /// 907 /// @code 908 /// bb0: 909 /// %tmp0 = tail call i32 @f0() 910 /// ret i32 %tmp0 911 /// bb1: 912 /// %tmp1 = tail call i32 @f1() 913 /// ret i32 %tmp1 914 /// bb2: 915 /// %tmp2 = tail call i32 @f2() 916 /// ret i32 %tmp2 917 /// @endcode 918 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 919 if (!TLI) 920 return false; 921 922 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 923 if (!RI) 924 return false; 925 926 PHINode *PN = nullptr; 927 BitCastInst *BCI = nullptr; 928 Value *V = RI->getReturnValue(); 929 if (V) { 930 BCI = dyn_cast<BitCastInst>(V); 931 if (BCI) 932 V = BCI->getOperand(0); 933 934 PN = dyn_cast<PHINode>(V); 935 if (!PN) 936 return false; 937 } 938 939 if (PN && PN->getParent() != BB) 940 return false; 941 942 // It's not safe to eliminate the sign / zero extension of the return value. 943 // See llvm::isInTailCallPosition(). 944 const Function *F = BB->getParent(); 945 AttributeSet CallerAttrs = F->getAttributes(); 946 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 947 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 948 return false; 949 950 // Make sure there are no instructions between the PHI and return, or that the 951 // return is the first instruction in the block. 952 if (PN) { 953 BasicBlock::iterator BI = BB->begin(); 954 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 955 if (&*BI == BCI) 956 // Also skip over the bitcast. 957 ++BI; 958 if (&*BI != RI) 959 return false; 960 } else { 961 BasicBlock::iterator BI = BB->begin(); 962 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 963 if (&*BI != RI) 964 return false; 965 } 966 967 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 968 /// call. 969 SmallVector<CallInst*, 4> TailCalls; 970 if (PN) { 971 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 972 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 973 // Make sure the phi value is indeed produced by the tail call. 974 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 975 TLI->mayBeEmittedAsTailCall(CI)) 976 TailCalls.push_back(CI); 977 } 978 } else { 979 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 980 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 981 if (!VisitedBBs.insert(*PI)) 982 continue; 983 984 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 985 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 986 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 987 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 988 if (RI == RE) 989 continue; 990 991 CallInst *CI = dyn_cast<CallInst>(&*RI); 992 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 993 TailCalls.push_back(CI); 994 } 995 } 996 997 bool Changed = false; 998 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 999 CallInst *CI = TailCalls[i]; 1000 CallSite CS(CI); 1001 1002 // Conservatively require the attributes of the call to match those of the 1003 // return. Ignore noalias because it doesn't affect the call sequence. 1004 AttributeSet CalleeAttrs = CS.getAttributes(); 1005 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1006 removeAttribute(Attribute::NoAlias) != 1007 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 1008 removeAttribute(Attribute::NoAlias)) 1009 continue; 1010 1011 // Make sure the call instruction is followed by an unconditional branch to 1012 // the return block. 1013 BasicBlock *CallBB = CI->getParent(); 1014 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 1015 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 1016 continue; 1017 1018 // Duplicate the return into CallBB. 1019 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 1020 ModifiedDT = Changed = true; 1021 ++NumRetsDup; 1022 } 1023 1024 // If we eliminated all predecessors of the block, delete the block now. 1025 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 1026 BB->eraseFromParent(); 1027 1028 return Changed; 1029 } 1030 1031 //===----------------------------------------------------------------------===// 1032 // Memory Optimization 1033 //===----------------------------------------------------------------------===// 1034 1035 namespace { 1036 1037 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 1038 /// which holds actual Value*'s for register values. 1039 struct ExtAddrMode : public TargetLowering::AddrMode { 1040 Value *BaseReg; 1041 Value *ScaledReg; 1042 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} 1043 void print(raw_ostream &OS) const; 1044 void dump() const; 1045 1046 bool operator==(const ExtAddrMode& O) const { 1047 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 1048 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 1049 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 1050 } 1051 }; 1052 1053 #ifndef NDEBUG 1054 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 1055 AM.print(OS); 1056 return OS; 1057 } 1058 #endif 1059 1060 void ExtAddrMode::print(raw_ostream &OS) const { 1061 bool NeedPlus = false; 1062 OS << "["; 1063 if (BaseGV) { 1064 OS << (NeedPlus ? " + " : "") 1065 << "GV:"; 1066 BaseGV->printAsOperand(OS, /*PrintType=*/false); 1067 NeedPlus = true; 1068 } 1069 1070 if (BaseOffs) { 1071 OS << (NeedPlus ? " + " : "") 1072 << BaseOffs; 1073 NeedPlus = true; 1074 } 1075 1076 if (BaseReg) { 1077 OS << (NeedPlus ? " + " : "") 1078 << "Base:"; 1079 BaseReg->printAsOperand(OS, /*PrintType=*/false); 1080 NeedPlus = true; 1081 } 1082 if (Scale) { 1083 OS << (NeedPlus ? " + " : "") 1084 << Scale << "*"; 1085 ScaledReg->printAsOperand(OS, /*PrintType=*/false); 1086 } 1087 1088 OS << ']'; 1089 } 1090 1091 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1092 void ExtAddrMode::dump() const { 1093 print(dbgs()); 1094 dbgs() << '\n'; 1095 } 1096 #endif 1097 1098 /// \brief This class provides transaction based operation on the IR. 1099 /// Every change made through this class is recorded in the internal state and 1100 /// can be undone (rollback) until commit is called. 1101 class TypePromotionTransaction { 1102 1103 /// \brief This represents the common interface of the individual transaction. 1104 /// Each class implements the logic for doing one specific modification on 1105 /// the IR via the TypePromotionTransaction. 1106 class TypePromotionAction { 1107 protected: 1108 /// The Instruction modified. 1109 Instruction *Inst; 1110 1111 public: 1112 /// \brief Constructor of the action. 1113 /// The constructor performs the related action on the IR. 1114 TypePromotionAction(Instruction *Inst) : Inst(Inst) {} 1115 1116 virtual ~TypePromotionAction() {} 1117 1118 /// \brief Undo the modification done by this action. 1119 /// When this method is called, the IR must be in the same state as it was 1120 /// before this action was applied. 1121 /// \pre Undoing the action works if and only if the IR is in the exact same 1122 /// state as it was directly after this action was applied. 1123 virtual void undo() = 0; 1124 1125 /// \brief Advocate every change made by this action. 1126 /// When the results on the IR of the action are to be kept, it is important 1127 /// to call this function, otherwise hidden information may be kept forever. 1128 virtual void commit() { 1129 // Nothing to be done, this action is not doing anything. 1130 } 1131 }; 1132 1133 /// \brief Utility to remember the position of an instruction. 1134 class InsertionHandler { 1135 /// Position of an instruction. 1136 /// Either an instruction: 1137 /// - Is the first in a basic block: BB is used. 1138 /// - Has a previous instructon: PrevInst is used. 1139 union { 1140 Instruction *PrevInst; 1141 BasicBlock *BB; 1142 } Point; 1143 /// Remember whether or not the instruction had a previous instruction. 1144 bool HasPrevInstruction; 1145 1146 public: 1147 /// \brief Record the position of \p Inst. 1148 InsertionHandler(Instruction *Inst) { 1149 BasicBlock::iterator It = Inst; 1150 HasPrevInstruction = (It != (Inst->getParent()->begin())); 1151 if (HasPrevInstruction) 1152 Point.PrevInst = --It; 1153 else 1154 Point.BB = Inst->getParent(); 1155 } 1156 1157 /// \brief Insert \p Inst at the recorded position. 1158 void insert(Instruction *Inst) { 1159 if (HasPrevInstruction) { 1160 if (Inst->getParent()) 1161 Inst->removeFromParent(); 1162 Inst->insertAfter(Point.PrevInst); 1163 } else { 1164 Instruction *Position = Point.BB->getFirstInsertionPt(); 1165 if (Inst->getParent()) 1166 Inst->moveBefore(Position); 1167 else 1168 Inst->insertBefore(Position); 1169 } 1170 } 1171 }; 1172 1173 /// \brief Move an instruction before another. 1174 class InstructionMoveBefore : public TypePromotionAction { 1175 /// Original position of the instruction. 1176 InsertionHandler Position; 1177 1178 public: 1179 /// \brief Move \p Inst before \p Before. 1180 InstructionMoveBefore(Instruction *Inst, Instruction *Before) 1181 : TypePromotionAction(Inst), Position(Inst) { 1182 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); 1183 Inst->moveBefore(Before); 1184 } 1185 1186 /// \brief Move the instruction back to its original position. 1187 void undo() override { 1188 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); 1189 Position.insert(Inst); 1190 } 1191 }; 1192 1193 /// \brief Set the operand of an instruction with a new value. 1194 class OperandSetter : public TypePromotionAction { 1195 /// Original operand of the instruction. 1196 Value *Origin; 1197 /// Index of the modified instruction. 1198 unsigned Idx; 1199 1200 public: 1201 /// \brief Set \p Idx operand of \p Inst with \p NewVal. 1202 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) 1203 : TypePromotionAction(Inst), Idx(Idx) { 1204 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" 1205 << "for:" << *Inst << "\n" 1206 << "with:" << *NewVal << "\n"); 1207 Origin = Inst->getOperand(Idx); 1208 Inst->setOperand(Idx, NewVal); 1209 } 1210 1211 /// \brief Restore the original value of the instruction. 1212 void undo() override { 1213 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" 1214 << "for: " << *Inst << "\n" 1215 << "with: " << *Origin << "\n"); 1216 Inst->setOperand(Idx, Origin); 1217 } 1218 }; 1219 1220 /// \brief Hide the operands of an instruction. 1221 /// Do as if this instruction was not using any of its operands. 1222 class OperandsHider : public TypePromotionAction { 1223 /// The list of original operands. 1224 SmallVector<Value *, 4> OriginalValues; 1225 1226 public: 1227 /// \brief Remove \p Inst from the uses of the operands of \p Inst. 1228 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { 1229 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); 1230 unsigned NumOpnds = Inst->getNumOperands(); 1231 OriginalValues.reserve(NumOpnds); 1232 for (unsigned It = 0; It < NumOpnds; ++It) { 1233 // Save the current operand. 1234 Value *Val = Inst->getOperand(It); 1235 OriginalValues.push_back(Val); 1236 // Set a dummy one. 1237 // We could use OperandSetter here, but that would implied an overhead 1238 // that we are not willing to pay. 1239 Inst->setOperand(It, UndefValue::get(Val->getType())); 1240 } 1241 } 1242 1243 /// \brief Restore the original list of uses. 1244 void undo() override { 1245 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); 1246 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) 1247 Inst->setOperand(It, OriginalValues[It]); 1248 } 1249 }; 1250 1251 /// \brief Build a truncate instruction. 1252 class TruncBuilder : public TypePromotionAction { 1253 public: 1254 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty 1255 /// result. 1256 /// trunc Opnd to Ty. 1257 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { 1258 IRBuilder<> Builder(Opnd); 1259 Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); 1260 DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); 1261 } 1262 1263 /// \brief Get the built instruction. 1264 Instruction *getBuiltInstruction() { return Inst; } 1265 1266 /// \brief Remove the built instruction. 1267 void undo() override { 1268 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); 1269 Inst->eraseFromParent(); 1270 } 1271 }; 1272 1273 /// \brief Build a sign extension instruction. 1274 class SExtBuilder : public TypePromotionAction { 1275 public: 1276 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty 1277 /// result. 1278 /// sext Opnd to Ty. 1279 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) 1280 : TypePromotionAction(Inst) { 1281 IRBuilder<> Builder(InsertPt); 1282 Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); 1283 DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); 1284 } 1285 1286 /// \brief Get the built instruction. 1287 Instruction *getBuiltInstruction() { return Inst; } 1288 1289 /// \brief Remove the built instruction. 1290 void undo() override { 1291 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); 1292 Inst->eraseFromParent(); 1293 } 1294 }; 1295 1296 /// \brief Mutate an instruction to another type. 1297 class TypeMutator : public TypePromotionAction { 1298 /// Record the original type. 1299 Type *OrigTy; 1300 1301 public: 1302 /// \brief Mutate the type of \p Inst into \p NewTy. 1303 TypeMutator(Instruction *Inst, Type *NewTy) 1304 : TypePromotionAction(Inst), OrigTy(Inst->getType()) { 1305 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy 1306 << "\n"); 1307 Inst->mutateType(NewTy); 1308 } 1309 1310 /// \brief Mutate the instruction back to its original type. 1311 void undo() override { 1312 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy 1313 << "\n"); 1314 Inst->mutateType(OrigTy); 1315 } 1316 }; 1317 1318 /// \brief Replace the uses of an instruction by another instruction. 1319 class UsesReplacer : public TypePromotionAction { 1320 /// Helper structure to keep track of the replaced uses. 1321 struct InstructionAndIdx { 1322 /// The instruction using the instruction. 1323 Instruction *Inst; 1324 /// The index where this instruction is used for Inst. 1325 unsigned Idx; 1326 InstructionAndIdx(Instruction *Inst, unsigned Idx) 1327 : Inst(Inst), Idx(Idx) {} 1328 }; 1329 1330 /// Keep track of the original uses (pair Instruction, Index). 1331 SmallVector<InstructionAndIdx, 4> OriginalUses; 1332 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; 1333 1334 public: 1335 /// \brief Replace all the use of \p Inst by \p New. 1336 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { 1337 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New 1338 << "\n"); 1339 // Record the original uses. 1340 for (Use &U : Inst->uses()) { 1341 Instruction *UserI = cast<Instruction>(U.getUser()); 1342 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); 1343 } 1344 // Now, we can replace the uses. 1345 Inst->replaceAllUsesWith(New); 1346 } 1347 1348 /// \brief Reassign the original uses of Inst to Inst. 1349 void undo() override { 1350 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); 1351 for (use_iterator UseIt = OriginalUses.begin(), 1352 EndIt = OriginalUses.end(); 1353 UseIt != EndIt; ++UseIt) { 1354 UseIt->Inst->setOperand(UseIt->Idx, Inst); 1355 } 1356 } 1357 }; 1358 1359 /// \brief Remove an instruction from the IR. 1360 class InstructionRemover : public TypePromotionAction { 1361 /// Original position of the instruction. 1362 InsertionHandler Inserter; 1363 /// Helper structure to hide all the link to the instruction. In other 1364 /// words, this helps to do as if the instruction was removed. 1365 OperandsHider Hider; 1366 /// Keep track of the uses replaced, if any. 1367 UsesReplacer *Replacer; 1368 1369 public: 1370 /// \brief Remove all reference of \p Inst and optinally replace all its 1371 /// uses with New. 1372 /// \pre If !Inst->use_empty(), then New != nullptr 1373 InstructionRemover(Instruction *Inst, Value *New = nullptr) 1374 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), 1375 Replacer(nullptr) { 1376 if (New) 1377 Replacer = new UsesReplacer(Inst, New); 1378 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); 1379 Inst->removeFromParent(); 1380 } 1381 1382 ~InstructionRemover() { delete Replacer; } 1383 1384 /// \brief Really remove the instruction. 1385 void commit() override { delete Inst; } 1386 1387 /// \brief Resurrect the instruction and reassign it to the proper uses if 1388 /// new value was provided when build this action. 1389 void undo() override { 1390 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); 1391 Inserter.insert(Inst); 1392 if (Replacer) 1393 Replacer->undo(); 1394 Hider.undo(); 1395 } 1396 }; 1397 1398 public: 1399 /// Restoration point. 1400 /// The restoration point is a pointer to an action instead of an iterator 1401 /// because the iterator may be invalidated but not the pointer. 1402 typedef const TypePromotionAction *ConstRestorationPt; 1403 /// Advocate every changes made in that transaction. 1404 void commit(); 1405 /// Undo all the changes made after the given point. 1406 void rollback(ConstRestorationPt Point); 1407 /// Get the current restoration point. 1408 ConstRestorationPt getRestorationPoint() const; 1409 1410 /// \name API for IR modification with state keeping to support rollback. 1411 /// @{ 1412 /// Same as Instruction::setOperand. 1413 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); 1414 /// Same as Instruction::eraseFromParent. 1415 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); 1416 /// Same as Value::replaceAllUsesWith. 1417 void replaceAllUsesWith(Instruction *Inst, Value *New); 1418 /// Same as Value::mutateType. 1419 void mutateType(Instruction *Inst, Type *NewTy); 1420 /// Same as IRBuilder::createTrunc. 1421 Instruction *createTrunc(Instruction *Opnd, Type *Ty); 1422 /// Same as IRBuilder::createSExt. 1423 Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); 1424 /// Same as Instruction::moveBefore. 1425 void moveBefore(Instruction *Inst, Instruction *Before); 1426 /// @} 1427 1428 private: 1429 /// The ordered list of actions made so far. 1430 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; 1431 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; 1432 }; 1433 1434 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, 1435 Value *NewVal) { 1436 Actions.push_back( 1437 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); 1438 } 1439 1440 void TypePromotionTransaction::eraseInstruction(Instruction *Inst, 1441 Value *NewVal) { 1442 Actions.push_back( 1443 make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); 1444 } 1445 1446 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, 1447 Value *New) { 1448 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); 1449 } 1450 1451 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { 1452 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); 1453 } 1454 1455 Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, 1456 Type *Ty) { 1457 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); 1458 Instruction *I = Ptr->getBuiltInstruction(); 1459 Actions.push_back(std::move(Ptr)); 1460 return I; 1461 } 1462 1463 Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, 1464 Value *Opnd, Type *Ty) { 1465 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); 1466 Instruction *I = Ptr->getBuiltInstruction(); 1467 Actions.push_back(std::move(Ptr)); 1468 return I; 1469 } 1470 1471 void TypePromotionTransaction::moveBefore(Instruction *Inst, 1472 Instruction *Before) { 1473 Actions.push_back( 1474 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); 1475 } 1476 1477 TypePromotionTransaction::ConstRestorationPt 1478 TypePromotionTransaction::getRestorationPoint() const { 1479 return !Actions.empty() ? Actions.back().get() : nullptr; 1480 } 1481 1482 void TypePromotionTransaction::commit() { 1483 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; 1484 ++It) 1485 (*It)->commit(); 1486 Actions.clear(); 1487 } 1488 1489 void TypePromotionTransaction::rollback( 1490 TypePromotionTransaction::ConstRestorationPt Point) { 1491 while (!Actions.empty() && Point != Actions.back().get()) { 1492 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); 1493 Curr->undo(); 1494 } 1495 } 1496 1497 /// \brief A helper class for matching addressing modes. 1498 /// 1499 /// This encapsulates the logic for matching the target-legal addressing modes. 1500 class AddressingModeMatcher { 1501 SmallVectorImpl<Instruction*> &AddrModeInsts; 1502 const TargetLowering &TLI; 1503 1504 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 1505 /// the memory instruction that we're computing this address for. 1506 Type *AccessTy; 1507 Instruction *MemoryInst; 1508 1509 /// AddrMode - This is the addressing mode that we're building up. This is 1510 /// part of the return value of this addressing mode matching stuff. 1511 ExtAddrMode &AddrMode; 1512 1513 /// The truncate instruction inserted by other CodeGenPrepare optimizations. 1514 const SetOfInstrs &InsertedTruncs; 1515 /// A map from the instructions to their type before promotion. 1516 InstrToOrigTy &PromotedInsts; 1517 /// The ongoing transaction where every action should be registered. 1518 TypePromotionTransaction &TPT; 1519 1520 /// IgnoreProfitability - This is set to true when we should not do 1521 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 1522 /// always returns true. 1523 bool IgnoreProfitability; 1524 1525 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 1526 const TargetLowering &T, Type *AT, 1527 Instruction *MI, ExtAddrMode &AM, 1528 const SetOfInstrs &InsertedTruncs, 1529 InstrToOrigTy &PromotedInsts, 1530 TypePromotionTransaction &TPT) 1531 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), 1532 InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { 1533 IgnoreProfitability = false; 1534 } 1535 public: 1536 1537 /// Match - Find the maximal addressing mode that a load/store of V can fold, 1538 /// give an access type of AccessTy. This returns a list of involved 1539 /// instructions in AddrModeInsts. 1540 /// \p InsertedTruncs The truncate instruction inserted by other 1541 /// CodeGenPrepare 1542 /// optimizations. 1543 /// \p PromotedInsts maps the instructions to their type before promotion. 1544 /// \p The ongoing transaction where every action should be registered. 1545 static ExtAddrMode Match(Value *V, Type *AccessTy, 1546 Instruction *MemoryInst, 1547 SmallVectorImpl<Instruction*> &AddrModeInsts, 1548 const TargetLowering &TLI, 1549 const SetOfInstrs &InsertedTruncs, 1550 InstrToOrigTy &PromotedInsts, 1551 TypePromotionTransaction &TPT) { 1552 ExtAddrMode Result; 1553 1554 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 1555 MemoryInst, Result, InsertedTruncs, 1556 PromotedInsts, TPT).MatchAddr(V, 0); 1557 (void)Success; assert(Success && "Couldn't select *anything*?"); 1558 return Result; 1559 } 1560 private: 1561 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 1562 bool MatchAddr(Value *V, unsigned Depth); 1563 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, 1564 bool *MovedAway = nullptr); 1565 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 1566 ExtAddrMode &AMBefore, 1567 ExtAddrMode &AMAfter); 1568 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 1569 bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, 1570 Value *PromotedOperand) const; 1571 }; 1572 1573 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 1574 /// Return true and update AddrMode if this addr mode is legal for the target, 1575 /// false if not. 1576 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 1577 unsigned Depth) { 1578 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 1579 // mode. Just process that directly. 1580 if (Scale == 1) 1581 return MatchAddr(ScaleReg, Depth); 1582 1583 // If the scale is 0, it takes nothing to add this. 1584 if (Scale == 0) 1585 return true; 1586 1587 // If we already have a scale of this value, we can add to it, otherwise, we 1588 // need an available scale field. 1589 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 1590 return false; 1591 1592 ExtAddrMode TestAddrMode = AddrMode; 1593 1594 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 1595 // [A+B + A*7] -> [B+A*8]. 1596 TestAddrMode.Scale += Scale; 1597 TestAddrMode.ScaledReg = ScaleReg; 1598 1599 // If the new address isn't legal, bail out. 1600 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 1601 return false; 1602 1603 // It was legal, so commit it. 1604 AddrMode = TestAddrMode; 1605 1606 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 1607 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 1608 // X*Scale + C*Scale to addr mode. 1609 ConstantInt *CI = nullptr; Value *AddLHS = nullptr; 1610 if (isa<Instruction>(ScaleReg) && // not a constant expr. 1611 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 1612 TestAddrMode.ScaledReg = AddLHS; 1613 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 1614 1615 // If this addressing mode is legal, commit it and remember that we folded 1616 // this instruction. 1617 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 1618 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 1619 AddrMode = TestAddrMode; 1620 return true; 1621 } 1622 } 1623 1624 // Otherwise, not (x+c)*scale, just return what we have. 1625 return true; 1626 } 1627 1628 /// MightBeFoldableInst - This is a little filter, which returns true if an 1629 /// addressing computation involving I might be folded into a load/store 1630 /// accessing it. This doesn't need to be perfect, but needs to accept at least 1631 /// the set of instructions that MatchOperationAddr can. 1632 static bool MightBeFoldableInst(Instruction *I) { 1633 switch (I->getOpcode()) { 1634 case Instruction::BitCast: 1635 case Instruction::AddrSpaceCast: 1636 // Don't touch identity bitcasts. 1637 if (I->getType() == I->getOperand(0)->getType()) 1638 return false; 1639 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1640 case Instruction::PtrToInt: 1641 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1642 return true; 1643 case Instruction::IntToPtr: 1644 // We know the input is intptr_t, so this is foldable. 1645 return true; 1646 case Instruction::Add: 1647 return true; 1648 case Instruction::Mul: 1649 case Instruction::Shl: 1650 // Can only handle X*C and X << C. 1651 return isa<ConstantInt>(I->getOperand(1)); 1652 case Instruction::GetElementPtr: 1653 return true; 1654 default: 1655 return false; 1656 } 1657 } 1658 1659 /// \brief Hepler class to perform type promotion. 1660 class TypePromotionHelper { 1661 /// \brief Utility function to check whether or not a sign extension of 1662 /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either 1663 /// using the operands of \p Inst or promoting \p Inst. 1664 /// In other words, check if: 1665 /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. 1666 /// #1 Promotion applies: 1667 /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). 1668 /// #2 Operand reuses: 1669 /// sext opnd1 to ConsideredSExtType. 1670 /// \p PromotedInsts maps the instructions to their type before promotion. 1671 static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, 1672 const InstrToOrigTy &PromotedInsts); 1673 1674 /// \brief Utility function to determine if \p OpIdx should be promoted when 1675 /// promoting \p Inst. 1676 static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { 1677 if (isa<SelectInst>(Inst) && OpIdx == 0) 1678 return false; 1679 return true; 1680 } 1681 1682 /// \brief Utility function to promote the operand of \p SExt when this 1683 /// operand is a promotable trunc or sext. 1684 /// \p PromotedInsts maps the instructions to their type before promotion. 1685 /// \p CreatedInsts[out] contains how many non-free instructions have been 1686 /// created to promote the operand of SExt. 1687 /// Should never be called directly. 1688 /// \return The promoted value which is used instead of SExt. 1689 static Value *promoteOperandForTruncAndSExt(Instruction *SExt, 1690 TypePromotionTransaction &TPT, 1691 InstrToOrigTy &PromotedInsts, 1692 unsigned &CreatedInsts); 1693 1694 /// \brief Utility function to promote the operand of \p SExt when this 1695 /// operand is promotable and is not a supported trunc or sext. 1696 /// \p PromotedInsts maps the instructions to their type before promotion. 1697 /// \p CreatedInsts[out] contains how many non-free instructions have been 1698 /// created to promote the operand of SExt. 1699 /// Should never be called directly. 1700 /// \return The promoted value which is used instead of SExt. 1701 static Value *promoteOperandForOther(Instruction *SExt, 1702 TypePromotionTransaction &TPT, 1703 InstrToOrigTy &PromotedInsts, 1704 unsigned &CreatedInsts); 1705 1706 public: 1707 /// Type for the utility function that promotes the operand of SExt. 1708 typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, 1709 InstrToOrigTy &PromotedInsts, 1710 unsigned &CreatedInsts); 1711 /// \brief Given a sign extend instruction \p SExt, return the approriate 1712 /// action to promote the operand of \p SExt instead of using SExt. 1713 /// \return NULL if no promotable action is possible with the current 1714 /// sign extension. 1715 /// \p InsertedTruncs keeps track of all the truncate instructions inserted by 1716 /// the others CodeGenPrepare optimizations. This information is important 1717 /// because we do not want to promote these instructions as CodeGenPrepare 1718 /// will reinsert them later. Thus creating an infinite loop: create/remove. 1719 /// \p PromotedInsts maps the instructions to their type before promotion. 1720 static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, 1721 const TargetLowering &TLI, 1722 const InstrToOrigTy &PromotedInsts); 1723 }; 1724 1725 bool TypePromotionHelper::canGetThrough(const Instruction *Inst, 1726 Type *ConsideredSExtType, 1727 const InstrToOrigTy &PromotedInsts) { 1728 // We can always get through sext. 1729 if (isa<SExtInst>(Inst)) 1730 return true; 1731 1732 // We can get through binary operator, if it is legal. In other words, the 1733 // binary operator must have a nuw or nsw flag. 1734 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 1735 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && 1736 (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) 1737 return true; 1738 1739 // Check if we can do the following simplification. 1740 // sext(trunc(sext)) --> sext 1741 if (!isa<TruncInst>(Inst)) 1742 return false; 1743 1744 Value *OpndVal = Inst->getOperand(0); 1745 // Check if we can use this operand in the sext. 1746 // If the type is larger than the result type of the sign extension, 1747 // we cannot. 1748 if (OpndVal->getType()->getIntegerBitWidth() > 1749 ConsideredSExtType->getIntegerBitWidth()) 1750 return false; 1751 1752 // If the operand of the truncate is not an instruction, we will not have 1753 // any information on the dropped bits. 1754 // (Actually we could for constant but it is not worth the extra logic). 1755 Instruction *Opnd = dyn_cast<Instruction>(OpndVal); 1756 if (!Opnd) 1757 return false; 1758 1759 // Check if the source of the type is narrow enough. 1760 // I.e., check that trunc just drops sign extended bits. 1761 // #1 get the type of the operand. 1762 const Type *OpndType; 1763 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); 1764 if (It != PromotedInsts.end()) 1765 OpndType = It->second; 1766 else if (isa<SExtInst>(Opnd)) 1767 OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); 1768 else 1769 return false; 1770 1771 // #2 check that the truncate just drop sign extended bits. 1772 if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) 1773 return true; 1774 1775 return false; 1776 } 1777 1778 TypePromotionHelper::Action TypePromotionHelper::getAction( 1779 Instruction *SExt, const SetOfInstrs &InsertedTruncs, 1780 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { 1781 Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); 1782 Type *SExtTy = SExt->getType(); 1783 // If the operand of the sign extension is not an instruction, we cannot 1784 // get through. 1785 // If it, check we can get through. 1786 if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) 1787 return nullptr; 1788 1789 // Do not promote if the operand has been added by codegenprepare. 1790 // Otherwise, it means we are undoing an optimization that is likely to be 1791 // redone, thus causing potential infinite loop. 1792 if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) 1793 return nullptr; 1794 1795 // SExt or Trunc instructions. 1796 // Return the related handler. 1797 if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) 1798 return promoteOperandForTruncAndSExt; 1799 1800 // Regular instruction. 1801 // Abort early if we will have to insert non-free instructions. 1802 if (!SExtOpnd->hasOneUse() && 1803 !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) 1804 return nullptr; 1805 return promoteOperandForOther; 1806 } 1807 1808 Value *TypePromotionHelper::promoteOperandForTruncAndSExt( 1809 llvm::Instruction *SExt, TypePromotionTransaction &TPT, 1810 InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { 1811 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1812 // get through it and this method should not be called. 1813 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1814 // Replace sext(trunc(opnd)) or sext(sext(opnd)) 1815 // => sext(opnd). 1816 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); 1817 CreatedInsts = 0; 1818 1819 // Remove dead code. 1820 if (SExtOpnd->use_empty()) 1821 TPT.eraseInstruction(SExtOpnd); 1822 1823 // Check if the sext is still needed. 1824 if (SExt->getType() != SExt->getOperand(0)->getType()) 1825 return SExt; 1826 1827 // At this point we have: sext ty opnd to ty. 1828 // Reassign the uses of SExt to the opnd and remove SExt. 1829 Value *NextVal = SExt->getOperand(0); 1830 TPT.eraseInstruction(SExt, NextVal); 1831 return NextVal; 1832 } 1833 1834 Value * 1835 TypePromotionHelper::promoteOperandForOther(Instruction *SExt, 1836 TypePromotionTransaction &TPT, 1837 InstrToOrigTy &PromotedInsts, 1838 unsigned &CreatedInsts) { 1839 // By construction, the operand of SExt is an instruction. Otherwise we cannot 1840 // get through it and this method should not be called. 1841 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); 1842 CreatedInsts = 0; 1843 if (!SExtOpnd->hasOneUse()) { 1844 // SExtOpnd will be promoted. 1845 // All its uses, but SExt, will need to use a truncated value of the 1846 // promoted version. 1847 // Create the truncate now. 1848 Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); 1849 Trunc->removeFromParent(); 1850 // Insert it just after the definition. 1851 Trunc->insertAfter(SExtOpnd); 1852 1853 TPT.replaceAllUsesWith(SExtOpnd, Trunc); 1854 // Restore the operand of SExt (which has been replace by the previous call 1855 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. 1856 TPT.setOperand(SExt, 0, SExtOpnd); 1857 } 1858 1859 // Get through the Instruction: 1860 // 1. Update its type. 1861 // 2. Replace the uses of SExt by Inst. 1862 // 3. Sign extend each operand that needs to be sign extended. 1863 1864 // Remember the original type of the instruction before promotion. 1865 // This is useful to know that the high bits are sign extended bits. 1866 PromotedInsts.insert( 1867 std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); 1868 // Step #1. 1869 TPT.mutateType(SExtOpnd, SExt->getType()); 1870 // Step #2. 1871 TPT.replaceAllUsesWith(SExt, SExtOpnd); 1872 // Step #3. 1873 Instruction *SExtForOpnd = SExt; 1874 1875 DEBUG(dbgs() << "Propagate SExt to operands\n"); 1876 for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; 1877 ++OpIdx) { 1878 DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); 1879 if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || 1880 !shouldSExtOperand(SExtOpnd, OpIdx)) { 1881 DEBUG(dbgs() << "No need to propagate\n"); 1882 continue; 1883 } 1884 // Check if we can statically sign extend the operand. 1885 Value *Opnd = SExtOpnd->getOperand(OpIdx); 1886 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { 1887 DEBUG(dbgs() << "Statically sign extend\n"); 1888 TPT.setOperand( 1889 SExtOpnd, OpIdx, 1890 ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); 1891 continue; 1892 } 1893 // UndefValue are typed, so we have to statically sign extend them. 1894 if (isa<UndefValue>(Opnd)) { 1895 DEBUG(dbgs() << "Statically sign extend\n"); 1896 TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); 1897 continue; 1898 } 1899 1900 // Otherwise we have to explicity sign extend the operand. 1901 // Check if SExt was reused to sign extend an operand. 1902 if (!SExtForOpnd) { 1903 // If yes, create a new one. 1904 DEBUG(dbgs() << "More operands to sext\n"); 1905 SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); 1906 ++CreatedInsts; 1907 } 1908 1909 TPT.setOperand(SExtForOpnd, 0, Opnd); 1910 1911 // Move the sign extension before the insertion point. 1912 TPT.moveBefore(SExtForOpnd, SExtOpnd); 1913 TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); 1914 // If more sext are required, new instructions will have to be created. 1915 SExtForOpnd = nullptr; 1916 } 1917 if (SExtForOpnd == SExt) { 1918 DEBUG(dbgs() << "Sign extension is useless now\n"); 1919 TPT.eraseInstruction(SExt); 1920 } 1921 return SExtOpnd; 1922 } 1923 1924 /// IsPromotionProfitable - Check whether or not promoting an instruction 1925 /// to a wider type was profitable. 1926 /// \p MatchedSize gives the number of instructions that have been matched 1927 /// in the addressing mode after the promotion was applied. 1928 /// \p SizeWithPromotion gives the number of created instructions for 1929 /// the promotion plus the number of instructions that have been 1930 /// matched in the addressing mode before the promotion. 1931 /// \p PromotedOperand is the value that has been promoted. 1932 /// \return True if the promotion is profitable, false otherwise. 1933 bool 1934 AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, 1935 unsigned SizeWithPromotion, 1936 Value *PromotedOperand) const { 1937 // We folded less instructions than what we created to promote the operand. 1938 // This is not profitable. 1939 if (MatchedSize < SizeWithPromotion) 1940 return false; 1941 if (MatchedSize > SizeWithPromotion) 1942 return true; 1943 // The promotion is neutral but it may help folding the sign extension in 1944 // loads for instance. 1945 // Check that we did not create an illegal instruction. 1946 Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); 1947 if (!PromotedInst) 1948 return false; 1949 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); 1950 // If the ISDOpcode is undefined, it was undefined before the promotion. 1951 if (!ISDOpcode) 1952 return true; 1953 // Otherwise, check if the promoted instruction is legal or not. 1954 return TLI.isOperationLegalOrCustom(ISDOpcode, 1955 EVT::getEVT(PromotedInst->getType())); 1956 } 1957 1958 /// MatchOperationAddr - Given an instruction or constant expr, see if we can 1959 /// fold the operation into the addressing mode. If so, update the addressing 1960 /// mode and return true, otherwise return false without modifying AddrMode. 1961 /// If \p MovedAway is not NULL, it contains the information of whether or 1962 /// not AddrInst has to be folded into the addressing mode on success. 1963 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing 1964 /// because it has been moved away. 1965 /// Thus AddrInst must not be added in the matched instructions. 1966 /// This state can happen when AddrInst is a sext, since it may be moved away. 1967 /// Therefore, AddrInst may not be valid when MovedAway is true and it must 1968 /// not be referenced anymore. 1969 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 1970 unsigned Depth, 1971 bool *MovedAway) { 1972 // Avoid exponential behavior on extremely deep expression trees. 1973 if (Depth >= 5) return false; 1974 1975 // By default, all matched instructions stay in place. 1976 if (MovedAway) 1977 *MovedAway = false; 1978 1979 switch (Opcode) { 1980 case Instruction::PtrToInt: 1981 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1982 return MatchAddr(AddrInst->getOperand(0), Depth); 1983 case Instruction::IntToPtr: 1984 // This inttoptr is a no-op if the integer type is pointer sized. 1985 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 1986 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 1987 return MatchAddr(AddrInst->getOperand(0), Depth); 1988 return false; 1989 case Instruction::BitCast: 1990 case Instruction::AddrSpaceCast: 1991 // BitCast is always a noop, and we can handle it as long as it is 1992 // int->int or pointer->pointer (we don't want int<->fp or something). 1993 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 1994 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 1995 // Don't touch identity bitcasts. These were probably put here by LSR, 1996 // and we don't want to mess around with them. Assume it knows what it 1997 // is doing. 1998 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 1999 return MatchAddr(AddrInst->getOperand(0), Depth); 2000 return false; 2001 case Instruction::Add: { 2002 // Check to see if we can merge in the RHS then the LHS. If so, we win. 2003 ExtAddrMode BackupAddrMode = AddrMode; 2004 unsigned OldSize = AddrModeInsts.size(); 2005 // Start a transaction at this point. 2006 // The LHS may match but not the RHS. 2007 // Therefore, we need a higher level restoration point to undo partially 2008 // matched operation. 2009 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2010 TPT.getRestorationPoint(); 2011 2012 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 2013 MatchAddr(AddrInst->getOperand(0), Depth+1)) 2014 return true; 2015 2016 // Restore the old addr mode info. 2017 AddrMode = BackupAddrMode; 2018 AddrModeInsts.resize(OldSize); 2019 TPT.rollback(LastKnownGood); 2020 2021 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 2022 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 2023 MatchAddr(AddrInst->getOperand(1), Depth+1)) 2024 return true; 2025 2026 // Otherwise we definitely can't merge the ADD in. 2027 AddrMode = BackupAddrMode; 2028 AddrModeInsts.resize(OldSize); 2029 TPT.rollback(LastKnownGood); 2030 break; 2031 } 2032 //case Instruction::Or: 2033 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 2034 //break; 2035 case Instruction::Mul: 2036 case Instruction::Shl: { 2037 // Can only handle X*C and X << C. 2038 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 2039 if (!RHS) return false; 2040 int64_t Scale = RHS->getSExtValue(); 2041 if (Opcode == Instruction::Shl) 2042 Scale = 1LL << Scale; 2043 2044 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 2045 } 2046 case Instruction::GetElementPtr: { 2047 // Scan the GEP. We check it if it contains constant offsets and at most 2048 // one variable offset. 2049 int VariableOperand = -1; 2050 unsigned VariableScale = 0; 2051 2052 int64_t ConstantOffset = 0; 2053 const DataLayout *TD = TLI.getDataLayout(); 2054 gep_type_iterator GTI = gep_type_begin(AddrInst); 2055 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 2056 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 2057 const StructLayout *SL = TD->getStructLayout(STy); 2058 unsigned Idx = 2059 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 2060 ConstantOffset += SL->getElementOffset(Idx); 2061 } else { 2062 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 2063 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 2064 ConstantOffset += CI->getSExtValue()*TypeSize; 2065 } else if (TypeSize) { // Scales of zero don't do anything. 2066 // We only allow one variable index at the moment. 2067 if (VariableOperand != -1) 2068 return false; 2069 2070 // Remember the variable index. 2071 VariableOperand = i; 2072 VariableScale = TypeSize; 2073 } 2074 } 2075 } 2076 2077 // A common case is for the GEP to only do a constant offset. In this case, 2078 // just add it to the disp field and check validity. 2079 if (VariableOperand == -1) { 2080 AddrMode.BaseOffs += ConstantOffset; 2081 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 2082 // Check to see if we can fold the base pointer in too. 2083 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 2084 return true; 2085 } 2086 AddrMode.BaseOffs -= ConstantOffset; 2087 return false; 2088 } 2089 2090 // Save the valid addressing mode in case we can't match. 2091 ExtAddrMode BackupAddrMode = AddrMode; 2092 unsigned OldSize = AddrModeInsts.size(); 2093 2094 // See if the scale and offset amount is valid for this target. 2095 AddrMode.BaseOffs += ConstantOffset; 2096 2097 // Match the base operand of the GEP. 2098 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 2099 // If it couldn't be matched, just stuff the value in a register. 2100 if (AddrMode.HasBaseReg) { 2101 AddrMode = BackupAddrMode; 2102 AddrModeInsts.resize(OldSize); 2103 return false; 2104 } 2105 AddrMode.HasBaseReg = true; 2106 AddrMode.BaseReg = AddrInst->getOperand(0); 2107 } 2108 2109 // Match the remaining variable portion of the GEP. 2110 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 2111 Depth)) { 2112 // If it couldn't be matched, try stuffing the base into a register 2113 // instead of matching it, and retrying the match of the scale. 2114 AddrMode = BackupAddrMode; 2115 AddrModeInsts.resize(OldSize); 2116 if (AddrMode.HasBaseReg) 2117 return false; 2118 AddrMode.HasBaseReg = true; 2119 AddrMode.BaseReg = AddrInst->getOperand(0); 2120 AddrMode.BaseOffs += ConstantOffset; 2121 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 2122 VariableScale, Depth)) { 2123 // If even that didn't work, bail. 2124 AddrMode = BackupAddrMode; 2125 AddrModeInsts.resize(OldSize); 2126 return false; 2127 } 2128 } 2129 2130 return true; 2131 } 2132 case Instruction::SExt: { 2133 // Try to move this sext out of the way of the addressing mode. 2134 Instruction *SExt = cast<Instruction>(AddrInst); 2135 // Ask for a method for doing so. 2136 TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( 2137 SExt, InsertedTruncs, TLI, PromotedInsts); 2138 if (!TPH) 2139 return false; 2140 2141 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2142 TPT.getRestorationPoint(); 2143 unsigned CreatedInsts = 0; 2144 Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); 2145 // SExt has been moved away. 2146 // Thus either it will be rematched later in the recursive calls or it is 2147 // gone. Anyway, we must not fold it into the addressing mode at this point. 2148 // E.g., 2149 // op = add opnd, 1 2150 // idx = sext op 2151 // addr = gep base, idx 2152 // is now: 2153 // promotedOpnd = sext opnd <- no match here 2154 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) 2155 // addr = gep base, op <- match 2156 if (MovedAway) 2157 *MovedAway = true; 2158 2159 assert(PromotedOperand && 2160 "TypePromotionHelper should have filtered out those cases"); 2161 2162 ExtAddrMode BackupAddrMode = AddrMode; 2163 unsigned OldSize = AddrModeInsts.size(); 2164 2165 if (!MatchAddr(PromotedOperand, Depth) || 2166 !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, 2167 PromotedOperand)) { 2168 AddrMode = BackupAddrMode; 2169 AddrModeInsts.resize(OldSize); 2170 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); 2171 TPT.rollback(LastKnownGood); 2172 return false; 2173 } 2174 return true; 2175 } 2176 } 2177 return false; 2178 } 2179 2180 /// MatchAddr - If we can, try to add the value of 'Addr' into the current 2181 /// addressing mode. If Addr can't be added to AddrMode this returns false and 2182 /// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 2183 /// or intptr_t for the target. 2184 /// 2185 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 2186 // Start a transaction at this point that we will rollback if the matching 2187 // fails. 2188 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2189 TPT.getRestorationPoint(); 2190 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 2191 // Fold in immediates if legal for the target. 2192 AddrMode.BaseOffs += CI->getSExtValue(); 2193 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2194 return true; 2195 AddrMode.BaseOffs -= CI->getSExtValue(); 2196 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 2197 // If this is a global variable, try to fold it into the addressing mode. 2198 if (!AddrMode.BaseGV) { 2199 AddrMode.BaseGV = GV; 2200 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2201 return true; 2202 AddrMode.BaseGV = nullptr; 2203 } 2204 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 2205 ExtAddrMode BackupAddrMode = AddrMode; 2206 unsigned OldSize = AddrModeInsts.size(); 2207 2208 // Check to see if it is possible to fold this operation. 2209 bool MovedAway = false; 2210 if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { 2211 // This instruction may have been move away. If so, there is nothing 2212 // to check here. 2213 if (MovedAway) 2214 return true; 2215 // Okay, it's possible to fold this. Check to see if it is actually 2216 // *profitable* to do so. We use a simple cost model to avoid increasing 2217 // register pressure too much. 2218 if (I->hasOneUse() || 2219 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 2220 AddrModeInsts.push_back(I); 2221 return true; 2222 } 2223 2224 // It isn't profitable to do this, roll back. 2225 //cerr << "NOT FOLDING: " << *I; 2226 AddrMode = BackupAddrMode; 2227 AddrModeInsts.resize(OldSize); 2228 TPT.rollback(LastKnownGood); 2229 } 2230 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 2231 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 2232 return true; 2233 TPT.rollback(LastKnownGood); 2234 } else if (isa<ConstantPointerNull>(Addr)) { 2235 // Null pointer gets folded without affecting the addressing mode. 2236 return true; 2237 } 2238 2239 // Worse case, the target should support [reg] addressing modes. :) 2240 if (!AddrMode.HasBaseReg) { 2241 AddrMode.HasBaseReg = true; 2242 AddrMode.BaseReg = Addr; 2243 // Still check for legality in case the target supports [imm] but not [i+r]. 2244 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2245 return true; 2246 AddrMode.HasBaseReg = false; 2247 AddrMode.BaseReg = nullptr; 2248 } 2249 2250 // If the base register is already taken, see if we can do [r+r]. 2251 if (AddrMode.Scale == 0) { 2252 AddrMode.Scale = 1; 2253 AddrMode.ScaledReg = Addr; 2254 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 2255 return true; 2256 AddrMode.Scale = 0; 2257 AddrMode.ScaledReg = nullptr; 2258 } 2259 // Couldn't match. 2260 TPT.rollback(LastKnownGood); 2261 return false; 2262 } 2263 2264 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 2265 /// inline asm call are due to memory operands. If so, return true, otherwise 2266 /// return false. 2267 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 2268 const TargetLowering &TLI) { 2269 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 2270 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2271 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2272 2273 // Compute the constraint code and ConstraintType to use. 2274 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 2275 2276 // If this asm operand is our Value*, and if it isn't an indirect memory 2277 // operand, we can't fold it! 2278 if (OpInfo.CallOperandVal == OpVal && 2279 (OpInfo.ConstraintType != TargetLowering::C_Memory || 2280 !OpInfo.isIndirect)) 2281 return false; 2282 } 2283 2284 return true; 2285 } 2286 2287 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a 2288 /// memory use. If we find an obviously non-foldable instruction, return true. 2289 /// Add the ultimately found memory instructions to MemoryUses. 2290 static bool FindAllMemoryUses(Instruction *I, 2291 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 2292 SmallPtrSet<Instruction*, 16> &ConsideredInsts, 2293 const TargetLowering &TLI) { 2294 // If we already considered this instruction, we're done. 2295 if (!ConsideredInsts.insert(I)) 2296 return false; 2297 2298 // If this is an obviously unfoldable instruction, bail out. 2299 if (!MightBeFoldableInst(I)) 2300 return true; 2301 2302 // Loop over all the uses, recursively processing them. 2303 for (Use &U : I->uses()) { 2304 Instruction *UserI = cast<Instruction>(U.getUser()); 2305 2306 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { 2307 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); 2308 continue; 2309 } 2310 2311 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { 2312 unsigned opNo = U.getOperandNo(); 2313 if (opNo == 0) return true; // Storing addr, not into addr. 2314 MemoryUses.push_back(std::make_pair(SI, opNo)); 2315 continue; 2316 } 2317 2318 if (CallInst *CI = dyn_cast<CallInst>(UserI)) { 2319 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 2320 if (!IA) return true; 2321 2322 // If this is a memory operand, we're cool, otherwise bail out. 2323 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 2324 return true; 2325 continue; 2326 } 2327 2328 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) 2329 return true; 2330 } 2331 2332 return false; 2333 } 2334 2335 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 2336 /// the use site that we're folding it into. If so, there is no cost to 2337 /// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 2338 /// that we know are live at the instruction already. 2339 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 2340 Value *KnownLive2) { 2341 // If Val is either of the known-live values, we know it is live! 2342 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) 2343 return true; 2344 2345 // All values other than instructions and arguments (e.g. constants) are live. 2346 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 2347 2348 // If Val is a constant sized alloca in the entry block, it is live, this is 2349 // true because it is just a reference to the stack/frame pointer, which is 2350 // live for the whole function. 2351 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 2352 if (AI->isStaticAlloca()) 2353 return true; 2354 2355 // Check to see if this value is already used in the memory instruction's 2356 // block. If so, it's already live into the block at the very least, so we 2357 // can reasonably fold it. 2358 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 2359 } 2360 2361 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 2362 /// mode of the machine to fold the specified instruction into a load or store 2363 /// that ultimately uses it. However, the specified instruction has multiple 2364 /// uses. Given this, it may actually increase register pressure to fold it 2365 /// into the load. For example, consider this code: 2366 /// 2367 /// X = ... 2368 /// Y = X+1 2369 /// use(Y) -> nonload/store 2370 /// Z = Y+1 2371 /// load Z 2372 /// 2373 /// In this case, Y has multiple uses, and can be folded into the load of Z 2374 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 2375 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one 2376 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 2377 /// number of computations either. 2378 /// 2379 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 2380 /// X was live across 'load Z' for other reasons, we actually *would* want to 2381 /// fold the addressing mode in the Z case. This would make Y die earlier. 2382 bool AddressingModeMatcher:: 2383 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 2384 ExtAddrMode &AMAfter) { 2385 if (IgnoreProfitability) return true; 2386 2387 // AMBefore is the addressing mode before this instruction was folded into it, 2388 // and AMAfter is the addressing mode after the instruction was folded. Get 2389 // the set of registers referenced by AMAfter and subtract out those 2390 // referenced by AMBefore: this is the set of values which folding in this 2391 // address extends the lifetime of. 2392 // 2393 // Note that there are only two potential values being referenced here, 2394 // BaseReg and ScaleReg (global addresses are always available, as are any 2395 // folded immediates). 2396 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 2397 2398 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 2399 // lifetime wasn't extended by adding this instruction. 2400 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2401 BaseReg = nullptr; 2402 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 2403 ScaledReg = nullptr; 2404 2405 // If folding this instruction (and it's subexprs) didn't extend any live 2406 // ranges, we're ok with it. 2407 if (!BaseReg && !ScaledReg) 2408 return true; 2409 2410 // If all uses of this instruction are ultimately load/store/inlineasm's, 2411 // check to see if their addressing modes will include this instruction. If 2412 // so, we can fold it into all uses, so it doesn't matter if it has multiple 2413 // uses. 2414 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 2415 SmallPtrSet<Instruction*, 16> ConsideredInsts; 2416 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 2417 return false; // Has a non-memory, non-foldable use! 2418 2419 // Now that we know that all uses of this instruction are part of a chain of 2420 // computation involving only operations that could theoretically be folded 2421 // into a memory use, loop over each of these uses and see if they could 2422 // *actually* fold the instruction. 2423 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 2424 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 2425 Instruction *User = MemoryUses[i].first; 2426 unsigned OpNo = MemoryUses[i].second; 2427 2428 // Get the access type of this use. If the use isn't a pointer, we don't 2429 // know what it accesses. 2430 Value *Address = User->getOperand(OpNo); 2431 if (!Address->getType()->isPointerTy()) 2432 return false; 2433 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 2434 2435 // Do a match against the root of this address, ignoring profitability. This 2436 // will tell us if the addressing mode for the memory operation will 2437 // *actually* cover the shared instruction. 2438 ExtAddrMode Result; 2439 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2440 TPT.getRestorationPoint(); 2441 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 2442 MemoryInst, Result, InsertedTruncs, 2443 PromotedInsts, TPT); 2444 Matcher.IgnoreProfitability = true; 2445 bool Success = Matcher.MatchAddr(Address, 0); 2446 (void)Success; assert(Success && "Couldn't select *anything*?"); 2447 2448 // The match was to check the profitability, the changes made are not 2449 // part of the original matcher. Therefore, they should be dropped 2450 // otherwise the original matcher will not present the right state. 2451 TPT.rollback(LastKnownGood); 2452 2453 // If the match didn't cover I, then it won't be shared by it. 2454 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 2455 I) == MatchedAddrModeInsts.end()) 2456 return false; 2457 2458 MatchedAddrModeInsts.clear(); 2459 } 2460 2461 return true; 2462 } 2463 2464 } // end anonymous namespace 2465 2466 /// IsNonLocalValue - Return true if the specified values are defined in a 2467 /// different basic block than BB. 2468 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 2469 if (Instruction *I = dyn_cast<Instruction>(V)) 2470 return I->getParent() != BB; 2471 return false; 2472 } 2473 2474 /// OptimizeMemoryInst - Load and Store Instructions often have 2475 /// addressing modes that can do significant amounts of computation. As such, 2476 /// instruction selection will try to get the load or store to do as much 2477 /// computation as possible for the program. The problem is that isel can only 2478 /// see within a single block. As such, we sink as much legal addressing mode 2479 /// stuff into the block as possible. 2480 /// 2481 /// This method is used to optimize both load/store and inline asms with memory 2482 /// operands. 2483 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 2484 Type *AccessTy) { 2485 Value *Repl = Addr; 2486 2487 // Try to collapse single-value PHI nodes. This is necessary to undo 2488 // unprofitable PRE transformations. 2489 SmallVector<Value*, 8> worklist; 2490 SmallPtrSet<Value*, 16> Visited; 2491 worklist.push_back(Addr); 2492 2493 // Use a worklist to iteratively look through PHI nodes, and ensure that 2494 // the addressing mode obtained from the non-PHI roots of the graph 2495 // are equivalent. 2496 Value *Consensus = nullptr; 2497 unsigned NumUsesConsensus = 0; 2498 bool IsNumUsesConsensusValid = false; 2499 SmallVector<Instruction*, 16> AddrModeInsts; 2500 ExtAddrMode AddrMode; 2501 TypePromotionTransaction TPT; 2502 TypePromotionTransaction::ConstRestorationPt LastKnownGood = 2503 TPT.getRestorationPoint(); 2504 while (!worklist.empty()) { 2505 Value *V = worklist.back(); 2506 worklist.pop_back(); 2507 2508 // Break use-def graph loops. 2509 if (!Visited.insert(V)) { 2510 Consensus = nullptr; 2511 break; 2512 } 2513 2514 // For a PHI node, push all of its incoming values. 2515 if (PHINode *P = dyn_cast<PHINode>(V)) { 2516 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 2517 worklist.push_back(P->getIncomingValue(i)); 2518 continue; 2519 } 2520 2521 // For non-PHIs, determine the addressing mode being computed. 2522 SmallVector<Instruction*, 16> NewAddrModeInsts; 2523 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( 2524 V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, 2525 PromotedInsts, TPT); 2526 2527 // This check is broken into two cases with very similar code to avoid using 2528 // getNumUses() as much as possible. Some values have a lot of uses, so 2529 // calling getNumUses() unconditionally caused a significant compile-time 2530 // regression. 2531 if (!Consensus) { 2532 Consensus = V; 2533 AddrMode = NewAddrMode; 2534 AddrModeInsts = NewAddrModeInsts; 2535 continue; 2536 } else if (NewAddrMode == AddrMode) { 2537 if (!IsNumUsesConsensusValid) { 2538 NumUsesConsensus = Consensus->getNumUses(); 2539 IsNumUsesConsensusValid = true; 2540 } 2541 2542 // Ensure that the obtained addressing mode is equivalent to that obtained 2543 // for all other roots of the PHI traversal. Also, when choosing one 2544 // such root as representative, select the one with the most uses in order 2545 // to keep the cost modeling heuristics in AddressingModeMatcher 2546 // applicable. 2547 unsigned NumUses = V->getNumUses(); 2548 if (NumUses > NumUsesConsensus) { 2549 Consensus = V; 2550 NumUsesConsensus = NumUses; 2551 AddrModeInsts = NewAddrModeInsts; 2552 } 2553 continue; 2554 } 2555 2556 Consensus = nullptr; 2557 break; 2558 } 2559 2560 // If the addressing mode couldn't be determined, or if multiple different 2561 // ones were determined, bail out now. 2562 if (!Consensus) { 2563 TPT.rollback(LastKnownGood); 2564 return false; 2565 } 2566 TPT.commit(); 2567 2568 // Check to see if any of the instructions supersumed by this addr mode are 2569 // non-local to I's BB. 2570 bool AnyNonLocal = false; 2571 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 2572 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 2573 AnyNonLocal = true; 2574 break; 2575 } 2576 } 2577 2578 // If all the instructions matched are already in this BB, don't do anything. 2579 if (!AnyNonLocal) { 2580 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 2581 return false; 2582 } 2583 2584 // Insert this computation right after this user. Since our caller is 2585 // scanning from the top of the BB to the bottom, reuse of the expr are 2586 // guaranteed to happen later. 2587 IRBuilder<> Builder(MemoryInst); 2588 2589 // Now that we determined the addressing expression we want to use and know 2590 // that we have to sink it into this block. Check to see if we have already 2591 // done this for some other load/store instr in this block. If so, reuse the 2592 // computation. 2593 Value *&SunkAddr = SunkAddrs[Addr]; 2594 if (SunkAddr) { 2595 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 2596 << *MemoryInst << "\n"); 2597 if (SunkAddr->getType() != Addr->getType()) 2598 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2599 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && 2600 TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { 2601 // By default, we use the GEP-based method when AA is used later. This 2602 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. 2603 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2604 << *MemoryInst << "\n"); 2605 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2606 Value *ResultPtr = nullptr, *ResultIndex = nullptr; 2607 2608 // First, find the pointer. 2609 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { 2610 ResultPtr = AddrMode.BaseReg; 2611 AddrMode.BaseReg = nullptr; 2612 } 2613 2614 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { 2615 // We can't add more than one pointer together, nor can we scale a 2616 // pointer (both of which seem meaningless). 2617 if (ResultPtr || AddrMode.Scale != 1) 2618 return false; 2619 2620 ResultPtr = AddrMode.ScaledReg; 2621 AddrMode.Scale = 0; 2622 } 2623 2624 if (AddrMode.BaseGV) { 2625 if (ResultPtr) 2626 return false; 2627 2628 ResultPtr = AddrMode.BaseGV; 2629 } 2630 2631 // If the real base value actually came from an inttoptr, then the matcher 2632 // will look through it and provide only the integer value. In that case, 2633 // use it here. 2634 if (!ResultPtr && AddrMode.BaseReg) { 2635 ResultPtr = 2636 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); 2637 AddrMode.BaseReg = nullptr; 2638 } else if (!ResultPtr && AddrMode.Scale == 1) { 2639 ResultPtr = 2640 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); 2641 AddrMode.Scale = 0; 2642 } 2643 2644 if (!ResultPtr && 2645 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { 2646 SunkAddr = Constant::getNullValue(Addr->getType()); 2647 } else if (!ResultPtr) { 2648 return false; 2649 } else { 2650 Type *I8PtrTy = 2651 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); 2652 2653 // Start with the base register. Do this first so that subsequent address 2654 // matching finds it last, which will prevent it from trying to match it 2655 // as the scaled value in case it happens to be a mul. That would be 2656 // problematic if we've sunk a different mul for the scale, because then 2657 // we'd end up sinking both muls. 2658 if (AddrMode.BaseReg) { 2659 Value *V = AddrMode.BaseReg; 2660 if (V->getType() != IntPtrTy) 2661 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2662 2663 ResultIndex = V; 2664 } 2665 2666 // Add the scale value. 2667 if (AddrMode.Scale) { 2668 Value *V = AddrMode.ScaledReg; 2669 if (V->getType() == IntPtrTy) { 2670 // done. 2671 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2672 cast<IntegerType>(V->getType())->getBitWidth()) { 2673 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2674 } else { 2675 // It is only safe to sign extend the BaseReg if we know that the math 2676 // required to create it did not overflow before we extend it. Since 2677 // the original IR value was tossed in favor of a constant back when 2678 // the AddrMode was created we need to bail out gracefully if widths 2679 // do not match instead of extending it. 2680 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); 2681 if (I && (ResultIndex != AddrMode.BaseReg)) 2682 I->eraseFromParent(); 2683 return false; 2684 } 2685 2686 if (AddrMode.Scale != 1) 2687 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2688 "sunkaddr"); 2689 if (ResultIndex) 2690 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); 2691 else 2692 ResultIndex = V; 2693 } 2694 2695 // Add in the Base Offset if present. 2696 if (AddrMode.BaseOffs) { 2697 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2698 if (ResultIndex) { 2699 // We need to add this separately from the scale above to help with 2700 // SDAG consecutive load/store merging. 2701 if (ResultPtr->getType() != I8PtrTy) 2702 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2703 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2704 } 2705 2706 ResultIndex = V; 2707 } 2708 2709 if (!ResultIndex) { 2710 SunkAddr = ResultPtr; 2711 } else { 2712 if (ResultPtr->getType() != I8PtrTy) 2713 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); 2714 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); 2715 } 2716 2717 if (SunkAddr->getType() != Addr->getType()) 2718 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 2719 } 2720 } else { 2721 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 2722 << *MemoryInst << "\n"); 2723 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 2724 Value *Result = nullptr; 2725 2726 // Start with the base register. Do this first so that subsequent address 2727 // matching finds it last, which will prevent it from trying to match it 2728 // as the scaled value in case it happens to be a mul. That would be 2729 // problematic if we've sunk a different mul for the scale, because then 2730 // we'd end up sinking both muls. 2731 if (AddrMode.BaseReg) { 2732 Value *V = AddrMode.BaseReg; 2733 if (V->getType()->isPointerTy()) 2734 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2735 if (V->getType() != IntPtrTy) 2736 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 2737 Result = V; 2738 } 2739 2740 // Add the scale value. 2741 if (AddrMode.Scale) { 2742 Value *V = AddrMode.ScaledReg; 2743 if (V->getType() == IntPtrTy) { 2744 // done. 2745 } else if (V->getType()->isPointerTy()) { 2746 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 2747 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 2748 cast<IntegerType>(V->getType())->getBitWidth()) { 2749 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 2750 } else { 2751 // It is only safe to sign extend the BaseReg if we know that the math 2752 // required to create it did not overflow before we extend it. Since 2753 // the original IR value was tossed in favor of a constant back when 2754 // the AddrMode was created we need to bail out gracefully if widths 2755 // do not match instead of extending it. 2756 Instruction *I = dyn_cast_or_null<Instruction>(Result); 2757 if (I && (Result != AddrMode.BaseReg)) 2758 I->eraseFromParent(); 2759 return false; 2760 } 2761 if (AddrMode.Scale != 1) 2762 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 2763 "sunkaddr"); 2764 if (Result) 2765 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2766 else 2767 Result = V; 2768 } 2769 2770 // Add in the BaseGV if present. 2771 if (AddrMode.BaseGV) { 2772 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 2773 if (Result) 2774 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2775 else 2776 Result = V; 2777 } 2778 2779 // Add in the Base Offset if present. 2780 if (AddrMode.BaseOffs) { 2781 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 2782 if (Result) 2783 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 2784 else 2785 Result = V; 2786 } 2787 2788 if (!Result) 2789 SunkAddr = Constant::getNullValue(Addr->getType()); 2790 else 2791 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 2792 } 2793 2794 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 2795 2796 // If we have no uses, recursively delete the value and all dead instructions 2797 // using it. 2798 if (Repl->use_empty()) { 2799 // This can cause recursive deletion, which can invalidate our iterator. 2800 // Use a WeakVH to hold onto it in case this happens. 2801 WeakVH IterHandle(CurInstIterator); 2802 BasicBlock *BB = CurInstIterator->getParent(); 2803 2804 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 2805 2806 if (IterHandle != CurInstIterator) { 2807 // If the iterator instruction was recursively deleted, start over at the 2808 // start of the block. 2809 CurInstIterator = BB->begin(); 2810 SunkAddrs.clear(); 2811 } 2812 } 2813 ++NumMemoryInsts; 2814 return true; 2815 } 2816 2817 /// OptimizeInlineAsmInst - If there are any memory operands, use 2818 /// OptimizeMemoryInst to sink their address computing into the block when 2819 /// possible / profitable. 2820 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 2821 bool MadeChange = false; 2822 2823 TargetLowering::AsmOperandInfoVector 2824 TargetConstraints = TLI->ParseConstraints(CS); 2825 unsigned ArgNo = 0; 2826 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 2827 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 2828 2829 // Compute the constraint code and ConstraintType to use. 2830 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 2831 2832 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 2833 OpInfo.isIndirect) { 2834 Value *OpVal = CS->getArgOperand(ArgNo++); 2835 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 2836 } else if (OpInfo.Type == InlineAsm::isInput) 2837 ArgNo++; 2838 } 2839 2840 return MadeChange; 2841 } 2842 2843 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 2844 /// basic block as the load, unless conditions are unfavorable. This allows 2845 /// SelectionDAG to fold the extend into the load. 2846 /// 2847 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 2848 // Look for a load being extended. 2849 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 2850 if (!LI) return false; 2851 2852 // If they're already in the same block, there's nothing to do. 2853 if (LI->getParent() == I->getParent()) 2854 return false; 2855 2856 // If the load has other users and the truncate is not free, this probably 2857 // isn't worthwhile. 2858 if (!LI->hasOneUse() && 2859 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 2860 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 2861 !TLI->isTruncateFree(I->getType(), LI->getType())) 2862 return false; 2863 2864 // Check whether the target supports casts folded into loads. 2865 unsigned LType; 2866 if (isa<ZExtInst>(I)) 2867 LType = ISD::ZEXTLOAD; 2868 else { 2869 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 2870 LType = ISD::SEXTLOAD; 2871 } 2872 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 2873 return false; 2874 2875 // Move the extend into the same block as the load, so that SelectionDAG 2876 // can fold it. 2877 I->removeFromParent(); 2878 I->insertAfter(LI); 2879 ++NumExtsMoved; 2880 return true; 2881 } 2882 2883 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 2884 BasicBlock *DefBB = I->getParent(); 2885 2886 // If the result of a {s|z}ext and its source are both live out, rewrite all 2887 // other uses of the source with result of extension. 2888 Value *Src = I->getOperand(0); 2889 if (Src->hasOneUse()) 2890 return false; 2891 2892 // Only do this xform if truncating is free. 2893 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 2894 return false; 2895 2896 // Only safe to perform the optimization if the source is also defined in 2897 // this block. 2898 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 2899 return false; 2900 2901 bool DefIsLiveOut = false; 2902 for (User *U : I->users()) { 2903 Instruction *UI = cast<Instruction>(U); 2904 2905 // Figure out which BB this ext is used in. 2906 BasicBlock *UserBB = UI->getParent(); 2907 if (UserBB == DefBB) continue; 2908 DefIsLiveOut = true; 2909 break; 2910 } 2911 if (!DefIsLiveOut) 2912 return false; 2913 2914 // Make sure none of the uses are PHI nodes. 2915 for (User *U : Src->users()) { 2916 Instruction *UI = cast<Instruction>(U); 2917 BasicBlock *UserBB = UI->getParent(); 2918 if (UserBB == DefBB) continue; 2919 // Be conservative. We don't want this xform to end up introducing 2920 // reloads just before load / store instructions. 2921 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) 2922 return false; 2923 } 2924 2925 // InsertedTruncs - Only insert one trunc in each block once. 2926 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 2927 2928 bool MadeChange = false; 2929 for (Use &U : Src->uses()) { 2930 Instruction *User = cast<Instruction>(U.getUser()); 2931 2932 // Figure out which BB this ext is used in. 2933 BasicBlock *UserBB = User->getParent(); 2934 if (UserBB == DefBB) continue; 2935 2936 // Both src and def are live in this block. Rewrite the use. 2937 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 2938 2939 if (!InsertedTrunc) { 2940 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 2941 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 2942 InsertedTruncsSet.insert(InsertedTrunc); 2943 } 2944 2945 // Replace a use of the {s|z}ext source with a use of the result. 2946 U = InsertedTrunc; 2947 ++NumExtUses; 2948 MadeChange = true; 2949 } 2950 2951 return MadeChange; 2952 } 2953 2954 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 2955 /// turned into an explicit branch. 2956 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 2957 // FIXME: This should use the same heuristics as IfConversion to determine 2958 // whether a select is better represented as a branch. This requires that 2959 // branch probability metadata is preserved for the select, which is not the 2960 // case currently. 2961 2962 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 2963 2964 // If the branch is predicted right, an out of order CPU can avoid blocking on 2965 // the compare. Emit cmovs on compares with a memory operand as branches to 2966 // avoid stalls on the load from memory. If the compare has more than one use 2967 // there's probably another cmov or setcc around so it's not worth emitting a 2968 // branch. 2969 if (!Cmp) 2970 return false; 2971 2972 Value *CmpOp0 = Cmp->getOperand(0); 2973 Value *CmpOp1 = Cmp->getOperand(1); 2974 2975 // We check that the memory operand has one use to avoid uses of the loaded 2976 // value directly after the compare, making branches unprofitable. 2977 return Cmp->hasOneUse() && 2978 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 2979 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 2980 } 2981 2982 2983 /// If we have a SelectInst that will likely profit from branch prediction, 2984 /// turn it into a branch. 2985 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 2986 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 2987 2988 // Can we convert the 'select' to CF ? 2989 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 2990 return false; 2991 2992 TargetLowering::SelectSupportKind SelectKind; 2993 if (VectorCond) 2994 SelectKind = TargetLowering::VectorMaskSelect; 2995 else if (SI->getType()->isVectorTy()) 2996 SelectKind = TargetLowering::ScalarCondVectorVal; 2997 else 2998 SelectKind = TargetLowering::ScalarValSelect; 2999 3000 // Do we have efficient codegen support for this kind of 'selects' ? 3001 if (TLI->isSelectSupported(SelectKind)) { 3002 // We have efficient codegen support for the select instruction. 3003 // Check if it is profitable to keep this 'select'. 3004 if (!TLI->isPredictableSelectExpensive() || 3005 !isFormingBranchFromSelectProfitable(SI)) 3006 return false; 3007 } 3008 3009 ModifiedDT = true; 3010 3011 // First, we split the block containing the select into 2 blocks. 3012 BasicBlock *StartBlock = SI->getParent(); 3013 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 3014 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 3015 3016 // Create a new block serving as the landing pad for the branch. 3017 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 3018 NextBlock->getParent(), NextBlock); 3019 3020 // Move the unconditional branch from the block with the select in it into our 3021 // landing pad block. 3022 StartBlock->getTerminator()->eraseFromParent(); 3023 BranchInst::Create(NextBlock, SmallBlock); 3024 3025 // Insert the real conditional branch based on the original condition. 3026 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 3027 3028 // The select itself is replaced with a PHI Node. 3029 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 3030 PN->takeName(SI); 3031 PN->addIncoming(SI->getTrueValue(), StartBlock); 3032 PN->addIncoming(SI->getFalseValue(), SmallBlock); 3033 SI->replaceAllUsesWith(PN); 3034 SI->eraseFromParent(); 3035 3036 // Instruct OptimizeBlock to skip to the next block. 3037 CurInstIterator = StartBlock->end(); 3038 ++NumSelectsExpanded; 3039 return true; 3040 } 3041 3042 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { 3043 SmallVector<int, 16> Mask(SVI->getShuffleMask()); 3044 int SplatElem = -1; 3045 for (unsigned i = 0; i < Mask.size(); ++i) { 3046 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) 3047 return false; 3048 SplatElem = Mask[i]; 3049 } 3050 3051 return true; 3052 } 3053 3054 /// Some targets have expensive vector shifts if the lanes aren't all the same 3055 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases 3056 /// it's often worth sinking a shufflevector splat down to its use so that 3057 /// codegen can spot all lanes are identical. 3058 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { 3059 BasicBlock *DefBB = SVI->getParent(); 3060 3061 // Only do this xform if variable vector shifts are particularly expensive. 3062 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) 3063 return false; 3064 3065 // We only expect better codegen by sinking a shuffle if we can recognise a 3066 // constant splat. 3067 if (!isBroadcastShuffle(SVI)) 3068 return false; 3069 3070 // InsertedShuffles - Only insert a shuffle in each block once. 3071 DenseMap<BasicBlock*, Instruction*> InsertedShuffles; 3072 3073 bool MadeChange = false; 3074 for (User *U : SVI->users()) { 3075 Instruction *UI = cast<Instruction>(U); 3076 3077 // Figure out which BB this ext is used in. 3078 BasicBlock *UserBB = UI->getParent(); 3079 if (UserBB == DefBB) continue; 3080 3081 // For now only apply this when the splat is used by a shift instruction. 3082 if (!UI->isShift()) continue; 3083 3084 // Everything checks out, sink the shuffle if the user's block doesn't 3085 // already have a copy. 3086 Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; 3087 3088 if (!InsertedShuffle) { 3089 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 3090 InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), 3091 SVI->getOperand(1), 3092 SVI->getOperand(2), "", InsertPt); 3093 } 3094 3095 UI->replaceUsesOfWith(SVI, InsertedShuffle); 3096 MadeChange = true; 3097 } 3098 3099 // If we removed all uses, nuke the shuffle. 3100 if (SVI->use_empty()) { 3101 SVI->eraseFromParent(); 3102 MadeChange = true; 3103 } 3104 3105 return MadeChange; 3106 } 3107 3108 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 3109 if (PHINode *P = dyn_cast<PHINode>(I)) { 3110 // It is possible for very late stage optimizations (such as SimplifyCFG) 3111 // to introduce PHI nodes too late to be cleaned up. If we detect such a 3112 // trivial PHI, go ahead and zap it here. 3113 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, 3114 TLInfo, DT)) { 3115 P->replaceAllUsesWith(V); 3116 P->eraseFromParent(); 3117 ++NumPHIsElim; 3118 return true; 3119 } 3120 return false; 3121 } 3122 3123 if (CastInst *CI = dyn_cast<CastInst>(I)) { 3124 // If the source of the cast is a constant, then this should have 3125 // already been constant folded. The only reason NOT to constant fold 3126 // it is if something (e.g. LSR) was careful to place the constant 3127 // evaluation in a block other than then one that uses it (e.g. to hoist 3128 // the address of globals out of a loop). If this is the case, we don't 3129 // want to forward-subst the cast. 3130 if (isa<Constant>(CI->getOperand(0))) 3131 return false; 3132 3133 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 3134 return true; 3135 3136 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 3137 /// Sink a zext or sext into its user blocks if the target type doesn't 3138 /// fit in one register 3139 if (TLI && TLI->getTypeAction(CI->getContext(), 3140 TLI->getValueType(CI->getType())) == 3141 TargetLowering::TypeExpandInteger) { 3142 return SinkCast(CI); 3143 } else { 3144 bool MadeChange = MoveExtToFormExtLoad(I); 3145 return MadeChange | OptimizeExtUses(I); 3146 } 3147 } 3148 return false; 3149 } 3150 3151 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 3152 if (!TLI || !TLI->hasMultipleConditionRegisters()) 3153 return OptimizeCmpExpression(CI); 3154 3155 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 3156 if (TLI) 3157 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 3158 return false; 3159 } 3160 3161 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 3162 if (TLI) 3163 return OptimizeMemoryInst(I, SI->getOperand(1), 3164 SI->getOperand(0)->getType()); 3165 return false; 3166 } 3167 3168 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); 3169 3170 if (BinOp && (BinOp->getOpcode() == Instruction::AShr || 3171 BinOp->getOpcode() == Instruction::LShr)) { 3172 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 3173 if (TLI && CI && TLI->hasExtractBitsInsn()) 3174 return OptimizeExtractBits(BinOp, CI, *TLI); 3175 3176 return false; 3177 } 3178 3179 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 3180 if (GEPI->hasAllZeroIndices()) { 3181 /// The GEP operand must be a pointer, so must its result -> BitCast 3182 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 3183 GEPI->getName(), GEPI); 3184 GEPI->replaceAllUsesWith(NC); 3185 GEPI->eraseFromParent(); 3186 ++NumGEPsElim; 3187 OptimizeInst(NC); 3188 return true; 3189 } 3190 return false; 3191 } 3192 3193 if (CallInst *CI = dyn_cast<CallInst>(I)) 3194 return OptimizeCallInst(CI); 3195 3196 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 3197 return OptimizeSelectInst(SI); 3198 3199 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) 3200 return OptimizeShuffleVectorInst(SVI); 3201 3202 return false; 3203 } 3204 3205 // In this pass we look for GEP and cast instructions that are used 3206 // across basic blocks and rewrite them to improve basic-block-at-a-time 3207 // selection. 3208 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 3209 SunkAddrs.clear(); 3210 bool MadeChange = false; 3211 3212 CurInstIterator = BB.begin(); 3213 while (CurInstIterator != BB.end()) 3214 MadeChange |= OptimizeInst(CurInstIterator++); 3215 3216 MadeChange |= DupRetToEnableTailCallOpts(&BB); 3217 3218 return MadeChange; 3219 } 3220 3221 // llvm.dbg.value is far away from the value then iSel may not be able 3222 // handle it properly. iSel will drop llvm.dbg.value if it can not 3223 // find a node corresponding to the value. 3224 bool CodeGenPrepare::PlaceDbgValues(Function &F) { 3225 bool MadeChange = false; 3226 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 3227 Instruction *PrevNonDbgInst = nullptr; 3228 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 3229 Instruction *Insn = BI; ++BI; 3230 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 3231 // Leave dbg.values that refer to an alloca alone. These 3232 // instrinsics describe the address of a variable (= the alloca) 3233 // being taken. They should not be moved next to the alloca 3234 // (and to the beginning of the scope), but rather stay close to 3235 // where said address is used. 3236 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { 3237 PrevNonDbgInst = Insn; 3238 continue; 3239 } 3240 3241 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 3242 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 3243 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 3244 DVI->removeFromParent(); 3245 if (isa<PHINode>(VI)) 3246 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 3247 else 3248 DVI->insertAfter(VI); 3249 MadeChange = true; 3250 ++NumDbgValueMoved; 3251 } 3252 } 3253 } 3254 return MadeChange; 3255 } 3256 3257 // If there is a sequence that branches based on comparing a single bit 3258 // against zero that can be combined into a single instruction, and the 3259 // target supports folding these into a single instruction, sink the 3260 // mask and compare into the branch uses. Do this before OptimizeBlock -> 3261 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being 3262 // searched for. 3263 bool CodeGenPrepare::sinkAndCmp(Function &F) { 3264 if (!EnableAndCmpSinking) 3265 return false; 3266 if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) 3267 return false; 3268 bool MadeChange = false; 3269 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 3270 BasicBlock *BB = I++; 3271 3272 // Does this BB end with the following? 3273 // %andVal = and %val, #single-bit-set 3274 // %icmpVal = icmp %andResult, 0 3275 // br i1 %cmpVal label %dest1, label %dest2" 3276 BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); 3277 if (!Brcc || !Brcc->isConditional()) 3278 continue; 3279 ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); 3280 if (!Cmp || Cmp->getParent() != BB) 3281 continue; 3282 ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 3283 if (!Zero || !Zero->isZero()) 3284 continue; 3285 Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); 3286 if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) 3287 continue; 3288 ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); 3289 if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) 3290 continue; 3291 DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); 3292 3293 // Push the "and; icmp" for any users that are conditional branches. 3294 // Since there can only be one branch use per BB, we don't need to keep 3295 // track of which BBs we insert into. 3296 for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); 3297 UI != E; ) { 3298 Use &TheUse = *UI; 3299 // Find brcc use. 3300 BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); 3301 ++UI; 3302 if (!BrccUser || !BrccUser->isConditional()) 3303 continue; 3304 BasicBlock *UserBB = BrccUser->getParent(); 3305 if (UserBB == BB) continue; 3306 DEBUG(dbgs() << "found Brcc use\n"); 3307 3308 // Sink the "and; icmp" to use. 3309 MadeChange = true; 3310 BinaryOperator *NewAnd = 3311 BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", 3312 BrccUser); 3313 CmpInst *NewCmp = 3314 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, 3315 "", BrccUser); 3316 TheUse = NewCmp; 3317 ++NumAndCmpsMoved; 3318 DEBUG(BrccUser->getParent()->dump()); 3319 } 3320 } 3321 return MadeChange; 3322 } 3323