1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. It should eventually be removed. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "codegenprepare" 17 #include "llvm/Transforms/Scalar.h" 18 #include "llvm/Constants.h" 19 #include "llvm/DerivedTypes.h" 20 #include "llvm/Function.h" 21 #include "llvm/InlineAsm.h" 22 #include "llvm/Instructions.h" 23 #include "llvm/IntrinsicInst.h" 24 #include "llvm/Pass.h" 25 #include "llvm/Analysis/Dominators.h" 26 #include "llvm/Analysis/InstructionSimplify.h" 27 #include "llvm/Analysis/ProfileInfo.h" 28 #include "llvm/Target/TargetData.h" 29 #include "llvm/Target/TargetLowering.h" 30 #include "llvm/Transforms/Utils/AddrModeMatcher.h" 31 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 32 #include "llvm/Transforms/Utils/Local.h" 33 #include "llvm/Transforms/Utils/BuildLibCalls.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/Assembly/Writer.h" 38 #include "llvm/Support/CallSite.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/GetElementPtrTypeIterator.h" 42 #include "llvm/Support/PatternMatch.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Support/IRBuilder.h" 45 #include "llvm/Support/ValueHandle.h" 46 using namespace llvm; 47 using namespace llvm::PatternMatch; 48 49 STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 50 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 51 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 52 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 53 "sunken Cmps"); 54 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 55 "of sunken Casts"); 56 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 57 "computations were sunk"); 58 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 59 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 60 STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 61 62 static cl::opt<bool> DisableBranchOpts( 63 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 64 cl::desc("Disable branch optimizations in CodeGenPrepare")); 65 66 namespace { 67 class CodeGenPrepare : public FunctionPass { 68 /// TLI - Keep a pointer of a TargetLowering to consult for determining 69 /// transformation profitability. 70 const TargetLowering *TLI; 71 DominatorTree *DT; 72 ProfileInfo *PFI; 73 74 /// CurInstIterator - As we scan instructions optimizing them, this is the 75 /// next instruction to optimize. Xforms that can invalidate this should 76 /// update it. 77 BasicBlock::iterator CurInstIterator; 78 79 /// Keeps track of non-local addresses that have been sunk into a block. 80 /// This allows us to avoid inserting duplicate code for blocks with 81 /// multiple load/stores of the same address. 82 DenseMap<Value*, Value*> SunkAddrs; 83 84 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 85 /// be updated. 86 bool ModifiedDT; 87 88 public: 89 static char ID; // Pass identification, replacement for typeid 90 explicit CodeGenPrepare(const TargetLowering *tli = 0) 91 : FunctionPass(ID), TLI(tli) { 92 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 93 } 94 bool runOnFunction(Function &F); 95 96 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 97 AU.addPreserved<DominatorTree>(); 98 AU.addPreserved<ProfileInfo>(); 99 } 100 101 private: 102 bool EliminateMostlyEmptyBlocks(Function &F); 103 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 104 void EliminateMostlyEmptyBlock(BasicBlock *BB); 105 bool OptimizeBlock(BasicBlock &BB); 106 bool OptimizeInst(Instruction *I); 107 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 108 bool OptimizeInlineAsmInst(CallInst *CS); 109 bool OptimizeCallInst(CallInst *CI); 110 bool MoveExtToFormExtLoad(Instruction *I); 111 bool OptimizeExtUses(Instruction *I); 112 bool DupRetToEnableTailCallOpts(ReturnInst *RI); 113 }; 114 } 115 116 char CodeGenPrepare::ID = 0; 117 INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", 118 "Optimize for code generation", false, false) 119 120 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 121 return new CodeGenPrepare(TLI); 122 } 123 124 bool CodeGenPrepare::runOnFunction(Function &F) { 125 bool EverMadeChange = false; 126 127 ModifiedDT = false; 128 DT = getAnalysisIfAvailable<DominatorTree>(); 129 PFI = getAnalysisIfAvailable<ProfileInfo>(); 130 131 // First pass, eliminate blocks that contain only PHI nodes and an 132 // unconditional branch. 133 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 134 135 bool MadeChange = true; 136 while (MadeChange) { 137 MadeChange = false; 138 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 139 BasicBlock *BB = I++; 140 MadeChange |= OptimizeBlock(*BB); 141 } 142 EverMadeChange |= MadeChange; 143 } 144 145 SunkAddrs.clear(); 146 147 if (!DisableBranchOpts) { 148 MadeChange = false; 149 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 150 MadeChange |= ConstantFoldTerminator(BB, true); 151 152 if (MadeChange) 153 ModifiedDT = true; 154 EverMadeChange |= MadeChange; 155 } 156 157 if (ModifiedDT && DT) 158 DT->DT->recalculate(F); 159 160 return EverMadeChange; 161 } 162 163 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 164 /// debug info directives, and an unconditional branch. Passes before isel 165 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 166 /// isel. Start by eliminating these blocks so we can split them the way we 167 /// want them. 168 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 169 bool MadeChange = false; 170 // Note that this intentionally skips the entry block. 171 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 172 BasicBlock *BB = I++; 173 174 // If this block doesn't end with an uncond branch, ignore it. 175 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 176 if (!BI || !BI->isUnconditional()) 177 continue; 178 179 // If the instruction before the branch (skipping debug info) isn't a phi 180 // node, then other stuff is happening here. 181 BasicBlock::iterator BBI = BI; 182 if (BBI != BB->begin()) { 183 --BBI; 184 while (isa<DbgInfoIntrinsic>(BBI)) { 185 if (BBI == BB->begin()) 186 break; 187 --BBI; 188 } 189 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 190 continue; 191 } 192 193 // Do not break infinite loops. 194 BasicBlock *DestBB = BI->getSuccessor(0); 195 if (DestBB == BB) 196 continue; 197 198 if (!CanMergeBlocks(BB, DestBB)) 199 continue; 200 201 EliminateMostlyEmptyBlock(BB); 202 MadeChange = true; 203 } 204 return MadeChange; 205 } 206 207 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 208 /// single uncond branch between them, and BB contains no other non-phi 209 /// instructions. 210 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 211 const BasicBlock *DestBB) const { 212 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 213 // the successor. If there are more complex condition (e.g. preheaders), 214 // don't mess around with them. 215 BasicBlock::const_iterator BBI = BB->begin(); 216 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 217 for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 218 UI != E; ++UI) { 219 const Instruction *User = cast<Instruction>(*UI); 220 if (User->getParent() != DestBB || !isa<PHINode>(User)) 221 return false; 222 // If User is inside DestBB block and it is a PHINode then check 223 // incoming value. If incoming value is not from BB then this is 224 // a complex condition (e.g. preheaders) we want to avoid here. 225 if (User->getParent() == DestBB) { 226 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 227 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 228 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 229 if (Insn && Insn->getParent() == BB && 230 Insn->getParent() != UPN->getIncomingBlock(I)) 231 return false; 232 } 233 } 234 } 235 } 236 237 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 238 // and DestBB may have conflicting incoming values for the block. If so, we 239 // can't merge the block. 240 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 241 if (!DestBBPN) return true; // no conflict. 242 243 // Collect the preds of BB. 244 SmallPtrSet<const BasicBlock*, 16> BBPreds; 245 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 246 // It is faster to get preds from a PHI than with pred_iterator. 247 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 248 BBPreds.insert(BBPN->getIncomingBlock(i)); 249 } else { 250 BBPreds.insert(pred_begin(BB), pred_end(BB)); 251 } 252 253 // Walk the preds of DestBB. 254 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 255 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 256 if (BBPreds.count(Pred)) { // Common predecessor? 257 BBI = DestBB->begin(); 258 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 259 const Value *V1 = PN->getIncomingValueForBlock(Pred); 260 const Value *V2 = PN->getIncomingValueForBlock(BB); 261 262 // If V2 is a phi node in BB, look up what the mapped value will be. 263 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 264 if (V2PN->getParent() == BB) 265 V2 = V2PN->getIncomingValueForBlock(Pred); 266 267 // If there is a conflict, bail out. 268 if (V1 != V2) return false; 269 } 270 } 271 } 272 273 return true; 274 } 275 276 277 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 278 /// an unconditional branch in it. 279 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 280 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 281 BasicBlock *DestBB = BI->getSuccessor(0); 282 283 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 284 285 // If the destination block has a single pred, then this is a trivial edge, 286 // just collapse it. 287 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 288 if (SinglePred != DestBB) { 289 // Remember if SinglePred was the entry block of the function. If so, we 290 // will need to move BB back to the entry position. 291 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 292 MergeBasicBlockIntoOnlyPred(DestBB, this); 293 294 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 295 BB->moveBefore(&BB->getParent()->getEntryBlock()); 296 297 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 298 return; 299 } 300 } 301 302 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 303 // to handle the new incoming edges it is about to have. 304 PHINode *PN; 305 for (BasicBlock::iterator BBI = DestBB->begin(); 306 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 307 // Remove the incoming value for BB, and remember it. 308 Value *InVal = PN->removeIncomingValue(BB, false); 309 310 // Two options: either the InVal is a phi node defined in BB or it is some 311 // value that dominates BB. 312 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 313 if (InValPhi && InValPhi->getParent() == BB) { 314 // Add all of the input values of the input PHI as inputs of this phi. 315 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 316 PN->addIncoming(InValPhi->getIncomingValue(i), 317 InValPhi->getIncomingBlock(i)); 318 } else { 319 // Otherwise, add one instance of the dominating value for each edge that 320 // we will be adding. 321 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 322 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 323 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 324 } else { 325 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 326 PN->addIncoming(InVal, *PI); 327 } 328 } 329 } 330 331 // The PHIs are now updated, change everything that refers to BB to use 332 // DestBB and remove BB. 333 BB->replaceAllUsesWith(DestBB); 334 if (DT && !ModifiedDT) { 335 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 336 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 337 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 338 DT->changeImmediateDominator(DestBB, NewIDom); 339 DT->eraseNode(BB); 340 } 341 if (PFI) { 342 PFI->replaceAllUses(BB, DestBB); 343 PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 344 } 345 BB->eraseFromParent(); 346 ++NumBlocksElim; 347 348 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 349 } 350 351 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 352 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 353 /// sink it into user blocks to reduce the number of virtual 354 /// registers that must be created and coalesced. 355 /// 356 /// Return true if any changes are made. 357 /// 358 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 359 // If this is a noop copy, 360 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 361 EVT DstVT = TLI.getValueType(CI->getType()); 362 363 // This is an fp<->int conversion? 364 if (SrcVT.isInteger() != DstVT.isInteger()) 365 return false; 366 367 // If this is an extension, it will be a zero or sign extension, which 368 // isn't a noop. 369 if (SrcVT.bitsLT(DstVT)) return false; 370 371 // If these values will be promoted, find out what they will be promoted 372 // to. This helps us consider truncates on PPC as noop copies when they 373 // are. 374 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 375 TargetLowering::TypePromoteInteger) 376 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 377 if (TLI.getTypeAction(CI->getContext(), DstVT) == 378 TargetLowering::TypePromoteInteger) 379 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 380 381 // If, after promotion, these are the same types, this is a noop copy. 382 if (SrcVT != DstVT) 383 return false; 384 385 BasicBlock *DefBB = CI->getParent(); 386 387 /// InsertedCasts - Only insert a cast in each block once. 388 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 389 390 bool MadeChange = false; 391 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 392 UI != E; ) { 393 Use &TheUse = UI.getUse(); 394 Instruction *User = cast<Instruction>(*UI); 395 396 // Figure out which BB this cast is used in. For PHI's this is the 397 // appropriate predecessor block. 398 BasicBlock *UserBB = User->getParent(); 399 if (PHINode *PN = dyn_cast<PHINode>(User)) { 400 UserBB = PN->getIncomingBlock(UI); 401 } 402 403 // Preincrement use iterator so we don't invalidate it. 404 ++UI; 405 406 // If this user is in the same block as the cast, don't change the cast. 407 if (UserBB == DefBB) continue; 408 409 // If we have already inserted a cast into this block, use it. 410 CastInst *&InsertedCast = InsertedCasts[UserBB]; 411 412 if (!InsertedCast) { 413 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 414 415 InsertedCast = 416 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 417 InsertPt); 418 MadeChange = true; 419 } 420 421 // Replace a use of the cast with a use of the new cast. 422 TheUse = InsertedCast; 423 ++NumCastUses; 424 } 425 426 // If we removed all uses, nuke the cast. 427 if (CI->use_empty()) { 428 CI->eraseFromParent(); 429 MadeChange = true; 430 } 431 432 return MadeChange; 433 } 434 435 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 436 /// the number of virtual registers that must be created and coalesced. This is 437 /// a clear win except on targets with multiple condition code registers 438 /// (PowerPC), where it might lose; some adjustment may be wanted there. 439 /// 440 /// Return true if any changes are made. 441 static bool OptimizeCmpExpression(CmpInst *CI) { 442 BasicBlock *DefBB = CI->getParent(); 443 444 /// InsertedCmp - Only insert a cmp in each block once. 445 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 446 447 bool MadeChange = false; 448 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 449 UI != E; ) { 450 Use &TheUse = UI.getUse(); 451 Instruction *User = cast<Instruction>(*UI); 452 453 // Preincrement use iterator so we don't invalidate it. 454 ++UI; 455 456 // Don't bother for PHI nodes. 457 if (isa<PHINode>(User)) 458 continue; 459 460 // Figure out which BB this cmp is used in. 461 BasicBlock *UserBB = User->getParent(); 462 463 // If this user is in the same block as the cmp, don't change the cmp. 464 if (UserBB == DefBB) continue; 465 466 // If we have already inserted a cmp into this block, use it. 467 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 468 469 if (!InsertedCmp) { 470 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 471 472 InsertedCmp = 473 CmpInst::Create(CI->getOpcode(), 474 CI->getPredicate(), CI->getOperand(0), 475 CI->getOperand(1), "", InsertPt); 476 MadeChange = true; 477 } 478 479 // Replace a use of the cmp with a use of the new cmp. 480 TheUse = InsertedCmp; 481 ++NumCmpUses; 482 } 483 484 // If we removed all uses, nuke the cmp. 485 if (CI->use_empty()) 486 CI->eraseFromParent(); 487 488 return MadeChange; 489 } 490 491 namespace { 492 class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 493 protected: 494 void replaceCall(Value *With) { 495 CI->replaceAllUsesWith(With); 496 CI->eraseFromParent(); 497 } 498 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 499 if (ConstantInt *SizeCI = 500 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 501 return SizeCI->isAllOnesValue(); 502 return false; 503 } 504 }; 505 } // end anonymous namespace 506 507 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 508 BasicBlock *BB = CI->getParent(); 509 510 // Lower inline assembly if we can. 511 // If we found an inline asm expession, and if the target knows how to 512 // lower it to normal LLVM code, do so now. 513 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 514 if (TLI->ExpandInlineAsm(CI)) { 515 // Avoid invalidating the iterator. 516 CurInstIterator = BB->begin(); 517 // Avoid processing instructions out of order, which could cause 518 // reuse before a value is defined. 519 SunkAddrs.clear(); 520 return true; 521 } 522 // Sink address computing for memory operands into the block. 523 if (OptimizeInlineAsmInst(CI)) 524 return true; 525 } 526 527 // Lower all uses of llvm.objectsize.* 528 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 529 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 530 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 531 Type *ReturnTy = CI->getType(); 532 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 533 534 // Substituting this can cause recursive simplifications, which can 535 // invalidate our iterator. Use a WeakVH to hold onto it in case this 536 // happens. 537 WeakVH IterHandle(CurInstIterator); 538 539 ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, 540 ModifiedDT ? 0 : DT); 541 542 // If the iterator instruction was recursively deleted, start over at the 543 // start of the block. 544 if (IterHandle != CurInstIterator) { 545 CurInstIterator = BB->begin(); 546 SunkAddrs.clear(); 547 } 548 return true; 549 } 550 551 // From here on out we're working with named functions. 552 if (CI->getCalledFunction() == 0) return false; 553 554 // llvm.dbg.value is far away from the value then iSel may not be able 555 // handle it properly. iSel will drop llvm.dbg.value if it can not 556 // find a node corresponding to the value. 557 if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI)) 558 if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue())) 559 if (!VI->isTerminator() && 560 (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) { 561 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 562 DVI->removeFromParent(); 563 if (isa<PHINode>(VI)) 564 DVI->insertBefore(VI->getParent()->getFirstNonPHI()); 565 else 566 DVI->insertAfter(VI); 567 return true; 568 } 569 570 // We'll need TargetData from here on out. 571 const TargetData *TD = TLI ? TLI->getTargetData() : 0; 572 if (!TD) return false; 573 574 // Lower all default uses of _chk calls. This is very similar 575 // to what InstCombineCalls does, but here we are only lowering calls 576 // that have the default "don't know" as the objectsize. Anything else 577 // should be left alone. 578 CodeGenPrepareFortifiedLibCalls Simplifier; 579 return Simplifier.fold(CI, TD); 580 } 581 582 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 583 /// instructions to the predecessor to enable tail call optimizations. The 584 /// case it is currently looking for is: 585 /// bb0: 586 /// %tmp0 = tail call i32 @f0() 587 /// br label %return 588 /// bb1: 589 /// %tmp1 = tail call i32 @f1() 590 /// br label %return 591 /// bb2: 592 /// %tmp2 = tail call i32 @f2() 593 /// br label %return 594 /// return: 595 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 596 /// ret i32 %retval 597 /// 598 /// => 599 /// 600 /// bb0: 601 /// %tmp0 = tail call i32 @f0() 602 /// ret i32 %tmp0 603 /// bb1: 604 /// %tmp1 = tail call i32 @f1() 605 /// ret i32 %tmp1 606 /// bb2: 607 /// %tmp2 = tail call i32 @f2() 608 /// ret i32 %tmp2 609 /// 610 bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { 611 if (!TLI) 612 return false; 613 614 Value *V = RI->getReturnValue(); 615 PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; 616 if (V && !PN) 617 return false; 618 619 BasicBlock *BB = RI->getParent(); 620 if (PN && PN->getParent() != BB) 621 return false; 622 623 // It's not safe to eliminate the sign / zero extension of the return value. 624 // See llvm::isInTailCallPosition(). 625 const Function *F = BB->getParent(); 626 unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); 627 if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) 628 return false; 629 630 // Make sure there are no instructions between the PHI and return, or that the 631 // return is the first instruction in the block. 632 if (PN) { 633 BasicBlock::iterator BI = BB->begin(); 634 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 635 if (&*BI != RI) 636 return false; 637 } else { 638 BasicBlock::iterator BI = BB->begin(); 639 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 640 if (&*BI != RI) 641 return false; 642 } 643 644 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 645 /// call. 646 SmallVector<CallInst*, 4> TailCalls; 647 if (PN) { 648 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 649 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 650 // Make sure the phi value is indeed produced by the tail call. 651 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 652 TLI->mayBeEmittedAsTailCall(CI)) 653 TailCalls.push_back(CI); 654 } 655 } else { 656 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 657 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 658 if (!VisitedBBs.insert(*PI)) 659 continue; 660 661 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 662 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 663 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 664 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 665 if (RI == RE) 666 continue; 667 668 CallInst *CI = dyn_cast<CallInst>(&*RI); 669 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 670 TailCalls.push_back(CI); 671 } 672 } 673 674 bool Changed = false; 675 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 676 CallInst *CI = TailCalls[i]; 677 CallSite CS(CI); 678 679 // Conservatively require the attributes of the call to match those of the 680 // return. Ignore noalias because it doesn't affect the call sequence. 681 unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); 682 if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) 683 continue; 684 685 // Make sure the call instruction is followed by an unconditional branch to 686 // the return block. 687 BasicBlock *CallBB = CI->getParent(); 688 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 689 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 690 continue; 691 692 // Duplicate the return into CallBB. 693 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 694 ModifiedDT = Changed = true; 695 ++NumRetsDup; 696 } 697 698 // If we eliminated all predecessors of the block, delete the block now. 699 if (Changed && pred_begin(BB) == pred_end(BB)) 700 BB->eraseFromParent(); 701 702 return Changed; 703 } 704 705 //===----------------------------------------------------------------------===// 706 // Memory Optimization 707 //===----------------------------------------------------------------------===// 708 709 /// IsNonLocalValue - Return true if the specified values are defined in a 710 /// different basic block than BB. 711 static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 712 if (Instruction *I = dyn_cast<Instruction>(V)) 713 return I->getParent() != BB; 714 return false; 715 } 716 717 /// OptimizeMemoryInst - Load and Store Instructions often have 718 /// addressing modes that can do significant amounts of computation. As such, 719 /// instruction selection will try to get the load or store to do as much 720 /// computation as possible for the program. The problem is that isel can only 721 /// see within a single block. As such, we sink as much legal addressing mode 722 /// stuff into the block as possible. 723 /// 724 /// This method is used to optimize both load/store and inline asms with memory 725 /// operands. 726 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 727 Type *AccessTy) { 728 Value *Repl = Addr; 729 730 // Try to collapse single-value PHI nodes. This is necessary to undo 731 // unprofitable PRE transformations. 732 SmallVector<Value*, 8> worklist; 733 SmallPtrSet<Value*, 16> Visited; 734 worklist.push_back(Addr); 735 736 // Use a worklist to iteratively look through PHI nodes, and ensure that 737 // the addressing mode obtained from the non-PHI roots of the graph 738 // are equivalent. 739 Value *Consensus = 0; 740 unsigned NumUsesConsensus = 0; 741 bool IsNumUsesConsensusValid = false; 742 SmallVector<Instruction*, 16> AddrModeInsts; 743 ExtAddrMode AddrMode; 744 while (!worklist.empty()) { 745 Value *V = worklist.back(); 746 worklist.pop_back(); 747 748 // Break use-def graph loops. 749 if (Visited.count(V)) { 750 Consensus = 0; 751 break; 752 } 753 754 Visited.insert(V); 755 756 // For a PHI node, push all of its incoming values. 757 if (PHINode *P = dyn_cast<PHINode>(V)) { 758 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 759 worklist.push_back(P->getIncomingValue(i)); 760 continue; 761 } 762 763 // For non-PHIs, determine the addressing mode being computed. 764 SmallVector<Instruction*, 16> NewAddrModeInsts; 765 ExtAddrMode NewAddrMode = 766 AddressingModeMatcher::Match(V, AccessTy,MemoryInst, 767 NewAddrModeInsts, *TLI); 768 769 // This check is broken into two cases with very similar code to avoid using 770 // getNumUses() as much as possible. Some values have a lot of uses, so 771 // calling getNumUses() unconditionally caused a significant compile-time 772 // regression. 773 if (!Consensus) { 774 Consensus = V; 775 AddrMode = NewAddrMode; 776 AddrModeInsts = NewAddrModeInsts; 777 continue; 778 } else if (NewAddrMode == AddrMode) { 779 if (!IsNumUsesConsensusValid) { 780 NumUsesConsensus = Consensus->getNumUses(); 781 IsNumUsesConsensusValid = true; 782 } 783 784 // Ensure that the obtained addressing mode is equivalent to that obtained 785 // for all other roots of the PHI traversal. Also, when choosing one 786 // such root as representative, select the one with the most uses in order 787 // to keep the cost modeling heuristics in AddressingModeMatcher 788 // applicable. 789 unsigned NumUses = V->getNumUses(); 790 if (NumUses > NumUsesConsensus) { 791 Consensus = V; 792 NumUsesConsensus = NumUses; 793 AddrModeInsts = NewAddrModeInsts; 794 } 795 continue; 796 } 797 798 Consensus = 0; 799 break; 800 } 801 802 // If the addressing mode couldn't be determined, or if multiple different 803 // ones were determined, bail out now. 804 if (!Consensus) return false; 805 806 // Check to see if any of the instructions supersumed by this addr mode are 807 // non-local to I's BB. 808 bool AnyNonLocal = false; 809 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 810 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 811 AnyNonLocal = true; 812 break; 813 } 814 } 815 816 // If all the instructions matched are already in this BB, don't do anything. 817 if (!AnyNonLocal) { 818 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 819 return false; 820 } 821 822 // Insert this computation right after this user. Since our caller is 823 // scanning from the top of the BB to the bottom, reuse of the expr are 824 // guaranteed to happen later. 825 BasicBlock::iterator InsertPt = MemoryInst; 826 827 // Now that we determined the addressing expression we want to use and know 828 // that we have to sink it into this block. Check to see if we have already 829 // done this for some other load/store instr in this block. If so, reuse the 830 // computation. 831 Value *&SunkAddr = SunkAddrs[Addr]; 832 if (SunkAddr) { 833 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 834 << *MemoryInst); 835 if (SunkAddr->getType() != Addr->getType()) 836 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 837 } else { 838 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 839 << *MemoryInst); 840 Type *IntPtrTy = 841 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 842 843 Value *Result = 0; 844 845 // Start with the base register. Do this first so that subsequent address 846 // matching finds it last, which will prevent it from trying to match it 847 // as the scaled value in case it happens to be a mul. That would be 848 // problematic if we've sunk a different mul for the scale, because then 849 // we'd end up sinking both muls. 850 if (AddrMode.BaseReg) { 851 Value *V = AddrMode.BaseReg; 852 if (V->getType()->isPointerTy()) 853 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 854 if (V->getType() != IntPtrTy) 855 V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, 856 "sunkaddr", InsertPt); 857 Result = V; 858 } 859 860 // Add the scale value. 861 if (AddrMode.Scale) { 862 Value *V = AddrMode.ScaledReg; 863 if (V->getType() == IntPtrTy) { 864 // done. 865 } else if (V->getType()->isPointerTy()) { 866 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 867 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 868 cast<IntegerType>(V->getType())->getBitWidth()) { 869 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 870 } else { 871 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 872 } 873 if (AddrMode.Scale != 1) 874 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 875 AddrMode.Scale), 876 "sunkaddr", InsertPt); 877 if (Result) 878 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 879 else 880 Result = V; 881 } 882 883 // Add in the BaseGV if present. 884 if (AddrMode.BaseGV) { 885 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 886 InsertPt); 887 if (Result) 888 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 889 else 890 Result = V; 891 } 892 893 // Add in the Base Offset if present. 894 if (AddrMode.BaseOffs) { 895 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 896 if (Result) 897 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 898 else 899 Result = V; 900 } 901 902 if (Result == 0) 903 SunkAddr = Constant::getNullValue(Addr->getType()); 904 else 905 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 906 } 907 908 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 909 910 // If we have no uses, recursively delete the value and all dead instructions 911 // using it. 912 if (Repl->use_empty()) { 913 // This can cause recursive deletion, which can invalidate our iterator. 914 // Use a WeakVH to hold onto it in case this happens. 915 WeakVH IterHandle(CurInstIterator); 916 BasicBlock *BB = CurInstIterator->getParent(); 917 918 RecursivelyDeleteTriviallyDeadInstructions(Repl); 919 920 if (IterHandle != CurInstIterator) { 921 // If the iterator instruction was recursively deleted, start over at the 922 // start of the block. 923 CurInstIterator = BB->begin(); 924 SunkAddrs.clear(); 925 } else { 926 // This address is now available for reassignment, so erase the table 927 // entry; we don't want to match some completely different instruction. 928 SunkAddrs[Addr] = 0; 929 } 930 } 931 ++NumMemoryInsts; 932 return true; 933 } 934 935 /// OptimizeInlineAsmInst - If there are any memory operands, use 936 /// OptimizeMemoryInst to sink their address computing into the block when 937 /// possible / profitable. 938 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 939 bool MadeChange = false; 940 941 TargetLowering::AsmOperandInfoVector 942 TargetConstraints = TLI->ParseConstraints(CS); 943 unsigned ArgNo = 0; 944 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 945 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 946 947 // Compute the constraint code and ConstraintType to use. 948 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 949 950 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 951 OpInfo.isIndirect) { 952 Value *OpVal = CS->getArgOperand(ArgNo++); 953 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 954 } else if (OpInfo.Type == InlineAsm::isInput) 955 ArgNo++; 956 } 957 958 return MadeChange; 959 } 960 961 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 962 /// basic block as the load, unless conditions are unfavorable. This allows 963 /// SelectionDAG to fold the extend into the load. 964 /// 965 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 966 // Look for a load being extended. 967 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 968 if (!LI) return false; 969 970 // If they're already in the same block, there's nothing to do. 971 if (LI->getParent() == I->getParent()) 972 return false; 973 974 // If the load has other users and the truncate is not free, this probably 975 // isn't worthwhile. 976 if (!LI->hasOneUse() && 977 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 978 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 979 !TLI->isTruncateFree(I->getType(), LI->getType())) 980 return false; 981 982 // Check whether the target supports casts folded into loads. 983 unsigned LType; 984 if (isa<ZExtInst>(I)) 985 LType = ISD::ZEXTLOAD; 986 else { 987 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 988 LType = ISD::SEXTLOAD; 989 } 990 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 991 return false; 992 993 // Move the extend into the same block as the load, so that SelectionDAG 994 // can fold it. 995 I->removeFromParent(); 996 I->insertAfter(LI); 997 ++NumExtsMoved; 998 return true; 999 } 1000 1001 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1002 BasicBlock *DefBB = I->getParent(); 1003 1004 // If the result of a {s|z}ext and its source are both live out, rewrite all 1005 // other uses of the source with result of extension. 1006 Value *Src = I->getOperand(0); 1007 if (Src->hasOneUse()) 1008 return false; 1009 1010 // Only do this xform if truncating is free. 1011 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1012 return false; 1013 1014 // Only safe to perform the optimization if the source is also defined in 1015 // this block. 1016 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1017 return false; 1018 1019 bool DefIsLiveOut = false; 1020 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1021 UI != E; ++UI) { 1022 Instruction *User = cast<Instruction>(*UI); 1023 1024 // Figure out which BB this ext is used in. 1025 BasicBlock *UserBB = User->getParent(); 1026 if (UserBB == DefBB) continue; 1027 DefIsLiveOut = true; 1028 break; 1029 } 1030 if (!DefIsLiveOut) 1031 return false; 1032 1033 // Make sure non of the uses are PHI nodes. 1034 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1035 UI != E; ++UI) { 1036 Instruction *User = cast<Instruction>(*UI); 1037 BasicBlock *UserBB = User->getParent(); 1038 if (UserBB == DefBB) continue; 1039 // Be conservative. We don't want this xform to end up introducing 1040 // reloads just before load / store instructions. 1041 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1042 return false; 1043 } 1044 1045 // InsertedTruncs - Only insert one trunc in each block once. 1046 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1047 1048 bool MadeChange = false; 1049 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1050 UI != E; ++UI) { 1051 Use &TheUse = UI.getUse(); 1052 Instruction *User = cast<Instruction>(*UI); 1053 1054 // Figure out which BB this ext is used in. 1055 BasicBlock *UserBB = User->getParent(); 1056 if (UserBB == DefBB) continue; 1057 1058 // Both src and def are live in this block. Rewrite the use. 1059 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1060 1061 if (!InsertedTrunc) { 1062 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 1063 1064 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1065 } 1066 1067 // Replace a use of the {s|z}ext source with a use of the result. 1068 TheUse = InsertedTrunc; 1069 ++NumExtUses; 1070 MadeChange = true; 1071 } 1072 1073 return MadeChange; 1074 } 1075 1076 bool CodeGenPrepare::OptimizeInst(Instruction *I) { 1077 if (PHINode *P = dyn_cast<PHINode>(I)) { 1078 // It is possible for very late stage optimizations (such as SimplifyCFG) 1079 // to introduce PHI nodes too late to be cleaned up. If we detect such a 1080 // trivial PHI, go ahead and zap it here. 1081 if (Value *V = SimplifyInstruction(P)) { 1082 P->replaceAllUsesWith(V); 1083 P->eraseFromParent(); 1084 ++NumPHIsElim; 1085 return true; 1086 } 1087 return false; 1088 } 1089 1090 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1091 // If the source of the cast is a constant, then this should have 1092 // already been constant folded. The only reason NOT to constant fold 1093 // it is if something (e.g. LSR) was careful to place the constant 1094 // evaluation in a block other than then one that uses it (e.g. to hoist 1095 // the address of globals out of a loop). If this is the case, we don't 1096 // want to forward-subst the cast. 1097 if (isa<Constant>(CI->getOperand(0))) 1098 return false; 1099 1100 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 1101 return true; 1102 1103 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 1104 bool MadeChange = MoveExtToFormExtLoad(I); 1105 return MadeChange | OptimizeExtUses(I); 1106 } 1107 return false; 1108 } 1109 1110 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 1111 return OptimizeCmpExpression(CI); 1112 1113 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1114 if (TLI) 1115 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 1116 return false; 1117 } 1118 1119 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1120 if (TLI) 1121 return OptimizeMemoryInst(I, SI->getOperand(1), 1122 SI->getOperand(0)->getType()); 1123 return false; 1124 } 1125 1126 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1127 if (GEPI->hasAllZeroIndices()) { 1128 /// The GEP operand must be a pointer, so must its result -> BitCast 1129 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1130 GEPI->getName(), GEPI); 1131 GEPI->replaceAllUsesWith(NC); 1132 GEPI->eraseFromParent(); 1133 ++NumGEPsElim; 1134 OptimizeInst(NC); 1135 return true; 1136 } 1137 return false; 1138 } 1139 1140 if (CallInst *CI = dyn_cast<CallInst>(I)) 1141 return OptimizeCallInst(CI); 1142 1143 if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) 1144 return DupRetToEnableTailCallOpts(RI); 1145 1146 return false; 1147 } 1148 1149 // In this pass we look for GEP and cast instructions that are used 1150 // across basic blocks and rewrite them to improve basic-block-at-a-time 1151 // selection. 1152 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1153 SunkAddrs.clear(); 1154 bool MadeChange = false; 1155 1156 CurInstIterator = BB.begin(); 1157 for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) 1158 MadeChange |= OptimizeInst(CurInstIterator++); 1159 1160 return MadeChange; 1161 } 1162