1 //===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass implements an idiom recognizer that transforms simple loops into a 11 // non-loop form. In cases that this kicks in, it can be a significant 12 // performance win. 13 // 14 //===----------------------------------------------------------------------===// 15 // 16 // TODO List: 17 // 18 // Future loop memory idioms to recognize: 19 // memcmp, memmove, strlen, etc. 20 // Future floating point idioms to recognize in -ffast-math mode: 21 // fpowi 22 // Future integer operation idioms to recognize: 23 // ctpop, ctlz, cttz 24 // 25 // Beware that isel's default lowering for ctpop is highly inefficient for 26 // i64 and larger types when i64 is legal and the value has few bits set. It 27 // would be good to enhance isel to emit a loop for ctpop in this case. 28 // 29 // We should enhance the memset/memcpy recognition to handle multiple stores in 30 // the loop. This would handle things like: 31 // void foo(_Complex float *P) 32 // for (i) { __real__(*P) = 0; __imag__(*P) = 0; } 33 // 34 // We should enhance this to handle negative strides through memory. 35 // Alternatively (and perhaps better) we could rely on an earlier pass to force 36 // forward iteration through memory, which is generally better for cache 37 // behavior. Negative strides *do* happen for memset/memcpy loops. 38 // 39 // This could recognize common matrix multiplies and dot product idioms and 40 // replace them with calls to BLAS (if linked in??). 41 // 42 //===----------------------------------------------------------------------===// 43 44 #define DEBUG_TYPE "loop-idiom" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/IntrinsicInst.h" 47 #include "llvm/Module.h" 48 #include "llvm/Analysis/AliasAnalysis.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/ValueTracking.h" 53 #include "llvm/Target/TargetData.h" 54 #include "llvm/Target/TargetLibraryInfo.h" 55 #include "llvm/Transforms/Utils/Local.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/IRBuilder.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/ADT/Statistic.h" 60 using namespace llvm; 61 62 STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); 63 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); 64 65 namespace { 66 class LoopIdiomRecognize : public LoopPass { 67 Loop *CurLoop; 68 const TargetData *TD; 69 DominatorTree *DT; 70 ScalarEvolution *SE; 71 TargetLibraryInfo *TLI; 72 public: 73 static char ID; 74 explicit LoopIdiomRecognize() : LoopPass(ID) { 75 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 76 } 77 78 bool runOnLoop(Loop *L, LPPassManager &LPM); 79 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 80 SmallVectorImpl<BasicBlock*> &ExitBlocks); 81 82 bool processLoopStore(StoreInst *SI, const SCEV *BECount); 83 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); 84 85 bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 86 unsigned StoreAlignment, 87 Value *SplatValue, Instruction *TheStore, 88 const SCEVAddRecExpr *Ev, 89 const SCEV *BECount); 90 bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 91 const SCEVAddRecExpr *StoreEv, 92 const SCEVAddRecExpr *LoadEv, 93 const SCEV *BECount); 94 95 /// This transformation requires natural loop information & requires that 96 /// loop preheaders be inserted into the CFG. 97 /// 98 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 99 AU.addRequired<LoopInfo>(); 100 AU.addPreserved<LoopInfo>(); 101 AU.addRequiredID(LoopSimplifyID); 102 AU.addPreservedID(LoopSimplifyID); 103 AU.addRequiredID(LCSSAID); 104 AU.addPreservedID(LCSSAID); 105 AU.addRequired<AliasAnalysis>(); 106 AU.addPreserved<AliasAnalysis>(); 107 AU.addRequired<ScalarEvolution>(); 108 AU.addPreserved<ScalarEvolution>(); 109 AU.addPreserved<DominatorTree>(); 110 AU.addRequired<DominatorTree>(); 111 AU.addRequired<TargetLibraryInfo>(); 112 } 113 }; 114 } 115 116 char LoopIdiomRecognize::ID = 0; 117 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 118 false, false) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 120 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 121 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 122 INITIALIZE_PASS_DEPENDENCY(LCSSA) 123 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 124 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 125 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 126 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 127 false, false) 128 129 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 130 131 /// deleteDeadInstruction - Delete this instruction. Before we do, go through 132 /// and zero out all the operands of this instruction. If any of them become 133 /// dead, delete them and the computation tree that feeds them. 134 /// 135 static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { 136 SmallVector<Instruction*, 32> NowDeadInsts; 137 138 NowDeadInsts.push_back(I); 139 140 // Before we touch this instruction, remove it from SE! 141 do { 142 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 143 144 // This instruction is dead, zap it, in stages. Start by removing it from 145 // SCEV. 146 SE.forgetValue(DeadInst); 147 148 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 149 Value *Op = DeadInst->getOperand(op); 150 DeadInst->setOperand(op, 0); 151 152 // If this operand just became dead, add it to the NowDeadInsts list. 153 if (!Op->use_empty()) continue; 154 155 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 156 if (isInstructionTriviallyDead(OpI)) 157 NowDeadInsts.push_back(OpI); 158 } 159 160 DeadInst->eraseFromParent(); 161 162 } while (!NowDeadInsts.empty()); 163 } 164 165 /// deleteIfDeadInstruction - If the specified value is a dead instruction, 166 /// delete it and any recursively used instructions. 167 static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { 168 if (Instruction *I = dyn_cast<Instruction>(V)) 169 if (isInstructionTriviallyDead(I)) 170 deleteDeadInstruction(I, SE); 171 } 172 173 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 174 CurLoop = L; 175 176 // Disable loop idiom recognition if the function's name is a common idiom. 177 StringRef Name = L->getHeader()->getParent()->getName(); 178 if (Name == "memset" || Name == "memcpy") 179 return false; 180 181 // The trip count of the loop must be analyzable. 182 SE = &getAnalysis<ScalarEvolution>(); 183 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 184 return false; 185 const SCEV *BECount = SE->getBackedgeTakenCount(L); 186 if (isa<SCEVCouldNotCompute>(BECount)) return false; 187 188 // If this loop executes exactly one time, then it should be peeled, not 189 // optimized by this pass. 190 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 191 if (BECst->getValue()->getValue() == 0) 192 return false; 193 194 // We require target data for now. 195 TD = getAnalysisIfAvailable<TargetData>(); 196 if (TD == 0) return false; 197 198 DT = &getAnalysis<DominatorTree>(); 199 LoopInfo &LI = getAnalysis<LoopInfo>(); 200 TLI = &getAnalysis<TargetLibraryInfo>(); 201 202 SmallVector<BasicBlock*, 8> ExitBlocks; 203 CurLoop->getUniqueExitBlocks(ExitBlocks); 204 205 DEBUG(dbgs() << "loop-idiom Scanning: F[" 206 << L->getHeader()->getParent()->getName() 207 << "] Loop %" << L->getHeader()->getName() << "\n"); 208 209 bool MadeChange = false; 210 // Scan all the blocks in the loop that are not in subloops. 211 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 212 ++BI) { 213 // Ignore blocks in subloops. 214 if (LI.getLoopFor(*BI) != CurLoop) 215 continue; 216 217 MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); 218 } 219 return MadeChange; 220 } 221 222 /// runOnLoopBlock - Process the specified block, which lives in a counted loop 223 /// with the specified backedge count. This block is known to be in the current 224 /// loop and not in any subloops. 225 bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 226 SmallVectorImpl<BasicBlock*> &ExitBlocks) { 227 // We can only promote stores in this block if they are unconditionally 228 // executed in the loop. For a block to be unconditionally executed, it has 229 // to dominate all the exit blocks of the loop. Verify this now. 230 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 231 if (!DT->dominates(BB, ExitBlocks[i])) 232 return false; 233 234 bool MadeChange = false; 235 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 236 Instruction *Inst = I++; 237 // Look for store instructions, which may be optimized to memset/memcpy. 238 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 239 WeakVH InstPtr(I); 240 if (!processLoopStore(SI, BECount)) continue; 241 MadeChange = true; 242 243 // If processing the store invalidated our iterator, start over from the 244 // top of the block. 245 if (InstPtr == 0) 246 I = BB->begin(); 247 continue; 248 } 249 250 // Look for memset instructions, which may be optimized to a larger memset. 251 if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { 252 WeakVH InstPtr(I); 253 if (!processLoopMemSet(MSI, BECount)) continue; 254 MadeChange = true; 255 256 // If processing the memset invalidated our iterator, start over from the 257 // top of the block. 258 if (InstPtr == 0) 259 I = BB->begin(); 260 continue; 261 } 262 } 263 264 return MadeChange; 265 } 266 267 268 /// processLoopStore - See if this store can be promoted to a memset or memcpy. 269 bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 270 if (SI->isVolatile()) return false; 271 272 Value *StoredVal = SI->getValueOperand(); 273 Value *StorePtr = SI->getPointerOperand(); 274 275 // Reject stores that are so large that they overflow an unsigned. 276 uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); 277 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 278 return false; 279 280 // See if the pointer expression is an AddRec like {base,+,1} on the current 281 // loop, which indicates a strided store. If we have something else, it's a 282 // random store we can't handle. 283 const SCEVAddRecExpr *StoreEv = 284 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 285 if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 286 return false; 287 288 // Check to see if the stride matches the size of the store. If so, then we 289 // know that every byte is touched in the loop. 290 unsigned StoreSize = (unsigned)SizeInBits >> 3; 291 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); 292 293 if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { 294 // TODO: Could also handle negative stride here someday, that will require 295 // the validity check in mayLoopAccessLocation to be updated though. 296 // Enable this to print exact negative strides. 297 if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { 298 dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; 299 dbgs() << "BB: " << *SI->getParent(); 300 } 301 302 return false; 303 } 304 305 // See if we can optimize just this store in isolation. 306 if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), 307 StoredVal, SI, StoreEv, BECount)) 308 return true; 309 310 // If the stored value is a strided load in the same loop with the same stride 311 // this this may be transformable into a memcpy. This kicks in for stuff like 312 // for (i) A[i] = B[i]; 313 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 314 const SCEVAddRecExpr *LoadEv = 315 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); 316 if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && 317 StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile()) 318 if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) 319 return true; 320 } 321 //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; 322 323 return false; 324 } 325 326 /// processLoopMemSet - See if this memset can be promoted to a large memset. 327 bool LoopIdiomRecognize:: 328 processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { 329 // We can only handle non-volatile memsets with a constant size. 330 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; 331 332 // If we're not allowed to hack on memset, we fail. 333 if (!TLI->has(LibFunc::memset)) 334 return false; 335 336 Value *Pointer = MSI->getDest(); 337 338 // See if the pointer expression is an AddRec like {base,+,1} on the current 339 // loop, which indicates a strided store. If we have something else, it's a 340 // random store we can't handle. 341 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); 342 if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) 343 return false; 344 345 // Reject memsets that are so large that they overflow an unsigned. 346 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 347 if ((SizeInBytes >> 32) != 0) 348 return false; 349 350 // Check to see if the stride matches the size of the memset. If so, then we 351 // know that every byte is touched in the loop. 352 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 353 354 // TODO: Could also handle negative stride here someday, that will require the 355 // validity check in mayLoopAccessLocation to be updated though. 356 if (Stride == 0 || MSI->getLength() != Stride->getValue()) 357 return false; 358 359 return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, 360 MSI->getAlignment(), MSI->getValue(), 361 MSI, Ev, BECount); 362 } 363 364 365 /// mayLoopAccessLocation - Return true if the specified loop might access the 366 /// specified pointer location, which is a loop-strided access. The 'Access' 367 /// argument specifies what the verboten forms of access are (read or write). 368 static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, 369 Loop *L, const SCEV *BECount, 370 unsigned StoreSize, AliasAnalysis &AA, 371 Instruction *IgnoredStore) { 372 // Get the location that may be stored across the loop. Since the access is 373 // strided positively through memory, we say that the modified location starts 374 // at the pointer and has infinite size. 375 uint64_t AccessSize = AliasAnalysis::UnknownSize; 376 377 // If the loop iterates a fixed number of times, we can refine the access size 378 // to be exactly the size of the memset, which is (BECount+1)*StoreSize 379 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 380 AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; 381 382 // TODO: For this to be really effective, we have to dive into the pointer 383 // operand in the store. Store to &A[i] of 100 will always return may alias 384 // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 385 // which will then no-alias a store to &A[100]. 386 AliasAnalysis::Location StoreLoc(Ptr, AccessSize); 387 388 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 389 ++BI) 390 for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) 391 if (&*I != IgnoredStore && 392 (AA.getModRefInfo(I, StoreLoc) & Access)) 393 return true; 394 395 return false; 396 } 397 398 /// getMemSetPatternValue - If a strided store of the specified value is safe to 399 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should 400 /// be passed in. Otherwise, return null. 401 /// 402 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these 403 /// just replicate their input array and then pass on to memset_pattern16. 404 static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { 405 // If the value isn't a constant, we can't promote it to being in a constant 406 // array. We could theoretically do a store to an alloca or something, but 407 // that doesn't seem worthwhile. 408 Constant *C = dyn_cast<Constant>(V); 409 if (C == 0) return 0; 410 411 // Only handle simple values that are a power of two bytes in size. 412 uint64_t Size = TD.getTypeSizeInBits(V->getType()); 413 if (Size == 0 || (Size & 7) || (Size & (Size-1))) 414 return 0; 415 416 // Don't care enough about darwin/ppc to implement this. 417 if (TD.isBigEndian()) 418 return 0; 419 420 // Convert to size in bytes. 421 Size /= 8; 422 423 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see 424 // if the top and bottom are the same (e.g. for vectors and large integers). 425 if (Size > 16) return 0; 426 427 // If the constant is exactly 16 bytes, just use it. 428 if (Size == 16) return C; 429 430 // Otherwise, we'll use an array of the constants. 431 unsigned ArraySize = 16/Size; 432 ArrayType *AT = ArrayType::get(V->getType(), ArraySize); 433 return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); 434 } 435 436 437 /// processLoopStridedStore - We see a strided store of some value. If we can 438 /// transform this into a memset or memset_pattern in the loop preheader, do so. 439 bool LoopIdiomRecognize:: 440 processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 441 unsigned StoreAlignment, Value *StoredVal, 442 Instruction *TheStore, const SCEVAddRecExpr *Ev, 443 const SCEV *BECount) { 444 445 // If the stored value is a byte-wise value (like i32 -1), then it may be 446 // turned into a memset of i8 -1, assuming that all the consecutive bytes 447 // are stored. A store of i32 0x01020304 can never be turned into a memset, 448 // but it can be turned into memset_pattern if the target supports it. 449 Value *SplatValue = isBytewiseValue(StoredVal); 450 Constant *PatternValue = 0; 451 452 // If we're allowed to form a memset, and the stored value would be acceptable 453 // for memset, use it. 454 if (SplatValue && TLI->has(LibFunc::memset) && 455 // Verify that the stored value is loop invariant. If not, we can't 456 // promote the memset. 457 CurLoop->isLoopInvariant(SplatValue)) { 458 // Keep and use SplatValue. 459 PatternValue = 0; 460 } else if (TLI->has(LibFunc::memset_pattern16) && 461 (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { 462 // It looks like we can use PatternValue! 463 SplatValue = 0; 464 } else { 465 // Otherwise, this isn't an idiom we can transform. For example, we can't 466 // do anything with a 3-byte store, for example. 467 return false; 468 } 469 470 // The trip count of the loop and the base pointer of the addrec SCEV is 471 // guaranteed to be loop invariant, which means that it should dominate the 472 // header. This allows us to insert code for it in the preheader. 473 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 474 IRBuilder<> Builder(Preheader->getTerminator()); 475 SCEVExpander Expander(*SE, "loop-idiom"); 476 477 // Okay, we have a strided store "p[i]" of a splattable value. We can turn 478 // this into a memset in the loop preheader now if we want. However, this 479 // would be unsafe to do if there is anything else in the loop that may read 480 // or write to the aliased location. Check for any overlap by generating the 481 // base pointer and checking the region. 482 unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); 483 Value *BasePtr = 484 Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), 485 Preheader->getTerminator()); 486 487 488 if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, 489 CurLoop, BECount, 490 StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ 491 Expander.clear(); 492 // If we generated new code for the base pointer, clean up. 493 deleteIfDeadInstruction(BasePtr, *SE); 494 return false; 495 } 496 497 // Okay, everything looks good, insert the memset. 498 499 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 500 // pointer size if it isn't already. 501 Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); 502 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 503 504 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 505 SCEV::FlagNUW); 506 if (StoreSize != 1) 507 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 508 SCEV::FlagNUW); 509 510 Value *NumBytes = 511 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 512 513 CallInst *NewCall; 514 if (SplatValue) 515 NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); 516 else { 517 Module *M = TheStore->getParent()->getParent()->getParent(); 518 Value *MSP = M->getOrInsertFunction("memset_pattern16", 519 Builder.getVoidTy(), 520 Builder.getInt8PtrTy(), 521 Builder.getInt8PtrTy(), IntPtr, 522 (void*)0); 523 524 // Otherwise we should form a memset_pattern16. PatternValue is known to be 525 // an constant array of 16-bytes. Plop the value into a mergable global. 526 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, 527 GlobalValue::InternalLinkage, 528 PatternValue, ".memset_pattern"); 529 GV->setUnnamedAddr(true); // Ok to merge these. 530 GV->setAlignment(16); 531 Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); 532 NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); 533 } 534 535 DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 536 << " from store to: " << *Ev << " at: " << *TheStore << "\n"); 537 NewCall->setDebugLoc(TheStore->getDebugLoc()); 538 539 // Okay, the memset has been formed. Zap the original store and anything that 540 // feeds into it. 541 deleteDeadInstruction(TheStore, *SE); 542 ++NumMemSet; 543 return true; 544 } 545 546 /// processLoopStoreOfLoopLoad - We see a strided store whose value is a 547 /// same-strided load. 548 bool LoopIdiomRecognize:: 549 processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 550 const SCEVAddRecExpr *StoreEv, 551 const SCEVAddRecExpr *LoadEv, 552 const SCEV *BECount) { 553 // If we're not allowed to form memcpy, we fail. 554 if (!TLI->has(LibFunc::memcpy)) 555 return false; 556 557 LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); 558 559 // The trip count of the loop and the base pointer of the addrec SCEV is 560 // guaranteed to be loop invariant, which means that it should dominate the 561 // header. This allows us to insert code for it in the preheader. 562 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 563 IRBuilder<> Builder(Preheader->getTerminator()); 564 SCEVExpander Expander(*SE, "loop-idiom"); 565 566 // Okay, we have a strided store "p[i]" of a loaded value. We can turn 567 // this into a memcpy in the loop preheader now if we want. However, this 568 // would be unsafe to do if there is anything else in the loop that may read 569 // or write the memory region we're storing to. This includes the load that 570 // feeds the stores. Check for an alias by generating the base address and 571 // checking everything. 572 Value *StoreBasePtr = 573 Expander.expandCodeFor(StoreEv->getStart(), 574 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), 575 Preheader->getTerminator()); 576 577 if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, 578 CurLoop, BECount, StoreSize, 579 getAnalysis<AliasAnalysis>(), SI)) { 580 Expander.clear(); 581 // If we generated new code for the base pointer, clean up. 582 deleteIfDeadInstruction(StoreBasePtr, *SE); 583 return false; 584 } 585 586 // For a memcpy, we have to make sure that the input array is not being 587 // mutated by the loop. 588 Value *LoadBasePtr = 589 Expander.expandCodeFor(LoadEv->getStart(), 590 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), 591 Preheader->getTerminator()); 592 593 if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, 594 StoreSize, getAnalysis<AliasAnalysis>(), SI)) { 595 Expander.clear(); 596 // If we generated new code for the base pointer, clean up. 597 deleteIfDeadInstruction(LoadBasePtr, *SE); 598 deleteIfDeadInstruction(StoreBasePtr, *SE); 599 return false; 600 } 601 602 // Okay, everything is safe, we can transform this! 603 604 605 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 606 // pointer size if it isn't already. 607 Type *IntPtr = TD->getIntPtrType(SI->getContext()); 608 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 609 610 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 611 SCEV::FlagNUW); 612 if (StoreSize != 1) 613 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 614 SCEV::FlagNUW); 615 616 Value *NumBytes = 617 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 618 619 CallInst *NewCall = 620 Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, 621 std::min(SI->getAlignment(), LI->getAlignment())); 622 NewCall->setDebugLoc(SI->getDebugLoc()); 623 624 DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" 625 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 626 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); 627 628 629 // Okay, the memset has been formed. Zap the original store and anything that 630 // feeds into it. 631 deleteDeadInstruction(SI, *SE); 632 ++NumMemCpy; 633 return true; 634 } 635