1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a Loop Data Prefetching Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "loop-data-prefetch" 15 #include "llvm/Transforms/Scalar.h" 16 #include "llvm/ADT/DepthFirstIterator.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/CodeMetrics.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/CFG.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Function.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37 #include "llvm/Transforms/Utils/Local.h" 38 #include "llvm/Transforms/Utils/ValueMapper.h" 39 using namespace llvm; 40 41 // By default, we limit this to creating 16 PHIs (which is a little over half 42 // of the allocatable register set). 43 static cl::opt<bool> 44 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 45 cl::desc("Prefetch write addresses")); 46 47 static cl::opt<unsigned> 48 PrefetchDistance("prefetch-distance", 49 cl::desc("Number of instructions to prefetch ahead"), 50 cl::Hidden); 51 52 static cl::opt<unsigned> 53 MinPrefetchStride("min-prefetch-stride", 54 cl::desc("Min stride to add prefetches"), cl::Hidden); 55 56 static cl::opt<unsigned> MaxPrefetchIterationsAhead( 57 "max-prefetch-iters-ahead", 58 cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden); 59 60 STATISTIC(NumPrefetches, "Number of prefetches inserted"); 61 62 namespace llvm { 63 void initializeLoopDataPrefetchPass(PassRegistry&); 64 } 65 66 namespace { 67 68 class LoopDataPrefetch : public FunctionPass { 69 public: 70 static char ID; // Pass ID, replacement for typeid 71 LoopDataPrefetch() : FunctionPass(ID) { 72 initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); 73 } 74 75 void getAnalysisUsage(AnalysisUsage &AU) const override { 76 AU.addRequired<AssumptionCacheTracker>(); 77 AU.addPreserved<DominatorTreeWrapperPass>(); 78 AU.addRequired<LoopInfoWrapperPass>(); 79 AU.addPreserved<LoopInfoWrapperPass>(); 80 AU.addRequired<ScalarEvolutionWrapperPass>(); 81 // FIXME: For some reason, preserving SE here breaks LSR (even if 82 // this pass changes nothing). 83 // AU.addPreserved<ScalarEvolutionWrapperPass>(); 84 AU.addRequired<TargetTransformInfoWrapperPass>(); 85 } 86 87 bool runOnFunction(Function &F) override; 88 89 private: 90 bool runOnLoop(Loop *L); 91 92 /// \brief Check if the the stride of the accesses is large enough to 93 /// warrant a prefetch. 94 bool isStrideLargeEnough(const SCEVAddRecExpr *AR); 95 96 unsigned getMinPrefetchStride() { 97 if (MinPrefetchStride.getNumOccurrences() > 0) 98 return MinPrefetchStride; 99 return TTI->getMinPrefetchStride(); 100 } 101 102 unsigned getPrefetchDistance() { 103 if (PrefetchDistance.getNumOccurrences() > 0) 104 return PrefetchDistance; 105 return TTI->getPrefetchDistance(); 106 } 107 108 unsigned getMaxPrefetchIterationsAhead() { 109 if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) 110 return MaxPrefetchIterationsAhead; 111 return TTI->getMaxPrefetchIterationsAhead(); 112 } 113 114 AssumptionCache *AC; 115 LoopInfo *LI; 116 ScalarEvolution *SE; 117 const TargetTransformInfo *TTI; 118 const DataLayout *DL; 119 }; 120 } 121 122 char LoopDataPrefetch::ID = 0; 123 INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", 124 "Loop Data Prefetch", false, false) 125 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 126 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 127 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 128 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 129 INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", 130 "Loop Data Prefetch", false, false) 131 132 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); } 133 134 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) { 135 unsigned TargetMinStride = getMinPrefetchStride(); 136 // No need to check if any stride goes. 137 if (TargetMinStride <= 1) 138 return true; 139 140 const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 141 // If MinStride is set, don't prefetch unless we can ensure that stride is 142 // larger. 143 if (!ConstStride) 144 return false; 145 146 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue()); 147 return TargetMinStride <= AbsStride; 148 } 149 150 bool LoopDataPrefetch::runOnFunction(Function &F) { 151 if (skipFunction(F)) 152 return false; 153 154 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 155 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 156 DL = &F.getParent()->getDataLayout(); 157 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 158 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 159 160 // If PrefetchDistance is not set, don't run the pass. This gives an 161 // opportunity for targets to run this pass for selected subtargets only 162 // (whose TTI sets PrefetchDistance). 163 if (getPrefetchDistance() == 0) 164 return false; 165 assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); 166 167 bool MadeChange = false; 168 169 for (Loop *I : *LI) 170 for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) 171 MadeChange |= runOnLoop(*L); 172 173 return MadeChange; 174 } 175 176 bool LoopDataPrefetch::runOnLoop(Loop *L) { 177 bool MadeChange = false; 178 179 // Only prefetch in the inner-most loop 180 if (!L->empty()) 181 return MadeChange; 182 183 SmallPtrSet<const Value *, 32> EphValues; 184 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 185 186 // Calculate the number of iterations ahead to prefetch 187 CodeMetrics Metrics; 188 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 189 I != IE; ++I) { 190 191 // If the loop already has prefetches, then assume that the user knows 192 // what they are doing and don't add any more. 193 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 194 J != JE; ++J) 195 if (CallInst *CI = dyn_cast<CallInst>(J)) 196 if (Function *F = CI->getCalledFunction()) 197 if (F->getIntrinsicID() == Intrinsic::prefetch) 198 return MadeChange; 199 200 Metrics.analyzeBasicBlock(*I, *TTI, EphValues); 201 } 202 unsigned LoopSize = Metrics.NumInsts; 203 if (!LoopSize) 204 LoopSize = 1; 205 206 unsigned ItersAhead = getPrefetchDistance() / LoopSize; 207 if (!ItersAhead) 208 ItersAhead = 1; 209 210 if (ItersAhead > getMaxPrefetchIterationsAhead()) 211 return MadeChange; 212 213 Function *F = L->getHeader()->getParent(); 214 DEBUG(dbgs() << "Prefetching " << ItersAhead 215 << " iterations ahead (loop size: " << LoopSize << ") in " 216 << F->getName() << ": " << *L); 217 218 SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; 219 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 220 I != IE; ++I) { 221 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 222 J != JE; ++J) { 223 Value *PtrValue; 224 Instruction *MemI; 225 226 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 227 MemI = LMemI; 228 PtrValue = LMemI->getPointerOperand(); 229 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 230 if (!PrefetchWrites) continue; 231 MemI = SMemI; 232 PtrValue = SMemI->getPointerOperand(); 233 } else continue; 234 235 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 236 if (PtrAddrSpace) 237 continue; 238 239 if (L->isLoopInvariant(PtrValue)) 240 continue; 241 242 const SCEV *LSCEV = SE->getSCEV(PtrValue); 243 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 244 if (!LSCEVAddRec) 245 continue; 246 247 // Check if the the stride of the accesses is large enough to warrant a 248 // prefetch. 249 if (!isStrideLargeEnough(LSCEVAddRec)) 250 continue; 251 252 // We don't want to double prefetch individual cache lines. If this load 253 // is known to be within one cache line of some other load that has 254 // already been prefetched, then don't prefetch this one as well. 255 bool DupPref = false; 256 for (const auto &PrefLoad : PrefLoads) { 257 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second); 258 if (const SCEVConstant *ConstPtrDiff = 259 dyn_cast<SCEVConstant>(PtrDiff)) { 260 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 261 if (PD < (int64_t) TTI->getCacheLineSize()) { 262 DupPref = true; 263 break; 264 } 265 } 266 } 267 if (DupPref) 268 continue; 269 270 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( 271 SE->getConstant(LSCEVAddRec->getType(), ItersAhead), 272 LSCEVAddRec->getStepRecurrence(*SE))); 273 if (!isSafeToExpand(NextLSCEV, *SE)) 274 continue; 275 276 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); 277 278 Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); 279 SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); 280 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); 281 282 IRBuilder<> Builder(MemI); 283 Module *M = (*I)->getParent()->getParent(); 284 Type *I32 = Type::getInt32Ty((*I)->getContext()); 285 Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); 286 Builder.CreateCall( 287 PrefetchFunc, 288 {PrefPtrValue, 289 ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), 290 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 291 ++NumPrefetches; 292 DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV 293 << "\n"); 294 emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, 295 MemI->getDebugLoc(), "prefetched memory access"); 296 297 298 MadeChange = true; 299 } 300 } 301 302 return MadeChange; 303 } 304 305