1 //===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FIXME: This pass is deprecated in favor of NVPTXInferAddressSpaces, which 11 // uses a new algorithm that handles pointer induction variables. 12 // 13 // When a load/store accesses the generic address space, checks whether the 14 // address is casted from a non-generic address space. If so, remove this 15 // addrspacecast because accessing non-generic address spaces is typically 16 // faster. Besides removing addrspacecasts directly used by loads/stores, this 17 // optimization also recursively traces into a GEP's pointer operand and a 18 // bitcast's source to find more eliminable addrspacecasts. 19 // 20 // For instance, the code below loads a float from an array allocated in 21 // addrspace(3). 22 // 23 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 24 // %1 = gep [10 x float]* %0, i64 0, i64 %i 25 // %2 = bitcast float* %1 to i32* 26 // %3 = load i32* %2 ; emits ld.u32 27 // 28 // First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, 29 // and the bitcast to expose more optimization opportunities to function 30 // optimizeMemoryInst. The intermediate code looks like: 31 // 32 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 33 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* 34 // %2 = addrspacecast i32 addrspace(3)* %1 to i32* 35 // %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly 36 // 37 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed 38 // generic pointers, and folds the load and the addrspacecast into a load from 39 // the original address space. The final code looks like: 40 // 41 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 42 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* 43 // %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 44 // 45 // This pass may remove an addrspacecast in a different BB. Therefore, we 46 // implement it as a FunctionPass. 47 // 48 // TODO: 49 // The current implementation doesn't handle PHINodes. Eliminating 50 // addrspacecasts used by PHINodes is trickier because PHINodes can introduce 51 // loops in data flow. For example, 52 // 53 // %generic.input = addrspacecast float addrspace(3)* %input to float* 54 // loop: 55 // %y = phi [ %generic.input, %y2 ] 56 // %y2 = getelementptr %y, 1 57 // %v = load %y2 58 // br ..., label %loop, ... 59 // 60 // Marking %y2 shared depends on marking %y shared, but %y also data-flow 61 // depends on %y2. We probably need an iterative fix-point algorithm on handle 62 // this case. 63 // 64 //===----------------------------------------------------------------------===// 65 66 #include "NVPTX.h" 67 #include "llvm/IR/Function.h" 68 #include "llvm/IR/Instructions.h" 69 #include "llvm/IR/Operator.h" 70 #include "llvm/Support/CommandLine.h" 71 72 using namespace llvm; 73 74 // An option to disable this optimization. Enable it by default. 75 static cl::opt<bool> DisableFavorNonGeneric( 76 "disable-nvptx-favor-non-generic", 77 cl::init(false), 78 cl::desc("Do not convert generic address space usage " 79 "to non-generic address space usage"), 80 cl::Hidden); 81 82 namespace { 83 /// \brief NVPTXFavorNonGenericAddrSpaces 84 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { 85 public: 86 static char ID; 87 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} 88 bool runOnFunction(Function &F) override; 89 90 private: 91 /// Optimizes load/store instructions. Idx is the index of the pointer operand 92 /// (0 for load, and 1 for store). Returns true if it changes anything. 93 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); 94 /// Recursively traces into a GEP's pointer operand or a bitcast's source to 95 /// find an eliminable addrspacecast, and hoists that addrspacecast to the 96 /// outermost level. For example, this function transforms 97 /// bitcast(gep(gep(addrspacecast(X)))) 98 /// to 99 /// addrspacecast(bitcast(gep(gep(X)))). 100 /// 101 /// This reordering exposes to optimizeMemoryInstruction more 102 /// optimization opportunities on loads and stores. 103 /// 104 /// If this function successfully hoists an eliminable addrspacecast or V is 105 /// already such an addrspacecast, it returns the transformed value (which is 106 /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. 107 Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); 108 /// Helper function for GEPs. 109 Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); 110 /// Helper function for bitcasts. 111 Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); 112 }; 113 } 114 115 char NVPTXFavorNonGenericAddrSpaces::ID = 0; 116 117 namespace llvm { 118 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 119 } 120 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", 121 "Remove unnecessary non-generic-to-generic addrspacecasts", 122 false, false) 123 124 // Decides whether V is an addrspacecast and shortcutting V in load/store is 125 // valid and beneficial. 126 static bool isEliminableAddrSpaceCast(Value *V) { 127 // Returns false if V is not even an addrspacecast. 128 Operator *Cast = dyn_cast<Operator>(V); 129 if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) 130 return false; 131 132 Value *Src = Cast->getOperand(0); 133 PointerType *SrcTy = cast<PointerType>(Src->getType()); 134 PointerType *DestTy = cast<PointerType>(Cast->getType()); 135 // TODO: For now, we only handle the case where the addrspacecast only changes 136 // the address space but not the type. If the type also changes, we could 137 // still get rid of the addrspacecast by adding an extra bitcast, but we 138 // rarely see such scenarios. 139 if (SrcTy->getElementType() != DestTy->getElementType()) 140 return false; 141 142 // Checks whether the addrspacecast is from a non-generic address space to the 143 // generic address space. 144 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && 145 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); 146 } 147 148 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( 149 GEPOperator *GEP, int Depth) { 150 Value *NewOperand = 151 hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1); 152 if (NewOperand == nullptr) 153 return nullptr; 154 155 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. 156 assert(isEliminableAddrSpaceCast(NewOperand)); 157 Operator *Cast = cast<Operator>(NewOperand); 158 159 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); 160 Value *NewASC; 161 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { 162 // GEP = gep (addrspacecast X), indices 163 // => 164 // NewGEP = gep X, indices 165 // NewASC = addrspacecast NewGEP 166 GetElementPtrInst *NewGEP = GetElementPtrInst::Create( 167 GEP->getSourceElementType(), Cast->getOperand(0), Indices, 168 "", GEPI); 169 NewGEP->setIsInBounds(GEP->isInBounds()); 170 NewGEP->takeName(GEP); 171 NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); 172 // Without RAUWing GEP, the compiler would visit GEP again and emit 173 // redundant instructions. This is exercised in test @rauw in 174 // access-non-generic.ll. 175 GEP->replaceAllUsesWith(NewASC); 176 } else { 177 // GEP is a constant expression. 178 Constant *NewGEP = ConstantExpr::getGetElementPtr( 179 GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), 180 Indices, GEP->isInBounds()); 181 NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()); 182 } 183 return NewASC; 184 } 185 186 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( 187 BitCastOperator *BC, int Depth) { 188 Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1); 189 if (NewOperand == nullptr) 190 return nullptr; 191 192 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. 193 assert(isEliminableAddrSpaceCast(NewOperand)); 194 Operator *Cast = cast<Operator>(NewOperand); 195 196 // Cast = addrspacecast Src 197 // BC = bitcast Cast 198 // => 199 // Cast' = bitcast Src 200 // BC' = addrspacecast Cast' 201 Value *Src = Cast->getOperand(0); 202 Type *TypeOfNewCast = 203 PointerType::get(BC->getType()->getPointerElementType(), 204 Src->getType()->getPointerAddressSpace()); 205 Value *NewBC; 206 if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { 207 Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); 208 NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); 209 NewBC->takeName(BC); 210 // Without RAUWing BC, the compiler would visit BC again and emit 211 // redundant instructions. This is exercised in test @rauw in 212 // access-non-generic.ll. 213 BC->replaceAllUsesWith(NewBC); 214 } else { 215 // BC is a constant expression. 216 Constant *NewCast = 217 ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); 218 NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); 219 } 220 return NewBC; 221 } 222 223 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, 224 int Depth) { 225 // Returns V if V is already an eliminable addrspacecast. 226 if (isEliminableAddrSpaceCast(V)) 227 return V; 228 229 // Limit the depth to prevent this recursive function from running too long. 230 const int MaxDepth = 20; 231 if (Depth >= MaxDepth) 232 return nullptr; 233 234 // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer 235 // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts 236 // that are not directly used by the load/store. 237 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) 238 return hoistAddrSpaceCastFromGEP(GEP, Depth); 239 240 if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) 241 return hoistAddrSpaceCastFromBitCast(BC, Depth); 242 243 return nullptr; 244 } 245 246 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, 247 unsigned Idx) { 248 Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx)); 249 if (NewOperand == nullptr) 250 return false; 251 252 // load/store (addrspacecast X) => load/store X if shortcutting the 253 // addrspacecast is valid and can improve performance. 254 // 255 // e.g., 256 // %1 = addrspacecast float addrspace(3)* %0 to float* 257 // %2 = load float* %1 258 // -> 259 // %2 = load float addrspace(3)* %0 260 // 261 // Note: the addrspacecast can also be a constant expression. 262 assert(isEliminableAddrSpaceCast(NewOperand)); 263 Operator *ASC = dyn_cast<Operator>(NewOperand); 264 MI->setOperand(Idx, ASC->getOperand(0)); 265 return true; 266 } 267 268 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { 269 if (DisableFavorNonGeneric || skipFunction(F)) 270 return false; 271 272 bool Changed = false; 273 for (BasicBlock &B : F) { 274 for (Instruction &I : B) { 275 if (isa<LoadInst>(I)) { 276 // V = load P 277 Changed |= optimizeMemoryInstruction(&I, 0); 278 } else if (isa<StoreInst>(I)) { 279 // store V, P 280 Changed |= optimizeMemoryInstruction(&I, 1); 281 } 282 } 283 } 284 return Changed; 285 } 286 287 FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() { 288 return new NVPTXFavorNonGenericAddrSpaces(); 289 } 290