1 //===-- NVPTXInferAddressSpace.cpp - ---------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // CUDA C/C++ includes memory space designation as variable type qualifers (such 11 // as __global__ and __shared__). Knowing the space of a memory access allows 12 // CUDA compilers to emit faster PTX loads and stores. For example, a load from 13 // shared memory can be translated to `ld.shared` which is roughly 10% faster 14 // than a generic `ld` on an NVIDIA Tesla K40c. 15 // 16 // Unfortunately, type qualifiers only apply to variable declarations, so CUDA 17 // compilers must infer the memory space of an address expression from 18 // type-qualified variables. 19 // 20 // LLVM IR uses non-zero (so-called) specific address spaces to represent memory 21 // spaces (e.g. addrspace(3) means shared memory). The Clang frontend 22 // places only type-qualified variables in specific address spaces, and then 23 // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) 24 // (so-called the generic address space) for other instructions to use. 25 // 26 // For example, the Clang translates the following CUDA code 27 // __shared__ float a[10]; 28 // float v = a[i]; 29 // to 30 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 31 // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i 32 // %v = load float, float* %1 ; emits ld.f32 33 // @a is in addrspace(3) since it's type-qualified, but its use from %1 is 34 // redirected to %0 (the generic version of @a). 35 // 36 // The optimization implemented in this file propagates specific address spaces 37 // from type-qualified variable declarations to its users. For example, it 38 // optimizes the above IR to 39 // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 40 // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 41 // propagating the addrspace(3) from @a to %1. As the result, the NVPTX 42 // codegen is able to emit ld.shared.f32 for %v. 43 // 44 // Address space inference works in two steps. First, it uses a data-flow 45 // analysis to infer as many generic pointers as possible to point to only one 46 // specific address space. In the above example, it can prove that %1 only 47 // points to addrspace(3). This algorithm was published in 48 // CUDA: Compiling and optimizing for a GPU platform 49 // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang 50 // ICCS 2012 51 // 52 // Then, address space inference replaces all refinable generic pointers with 53 // equivalent specific pointers. 54 // 55 // The major challenge of implementing this optimization is handling PHINodes, 56 // which may create loops in the data flow graph. This brings two complications. 57 // 58 // First, the data flow analysis in Step 1 needs to be circular. For example, 59 // %generic.input = addrspacecast float addrspace(3)* %input to float* 60 // loop: 61 // %y = phi [ %generic.input, %y2 ] 62 // %y2 = getelementptr %y, 1 63 // %v = load %y2 64 // br ..., label %loop, ... 65 // proving %y specific requires proving both %generic.input and %y2 specific, 66 // but proving %y2 specific circles back to %y. To address this complication, 67 // the data flow analysis operates on a lattice: 68 // uninitialized > specific address spaces > generic. 69 // All address expressions (our implementation only considers phi, bitcast, 70 // addrspacecast, and getelementptr) start with the uninitialized address space. 71 // The monotone transfer function moves the address space of a pointer down a 72 // lattice path from uninitialized to specific and then to generic. A join 73 // operation of two different specific address spaces pushes the expression down 74 // to the generic address space. The analysis completes once it reaches a fixed 75 // point. 76 // 77 // Second, IR rewriting in Step 2 also needs to be circular. For example, 78 // converting %y to addrspace(3) requires the compiler to know the converted 79 // %y2, but converting %y2 needs the converted %y. To address this complication, 80 // we break these cycles using "undef" placeholders. When converting an 81 // instruction `I` to a new address space, if its operand `Op` is not converted 82 // yet, we let `I` temporarily use `undef` and fix all the uses of undef later. 83 // For instance, our algorithm first converts %y to 84 // %y' = phi float addrspace(3)* [ %input, undef ] 85 // Then, it converts %y2 to 86 // %y2' = getelementptr %y', 1 87 // Finally, it fixes the undef in %y' so that 88 // %y' = phi float addrspace(3)* [ %input, %y2' ] 89 // 90 // TODO: This pass is experimental and not enabled by default. Users can turn it 91 // on by setting the -nvptx-use-infer-addrspace flag of llc. We plan to replace 92 // NVPTXNonFavorGenericAddrSpaces with this pass shortly. 93 //===----------------------------------------------------------------------===// 94 95 #define DEBUG_TYPE "nvptx-infer-addrspace" 96 97 #include "NVPTX.h" 98 #include "MCTargetDesc/NVPTXBaseInfo.h" 99 #include "llvm/ADT/DenseSet.h" 100 #include "llvm/ADT/Optional.h" 101 #include "llvm/ADT/SetVector.h" 102 #include "llvm/IR/Function.h" 103 #include "llvm/IR/InstIterator.h" 104 #include "llvm/IR/Instructions.h" 105 #include "llvm/IR/Operator.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/raw_ostream.h" 108 #include "llvm/Transforms/Utils/Local.h" 109 #include "llvm/Transforms/Utils/ValueMapper.h" 110 111 using namespace llvm; 112 113 namespace { 114 const unsigned ADDRESS_SPACE_UNINITIALIZED = (unsigned)-1; 115 116 using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; 117 118 /// \brief NVPTXInferAddressSpaces 119 class NVPTXInferAddressSpaces: public FunctionPass { 120 public: 121 static char ID; 122 123 NVPTXInferAddressSpaces() : FunctionPass(ID) {} 124 125 bool runOnFunction(Function &F) override; 126 127 private: 128 // Returns the new address space of V if updated; otherwise, returns None. 129 Optional<unsigned> 130 updateAddressSpace(const Value &V, 131 const ValueToAddrSpaceMapTy &InferredAddrSpace); 132 133 // Tries to infer the specific address space of each address expression in 134 // Postorder. 135 void inferAddressSpaces(const std::vector<Value *> &Postorder, 136 ValueToAddrSpaceMapTy *InferredAddrSpace); 137 138 // Changes the generic address expressions in function F to point to specific 139 // address spaces if InferredAddrSpace says so. Postorder is the postorder of 140 // all generic address expressions in the use-def graph of function F. 141 bool 142 rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, 143 const ValueToAddrSpaceMapTy &InferredAddrSpace, 144 Function *F); 145 }; 146 } // end anonymous namespace 147 148 char NVPTXInferAddressSpaces::ID = 0; 149 150 namespace llvm { 151 void initializeNVPTXInferAddressSpacesPass(PassRegistry &); 152 } 153 INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace", 154 "Infer address spaces", 155 false, false) 156 157 // Returns true if V is an address expression. 158 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and 159 // getelementptr operators. 160 static bool isAddressExpression(const Value &V) { 161 if (!isa<Operator>(V)) 162 return false; 163 164 switch (cast<Operator>(V).getOpcode()) { 165 case Instruction::PHI: 166 case Instruction::BitCast: 167 case Instruction::AddrSpaceCast: 168 case Instruction::GetElementPtr: 169 return true; 170 default: 171 return false; 172 } 173 } 174 175 // Returns the pointer operands of V. 176 // 177 // Precondition: V is an address expression. 178 static SmallVector<Value *, 2> getPointerOperands(const Value &V) { 179 assert(isAddressExpression(V)); 180 const Operator& Op = cast<Operator>(V); 181 switch (Op.getOpcode()) { 182 case Instruction::PHI: { 183 auto IncomingValues = cast<PHINode>(Op).incoming_values(); 184 return SmallVector<Value *, 2>(IncomingValues.begin(), 185 IncomingValues.end()); 186 } 187 case Instruction::BitCast: 188 case Instruction::AddrSpaceCast: 189 case Instruction::GetElementPtr: 190 return {Op.getOperand(0)}; 191 default: 192 llvm_unreachable("Unexpected instruction type."); 193 } 194 } 195 196 // If V is an unvisited generic address expression, appends V to PostorderStack 197 // and marks it as visited. 198 static void appendsGenericAddressExpressionToPostorderStack( 199 Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack, 200 DenseSet<Value *> *Visited) { 201 assert(V->getType()->isPointerTy()); 202 if (isAddressExpression(*V) && 203 V->getType()->getPointerAddressSpace() == 204 AddressSpace::ADDRESS_SPACE_GENERIC) { 205 if (Visited->insert(V).second) 206 PostorderStack->push_back(std::make_pair(V, false)); 207 } 208 } 209 210 // Returns all generic address expressions in function F. The elements are 211 // ordered in postorder. 212 static std::vector<Value *> collectGenericAddressExpressions(Function &F) { 213 // This function implements a non-recursive postorder traversal of a partial 214 // use-def graph of function F. 215 std::vector<std::pair<Value*, bool>> PostorderStack; 216 // The set of visited expressions. 217 DenseSet<Value*> Visited; 218 // We only explore address expressions that are reachable from loads and 219 // stores for now because we aim at generating faster loads and stores. 220 for (Instruction &I : instructions(F)) { 221 if (isa<LoadInst>(I)) { 222 appendsGenericAddressExpressionToPostorderStack( 223 I.getOperand(0), &PostorderStack, &Visited); 224 } else if (isa<StoreInst>(I)) { 225 appendsGenericAddressExpressionToPostorderStack( 226 I.getOperand(1), &PostorderStack, &Visited); 227 } 228 } 229 230 std::vector<Value *> Postorder; // The resultant postorder. 231 while (!PostorderStack.empty()) { 232 // If the operands of the expression on the top are already explored, 233 // adds that expression to the resultant postorder. 234 if (PostorderStack.back().second) { 235 Postorder.push_back(PostorderStack.back().first); 236 PostorderStack.pop_back(); 237 continue; 238 } 239 // Otherwise, adds its operands to the stack and explores them. 240 PostorderStack.back().second = true; 241 for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { 242 appendsGenericAddressExpressionToPostorderStack( 243 PtrOperand, &PostorderStack, &Visited); 244 } 245 } 246 return Postorder; 247 } 248 249 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone 250 // of OperandUse.get() in the new address space. If the clone is not ready yet, 251 // returns an undef in the new address space as a placeholder. 252 static Value *operandWithNewAddressSpaceOrCreateUndef( 253 const Use &OperandUse, unsigned NewAddrSpace, 254 const ValueToValueMapTy &ValueWithNewAddrSpace, 255 SmallVectorImpl<const Use *> *UndefUsesToFix) { 256 Value *Operand = OperandUse.get(); 257 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) 258 return NewOperand; 259 260 UndefUsesToFix->push_back(&OperandUse); 261 return UndefValue::get( 262 Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace)); 263 } 264 265 // Returns a clone of `I` with its operands converted to those specified in 266 // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an 267 // operand whose address space needs to be modified might not exist in 268 // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and 269 // adds that operand use to UndefUsesToFix so that caller can fix them later. 270 // 271 // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast 272 // from a pointer whose type already matches. Therefore, this function returns a 273 // Value* instead of an Instruction*. 274 static Value *cloneInstructionWithNewAddressSpace( 275 Instruction *I, unsigned NewAddrSpace, 276 const ValueToValueMapTy &ValueWithNewAddrSpace, 277 SmallVectorImpl<const Use *> *UndefUsesToFix) { 278 Type *NewPtrType = 279 I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 280 281 if (I->getOpcode() == Instruction::AddrSpaceCast) { 282 Value *Src = I->getOperand(0); 283 // Because `I` is generic, the source address space must be specific. 284 // Therefore, the inferred address space must be the source space, according 285 // to our algorithm. 286 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); 287 if (Src->getType() != NewPtrType) 288 return new BitCastInst(Src, NewPtrType); 289 return Src; 290 } 291 292 // Computes the converted pointer operands. 293 SmallVector<Value *, 4> NewPointerOperands; 294 for (const Use &OperandUse : I->operands()) { 295 if (!OperandUse.get()->getType()->isPointerTy()) 296 NewPointerOperands.push_back(nullptr); 297 else 298 NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( 299 OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); 300 } 301 302 switch (I->getOpcode()) { 303 case Instruction::BitCast: 304 return new BitCastInst(NewPointerOperands[0], NewPtrType); 305 case Instruction::PHI: { 306 assert(I->getType()->isPointerTy()); 307 PHINode *PHI = cast<PHINode>(I); 308 PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); 309 for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { 310 unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); 311 NewPHI->addIncoming(NewPointerOperands[OperandNo], 312 PHI->getIncomingBlock(Index)); 313 } 314 return NewPHI; 315 } 316 case Instruction::GetElementPtr: { 317 GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); 318 GetElementPtrInst *NewGEP = GetElementPtrInst::Create( 319 GEP->getSourceElementType(), NewPointerOperands[0], 320 SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); 321 NewGEP->setIsInBounds(GEP->isInBounds()); 322 return NewGEP; 323 } 324 default: 325 llvm_unreachable("Unexpected opcode"); 326 } 327 } 328 329 // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the 330 // constant expression `CE` with its operands replaced as specified in 331 // ValueWithNewAddrSpace. 332 static Value *cloneConstantExprWithNewAddressSpace( 333 ConstantExpr *CE, unsigned NewAddrSpace, 334 const ValueToValueMapTy &ValueWithNewAddrSpace) { 335 Type *TargetType = 336 CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 337 338 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 339 // Because CE is generic, the source address space must be specific. 340 // Therefore, the inferred address space must be the source space according 341 // to our algorithm. 342 assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == 343 NewAddrSpace); 344 return ConstantExpr::getBitCast(CE->getOperand(0), TargetType); 345 } 346 347 // Computes the operands of the new constant expression. 348 SmallVector<Constant *, 4> NewOperands; 349 for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { 350 Constant *Operand = CE->getOperand(Index); 351 // If the address space of `Operand` needs to be modified, the new operand 352 // with the new address space should already be in ValueWithNewAddrSpace 353 // because (1) the constant expressions we consider (i.e. addrspacecast, 354 // bitcast, and getelementptr) do not incur cycles in the data flow graph 355 // and (2) this function is called on constant expressions in postorder. 356 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { 357 NewOperands.push_back(cast<Constant>(NewOperand)); 358 } else { 359 // Otherwise, reuses the old operand. 360 NewOperands.push_back(Operand); 361 } 362 } 363 364 if (CE->getOpcode() == Instruction::GetElementPtr) { 365 // Needs to specify the source type while constructing a getelementptr 366 // constant expression. 367 return CE->getWithOperands( 368 NewOperands, TargetType, /*OnlyIfReduced=*/false, 369 NewOperands[0]->getType()->getPointerElementType()); 370 } 371 372 return CE->getWithOperands(NewOperands, TargetType); 373 } 374 375 // Returns a clone of the value `V`, with its operands replaced as specified in 376 // ValueWithNewAddrSpace. This function is called on every generic address 377 // expression whose address space needs to be modified, in postorder. 378 // 379 // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. 380 static Value * 381 cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace, 382 const ValueToValueMapTy &ValueWithNewAddrSpace, 383 SmallVectorImpl<const Use *> *UndefUsesToFix) { 384 // All values in Postorder are generic address expressions. 385 assert(isAddressExpression(*V) && 386 V->getType()->getPointerAddressSpace() == 387 AddressSpace::ADDRESS_SPACE_GENERIC); 388 389 if (Instruction *I = dyn_cast<Instruction>(V)) { 390 Value *NewV = cloneInstructionWithNewAddressSpace( 391 I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); 392 if (Instruction *NewI = dyn_cast<Instruction>(NewV)) { 393 if (NewI->getParent() == nullptr) { 394 NewI->insertBefore(I); 395 NewI->takeName(I); 396 } 397 } 398 return NewV; 399 } 400 401 return cloneConstantExprWithNewAddressSpace( 402 cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace); 403 } 404 405 // Defines the join operation on the address space lattice (see the file header 406 // comments). 407 static unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) { 408 if (AS1 == AddressSpace::ADDRESS_SPACE_GENERIC || 409 AS2 == AddressSpace::ADDRESS_SPACE_GENERIC) 410 return AddressSpace::ADDRESS_SPACE_GENERIC; 411 412 if (AS1 == ADDRESS_SPACE_UNINITIALIZED) 413 return AS2; 414 if (AS2 == ADDRESS_SPACE_UNINITIALIZED) 415 return AS1; 416 417 // The join of two different specific address spaces is generic. 418 return AS1 == AS2 ? AS1 : (unsigned)AddressSpace::ADDRESS_SPACE_GENERIC; 419 } 420 421 bool NVPTXInferAddressSpaces::runOnFunction(Function &F) { 422 if (skipFunction(F)) 423 return false; 424 425 // Collects all generic address expressions in postorder. 426 std::vector<Value *> Postorder = collectGenericAddressExpressions(F); 427 428 // Runs a data-flow analysis to refine the address spaces of every expression 429 // in Postorder. 430 ValueToAddrSpaceMapTy InferredAddrSpace; 431 inferAddressSpaces(Postorder, &InferredAddrSpace); 432 433 // Changes the address spaces of the generic address expressions who are 434 // inferred to point to a specific address space. 435 return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); 436 } 437 438 void NVPTXInferAddressSpaces::inferAddressSpaces( 439 const std::vector<Value *> &Postorder, 440 ValueToAddrSpaceMapTy *InferredAddrSpace) { 441 SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); 442 // Initially, all expressions are in the uninitialized address space. 443 for (Value *V : Postorder) 444 (*InferredAddrSpace)[V] = ADDRESS_SPACE_UNINITIALIZED; 445 446 while (!Worklist.empty()) { 447 Value* V = Worklist.pop_back_val(); 448 449 // Tries to update the address space of the stack top according to the 450 // address spaces of its operands. 451 DEBUG(dbgs() << "Updating the address space of\n" 452 << " " << *V << "\n"); 453 Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace); 454 if (!NewAS.hasValue()) 455 continue; 456 // If any updates are made, grabs its users to the worklist because 457 // their address spaces can also be possibly updated. 458 DEBUG(dbgs() << " to " << NewAS.getValue() << "\n"); 459 (*InferredAddrSpace)[V] = NewAS.getValue(); 460 461 for (Value *User : V->users()) { 462 // Skip if User is already in the worklist. 463 if (Worklist.count(User)) 464 continue; 465 466 auto Pos = InferredAddrSpace->find(User); 467 // Our algorithm only updates the address spaces of generic address 468 // expressions, which are those in InferredAddrSpace. 469 if (Pos == InferredAddrSpace->end()) 470 continue; 471 472 // Function updateAddressSpace moves the address space down a lattice 473 // path. Therefore, nothing to do if User is already inferred as 474 // generic (the bottom element in the lattice). 475 if (Pos->second == AddressSpace::ADDRESS_SPACE_GENERIC) 476 continue; 477 478 Worklist.insert(User); 479 } 480 } 481 } 482 483 Optional<unsigned> NVPTXInferAddressSpaces::updateAddressSpace( 484 const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) { 485 assert(InferredAddrSpace.count(&V)); 486 487 // The new inferred address space equals the join of the address spaces 488 // of all its pointer operands. 489 unsigned NewAS = ADDRESS_SPACE_UNINITIALIZED; 490 for (Value *PtrOperand : getPointerOperands(V)) { 491 unsigned OperandAS; 492 if (InferredAddrSpace.count(PtrOperand)) 493 OperandAS = InferredAddrSpace.lookup(PtrOperand); 494 else 495 OperandAS = PtrOperand->getType()->getPointerAddressSpace(); 496 NewAS = joinAddressSpaces(NewAS, OperandAS); 497 // join(generic, *) = generic. So we can break if NewAS is already generic. 498 if (NewAS == AddressSpace::ADDRESS_SPACE_GENERIC) 499 break; 500 } 501 502 unsigned OldAS = InferredAddrSpace.lookup(&V); 503 assert(OldAS != AddressSpace::ADDRESS_SPACE_GENERIC); 504 if (OldAS == NewAS) 505 return None; 506 return NewAS; 507 } 508 509 bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( 510 const std::vector<Value *> &Postorder, 511 const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) { 512 // For each address expression to be modified, creates a clone of it with its 513 // pointer operands converted to the new address space. Since the pointer 514 // operands are converted, the clone is naturally in the new address space by 515 // construction. 516 ValueToValueMapTy ValueWithNewAddrSpace; 517 SmallVector<const Use *, 32> UndefUsesToFix; 518 for (Value* V : Postorder) { 519 unsigned NewAddrSpace = InferredAddrSpace.lookup(V); 520 if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { 521 ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( 522 V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); 523 } 524 } 525 526 if (ValueWithNewAddrSpace.empty()) 527 return false; 528 529 // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. 530 for (const Use* UndefUse : UndefUsesToFix) { 531 User *V = UndefUse->getUser(); 532 User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V)); 533 unsigned OperandNo = UndefUse->getOperandNo(); 534 assert(isa<UndefValue>(NewV->getOperand(OperandNo))); 535 NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); 536 } 537 538 // Replaces the uses of the old address expressions with the new ones. 539 for (Value *V : Postorder) { 540 Value *NewV = ValueWithNewAddrSpace.lookup(V); 541 if (NewV == nullptr) 542 continue; 543 544 SmallVector<Use *, 4> Uses; 545 for (Use &U : V->uses()) 546 Uses.push_back(&U); 547 DEBUG(dbgs() << "Replacing the uses of " << *V << "\n to\n " << *NewV 548 << "\n"); 549 for (Use *U : Uses) { 550 if (isa<LoadInst>(U->getUser()) || 551 (isa<StoreInst>(U->getUser()) && U->getOperandNo() == 1)) { 552 // If V is used as the pointer operand of a load/store, sets the pointer 553 // operand to NewV. This replacement does not change the element type, 554 // so the resultant load/store is still valid. 555 U->set(NewV); 556 } else if (isa<Instruction>(U->getUser())) { 557 // Otherwise, replaces the use with generic(NewV). 558 // TODO: Some optimization opportunities are missed. For example, in 559 // %0 = icmp eq float* %p, %q 560 // if both p and q are inferred to be shared, we can rewrite %0 as 561 // %0 = icmp eq float addrspace(3)* %new_p, %new_q 562 // instead of currently 563 // %generic_p = addrspacecast float addrspace(3)* %new_p to float* 564 // %generic_q = addrspacecast float addrspace(3)* %new_q to float* 565 // %0 = icmp eq float* %generic_p, %generic_q 566 if (Instruction *I = dyn_cast<Instruction>(V)) { 567 BasicBlock::iterator InsertPos = std::next(I->getIterator()); 568 while (isa<PHINode>(InsertPos)) 569 ++InsertPos; 570 U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); 571 } else { 572 U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), 573 V->getType())); 574 } 575 } 576 } 577 if (V->use_empty()) 578 RecursivelyDeleteTriviallyDeadInstructions(V); 579 } 580 581 return true; 582 } 583 584 FunctionPass *llvm::createNVPTXInferAddressSpacesPass() { 585 return new NVPTXInferAddressSpaces(); 586 } 587