Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FIXME: This pass is deprecated in favor of NVPTXInferAddressSpaces, which
     11 // uses a new algorithm that handles pointer induction variables.
     12 //
     13 // When a load/store accesses the generic address space, checks whether the
     14 // address is casted from a non-generic address space. If so, remove this
     15 // addrspacecast because accessing non-generic address spaces is typically
     16 // faster. Besides removing addrspacecasts directly used by loads/stores, this
     17 // optimization also recursively traces into a GEP's pointer operand and a
     18 // bitcast's source to find more eliminable addrspacecasts.
     19 //
     20 // For instance, the code below loads a float from an array allocated in
     21 // addrspace(3).
     22 //
     23 //   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
     24 //   %1 = gep [10 x float]* %0, i64 0, i64 %i
     25 //   %2 = bitcast float* %1 to i32*
     26 //   %3 = load i32* %2 ; emits ld.u32
     27 //
     28 // First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
     29 // and the bitcast to expose more optimization opportunities to function
     30 // optimizeMemoryInst. The intermediate code looks like:
     31 //
     32 //   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
     33 //   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
     34 //   %2 = addrspacecast i32 addrspace(3)* %1 to i32*
     35 //   %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
     36 //
     37 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
     38 // generic pointers, and folds the load and the addrspacecast into a load from
     39 // the original address space. The final code looks like:
     40 //
     41 //   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
     42 //   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
     43 //   %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
     44 //
     45 // This pass may remove an addrspacecast in a different BB. Therefore, we
     46 // implement it as a FunctionPass.
     47 //
     48 // TODO:
     49 // The current implementation doesn't handle PHINodes. Eliminating
     50 // addrspacecasts used by PHINodes is trickier because PHINodes can introduce
     51 // loops in data flow. For example,
     52 //
     53 //     %generic.input = addrspacecast float addrspace(3)* %input to float*
     54 //   loop:
     55 //     %y = phi [ %generic.input, %y2 ]
     56 //     %y2 = getelementptr %y, 1
     57 //     %v = load %y2
     58 //     br ..., label %loop, ...
     59 //
     60 // Marking %y2 shared depends on marking %y shared, but %y also data-flow
     61 // depends on %y2. We probably need an iterative fix-point algorithm on handle
     62 // this case.
     63 //
     64 //===----------------------------------------------------------------------===//
     65 
     66 #include "NVPTX.h"
     67 #include "llvm/IR/Function.h"
     68 #include "llvm/IR/Instructions.h"
     69 #include "llvm/IR/Operator.h"
     70 #include "llvm/Support/CommandLine.h"
     71 
     72 using namespace llvm;
     73 
     74 // An option to disable this optimization. Enable it by default.
     75 static cl::opt<bool> DisableFavorNonGeneric(
     76   "disable-nvptx-favor-non-generic",
     77   cl::init(false),
     78   cl::desc("Do not convert generic address space usage "
     79            "to non-generic address space usage"),
     80   cl::Hidden);
     81 
     82 namespace {
     83 /// \brief NVPTXFavorNonGenericAddrSpaces
     84 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
     85 public:
     86   static char ID;
     87   NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
     88   bool runOnFunction(Function &F) override;
     89 
     90 private:
     91   /// Optimizes load/store instructions. Idx is the index of the pointer operand
     92   /// (0 for load, and 1 for store). Returns true if it changes anything.
     93   bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
     94   /// Recursively traces into a GEP's pointer operand or a bitcast's source to
     95   /// find an eliminable addrspacecast, and hoists that addrspacecast to the
     96   /// outermost level. For example, this function transforms
     97   ///   bitcast(gep(gep(addrspacecast(X))))
     98   /// to
     99   ///   addrspacecast(bitcast(gep(gep(X)))).
    100   ///
    101   /// This reordering exposes to optimizeMemoryInstruction more
    102   /// optimization opportunities on loads and stores.
    103   ///
    104   /// If this function successfully hoists an eliminable addrspacecast or V is
    105   /// already such an addrspacecast, it returns the transformed value (which is
    106   /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
    107   Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
    108   /// Helper function for GEPs.
    109   Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
    110   /// Helper function for bitcasts.
    111   Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
    112 };
    113 }
    114 
    115 char NVPTXFavorNonGenericAddrSpaces::ID = 0;
    116 
    117 namespace llvm {
    118 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
    119 }
    120 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
    121                 "Remove unnecessary non-generic-to-generic addrspacecasts",
    122                 false, false)
    123 
    124 // Decides whether V is an addrspacecast and shortcutting V in load/store is
    125 // valid and beneficial.
    126 static bool isEliminableAddrSpaceCast(Value *V) {
    127   // Returns false if V is not even an addrspacecast.
    128   Operator *Cast = dyn_cast<Operator>(V);
    129   if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
    130     return false;
    131 
    132   Value *Src = Cast->getOperand(0);
    133   PointerType *SrcTy = cast<PointerType>(Src->getType());
    134   PointerType *DestTy = cast<PointerType>(Cast->getType());
    135   // TODO: For now, we only handle the case where the addrspacecast only changes
    136   // the address space but not the type. If the type also changes, we could
    137   // still get rid of the addrspacecast by adding an extra bitcast, but we
    138   // rarely see such scenarios.
    139   if (SrcTy->getElementType() != DestTy->getElementType())
    140     return false;
    141 
    142   // Checks whether the addrspacecast is from a non-generic address space to the
    143   // generic address space.
    144   return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
    145           DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
    146 }
    147 
    148 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
    149     GEPOperator *GEP, int Depth) {
    150   Value *NewOperand =
    151       hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
    152   if (NewOperand == nullptr)
    153     return nullptr;
    154 
    155   // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
    156   assert(isEliminableAddrSpaceCast(NewOperand));
    157   Operator *Cast = cast<Operator>(NewOperand);
    158 
    159   SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
    160   Value *NewASC;
    161   if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
    162     // GEP = gep (addrspacecast X), indices
    163     // =>
    164     // NewGEP = gep X, indices
    165     // NewASC = addrspacecast NewGEP
    166     GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
    167         GEP->getSourceElementType(), Cast->getOperand(0), Indices,
    168         "", GEPI);
    169     NewGEP->setIsInBounds(GEP->isInBounds());
    170     NewGEP->takeName(GEP);
    171     NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
    172     // Without RAUWing GEP, the compiler would visit GEP again and emit
    173     // redundant instructions. This is exercised in test @rauw in
    174     // access-non-generic.ll.
    175     GEP->replaceAllUsesWith(NewASC);
    176   } else {
    177     // GEP is a constant expression.
    178     Constant *NewGEP = ConstantExpr::getGetElementPtr(
    179         GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
    180         Indices, GEP->isInBounds());
    181     NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
    182   }
    183   return NewASC;
    184 }
    185 
    186 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
    187     BitCastOperator *BC, int Depth) {
    188   Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
    189   if (NewOperand == nullptr)
    190     return nullptr;
    191 
    192   // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
    193   assert(isEliminableAddrSpaceCast(NewOperand));
    194   Operator *Cast = cast<Operator>(NewOperand);
    195 
    196   // Cast  = addrspacecast Src
    197   // BC    = bitcast Cast
    198   //   =>
    199   // Cast' = bitcast Src
    200   // BC'   = addrspacecast Cast'
    201   Value *Src = Cast->getOperand(0);
    202   Type *TypeOfNewCast =
    203       PointerType::get(BC->getType()->getPointerElementType(),
    204                        Src->getType()->getPointerAddressSpace());
    205   Value *NewBC;
    206   if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
    207     Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
    208     NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
    209     NewBC->takeName(BC);
    210     // Without RAUWing BC, the compiler would visit BC again and emit
    211     // redundant instructions. This is exercised in test @rauw in
    212     // access-non-generic.ll.
    213     BC->replaceAllUsesWith(NewBC);
    214   } else {
    215     // BC is a constant expression.
    216     Constant *NewCast =
    217         ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
    218     NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
    219   }
    220   return NewBC;
    221 }
    222 
    223 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
    224                                                               int Depth) {
    225   // Returns V if V is already an eliminable addrspacecast.
    226   if (isEliminableAddrSpaceCast(V))
    227     return V;
    228 
    229   // Limit the depth to prevent this recursive function from running too long.
    230   const int MaxDepth = 20;
    231   if (Depth >= MaxDepth)
    232     return nullptr;
    233 
    234   // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
    235   // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
    236   // that are not directly used by the load/store.
    237   if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
    238     return hoistAddrSpaceCastFromGEP(GEP, Depth);
    239 
    240   if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
    241     return hoistAddrSpaceCastFromBitCast(BC, Depth);
    242 
    243   return nullptr;
    244 }
    245 
    246 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
    247                                                                unsigned Idx) {
    248   Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
    249   if (NewOperand == nullptr)
    250     return false;
    251 
    252   // load/store (addrspacecast X) => load/store X if shortcutting the
    253   // addrspacecast is valid and can improve performance.
    254   //
    255   // e.g.,
    256   // %1 = addrspacecast float addrspace(3)* %0 to float*
    257   // %2 = load float* %1
    258   // ->
    259   // %2 = load float addrspace(3)* %0
    260   //
    261   // Note: the addrspacecast can also be a constant expression.
    262   assert(isEliminableAddrSpaceCast(NewOperand));
    263   Operator *ASC = dyn_cast<Operator>(NewOperand);
    264   MI->setOperand(Idx, ASC->getOperand(0));
    265   return true;
    266 }
    267 
    268 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
    269   if (DisableFavorNonGeneric || skipFunction(F))
    270     return false;
    271 
    272   bool Changed = false;
    273   for (BasicBlock &B : F) {
    274     for (Instruction &I : B) {
    275       if (isa<LoadInst>(I)) {
    276         // V = load P
    277         Changed |= optimizeMemoryInstruction(&I, 0);
    278       } else if (isa<StoreInst>(I)) {
    279         // store V, P
    280         Changed |= optimizeMemoryInstruction(&I, 1);
    281       }
    282     }
    283   }
    284   return Changed;
    285 }
    286 
    287 FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() {
    288   return new NVPTXFavorNonGenericAddrSpaces();
    289 }
    290