Home | History | Annotate | Download | only in AMDGPU
      1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file This pass adds target attributes to functions which use intrinsics
     11 /// which will impact calling convention lowering.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPU.h"
     16 #include "llvm/IR/Constants.h"
     17 #include "llvm/IR/Instructions.h"
     18 #include "llvm/IR/Module.h"
     19 
     20 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
     21 
     22 using namespace llvm;
     23 
     24 namespace {
     25 
     26 class AMDGPUAnnotateKernelFeatures : public ModulePass {
     27 private:
     28   static bool hasAddrSpaceCast(const Function &F);
     29 
     30   void addAttrToCallers(Function *Intrin, StringRef AttrName);
     31   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
     32 
     33 public:
     34   static char ID;
     35 
     36   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
     37   bool runOnModule(Module &M) override;
     38   const char *getPassName() const override {
     39     return "AMDGPU Annotate Kernel Features";
     40   }
     41 
     42   void getAnalysisUsage(AnalysisUsage &AU) const override {
     43     AU.setPreservesAll();
     44     ModulePass::getAnalysisUsage(AU);
     45   }
     46 
     47   static bool visitConstantExpr(const ConstantExpr *CE);
     48   static bool visitConstantExprsRecursively(
     49     const Constant *EntryC,
     50     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
     51 };
     52 
     53 }
     54 
     55 char AMDGPUAnnotateKernelFeatures::ID = 0;
     56 
     57 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
     58 
     59 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
     60                 "Add AMDGPU function attributes", false, false)
     61 
     62 
     63 // The queue ptr is only needed when casting to flat, not from it.
     64 static bool castRequiresQueuePtr(unsigned SrcAS) {
     65   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
     66 }
     67 
     68 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
     69   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
     70 }
     71 
     72 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
     73   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
     74     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
     75     return castRequiresQueuePtr(SrcAS);
     76   }
     77 
     78   return false;
     79 }
     80 
     81 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
     82   const Constant *EntryC,
     83   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
     84 
     85   if (!ConstantExprVisited.insert(EntryC).second)
     86     return false;
     87 
     88   SmallVector<const Constant *, 16> Stack;
     89   Stack.push_back(EntryC);
     90 
     91   while (!Stack.empty()) {
     92     const Constant *C = Stack.pop_back_val();
     93 
     94     // Check this constant expression.
     95     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
     96       if (visitConstantExpr(CE))
     97         return true;
     98     }
     99 
    100     // Visit all sub-expressions.
    101     for (const Use &U : C->operands()) {
    102       const auto *OpC = dyn_cast<Constant>(U);
    103       if (!OpC)
    104         continue;
    105 
    106       if (!ConstantExprVisited.insert(OpC).second)
    107         continue;
    108 
    109       Stack.push_back(OpC);
    110     }
    111   }
    112 
    113   return false;
    114 }
    115 
    116 // Return true if an addrspacecast is used that requires the queue ptr.
    117 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
    118   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
    119 
    120   for (const BasicBlock &BB : F) {
    121     for (const Instruction &I : BB) {
    122       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
    123         if (castRequiresQueuePtr(ASC))
    124           return true;
    125       }
    126 
    127       for (const Use &U : I.operands()) {
    128         const auto *OpC = dyn_cast<Constant>(U);
    129         if (!OpC)
    130           continue;
    131 
    132         if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
    133           return true;
    134       }
    135     }
    136   }
    137 
    138   return false;
    139 }
    140 
    141 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
    142                                                     StringRef AttrName) {
    143   SmallPtrSet<Function *, 4> SeenFuncs;
    144 
    145   for (User *U : Intrin->users()) {
    146     // CallInst is the only valid user for an intrinsic.
    147     CallInst *CI = cast<CallInst>(U);
    148 
    149     Function *CallingFunction = CI->getParent()->getParent();
    150     if (SeenFuncs.insert(CallingFunction).second)
    151       CallingFunction->addFnAttr(AttrName);
    152   }
    153 }
    154 
    155 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
    156   Module &M,
    157   ArrayRef<StringRef[2]> IntrinsicToAttr) {
    158   bool Changed = false;
    159 
    160   for (const StringRef *Arr  : IntrinsicToAttr) {
    161     if (Function *Fn = M.getFunction(Arr[0])) {
    162       addAttrToCallers(Fn, Arr[1]);
    163       Changed = true;
    164     }
    165   }
    166 
    167   return Changed;
    168 }
    169 
    170 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
    171   Triple TT(M.getTargetTriple());
    172 
    173   static const StringRef IntrinsicToAttr[][2] = {
    174     // .x omitted
    175     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
    176     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
    177 
    178     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
    179     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
    180 
    181     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
    182     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
    183 
    184     // .x omitted
    185     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
    186     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
    187   };
    188 
    189   static const StringRef HSAIntrinsicToAttr[][2] = {
    190     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
    191     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }
    192   };
    193 
    194   // TODO: We should not add the attributes if the known compile time workgroup
    195   // size is 1 for y/z.
    196 
    197   // TODO: Intrinsics that require queue ptr.
    198 
    199   // We do not need to note the x workitem or workgroup id because they are
    200   // always initialized.
    201 
    202   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
    203   if (TT.getOS() == Triple::AMDHSA) {
    204     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
    205 
    206     for (Function &F : M) {
    207       if (F.hasFnAttribute("amdgpu-queue-ptr"))
    208         continue;
    209 
    210       if (hasAddrSpaceCast(F))
    211         F.addFnAttr("amdgpu-queue-ptr");
    212     }
    213   }
    214 
    215   return Changed;
    216 }
    217 
    218 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
    219   return new AMDGPUAnnotateKernelFeatures();
    220 }
    221