Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // On Fermi, image handles are not supported. To work around this, we traverse
     11 // the machine code and replace image handles with concrete symbols. For this
     12 // to work reliably, inlining of all function call must be performed.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "NVPTX.h"
     17 #include "NVPTXMachineFunctionInfo.h"
     18 #include "llvm/CodeGen/MachineFunction.h"
     19 #include "llvm/CodeGen/MachineFunctionPass.h"
     20 #include "llvm/CodeGen/MachineRegisterInfo.h"
     21 #include "llvm/Support/raw_ostream.h"
     22 #include "llvm/ADT/DenseSet.h"
     23 
     24 using namespace llvm;
     25 
     26 namespace {
     27 class NVPTXReplaceImageHandles : public MachineFunctionPass {
     28 private:
     29   static char ID;
     30   DenseSet<MachineInstr *> InstrsToRemove;
     31 
     32 public:
     33   NVPTXReplaceImageHandles();
     34 
     35   bool runOnMachineFunction(MachineFunction &MF) override;
     36 private:
     37   bool processInstr(MachineInstr &MI);
     38   void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
     39 };
     40 }
     41 
     42 char NVPTXReplaceImageHandles::ID = 0;
     43 
     44 NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
     45   : MachineFunctionPass(ID) {}
     46 
     47 bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
     48   bool Changed = false;
     49   InstrsToRemove.clear();
     50 
     51   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
     52        ++BI) {
     53     for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
     54          I != E; ++I) {
     55       MachineInstr &MI = *I;
     56       Changed |= processInstr(MI);
     57     }
     58   }
     59 
     60   // Now clean up any handle-access instructions
     61   // This is needed in debug mode when code cleanup passes are not executed,
     62   // but we need the handle access to be eliminated because they are not
     63   // valid instructions when image handles are disabled.
     64   for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
     65        E = InstrsToRemove.end(); I != E; ++I) {
     66     (*I)->eraseFromParent();
     67   }
     68 
     69   return Changed;
     70 }
     71 
     72 bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
     73   MachineFunction &MF = *MI.getParent()->getParent();
     74   // Check if we have a surface/texture instruction
     75   switch (MI.getOpcode()) {
     76   default: return false;
     77   case NVPTX::TEX_1D_F32_I32:
     78   case NVPTX::TEX_1D_F32_F32:
     79   case NVPTX::TEX_1D_F32_F32_LEVEL:
     80   case NVPTX::TEX_1D_F32_F32_GRAD:
     81   case NVPTX::TEX_1D_I32_I32:
     82   case NVPTX::TEX_1D_I32_F32:
     83   case NVPTX::TEX_1D_I32_F32_LEVEL:
     84   case NVPTX::TEX_1D_I32_F32_GRAD:
     85   case NVPTX::TEX_1D_ARRAY_F32_I32:
     86   case NVPTX::TEX_1D_ARRAY_F32_F32:
     87   case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
     88   case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
     89   case NVPTX::TEX_1D_ARRAY_I32_I32:
     90   case NVPTX::TEX_1D_ARRAY_I32_F32:
     91   case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
     92   case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
     93   case NVPTX::TEX_2D_F32_I32:
     94   case NVPTX::TEX_2D_F32_F32:
     95   case NVPTX::TEX_2D_F32_F32_LEVEL:
     96   case NVPTX::TEX_2D_F32_F32_GRAD:
     97   case NVPTX::TEX_2D_I32_I32:
     98   case NVPTX::TEX_2D_I32_F32:
     99   case NVPTX::TEX_2D_I32_F32_LEVEL:
    100   case NVPTX::TEX_2D_I32_F32_GRAD:
    101   case NVPTX::TEX_2D_ARRAY_F32_I32:
    102   case NVPTX::TEX_2D_ARRAY_F32_F32:
    103   case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
    104   case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
    105   case NVPTX::TEX_2D_ARRAY_I32_I32:
    106   case NVPTX::TEX_2D_ARRAY_I32_F32:
    107   case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
    108   case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
    109   case NVPTX::TEX_3D_F32_I32:
    110   case NVPTX::TEX_3D_F32_F32:
    111   case NVPTX::TEX_3D_F32_F32_LEVEL:
    112   case NVPTX::TEX_3D_F32_F32_GRAD:
    113   case NVPTX::TEX_3D_I32_I32:
    114   case NVPTX::TEX_3D_I32_F32:
    115   case NVPTX::TEX_3D_I32_F32_LEVEL:
    116   case NVPTX::TEX_3D_I32_F32_GRAD: {
    117     // This is a texture fetch, so operand 4 is a texref and operand 5 is
    118     // a samplerref
    119     MachineOperand &TexHandle = MI.getOperand(4);
    120     MachineOperand &SampHandle = MI.getOperand(5);
    121 
    122     replaceImageHandle(TexHandle, MF);
    123     replaceImageHandle(SampHandle, MF);
    124 
    125     return true;
    126   }
    127   case NVPTX::SULD_1D_I8_TRAP:
    128   case NVPTX::SULD_1D_I16_TRAP:
    129   case NVPTX::SULD_1D_I32_TRAP:
    130   case NVPTX::SULD_1D_ARRAY_I8_TRAP:
    131   case NVPTX::SULD_1D_ARRAY_I16_TRAP:
    132   case NVPTX::SULD_1D_ARRAY_I32_TRAP:
    133   case NVPTX::SULD_2D_I8_TRAP:
    134   case NVPTX::SULD_2D_I16_TRAP:
    135   case NVPTX::SULD_2D_I32_TRAP:
    136   case NVPTX::SULD_2D_ARRAY_I8_TRAP:
    137   case NVPTX::SULD_2D_ARRAY_I16_TRAP:
    138   case NVPTX::SULD_2D_ARRAY_I32_TRAP:
    139   case NVPTX::SULD_3D_I8_TRAP:
    140   case NVPTX::SULD_3D_I16_TRAP:
    141   case NVPTX::SULD_3D_I32_TRAP: {
    142     // This is a V1 surface load, so operand 1 is a surfref
    143     MachineOperand &SurfHandle = MI.getOperand(1);
    144 
    145     replaceImageHandle(SurfHandle, MF);
    146 
    147     return true;
    148   }
    149   case NVPTX::SULD_1D_V2I8_TRAP:
    150   case NVPTX::SULD_1D_V2I16_TRAP:
    151   case NVPTX::SULD_1D_V2I32_TRAP:
    152   case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
    153   case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
    154   case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
    155   case NVPTX::SULD_2D_V2I8_TRAP:
    156   case NVPTX::SULD_2D_V2I16_TRAP:
    157   case NVPTX::SULD_2D_V2I32_TRAP:
    158   case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
    159   case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
    160   case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
    161   case NVPTX::SULD_3D_V2I8_TRAP:
    162   case NVPTX::SULD_3D_V2I16_TRAP:
    163   case NVPTX::SULD_3D_V2I32_TRAP: {
    164     // This is a V2 surface load, so operand 2 is a surfref
    165     MachineOperand &SurfHandle = MI.getOperand(2);
    166 
    167     replaceImageHandle(SurfHandle, MF);
    168 
    169     return true;
    170   }
    171   case NVPTX::SULD_1D_V4I8_TRAP:
    172   case NVPTX::SULD_1D_V4I16_TRAP:
    173   case NVPTX::SULD_1D_V4I32_TRAP:
    174   case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
    175   case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
    176   case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
    177   case NVPTX::SULD_2D_V4I8_TRAP:
    178   case NVPTX::SULD_2D_V4I16_TRAP:
    179   case NVPTX::SULD_2D_V4I32_TRAP:
    180   case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
    181   case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
    182   case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
    183   case NVPTX::SULD_3D_V4I8_TRAP:
    184   case NVPTX::SULD_3D_V4I16_TRAP:
    185   case NVPTX::SULD_3D_V4I32_TRAP: {
    186     // This is a V4 surface load, so operand 4 is a surfref
    187     MachineOperand &SurfHandle = MI.getOperand(4);
    188 
    189     replaceImageHandle(SurfHandle, MF);
    190 
    191     return true;
    192   }
    193   case NVPTX::SUST_B_1D_B8_TRAP:
    194   case NVPTX::SUST_B_1D_B16_TRAP:
    195   case NVPTX::SUST_B_1D_B32_TRAP:
    196   case NVPTX::SUST_B_1D_V2B8_TRAP:
    197   case NVPTX::SUST_B_1D_V2B16_TRAP:
    198   case NVPTX::SUST_B_1D_V2B32_TRAP:
    199   case NVPTX::SUST_B_1D_V4B8_TRAP:
    200   case NVPTX::SUST_B_1D_V4B16_TRAP:
    201   case NVPTX::SUST_B_1D_V4B32_TRAP:
    202   case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
    203   case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
    204   case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
    205   case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
    206   case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
    207   case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
    208   case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
    209   case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
    210   case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
    211   case NVPTX::SUST_B_2D_B8_TRAP:
    212   case NVPTX::SUST_B_2D_B16_TRAP:
    213   case NVPTX::SUST_B_2D_B32_TRAP:
    214   case NVPTX::SUST_B_2D_V2B8_TRAP:
    215   case NVPTX::SUST_B_2D_V2B16_TRAP:
    216   case NVPTX::SUST_B_2D_V2B32_TRAP:
    217   case NVPTX::SUST_B_2D_V4B8_TRAP:
    218   case NVPTX::SUST_B_2D_V4B16_TRAP:
    219   case NVPTX::SUST_B_2D_V4B32_TRAP:
    220   case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
    221   case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
    222   case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
    223   case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
    224   case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
    225   case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
    226   case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
    227   case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
    228   case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
    229   case NVPTX::SUST_B_3D_B8_TRAP:
    230   case NVPTX::SUST_B_3D_B16_TRAP:
    231   case NVPTX::SUST_B_3D_B32_TRAP:
    232   case NVPTX::SUST_B_3D_V2B8_TRAP:
    233   case NVPTX::SUST_B_3D_V2B16_TRAP:
    234   case NVPTX::SUST_B_3D_V2B32_TRAP:
    235   case NVPTX::SUST_B_3D_V4B8_TRAP:
    236   case NVPTX::SUST_B_3D_V4B16_TRAP:
    237   case NVPTX::SUST_B_3D_V4B32_TRAP:
    238   case NVPTX::SUST_P_1D_B8_TRAP:
    239   case NVPTX::SUST_P_1D_B16_TRAP:
    240   case NVPTX::SUST_P_1D_B32_TRAP:
    241   case NVPTX::SUST_P_1D_V2B8_TRAP:
    242   case NVPTX::SUST_P_1D_V2B16_TRAP:
    243   case NVPTX::SUST_P_1D_V2B32_TRAP:
    244   case NVPTX::SUST_P_1D_V4B8_TRAP:
    245   case NVPTX::SUST_P_1D_V4B16_TRAP:
    246   case NVPTX::SUST_P_1D_V4B32_TRAP:
    247   case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
    248   case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
    249   case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
    250   case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
    251   case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
    252   case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
    253   case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
    254   case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
    255   case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
    256   case NVPTX::SUST_P_2D_B8_TRAP:
    257   case NVPTX::SUST_P_2D_B16_TRAP:
    258   case NVPTX::SUST_P_2D_B32_TRAP:
    259   case NVPTX::SUST_P_2D_V2B8_TRAP:
    260   case NVPTX::SUST_P_2D_V2B16_TRAP:
    261   case NVPTX::SUST_P_2D_V2B32_TRAP:
    262   case NVPTX::SUST_P_2D_V4B8_TRAP:
    263   case NVPTX::SUST_P_2D_V4B16_TRAP:
    264   case NVPTX::SUST_P_2D_V4B32_TRAP:
    265   case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
    266   case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
    267   case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
    268   case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
    269   case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
    270   case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
    271   case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
    272   case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
    273   case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
    274   case NVPTX::SUST_P_3D_B8_TRAP:
    275   case NVPTX::SUST_P_3D_B16_TRAP:
    276   case NVPTX::SUST_P_3D_B32_TRAP:
    277   case NVPTX::SUST_P_3D_V2B8_TRAP:
    278   case NVPTX::SUST_P_3D_V2B16_TRAP:
    279   case NVPTX::SUST_P_3D_V2B32_TRAP:
    280   case NVPTX::SUST_P_3D_V4B8_TRAP:
    281   case NVPTX::SUST_P_3D_V4B16_TRAP:
    282   case NVPTX::SUST_P_3D_V4B32_TRAP: {
    283     // This is a surface store, so operand 0 is a surfref
    284     MachineOperand &SurfHandle = MI.getOperand(0);
    285 
    286     replaceImageHandle(SurfHandle, MF);
    287 
    288     return true;
    289   }
    290   case NVPTX::TXQ_CHANNEL_ORDER:
    291   case NVPTX::TXQ_CHANNEL_DATA_TYPE:
    292   case NVPTX::TXQ_WIDTH:
    293   case NVPTX::TXQ_HEIGHT:
    294   case NVPTX::TXQ_DEPTH:
    295   case NVPTX::TXQ_ARRAY_SIZE:
    296   case NVPTX::TXQ_NUM_SAMPLES:
    297   case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
    298   case NVPTX::SUQ_CHANNEL_ORDER:
    299   case NVPTX::SUQ_CHANNEL_DATA_TYPE:
    300   case NVPTX::SUQ_WIDTH:
    301   case NVPTX::SUQ_HEIGHT:
    302   case NVPTX::SUQ_DEPTH:
    303   case NVPTX::SUQ_ARRAY_SIZE: {
    304     // This is a query, so operand 1 is a surfref/texref
    305     MachineOperand &Handle = MI.getOperand(1);
    306 
    307     replaceImageHandle(Handle, MF);
    308 
    309     return true;
    310   }
    311   }
    312 }
    313 
    314 void NVPTXReplaceImageHandles::
    315 replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
    316   const MachineRegisterInfo &MRI = MF.getRegInfo();
    317   NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
    318   // Which instruction defines the handle?
    319   MachineInstr *MI = MRI.getVRegDef(Op.getReg());
    320   assert(MI && "No def for image handle vreg?");
    321   MachineInstr &TexHandleDef = *MI;
    322 
    323   switch (TexHandleDef.getOpcode()) {
    324   case NVPTX::LD_i64_avar: {
    325     // The handle is a parameter value being loaded, replace with the
    326     // parameter symbol
    327     assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
    328     StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
    329     std::string ParamBaseName = MF.getName();
    330     ParamBaseName += "_param_";
    331     assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
    332     unsigned Param = atoi(Sym.data()+ParamBaseName.size());
    333     std::string NewSym;
    334     raw_string_ostream NewSymStr(NewSym);
    335     NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
    336     Op.ChangeToImmediate(
    337       MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
    338     InstrsToRemove.insert(&TexHandleDef);
    339     break;
    340   }
    341   case NVPTX::texsurf_handles: {
    342     // The handle is a global variable, replace with the global variable name
    343     assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
    344     const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
    345     assert(GV->hasName() && "Global sampler must be named!");
    346     Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
    347     InstrsToRemove.insert(&TexHandleDef);
    348     break;
    349   }
    350   default:
    351     llvm_unreachable("Unknown instruction operating on handle");
    352   }
    353 }
    354 
    355 MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
    356   return new NVPTXReplaceImageHandles();
    357 }
    358