Home | History | Annotate | Download | only in R600
      1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Insert wait instructions for memory reads and writes.
     12 ///
     13 /// Memory reads and writes are issued asynchronously, so we need to insert
     14 /// S_WAITCNT instructions when we want to access any of their results or
     15 /// overwrite any register that's used asynchronously.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "AMDGPU.h"
     20 #include "SIInstrInfo.h"
     21 #include "SIMachineFunctionInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineFunctionPass.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 
     27 using namespace llvm;
     28 
     29 namespace {
     30 
     31 /// \brief One variable for each of the hardware counters
     32 typedef union {
     33   struct {
     34     unsigned VM;
     35     unsigned EXP;
     36     unsigned LGKM;
     37   } Named;
     38   unsigned Array[3];
     39 
     40 } Counters;
     41 
     42 typedef Counters RegCounters[512];
     43 typedef std::pair<unsigned, unsigned> RegInterval;
     44 
     45 class SIInsertWaits : public MachineFunctionPass {
     46 
     47 private:
     48   static char ID;
     49   const SIInstrInfo *TII;
     50   const SIRegisterInfo &TRI;
     51   const MachineRegisterInfo *MRI;
     52 
     53   /// \brief Constant hardware limits
     54   static const Counters WaitCounts;
     55 
     56   /// \brief Constant zero value
     57   static const Counters ZeroCounts;
     58 
     59   /// \brief Counter values we have already waited on.
     60   Counters WaitedOn;
     61 
     62   /// \brief Counter values for last instruction issued.
     63   Counters LastIssued;
     64 
     65   /// \brief Registers used by async instructions.
     66   RegCounters UsedRegs;
     67 
     68   /// \brief Registers defined by async instructions.
     69   RegCounters DefinedRegs;
     70 
     71   /// \brief Different export instruction types seen since last wait.
     72   unsigned ExpInstrTypesSeen;
     73 
     74   /// \brief Get increment/decrement amount for this instruction.
     75   Counters getHwCounts(MachineInstr &MI);
     76 
     77   /// \brief Is operand relevant for async execution?
     78   bool isOpRelevant(MachineOperand &Op);
     79 
     80   /// \brief Get register interval an operand affects.
     81   RegInterval getRegInterval(MachineOperand &Op);
     82 
     83   /// \brief Handle instructions async components
     84   void pushInstruction(MachineInstr &MI);
     85 
     86   /// \brief Insert the actual wait instruction
     87   bool insertWait(MachineBasicBlock &MBB,
     88                   MachineBasicBlock::iterator I,
     89                   const Counters &Counts);
     90 
     91   /// \brief Do we need def2def checks?
     92   bool unorderedDefines(MachineInstr &MI);
     93 
     94   /// \brief Resolve all operand dependencies to counter requirements
     95   Counters handleOperands(MachineInstr &MI);
     96 
     97 public:
     98   SIInsertWaits(TargetMachine &tm) :
     99     MachineFunctionPass(ID),
    100     TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
    101     TRI(TII->getRegisterInfo()) { }
    102 
    103   virtual bool runOnMachineFunction(MachineFunction &MF);
    104 
    105   const char *getPassName() const {
    106     return "SI insert wait  instructions";
    107   }
    108 
    109 };
    110 
    111 } // End anonymous namespace
    112 
    113 char SIInsertWaits::ID = 0;
    114 
    115 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
    116 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
    117 
    118 FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
    119   return new SIInsertWaits(tm);
    120 }
    121 
    122 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
    123 
    124   uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
    125   Counters Result;
    126 
    127   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
    128 
    129   // Only consider stores or EXP for EXP_CNT
    130   Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
    131       (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
    132 
    133   // LGKM may uses larger values
    134   if (TSFlags & SIInstrFlags::LGKM_CNT) {
    135 
    136     MachineOperand &Op = MI.getOperand(0);
    137     assert(Op.isReg() && "First LGKM operand must be a register!");
    138 
    139     unsigned Reg = Op.getReg();
    140     unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
    141     Result.Named.LGKM = Size > 4 ? 2 : 1;
    142 
    143   } else {
    144     Result.Named.LGKM = 0;
    145   }
    146 
    147   return Result;
    148 }
    149 
    150 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
    151 
    152   // Constants are always irrelevant
    153   if (!Op.isReg())
    154     return false;
    155 
    156   // Defines are always relevant
    157   if (Op.isDef())
    158     return true;
    159 
    160   // For exports all registers are relevant
    161   MachineInstr &MI = *Op.getParent();
    162   if (MI.getOpcode() == AMDGPU::EXP)
    163     return true;
    164 
    165   // For stores the stored value is also relevant
    166   if (!MI.getDesc().mayStore())
    167     return false;
    168 
    169   for (MachineInstr::mop_iterator I = MI.operands_begin(),
    170        E = MI.operands_end(); I != E; ++I) {
    171 
    172     if (I->isReg() && I->isUse())
    173       return Op.isIdenticalTo(*I);
    174   }
    175 
    176   return false;
    177 }
    178 
    179 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
    180 
    181   if (!Op.isReg())
    182     return std::make_pair(0, 0);
    183 
    184   unsigned Reg = Op.getReg();
    185   unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
    186 
    187   assert(Size >= 4);
    188 
    189   RegInterval Result;
    190   Result.first = TRI.getEncodingValue(Reg);
    191   Result.second = Result.first + Size / 4;
    192 
    193   return Result;
    194 }
    195 
    196 void SIInsertWaits::pushInstruction(MachineInstr &MI) {
    197 
    198   // Get the hardware counter increments and sum them up
    199   Counters Increment = getHwCounts(MI);
    200   unsigned Sum = 0;
    201 
    202   for (unsigned i = 0; i < 3; ++i) {
    203     LastIssued.Array[i] += Increment.Array[i];
    204     Sum += Increment.Array[i];
    205   }
    206 
    207   // If we don't increase anything then that's it
    208   if (Sum == 0)
    209     return;
    210 
    211   // Remember which export instructions we have seen
    212   if (Increment.Named.EXP) {
    213     ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
    214   }
    215 
    216   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    217 
    218     MachineOperand &Op = MI.getOperand(i);
    219     if (!isOpRelevant(Op))
    220       continue;
    221 
    222     RegInterval Interval = getRegInterval(Op);
    223     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    224 
    225       // Remember which registers we define
    226       if (Op.isDef())
    227         DefinedRegs[j] = LastIssued;
    228 
    229       // and which one we are using
    230       if (Op.isUse())
    231         UsedRegs[j] = LastIssued;
    232     }
    233   }
    234 }
    235 
    236 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
    237                                MachineBasicBlock::iterator I,
    238                                const Counters &Required) {
    239 
    240   // End of program? No need to wait on anything
    241   if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
    242     return false;
    243 
    244   // Figure out if the async instructions execute in order
    245   bool Ordered[3];
    246 
    247   // VM_CNT is always ordered
    248   Ordered[0] = true;
    249 
    250   // EXP_CNT is unordered if we have both EXP & VM-writes
    251   Ordered[1] = ExpInstrTypesSeen == 3;
    252 
    253   // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
    254   Ordered[2] = false;
    255 
    256   // The values we are going to put into the S_WAITCNT instruction
    257   Counters Counts = WaitCounts;
    258 
    259   // Do we really need to wait?
    260   bool NeedWait = false;
    261 
    262   for (unsigned i = 0; i < 3; ++i) {
    263 
    264     if (Required.Array[i] <= WaitedOn.Array[i])
    265       continue;
    266 
    267     NeedWait = true;
    268 
    269     if (Ordered[i]) {
    270       unsigned Value = LastIssued.Array[i] - Required.Array[i];
    271 
    272       // adjust the value to the real hardware posibilities
    273       Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
    274 
    275     } else
    276       Counts.Array[i] = 0;
    277 
    278     // Remember on what we have waited on
    279     WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
    280   }
    281 
    282   if (!NeedWait)
    283     return false;
    284 
    285   // Reset EXP_CNT instruction types
    286   if (Counts.Named.EXP == 0)
    287     ExpInstrTypesSeen = 0;
    288 
    289   // Build the wait instruction
    290   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
    291           .addImm((Counts.Named.VM & 0xF) |
    292                   ((Counts.Named.EXP & 0x7) << 4) |
    293                   ((Counts.Named.LGKM & 0x7) << 8));
    294 
    295   return true;
    296 }
    297 
    298 /// \brief helper function for handleOperands
    299 static void increaseCounters(Counters &Dst, const Counters &Src) {
    300 
    301   for (unsigned i = 0; i < 3; ++i)
    302     Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
    303 }
    304 
    305 Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
    306 
    307   Counters Result = ZeroCounts;
    308 
    309   // For each register affected by this
    310   // instruction increase the result sequence
    311   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    312 
    313     MachineOperand &Op = MI.getOperand(i);
    314     RegInterval Interval = getRegInterval(Op);
    315     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    316 
    317       if (Op.isDef()) {
    318         increaseCounters(Result, UsedRegs[j]);
    319         increaseCounters(Result, DefinedRegs[j]);
    320       }
    321 
    322       if (Op.isUse())
    323         increaseCounters(Result, DefinedRegs[j]);
    324     }
    325   }
    326 
    327   return Result;
    328 }
    329 
    330 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
    331 
    332   bool Changes = false;
    333 
    334   MRI = &MF.getRegInfo();
    335 
    336   WaitedOn = ZeroCounts;
    337   LastIssued = ZeroCounts;
    338 
    339   memset(&UsedRegs, 0, sizeof(UsedRegs));
    340   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
    341 
    342   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
    343        BI != BE; ++BI) {
    344 
    345     MachineBasicBlock &MBB = *BI;
    346     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    347          I != E; ++I) {
    348 
    349       Changes |= insertWait(MBB, I, handleOperands(*I));
    350       pushInstruction(*I);
    351     }
    352 
    353     // Wait for everything at the end of the MBB
    354     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
    355   }
    356 
    357   return Changes;
    358 }
    359