Home | History | Annotate | Download | only in R600
      1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Insert wait instructions for memory reads and writes.
     12 ///
     13 /// Memory reads and writes are issued asynchronously, so we need to insert
     14 /// S_WAITCNT instructions when we want to access any of their results or
     15 /// overwrite any register that's used asynchronously.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "AMDGPU.h"
     20 #include "SIInstrInfo.h"
     21 #include "SIMachineFunctionInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineFunctionPass.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 
     27 using namespace llvm;
     28 
     29 namespace {
     30 
     31 /// \brief One variable for each of the hardware counters
     32 typedef union {
     33   struct {
     34     unsigned VM;
     35     unsigned EXP;
     36     unsigned LGKM;
     37   } Named;
     38   unsigned Array[3];
     39 
     40 } Counters;
     41 
     42 typedef Counters RegCounters[512];
     43 typedef std::pair<unsigned, unsigned> RegInterval;
     44 
     45 class SIInsertWaits : public MachineFunctionPass {
     46 
     47 private:
     48   static char ID;
     49   const SIInstrInfo *TII;
     50   const SIRegisterInfo *TRI;
     51   const MachineRegisterInfo *MRI;
     52 
     53   /// \brief Constant hardware limits
     54   static const Counters WaitCounts;
     55 
     56   /// \brief Constant zero value
     57   static const Counters ZeroCounts;
     58 
     59   /// \brief Counter values we have already waited on.
     60   Counters WaitedOn;
     61 
     62   /// \brief Counter values for last instruction issued.
     63   Counters LastIssued;
     64 
     65   /// \brief Registers used by async instructions.
     66   RegCounters UsedRegs;
     67 
     68   /// \brief Registers defined by async instructions.
     69   RegCounters DefinedRegs;
     70 
     71   /// \brief Different export instruction types seen since last wait.
     72   unsigned ExpInstrTypesSeen;
     73 
     74   /// \brief Get increment/decrement amount for this instruction.
     75   Counters getHwCounts(MachineInstr &MI);
     76 
     77   /// \brief Is operand relevant for async execution?
     78   bool isOpRelevant(MachineOperand &Op);
     79 
     80   /// \brief Get register interval an operand affects.
     81   RegInterval getRegInterval(MachineOperand &Op);
     82 
     83   /// \brief Handle instructions async components
     84   void pushInstruction(MachineInstr &MI);
     85 
     86   /// \brief Insert the actual wait instruction
     87   bool insertWait(MachineBasicBlock &MBB,
     88                   MachineBasicBlock::iterator I,
     89                   const Counters &Counts);
     90 
     91   /// \brief Do we need def2def checks?
     92   bool unorderedDefines(MachineInstr &MI);
     93 
     94   /// \brief Resolve all operand dependencies to counter requirements
     95   Counters handleOperands(MachineInstr &MI);
     96 
     97 public:
     98   SIInsertWaits(TargetMachine &tm) :
     99     MachineFunctionPass(ID),
    100     TII(0),
    101     TRI(0),
    102     ExpInstrTypesSeen(0) { }
    103 
    104   virtual bool runOnMachineFunction(MachineFunction &MF);
    105 
    106   const char *getPassName() const {
    107     return "SI insert wait  instructions";
    108   }
    109 
    110 };
    111 
    112 } // End anonymous namespace
    113 
    114 char SIInsertWaits::ID = 0;
    115 
    116 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
    117 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
    118 
    119 FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
    120   return new SIInsertWaits(tm);
    121 }
    122 
    123 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
    124 
    125   uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
    126   Counters Result;
    127 
    128   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
    129 
    130   // Only consider stores or EXP for EXP_CNT
    131   Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
    132       (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
    133 
    134   // LGKM may uses larger values
    135   if (TSFlags & SIInstrFlags::LGKM_CNT) {
    136 
    137     MachineOperand &Op = MI.getOperand(0);
    138     if (!Op.isReg())
    139       Op = MI.getOperand(1);
    140     assert(Op.isReg() && "First LGKM operand must be a register!");
    141 
    142     unsigned Reg = Op.getReg();
    143     unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
    144     Result.Named.LGKM = Size > 4 ? 2 : 1;
    145 
    146   } else {
    147     Result.Named.LGKM = 0;
    148   }
    149 
    150   return Result;
    151 }
    152 
    153 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
    154 
    155   // Constants are always irrelevant
    156   if (!Op.isReg())
    157     return false;
    158 
    159   // Defines are always relevant
    160   if (Op.isDef())
    161     return true;
    162 
    163   // For exports all registers are relevant
    164   MachineInstr &MI = *Op.getParent();
    165   if (MI.getOpcode() == AMDGPU::EXP)
    166     return true;
    167 
    168   // For stores the stored value is also relevant
    169   if (!MI.getDesc().mayStore())
    170     return false;
    171 
    172   for (MachineInstr::mop_iterator I = MI.operands_begin(),
    173        E = MI.operands_end(); I != E; ++I) {
    174 
    175     if (I->isReg() && I->isUse())
    176       return Op.isIdenticalTo(*I);
    177   }
    178 
    179   return false;
    180 }
    181 
    182 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
    183 
    184   if (!Op.isReg())
    185     return std::make_pair(0, 0);
    186 
    187   unsigned Reg = Op.getReg();
    188   unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
    189 
    190   assert(Size >= 4);
    191 
    192   RegInterval Result;
    193   Result.first = TRI->getEncodingValue(Reg);
    194   Result.second = Result.first + Size / 4;
    195 
    196   return Result;
    197 }
    198 
    199 void SIInsertWaits::pushInstruction(MachineInstr &MI) {
    200 
    201   // Get the hardware counter increments and sum them up
    202   Counters Increment = getHwCounts(MI);
    203   unsigned Sum = 0;
    204 
    205   for (unsigned i = 0; i < 3; ++i) {
    206     LastIssued.Array[i] += Increment.Array[i];
    207     Sum += Increment.Array[i];
    208   }
    209 
    210   // If we don't increase anything then that's it
    211   if (Sum == 0)
    212     return;
    213 
    214   // Remember which export instructions we have seen
    215   if (Increment.Named.EXP) {
    216     ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
    217   }
    218 
    219   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    220 
    221     MachineOperand &Op = MI.getOperand(i);
    222     if (!isOpRelevant(Op))
    223       continue;
    224 
    225     RegInterval Interval = getRegInterval(Op);
    226     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    227 
    228       // Remember which registers we define
    229       if (Op.isDef())
    230         DefinedRegs[j] = LastIssued;
    231 
    232       // and which one we are using
    233       if (Op.isUse())
    234         UsedRegs[j] = LastIssued;
    235     }
    236   }
    237 }
    238 
    239 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
    240                                MachineBasicBlock::iterator I,
    241                                const Counters &Required) {
    242 
    243   // End of program? No need to wait on anything
    244   if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
    245     return false;
    246 
    247   // Figure out if the async instructions execute in order
    248   bool Ordered[3];
    249 
    250   // VM_CNT is always ordered
    251   Ordered[0] = true;
    252 
    253   // EXP_CNT is unordered if we have both EXP & VM-writes
    254   Ordered[1] = ExpInstrTypesSeen == 3;
    255 
    256   // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
    257   Ordered[2] = false;
    258 
    259   // The values we are going to put into the S_WAITCNT instruction
    260   Counters Counts = WaitCounts;
    261 
    262   // Do we really need to wait?
    263   bool NeedWait = false;
    264 
    265   for (unsigned i = 0; i < 3; ++i) {
    266 
    267     if (Required.Array[i] <= WaitedOn.Array[i])
    268       continue;
    269 
    270     NeedWait = true;
    271 
    272     if (Ordered[i]) {
    273       unsigned Value = LastIssued.Array[i] - Required.Array[i];
    274 
    275       // adjust the value to the real hardware posibilities
    276       Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
    277 
    278     } else
    279       Counts.Array[i] = 0;
    280 
    281     // Remember on what we have waited on
    282     WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
    283   }
    284 
    285   if (!NeedWait)
    286     return false;
    287 
    288   // Reset EXP_CNT instruction types
    289   if (Counts.Named.EXP == 0)
    290     ExpInstrTypesSeen = 0;
    291 
    292   // Build the wait instruction
    293   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
    294           .addImm((Counts.Named.VM & 0xF) |
    295                   ((Counts.Named.EXP & 0x7) << 4) |
    296                   ((Counts.Named.LGKM & 0x7) << 8));
    297 
    298   return true;
    299 }
    300 
    301 /// \brief helper function for handleOperands
    302 static void increaseCounters(Counters &Dst, const Counters &Src) {
    303 
    304   for (unsigned i = 0; i < 3; ++i)
    305     Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
    306 }
    307 
    308 Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
    309 
    310   Counters Result = ZeroCounts;
    311 
    312   // For each register affected by this
    313   // instruction increase the result sequence
    314   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    315 
    316     MachineOperand &Op = MI.getOperand(i);
    317     RegInterval Interval = getRegInterval(Op);
    318     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    319 
    320       if (Op.isDef()) {
    321         increaseCounters(Result, UsedRegs[j]);
    322         increaseCounters(Result, DefinedRegs[j]);
    323       }
    324 
    325       if (Op.isUse())
    326         increaseCounters(Result, DefinedRegs[j]);
    327     }
    328   }
    329 
    330   return Result;
    331 }
    332 
    333 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
    334   bool Changes = false;
    335 
    336   TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
    337   TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
    338 
    339   MRI = &MF.getRegInfo();
    340 
    341   WaitedOn = ZeroCounts;
    342   LastIssued = ZeroCounts;
    343 
    344   memset(&UsedRegs, 0, sizeof(UsedRegs));
    345   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
    346 
    347   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
    348        BI != BE; ++BI) {
    349 
    350     MachineBasicBlock &MBB = *BI;
    351     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    352          I != E; ++I) {
    353 
    354       Changes |= insertWait(MBB, I, handleOperands(*I));
    355       pushInstruction(*I);
    356     }
    357 
    358     // Wait for everything at the end of the MBB
    359     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
    360   }
    361 
    362   return Changes;
    363 }
    364