Home | History | Annotate | Download | only in R600
      1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Insert wait instructions for memory reads and writes.
     12 ///
     13 /// Memory reads and writes are issued asynchronously, so we need to insert
     14 /// S_WAITCNT instructions when we want to access any of their results or
     15 /// overwrite any register that's used asynchronously.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "AMDGPU.h"
     20 #include "SIInstrInfo.h"
     21 #include "SIMachineFunctionInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineFunctionPass.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 
     27 using namespace llvm;
     28 
     29 namespace {
     30 
     31 /// \brief One variable for each of the hardware counters
     32 typedef union {
     33   struct {
     34     unsigned VM;
     35     unsigned EXP;
     36     unsigned LGKM;
     37   } Named;
     38   unsigned Array[3];
     39 
     40 } Counters;
     41 
     42 typedef Counters RegCounters[512];
     43 typedef std::pair<unsigned, unsigned> RegInterval;
     44 
     45 class SIInsertWaits : public MachineFunctionPass {
     46 
     47 private:
     48   static char ID;
     49   const SIInstrInfo *TII;
     50   const SIRegisterInfo *TRI;
     51   const MachineRegisterInfo *MRI;
     52 
     53   /// \brief Constant hardware limits
     54   static const Counters WaitCounts;
     55 
     56   /// \brief Constant zero value
     57   static const Counters ZeroCounts;
     58 
     59   /// \brief Counter values we have already waited on.
     60   Counters WaitedOn;
     61 
     62   /// \brief Counter values for last instruction issued.
     63   Counters LastIssued;
     64 
     65   /// \brief Registers used by async instructions.
     66   RegCounters UsedRegs;
     67 
     68   /// \brief Registers defined by async instructions.
     69   RegCounters DefinedRegs;
     70 
     71   /// \brief Different export instruction types seen since last wait.
     72   unsigned ExpInstrTypesSeen;
     73 
     74   /// \brief Get increment/decrement amount for this instruction.
     75   Counters getHwCounts(MachineInstr &MI);
     76 
     77   /// \brief Is operand relevant for async execution?
     78   bool isOpRelevant(MachineOperand &Op);
     79 
     80   /// \brief Get register interval an operand affects.
     81   RegInterval getRegInterval(MachineOperand &Op);
     82 
     83   /// \brief Handle instructions async components
     84   void pushInstruction(MachineInstr &MI);
     85 
     86   /// \brief Insert the actual wait instruction
     87   bool insertWait(MachineBasicBlock &MBB,
     88                   MachineBasicBlock::iterator I,
     89                   const Counters &Counts);
     90 
     91   /// \brief Do we need def2def checks?
     92   bool unorderedDefines(MachineInstr &MI);
     93 
     94   /// \brief Resolve all operand dependencies to counter requirements
     95   Counters handleOperands(MachineInstr &MI);
     96 
     97 public:
     98   SIInsertWaits(TargetMachine &tm) :
     99     MachineFunctionPass(ID),
    100     TII(nullptr),
    101     TRI(nullptr),
    102     ExpInstrTypesSeen(0) { }
    103 
    104   bool runOnMachineFunction(MachineFunction &MF) override;
    105 
    106   const char *getPassName() const override {
    107     return "SI insert wait  instructions";
    108   }
    109 
    110 };
    111 
    112 } // End anonymous namespace
    113 
    114 char SIInsertWaits::ID = 0;
    115 
    116 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
    117 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
    118 
    119 FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
    120   return new SIInsertWaits(tm);
    121 }
    122 
    123 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
    124 
    125   uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
    126   Counters Result;
    127 
    128   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
    129 
    130   // Only consider stores or EXP for EXP_CNT
    131   Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
    132       (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
    133 
    134   // LGKM may uses larger values
    135   if (TSFlags & SIInstrFlags::LGKM_CNT) {
    136 
    137     if (TII->isSMRD(MI.getOpcode())) {
    138 
    139       MachineOperand &Op = MI.getOperand(0);
    140       assert(Op.isReg() && "First LGKM operand must be a register!");
    141 
    142       unsigned Reg = Op.getReg();
    143       unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
    144       Result.Named.LGKM = Size > 4 ? 2 : 1;
    145 
    146     } else {
    147       // DS
    148       Result.Named.LGKM = 1;
    149     }
    150 
    151   } else {
    152     Result.Named.LGKM = 0;
    153   }
    154 
    155   return Result;
    156 }
    157 
    158 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
    159 
    160   // Constants are always irrelevant
    161   if (!Op.isReg())
    162     return false;
    163 
    164   // Defines are always relevant
    165   if (Op.isDef())
    166     return true;
    167 
    168   // For exports all registers are relevant
    169   MachineInstr &MI = *Op.getParent();
    170   if (MI.getOpcode() == AMDGPU::EXP)
    171     return true;
    172 
    173   // For stores the stored value is also relevant
    174   if (!MI.getDesc().mayStore())
    175     return false;
    176 
    177   for (MachineInstr::mop_iterator I = MI.operands_begin(),
    178        E = MI.operands_end(); I != E; ++I) {
    179 
    180     if (I->isReg() && I->isUse())
    181       return Op.isIdenticalTo(*I);
    182   }
    183 
    184   return false;
    185 }
    186 
    187 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
    188 
    189   if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
    190     return std::make_pair(0, 0);
    191 
    192   unsigned Reg = Op.getReg();
    193   unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
    194 
    195   assert(Size >= 4);
    196 
    197   RegInterval Result;
    198   Result.first = TRI->getEncodingValue(Reg);
    199   Result.second = Result.first + Size / 4;
    200 
    201   return Result;
    202 }
    203 
    204 void SIInsertWaits::pushInstruction(MachineInstr &MI) {
    205 
    206   // Get the hardware counter increments and sum them up
    207   Counters Increment = getHwCounts(MI);
    208   unsigned Sum = 0;
    209 
    210   for (unsigned i = 0; i < 3; ++i) {
    211     LastIssued.Array[i] += Increment.Array[i];
    212     Sum += Increment.Array[i];
    213   }
    214 
    215   // If we don't increase anything then that's it
    216   if (Sum == 0)
    217     return;
    218 
    219   // Remember which export instructions we have seen
    220   if (Increment.Named.EXP) {
    221     ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
    222   }
    223 
    224   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    225 
    226     MachineOperand &Op = MI.getOperand(i);
    227     if (!isOpRelevant(Op))
    228       continue;
    229 
    230     RegInterval Interval = getRegInterval(Op);
    231     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    232 
    233       // Remember which registers we define
    234       if (Op.isDef())
    235         DefinedRegs[j] = LastIssued;
    236 
    237       // and which one we are using
    238       if (Op.isUse())
    239         UsedRegs[j] = LastIssued;
    240     }
    241   }
    242 }
    243 
    244 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
    245                                MachineBasicBlock::iterator I,
    246                                const Counters &Required) {
    247 
    248   // End of program? No need to wait on anything
    249   if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
    250     return false;
    251 
    252   // Figure out if the async instructions execute in order
    253   bool Ordered[3];
    254 
    255   // VM_CNT is always ordered
    256   Ordered[0] = true;
    257 
    258   // EXP_CNT is unordered if we have both EXP & VM-writes
    259   Ordered[1] = ExpInstrTypesSeen == 3;
    260 
    261   // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
    262   Ordered[2] = false;
    263 
    264   // The values we are going to put into the S_WAITCNT instruction
    265   Counters Counts = WaitCounts;
    266 
    267   // Do we really need to wait?
    268   bool NeedWait = false;
    269 
    270   for (unsigned i = 0; i < 3; ++i) {
    271 
    272     if (Required.Array[i] <= WaitedOn.Array[i])
    273       continue;
    274 
    275     NeedWait = true;
    276 
    277     if (Ordered[i]) {
    278       unsigned Value = LastIssued.Array[i] - Required.Array[i];
    279 
    280       // adjust the value to the real hardware posibilities
    281       Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
    282 
    283     } else
    284       Counts.Array[i] = 0;
    285 
    286     // Remember on what we have waited on
    287     WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
    288   }
    289 
    290   if (!NeedWait)
    291     return false;
    292 
    293   // Reset EXP_CNT instruction types
    294   if (Counts.Named.EXP == 0)
    295     ExpInstrTypesSeen = 0;
    296 
    297   // Build the wait instruction
    298   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
    299           .addImm((Counts.Named.VM & 0xF) |
    300                   ((Counts.Named.EXP & 0x7) << 4) |
    301                   ((Counts.Named.LGKM & 0x7) << 8));
    302 
    303   return true;
    304 }
    305 
    306 /// \brief helper function for handleOperands
    307 static void increaseCounters(Counters &Dst, const Counters &Src) {
    308 
    309   for (unsigned i = 0; i < 3; ++i)
    310     Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
    311 }
    312 
    313 Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
    314 
    315   Counters Result = ZeroCounts;
    316 
    317   // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
    318   // but we also want to wait for any other outstanding transfers before
    319   // signalling other hardware blocks
    320   if (MI.getOpcode() == AMDGPU::S_SENDMSG)
    321     return LastIssued;
    322 
    323   // For each register affected by this
    324   // instruction increase the result sequence
    325   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    326 
    327     MachineOperand &Op = MI.getOperand(i);
    328     RegInterval Interval = getRegInterval(Op);
    329     for (unsigned j = Interval.first; j < Interval.second; ++j) {
    330 
    331       if (Op.isDef()) {
    332         increaseCounters(Result, UsedRegs[j]);
    333         increaseCounters(Result, DefinedRegs[j]);
    334       }
    335 
    336       if (Op.isUse())
    337         increaseCounters(Result, DefinedRegs[j]);
    338     }
    339   }
    340 
    341   return Result;
    342 }
    343 
    344 // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
    345 // around other non-memory instructions.
    346 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
    347   bool Changes = false;
    348 
    349   TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
    350   TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
    351 
    352   MRI = &MF.getRegInfo();
    353 
    354   WaitedOn = ZeroCounts;
    355   LastIssued = ZeroCounts;
    356 
    357   memset(&UsedRegs, 0, sizeof(UsedRegs));
    358   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
    359 
    360   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
    361        BI != BE; ++BI) {
    362 
    363     MachineBasicBlock &MBB = *BI;
    364     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    365          I != E; ++I) {
    366 
    367       Changes |= insertWait(MBB, I, handleOperands(*I));
    368       pushInstruction(*I);
    369     }
    370 
    371     // Wait for everything at the end of the MBB
    372     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
    373   }
    374 
    375   return Changes;
    376 }
    377