Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// Computations in WWM can overwrite values in inactive channels for
     12 /// variables that the register allocator thinks are dead. This pass adds fake
     13 /// uses of those variables to WWM instructions to make sure that they aren't
     14 /// overwritten.
     15 ///
     16 /// As an example, consider this snippet:
     17 /// %vgpr0 = V_MOV_B32_e32 0.0
     18 /// if (...) {
     19 ///   %vgpr1 = ...
     20 ///   %vgpr2 = WWM killed %vgpr1
     21 ///   ... = killed %vgpr2
     22 ///   %vgpr0 = V_MOV_B32_e32 1.0
     23 /// }
     24 /// ... = %vgpr0
     25 ///
     26 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
     27 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
     28 /// writing %vgpr1 would only write to channels that would be clobbered by the
     29 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
     30 /// it would clobber even the inactive channels for which the if-condition is
     31 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
     32 /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
     33 /// same register.
     34 ///
     35 /// In general, we need to figure out what registers might have their inactive
     36 /// channels which are eventually used accidentally clobbered by a WWM
     37 /// instruction. We approximate this using two conditions:
     38 ///
     39 /// 1. A definition of the variable reaches the WWM instruction.
     40 /// 2. The variable would be live at the WWM instruction if all its defs were
     41 /// partial defs (i.e. considered as a use), ignoring normal uses.
     42 ///
     43 /// If a register matches both conditions, then we add an implicit use of it to
     44 /// the WWM instruction. Condition #2 is the heart of the matter: every
     45 /// definition is really a partial definition, since every VALU instruction is
     46 /// implicitly predicated.  We can usually ignore this, but WWM forces us not
     47 /// to. Condition #1 prevents false positives if the variable is undefined at
     48 /// the WWM instruction anyways. This is overly conservative in certain cases,
     49 /// especially in uniform control flow, but this is a workaround anyways until
     50 /// LLVM gains the notion of predicated uses and definitions of variables.
     51 ///
     52 //===----------------------------------------------------------------------===//
     53 
     54 #include "AMDGPU.h"
     55 #include "AMDGPUSubtarget.h"
     56 #include "SIInstrInfo.h"
     57 #include "SIRegisterInfo.h"
     58 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     59 #include "llvm/ADT/DepthFirstIterator.h"
     60 #include "llvm/ADT/SparseBitVector.h"
     61 #include "llvm/CodeGen/LiveIntervals.h"
     62 #include "llvm/CodeGen/MachineFunctionPass.h"
     63 #include "llvm/CodeGen/Passes.h"
     64 #include "llvm/CodeGen/TargetRegisterInfo.h"
     65 
     66 using namespace llvm;
     67 
     68 #define DEBUG_TYPE "si-fix-wwm-liveness"
     69 
     70 namespace {
     71 
     72 class SIFixWWMLiveness : public MachineFunctionPass {
     73 private:
     74   LiveIntervals *LIS = nullptr;
     75   const SIRegisterInfo *TRI;
     76   MachineRegisterInfo *MRI;
     77 
     78 public:
     79   static char ID;
     80 
     81   SIFixWWMLiveness() : MachineFunctionPass(ID) {
     82     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
     83   }
     84 
     85   bool runOnMachineFunction(MachineFunction &MF) override;
     86 
     87   bool runOnWWMInstruction(MachineInstr &MI);
     88 
     89   void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
     90 
     91   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
     92 
     93   void getAnalysisUsage(AnalysisUsage &AU) const override {
     94     // Should preserve the same set that TwoAddressInstructions does.
     95     AU.addPreserved<SlotIndexes>();
     96     AU.addPreserved<LiveIntervals>();
     97     AU.addPreservedID(LiveVariablesID);
     98     AU.addPreservedID(MachineLoopInfoID);
     99     AU.addPreservedID(MachineDominatorsID);
    100     AU.setPreservesCFG();
    101     MachineFunctionPass::getAnalysisUsage(AU);
    102   }
    103 };
    104 
    105 } // End anonymous namespace.
    106 
    107 INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
    108                 "SI fix WWM liveness", false, false)
    109 
    110 char SIFixWWMLiveness::ID = 0;
    111 
    112 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
    113 
    114 FunctionPass *llvm::createSIFixWWMLivenessPass() {
    115   return new SIFixWWMLiveness();
    116 }
    117 
    118 void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
    119 {
    120   for (const MachineOperand &Op : MI.defs()) {
    121     if (Op.isReg()) {
    122       unsigned Reg = Op.getReg();
    123       if (TRI->isVGPR(*MRI, Reg))
    124         Regs.set(Reg);
    125     }
    126   }
    127 }
    128 
    129 bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
    130   MachineBasicBlock *MBB = WWM.getParent();
    131 
    132   // Compute the registers that are live out of MI by figuring out which defs
    133   // are reachable from MI.
    134   SparseBitVector<> LiveOut;
    135 
    136   for (auto II = MachineBasicBlock::iterator(WWM), IE =
    137        MBB->end(); II != IE; ++II) {
    138     addDefs(*II, LiveOut);
    139   }
    140 
    141   for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
    142                                         E = df_end(MBB);
    143        I != E; ++I) {
    144     for (const MachineInstr &MI : **I) {
    145       addDefs(MI, LiveOut);
    146     }
    147   }
    148 
    149   // Compute the registers that reach MI.
    150   SparseBitVector<> Reachable;
    151 
    152   for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
    153        MBB->rend(); II != IE; ++II) {
    154     addDefs(*II, Reachable);
    155   }
    156 
    157   for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
    158                                          E = idf_end(MBB);
    159        I != E; ++I) {
    160     for (const MachineInstr &MI : **I) {
    161       addDefs(MI, Reachable);
    162     }
    163   }
    164 
    165   // find the intersection, and add implicit uses.
    166   LiveOut &= Reachable;
    167 
    168   bool Modified = false;
    169   for (unsigned Reg : LiveOut) {
    170     WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
    171     if (LIS) {
    172       // FIXME: is there a better way to update the live interval?
    173       LIS->removeInterval(Reg);
    174       LIS->createAndComputeVirtRegInterval(Reg);
    175     }
    176     Modified = true;
    177   }
    178 
    179   return Modified;
    180 }
    181 
    182 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
    183   bool Modified = false;
    184 
    185   // This doesn't actually need LiveIntervals, but we can preserve them.
    186   LIS = getAnalysisIfAvailable<LiveIntervals>();
    187 
    188   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    189   const SIInstrInfo *TII = ST.getInstrInfo();
    190 
    191   TRI = &TII->getRegisterInfo();
    192   MRI = &MF.getRegInfo();
    193 
    194   for (MachineBasicBlock &MBB : MF) {
    195     for (MachineInstr &MI : MBB) {
    196       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
    197         Modified |= runOnWWMInstruction(MI);
    198       }
    199     }
    200   }
    201 
    202   return Modified;
    203 }
    204