Home | History | Annotate | Download | only in AMDGPU
      1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
     12 /// This pass is merging consecutive CFAlus where applicable.
     13 /// It needs to be called after IfCvt for best results.
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPU.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "R600Defines.h"
     19 #include "R600InstrInfo.h"
     20 #include "R600MachineFunctionInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "llvm/CodeGen/MachineFunctionPass.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 #include "llvm/Support/Debug.h"
     26 #include "llvm/Support/raw_ostream.h"
     27 
     28 using namespace llvm;
     29 
     30 #define DEBUG_TYPE "r600mergeclause"
     31 
     32 namespace {
     33 
     34 static bool isCFAlu(const MachineInstr &MI) {
     35   switch (MI.getOpcode()) {
     36   case AMDGPU::CF_ALU:
     37   case AMDGPU::CF_ALU_PUSH_BEFORE:
     38     return true;
     39   default:
     40     return false;
     41   }
     42 }
     43 
     44 class R600ClauseMergePass : public MachineFunctionPass {
     45 
     46 private:
     47   static char ID;
     48   const R600InstrInfo *TII;
     49 
     50   unsigned getCFAluSize(const MachineInstr &MI) const;
     51   bool isCFAluEnabled(const MachineInstr &MI) const;
     52 
     53   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
     54   /// removed and their content affected to the previous alu clause.
     55   /// This function parse instructions after CFAlu until it find a disabled
     56   /// CFAlu and merge the content, or an enabled CFAlu.
     57   void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
     58 
     59   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
     60   /// it is the case.
     61   bool mergeIfPossible(MachineInstr &RootCFAlu,
     62                        const MachineInstr &LatrCFAlu) const;
     63 
     64 public:
     65   R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
     66 
     67   bool runOnMachineFunction(MachineFunction &MF) override;
     68 
     69   const char *getPassName() const override;
     70 };
     71 
     72 char R600ClauseMergePass::ID = 0;
     73 
     74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
     75   assert(isCFAlu(MI));
     76   return MI
     77       .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
     78       .getImm();
     79 }
     80 
     81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
     82   assert(isCFAlu(MI));
     83   return MI
     84       .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
     85       .getImm();
     86 }
     87 
     88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
     89     MachineInstr &CFAlu) const {
     90   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
     91   MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
     92   I++;
     93   do {
     94     while (I != E && !isCFAlu(*I))
     95       I++;
     96     if (I == E)
     97       return;
     98     MachineInstr &MI = *I++;
     99     if (isCFAluEnabled(MI))
    100       break;
    101     CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
    102     MI.eraseFromParent();
    103   } while (I != E);
    104 }
    105 
    106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
    107                                           const MachineInstr &LatrCFAlu) const {
    108   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
    109   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
    110   unsigned RootInstCount = getCFAluSize(RootCFAlu),
    111       LaterInstCount = getCFAluSize(LatrCFAlu);
    112   unsigned CumuledInsts = RootInstCount + LaterInstCount;
    113   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
    114     DEBUG(dbgs() << "Excess inst counts\n");
    115     return false;
    116   }
    117   if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
    118     return false;
    119   // Is KCache Bank 0 compatible ?
    120   int Mode0Idx =
    121       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
    122   int KBank0Idx =
    123       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
    124   int KBank0LineIdx =
    125       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
    126   if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
    127       RootCFAlu.getOperand(Mode0Idx).getImm() &&
    128       (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
    129            RootCFAlu.getOperand(KBank0Idx).getImm() ||
    130        LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
    131            RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
    132     DEBUG(dbgs() << "Wrong KC0\n");
    133     return false;
    134   }
    135   // Is KCache Bank 1 compatible ?
    136   int Mode1Idx =
    137       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
    138   int KBank1Idx =
    139       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
    140   int KBank1LineIdx =
    141       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
    142   if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
    143       RootCFAlu.getOperand(Mode1Idx).getImm() &&
    144       (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
    145            RootCFAlu.getOperand(KBank1Idx).getImm() ||
    146        LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
    147            RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
    148     DEBUG(dbgs() << "Wrong KC0\n");
    149     return false;
    150   }
    151   if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
    152     RootCFAlu.getOperand(Mode0Idx).setImm(
    153         LatrCFAlu.getOperand(Mode0Idx).getImm());
    154     RootCFAlu.getOperand(KBank0Idx).setImm(
    155         LatrCFAlu.getOperand(KBank0Idx).getImm());
    156     RootCFAlu.getOperand(KBank0LineIdx)
    157         .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
    158   }
    159   if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
    160     RootCFAlu.getOperand(Mode1Idx).setImm(
    161         LatrCFAlu.getOperand(Mode1Idx).getImm());
    162     RootCFAlu.getOperand(KBank1Idx).setImm(
    163         LatrCFAlu.getOperand(KBank1Idx).getImm());
    164     RootCFAlu.getOperand(KBank1LineIdx)
    165         .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
    166   }
    167   RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
    168   RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
    169   return true;
    170 }
    171 
    172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
    173   if (skipFunction(*MF.getFunction()))
    174     return false;
    175 
    176   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
    177   TII = ST.getInstrInfo();
    178 
    179   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
    180                                                   BB != BB_E; ++BB) {
    181     MachineBasicBlock &MBB = *BB;
    182     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
    183     MachineBasicBlock::iterator LatestCFAlu = E;
    184     while (I != E) {
    185       MachineInstr &MI = *I++;
    186       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
    187           TII->mustBeLastInClause(MI.getOpcode()))
    188         LatestCFAlu = E;
    189       if (!isCFAlu(MI))
    190         continue;
    191       cleanPotentialDisabledCFAlu(MI);
    192 
    193       if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
    194         MI.eraseFromParent();
    195       } else {
    196         assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
    197         LatestCFAlu = MI;
    198       }
    199     }
    200   }
    201   return false;
    202 }
    203 
    204 const char *R600ClauseMergePass::getPassName() const {
    205   return "R600 Merge Clause Markers Pass";
    206 }
    207 
    208 } // end anonymous namespace
    209 
    210 
    211 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
    212   return new R600ClauseMergePass(TM);
    213 }
    214