Home | History | Annotate | Download | only in R600
      1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
     12 /// This pass is merging consecutive CFAlus where applicable.
     13 /// It needs to be called after IfCvt for best results.
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPU.h"
     17 #include "R600Defines.h"
     18 #include "R600InstrInfo.h"
     19 #include "R600MachineFunctionInfo.h"
     20 #include "R600RegisterInfo.h"
     21 #include "llvm/CodeGen/MachineFunctionPass.h"
     22 #include "llvm/CodeGen/MachineInstrBuilder.h"
     23 #include "llvm/CodeGen/MachineRegisterInfo.h"
     24 #include "llvm/Support/Debug.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 
     27 using namespace llvm;
     28 
     29 #define DEBUG_TYPE "r600mergeclause"
     30 
     31 namespace {
     32 
     33 static bool isCFAlu(const MachineInstr *MI) {
     34   switch (MI->getOpcode()) {
     35   case AMDGPU::CF_ALU:
     36   case AMDGPU::CF_ALU_PUSH_BEFORE:
     37     return true;
     38   default:
     39     return false;
     40   }
     41 }
     42 
     43 class R600ClauseMergePass : public MachineFunctionPass {
     44 
     45 private:
     46   static char ID;
     47   const R600InstrInfo *TII;
     48 
     49   unsigned getCFAluSize(const MachineInstr *MI) const;
     50   bool isCFAluEnabled(const MachineInstr *MI) const;
     51 
     52   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
     53   /// removed and their content affected to the previous alu clause.
     54   /// This function parse instructions after CFAlu until it find a disabled
     55   /// CFAlu and merge the content, or an enabled CFAlu.
     56   void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
     57 
     58   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
     59   /// it is the case.
     60   bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
     61       const;
     62 
     63 public:
     64   R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
     65 
     66   bool runOnMachineFunction(MachineFunction &MF) override;
     67 
     68   const char *getPassName() const override;
     69 };
     70 
     71 char R600ClauseMergePass::ID = 0;
     72 
     73 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
     74   assert(isCFAlu(MI));
     75   return MI->getOperand(
     76       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
     77 }
     78 
     79 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
     80   assert(isCFAlu(MI));
     81   return MI->getOperand(
     82       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
     83 }
     84 
     85 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
     86     const {
     87   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
     88   MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
     89   I++;
     90   do {
     91     while (I!= E && !isCFAlu(I))
     92       I++;
     93     if (I == E)
     94       return;
     95     MachineInstr *MI = I++;
     96     if (isCFAluEnabled(MI))
     97       break;
     98     CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
     99     MI->eraseFromParent();
    100   } while (I != E);
    101 }
    102 
    103 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
    104                                           const MachineInstr *LatrCFAlu) const {
    105   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
    106   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
    107   unsigned RootInstCount = getCFAluSize(RootCFAlu),
    108       LaterInstCount = getCFAluSize(LatrCFAlu);
    109   unsigned CumuledInsts = RootInstCount + LaterInstCount;
    110   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
    111     DEBUG(dbgs() << "Excess inst counts\n");
    112     return false;
    113   }
    114   if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
    115     return false;
    116   // Is KCache Bank 0 compatible ?
    117   int Mode0Idx =
    118       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
    119   int KBank0Idx =
    120       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
    121   int KBank0LineIdx =
    122       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
    123   if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
    124       RootCFAlu->getOperand(Mode0Idx).getImm() &&
    125       (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
    126        RootCFAlu->getOperand(KBank0Idx).getImm() ||
    127       LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
    128       RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
    129     DEBUG(dbgs() << "Wrong KC0\n");
    130     return false;
    131   }
    132   // Is KCache Bank 1 compatible ?
    133   int Mode1Idx =
    134       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
    135   int KBank1Idx =
    136       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
    137   int KBank1LineIdx =
    138       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
    139   if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
    140       RootCFAlu->getOperand(Mode1Idx).getImm() &&
    141       (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
    142       RootCFAlu->getOperand(KBank1Idx).getImm() ||
    143       LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
    144       RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
    145     DEBUG(dbgs() << "Wrong KC0\n");
    146     return false;
    147   }
    148   if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
    149     RootCFAlu->getOperand(Mode0Idx).setImm(
    150         LatrCFAlu->getOperand(Mode0Idx).getImm());
    151     RootCFAlu->getOperand(KBank0Idx).setImm(
    152         LatrCFAlu->getOperand(KBank0Idx).getImm());
    153     RootCFAlu->getOperand(KBank0LineIdx).setImm(
    154         LatrCFAlu->getOperand(KBank0LineIdx).getImm());
    155   }
    156   if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
    157     RootCFAlu->getOperand(Mode1Idx).setImm(
    158         LatrCFAlu->getOperand(Mode1Idx).getImm());
    159     RootCFAlu->getOperand(KBank1Idx).setImm(
    160         LatrCFAlu->getOperand(KBank1Idx).getImm());
    161     RootCFAlu->getOperand(KBank1LineIdx).setImm(
    162         LatrCFAlu->getOperand(KBank1LineIdx).getImm());
    163   }
    164   RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
    165   RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
    166   return true;
    167 }
    168 
    169 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
    170   TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
    171   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
    172                                                   BB != BB_E; ++BB) {
    173     MachineBasicBlock &MBB = *BB;
    174     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
    175     MachineBasicBlock::iterator LatestCFAlu = E;
    176     while (I != E) {
    177       MachineInstr *MI = I++;
    178       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
    179           TII->mustBeLastInClause(MI->getOpcode()))
    180         LatestCFAlu = E;
    181       if (!isCFAlu(MI))
    182         continue;
    183       cleanPotentialDisabledCFAlu(MI);
    184 
    185       if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
    186         MI->eraseFromParent();
    187       } else {
    188         assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
    189         LatestCFAlu = MI;
    190       }
    191     }
    192   }
    193   return false;
    194 }
    195 
    196 const char *R600ClauseMergePass::getPassName() const {
    197   return "R600 Merge Clause Markers Pass";
    198 }
    199 
    200 } // end anonymous namespace
    201 
    202 
    203 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
    204   return new R600ClauseMergePass(TM);
    205 }
    206