1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 12 /// This pass is merging consecutive CFAlus where applicable. 13 /// It needs to be called after IfCvt for best results. 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600Defines.h" 19 #include "R600InstrInfo.h" 20 #include "R600MachineFunctionInfo.h" 21 #include "R600RegisterInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "r600mergeclause" 31 32 namespace { 33 34 static bool isCFAlu(const MachineInstr &MI) { 35 switch (MI.getOpcode()) { 36 case AMDGPU::CF_ALU: 37 case AMDGPU::CF_ALU_PUSH_BEFORE: 38 return true; 39 default: 40 return false; 41 } 42 } 43 44 class R600ClauseMergePass : public MachineFunctionPass { 45 46 private: 47 static char ID; 48 const R600InstrInfo *TII; 49 50 unsigned getCFAluSize(const MachineInstr &MI) const; 51 bool isCFAluEnabled(const MachineInstr &MI) const; 52 53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be 54 /// removed and their content affected to the previous alu clause. 55 /// This function parse instructions after CFAlu until it find a disabled 56 /// CFAlu and merge the content, or an enabled CFAlu. 57 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const; 58 59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 60 /// it is the case. 61 bool mergeIfPossible(MachineInstr &RootCFAlu, 62 const MachineInstr &LatrCFAlu) const; 63 64 public: 65 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 const char *getPassName() const override; 70 }; 71 72 char R600ClauseMergePass::ID = 0; 73 74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const { 75 assert(isCFAlu(MI)); 76 return MI 77 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT)) 78 .getImm(); 79 } 80 81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const { 82 assert(isCFAlu(MI)); 83 return MI 84 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled)) 85 .getImm(); 86 } 87 88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu( 89 MachineInstr &CFAlu) const { 90 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 91 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end(); 92 I++; 93 do { 94 while (I != E && !isCFAlu(*I)) 95 I++; 96 if (I == E) 97 return; 98 MachineInstr &MI = *I++; 99 if (isCFAluEnabled(MI)) 100 break; 101 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 102 MI.eraseFromParent(); 103 } while (I != E); 104 } 105 106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, 107 const MachineInstr &LatrCFAlu) const { 108 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 109 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 110 unsigned RootInstCount = getCFAluSize(RootCFAlu), 111 LaterInstCount = getCFAluSize(LatrCFAlu); 112 unsigned CumuledInsts = RootInstCount + LaterInstCount; 113 if (CumuledInsts >= TII->getMaxAlusPerClause()) { 114 DEBUG(dbgs() << "Excess inst counts\n"); 115 return false; 116 } 117 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 118 return false; 119 // Is KCache Bank 0 compatible ? 120 int Mode0Idx = 121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 122 int KBank0Idx = 123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 124 int KBank0LineIdx = 125 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 126 if (LatrCFAlu.getOperand(Mode0Idx).getImm() && 127 RootCFAlu.getOperand(Mode0Idx).getImm() && 128 (LatrCFAlu.getOperand(KBank0Idx).getImm() != 129 RootCFAlu.getOperand(KBank0Idx).getImm() || 130 LatrCFAlu.getOperand(KBank0LineIdx).getImm() != 131 RootCFAlu.getOperand(KBank0LineIdx).getImm())) { 132 DEBUG(dbgs() << "Wrong KC0\n"); 133 return false; 134 } 135 // Is KCache Bank 1 compatible ? 136 int Mode1Idx = 137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 138 int KBank1Idx = 139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 140 int KBank1LineIdx = 141 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 142 if (LatrCFAlu.getOperand(Mode1Idx).getImm() && 143 RootCFAlu.getOperand(Mode1Idx).getImm() && 144 (LatrCFAlu.getOperand(KBank1Idx).getImm() != 145 RootCFAlu.getOperand(KBank1Idx).getImm() || 146 LatrCFAlu.getOperand(KBank1LineIdx).getImm() != 147 RootCFAlu.getOperand(KBank1LineIdx).getImm())) { 148 DEBUG(dbgs() << "Wrong KC0\n"); 149 return false; 150 } 151 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) { 152 RootCFAlu.getOperand(Mode0Idx).setImm( 153 LatrCFAlu.getOperand(Mode0Idx).getImm()); 154 RootCFAlu.getOperand(KBank0Idx).setImm( 155 LatrCFAlu.getOperand(KBank0Idx).getImm()); 156 RootCFAlu.getOperand(KBank0LineIdx) 157 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm()); 158 } 159 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) { 160 RootCFAlu.getOperand(Mode1Idx).setImm( 161 LatrCFAlu.getOperand(Mode1Idx).getImm()); 162 RootCFAlu.getOperand(KBank1Idx).setImm( 163 LatrCFAlu.getOperand(KBank1Idx).getImm()); 164 RootCFAlu.getOperand(KBank1LineIdx) 165 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm()); 166 } 167 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts); 168 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode())); 169 return true; 170 } 171 172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 173 if (skipFunction(*MF.getFunction())) 174 return false; 175 176 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 177 TII = ST.getInstrInfo(); 178 179 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 180 BB != BB_E; ++BB) { 181 MachineBasicBlock &MBB = *BB; 182 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 183 MachineBasicBlock::iterator LatestCFAlu = E; 184 while (I != E) { 185 MachineInstr &MI = *I++; 186 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 187 TII->mustBeLastInClause(MI.getOpcode())) 188 LatestCFAlu = E; 189 if (!isCFAlu(MI)) 190 continue; 191 cleanPotentialDisabledCFAlu(MI); 192 193 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) { 194 MI.eraseFromParent(); 195 } else { 196 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled"); 197 LatestCFAlu = MI; 198 } 199 } 200 } 201 return false; 202 } 203 204 const char *R600ClauseMergePass::getPassName() const { 205 return "R600 Merge Clause Markers Pass"; 206 } 207 208 } // end anonymous namespace 209 210 211 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 212 return new R600ClauseMergePass(TM); 213 } 214