1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 12 /// This pass is merging consecutive CFAlus where applicable. 13 /// It needs to be called after IfCvt for best results. 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "R600Defines.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineFunctionInfo.h" 20 #include "R600RegisterInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "r600mergeclause" 30 31 namespace { 32 33 static bool isCFAlu(const MachineInstr *MI) { 34 switch (MI->getOpcode()) { 35 case AMDGPU::CF_ALU: 36 case AMDGPU::CF_ALU_PUSH_BEFORE: 37 return true; 38 default: 39 return false; 40 } 41 } 42 43 class R600ClauseMergePass : public MachineFunctionPass { 44 45 private: 46 static char ID; 47 const R600InstrInfo *TII; 48 49 unsigned getCFAluSize(const MachineInstr *MI) const; 50 bool isCFAluEnabled(const MachineInstr *MI) const; 51 52 /// IfCvt pass can generate "disabled" ALU clause marker that need to be 53 /// removed and their content affected to the previous alu clause. 54 /// This function parse instructions after CFAlu until it find a disabled 55 /// CFAlu and merge the content, or an enabled CFAlu. 56 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; 57 58 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 59 /// it is the case. 60 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) 61 const; 62 63 public: 64 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 65 66 bool runOnMachineFunction(MachineFunction &MF) override; 67 68 const char *getPassName() const override; 69 }; 70 71 char R600ClauseMergePass::ID = 0; 72 73 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { 74 assert(isCFAlu(MI)); 75 return MI->getOperand( 76 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); 77 } 78 79 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { 80 assert(isCFAlu(MI)); 81 return MI->getOperand( 82 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); 83 } 84 85 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) 86 const { 87 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 88 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); 89 I++; 90 do { 91 while (I!= E && !isCFAlu(I)) 92 I++; 93 if (I == E) 94 return; 95 MachineInstr *MI = I++; 96 if (isCFAluEnabled(MI)) 97 break; 98 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 99 MI->eraseFromParent(); 100 } while (I != E); 101 } 102 103 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, 104 const MachineInstr *LatrCFAlu) const { 105 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 106 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 107 unsigned RootInstCount = getCFAluSize(RootCFAlu), 108 LaterInstCount = getCFAluSize(LatrCFAlu); 109 unsigned CumuledInsts = RootInstCount + LaterInstCount; 110 if (CumuledInsts >= TII->getMaxAlusPerClause()) { 111 DEBUG(dbgs() << "Excess inst counts\n"); 112 return false; 113 } 114 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 115 return false; 116 // Is KCache Bank 0 compatible ? 117 int Mode0Idx = 118 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 119 int KBank0Idx = 120 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 121 int KBank0LineIdx = 122 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 123 if (LatrCFAlu->getOperand(Mode0Idx).getImm() && 124 RootCFAlu->getOperand(Mode0Idx).getImm() && 125 (LatrCFAlu->getOperand(KBank0Idx).getImm() != 126 RootCFAlu->getOperand(KBank0Idx).getImm() || 127 LatrCFAlu->getOperand(KBank0LineIdx).getImm() != 128 RootCFAlu->getOperand(KBank0LineIdx).getImm())) { 129 DEBUG(dbgs() << "Wrong KC0\n"); 130 return false; 131 } 132 // Is KCache Bank 1 compatible ? 133 int Mode1Idx = 134 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 135 int KBank1Idx = 136 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 137 int KBank1LineIdx = 138 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 139 if (LatrCFAlu->getOperand(Mode1Idx).getImm() && 140 RootCFAlu->getOperand(Mode1Idx).getImm() && 141 (LatrCFAlu->getOperand(KBank1Idx).getImm() != 142 RootCFAlu->getOperand(KBank1Idx).getImm() || 143 LatrCFAlu->getOperand(KBank1LineIdx).getImm() != 144 RootCFAlu->getOperand(KBank1LineIdx).getImm())) { 145 DEBUG(dbgs() << "Wrong KC0\n"); 146 return false; 147 } 148 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { 149 RootCFAlu->getOperand(Mode0Idx).setImm( 150 LatrCFAlu->getOperand(Mode0Idx).getImm()); 151 RootCFAlu->getOperand(KBank0Idx).setImm( 152 LatrCFAlu->getOperand(KBank0Idx).getImm()); 153 RootCFAlu->getOperand(KBank0LineIdx).setImm( 154 LatrCFAlu->getOperand(KBank0LineIdx).getImm()); 155 } 156 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { 157 RootCFAlu->getOperand(Mode1Idx).setImm( 158 LatrCFAlu->getOperand(Mode1Idx).getImm()); 159 RootCFAlu->getOperand(KBank1Idx).setImm( 160 LatrCFAlu->getOperand(KBank1Idx).getImm()); 161 RootCFAlu->getOperand(KBank1LineIdx).setImm( 162 LatrCFAlu->getOperand(KBank1LineIdx).getImm()); 163 } 164 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); 165 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); 166 return true; 167 } 168 169 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 170 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 171 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 172 BB != BB_E; ++BB) { 173 MachineBasicBlock &MBB = *BB; 174 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 175 MachineBasicBlock::iterator LatestCFAlu = E; 176 while (I != E) { 177 MachineInstr *MI = I++; 178 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 179 TII->mustBeLastInClause(MI->getOpcode())) 180 LatestCFAlu = E; 181 if (!isCFAlu(MI)) 182 continue; 183 cleanPotentialDisabledCFAlu(MI); 184 185 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { 186 MI->eraseFromParent(); 187 } else { 188 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); 189 LatestCFAlu = MI; 190 } 191 } 192 } 193 return false; 194 } 195 196 const char *R600ClauseMergePass::getPassName() const { 197 return "R600 Merge Clause Markers Pass"; 198 } 199 200 } // end anonymous namespace 201 202 203 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 204 return new R600ClauseMergePass(TM); 205 } 206