1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass implements instructions packetization for R600. It unsets isLast 12 /// bit of instructions inside a bundle and substitutes src register with 13 /// PreviousVector when applicable. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #define DEBUG_TYPE "packets" 18 #include "llvm/Support/Debug.h" 19 #include "AMDGPU.h" 20 #include "R600InstrInfo.h" 21 #include "llvm/CodeGen/DFAPacketizer.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineLoopInfo.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/CodeGen/ScheduleDAG.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 namespace { 32 33 class R600Packetizer : public MachineFunctionPass { 34 35 public: 36 static char ID; 37 R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} 38 39 void getAnalysisUsage(AnalysisUsage &AU) const { 40 AU.setPreservesCFG(); 41 AU.addRequired<MachineDominatorTree>(); 42 AU.addPreserved<MachineDominatorTree>(); 43 AU.addRequired<MachineLoopInfo>(); 44 AU.addPreserved<MachineLoopInfo>(); 45 MachineFunctionPass::getAnalysisUsage(AU); 46 } 47 48 const char *getPassName() const { 49 return "R600 Packetizer"; 50 } 51 52 bool runOnMachineFunction(MachineFunction &Fn); 53 }; 54 char R600Packetizer::ID = 0; 55 56 class R600PacketizerList : public VLIWPacketizerList { 57 58 private: 59 const R600InstrInfo *TII; 60 const R600RegisterInfo &TRI; 61 62 unsigned getSlot(const MachineInstr *MI) const { 63 return TRI.getHWRegChan(MI->getOperand(0).getReg()); 64 } 65 66 /// \returns register to PV chan mapping for bundle/single instructions that 67 /// immediatly precedes I. 68 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 69 const { 70 DenseMap<unsigned, unsigned> Result; 71 I--; 72 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 73 return Result; 74 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 75 if (I->isBundle()) 76 BI++; 77 do { 78 if (TII->isPredicated(BI)) 79 continue; 80 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); 81 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 82 continue; 83 int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); 84 if (DstIdx == -1) { 85 continue; 86 } 87 unsigned Dst = BI->getOperand(DstIdx).getReg(); 88 if (TII->isTransOnly(BI)) { 89 Result[Dst] = AMDGPU::PS; 90 continue; 91 } 92 if (BI->getOpcode() == AMDGPU::DOT4_r600 || 93 BI->getOpcode() == AMDGPU::DOT4_eg) { 94 Result[Dst] = AMDGPU::PV_X; 95 continue; 96 } 97 if (Dst == AMDGPU::OQAP) { 98 continue; 99 } 100 unsigned PVReg = 0; 101 switch (TRI.getHWRegChan(Dst)) { 102 case 0: 103 PVReg = AMDGPU::PV_X; 104 break; 105 case 1: 106 PVReg = AMDGPU::PV_Y; 107 break; 108 case 2: 109 PVReg = AMDGPU::PV_Z; 110 break; 111 case 3: 112 PVReg = AMDGPU::PV_W; 113 break; 114 default: 115 llvm_unreachable("Invalid Chan"); 116 } 117 Result[Dst] = PVReg; 118 } while ((++BI)->isBundledWithPred()); 119 return Result; 120 } 121 122 void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs) 123 const { 124 unsigned Ops[] = { 125 AMDGPU::OpName::src0, 126 AMDGPU::OpName::src1, 127 AMDGPU::OpName::src2 128 }; 129 for (unsigned i = 0; i < 3; i++) { 130 int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); 131 if (OperandIdx < 0) 132 continue; 133 unsigned Src = MI->getOperand(OperandIdx).getReg(); 134 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 135 if (It != PVs.end()) 136 MI->getOperand(OperandIdx).setReg(It->second); 137 } 138 } 139 public: 140 // Ctor. 141 R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, 142 MachineDominatorTree &MDT) 143 : VLIWPacketizerList(MF, MLI, MDT, true), 144 TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())), 145 TRI(TII->getRegisterInfo()) { } 146 147 // initPacketizerState - initialize some internal flags. 148 void initPacketizerState() { } 149 150 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 151 bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { 152 return false; 153 } 154 155 // isSoloInstruction - return true if instruction MI can not be packetized 156 // with any other instruction, which means that MI itself is a packet. 157 bool isSoloInstruction(MachineInstr *MI) { 158 if (TII->isVector(*MI)) 159 return true; 160 if (!TII->isALUInstr(MI->getOpcode())) 161 return true; 162 if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) 163 return true; 164 // XXX: This can be removed once the packetizer properly handles all the 165 // LDS instruction group restrictions. 166 if (TII->isLDSInstr(MI->getOpcode())) 167 return true; 168 return false; 169 } 170 171 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 172 // together. 173 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { 174 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 175 if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII)) 176 return false; 177 // Does MII and MIJ share the same pred_sel ? 178 int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), 179 OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); 180 unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, 181 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; 182 if (PredI != PredJ) 183 return false; 184 if (SUJ->isSucc(SUI)) { 185 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 186 const SDep &Dep = SUJ->Succs[i]; 187 if (Dep.getSUnit() != SUI) 188 continue; 189 if (Dep.getKind() == SDep::Anti) 190 continue; 191 if (Dep.getKind() == SDep::Output) 192 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 193 continue; 194 return false; 195 } 196 } 197 return true; 198 } 199 200 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 201 // and SUJ. 202 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;} 203 204 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 205 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); 206 MI->getOperand(LastOp).setImm(Bit); 207 } 208 209 bool isBundlableWithCurrentPMI(MachineInstr *MI, 210 const DenseMap<unsigned, unsigned> &PV, 211 std::vector<R600InstrInfo::BankSwizzle> &BS, 212 bool &isTransSlot) { 213 isTransSlot = TII->isTransOnly(MI); 214 215 // Are the Constants limitations met ? 216 CurrentPacketMIs.push_back(MI); 217 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 218 DEBUG( 219 dbgs() << "Couldn't pack :\n"; 220 MI->dump(); 221 dbgs() << "with the following packets :\n"; 222 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 223 CurrentPacketMIs[i]->dump(); 224 dbgs() << "\n"; 225 } 226 dbgs() << "because of Consts read limitations\n"; 227 ); 228 CurrentPacketMIs.pop_back(); 229 return false; 230 } 231 232 // Is there a BankSwizzle set that meet Read Port limitations ? 233 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 234 PV, BS, isTransSlot)) { 235 DEBUG( 236 dbgs() << "Couldn't pack :\n"; 237 MI->dump(); 238 dbgs() << "with the following packets :\n"; 239 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 240 CurrentPacketMIs[i]->dump(); 241 dbgs() << "\n"; 242 } 243 dbgs() << "because of Read port limitations\n"; 244 ); 245 CurrentPacketMIs.pop_back(); 246 return false; 247 } 248 249 CurrentPacketMIs.pop_back(); 250 return true; 251 } 252 253 MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { 254 MachineBasicBlock::iterator FirstInBundle = 255 CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); 256 const DenseMap<unsigned, unsigned> &PV = 257 getPreviousVector(FirstInBundle); 258 std::vector<R600InstrInfo::BankSwizzle> BS; 259 bool isTransSlot; 260 261 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 262 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 263 MachineInstr *MI = CurrentPacketMIs[i]; 264 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 265 AMDGPU::OpName::bank_swizzle); 266 MI->getOperand(Op).setImm(BS[i]); 267 } 268 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 269 AMDGPU::OpName::bank_swizzle); 270 MI->getOperand(Op).setImm(BS.back()); 271 if (!CurrentPacketMIs.empty()) 272 setIsLastBit(CurrentPacketMIs.back(), 0); 273 substitutePV(MI, PV); 274 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 275 if (isTransSlot) { 276 endPacket(llvm::next(It)->getParent(), llvm::next(It)); 277 } 278 return It; 279 } 280 endPacket(MI->getParent(), MI); 281 return VLIWPacketizerList::addToPacket(MI); 282 } 283 }; 284 285 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 286 const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); 287 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 288 MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 289 290 // Instantiate the packetizer. 291 R600PacketizerList Packetizer(Fn, MLI, MDT); 292 293 // DFA state table should not be empty. 294 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 295 296 // 297 // Loop over all basic blocks and remove KILL pseudo-instructions 298 // These instructions confuse the dependence analysis. Consider: 299 // D0 = ... (Insn 0) 300 // R0 = KILL R0, D0 (Insn 1) 301 // R0 = ... (Insn 2) 302 // Here, Insn 1 will result in the dependence graph not emitting an output 303 // dependence between Insn 0 and Insn 2. This can lead to incorrect 304 // packetization 305 // 306 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 307 MBB != MBBe; ++MBB) { 308 MachineBasicBlock::iterator End = MBB->end(); 309 MachineBasicBlock::iterator MI = MBB->begin(); 310 while (MI != End) { 311 if (MI->isKill() || 312 (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { 313 MachineBasicBlock::iterator DeleteMI = MI; 314 ++MI; 315 MBB->erase(DeleteMI); 316 End = MBB->end(); 317 continue; 318 } 319 ++MI; 320 } 321 } 322 323 // Loop over all of the basic blocks. 324 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 325 MBB != MBBe; ++MBB) { 326 // Find scheduling regions and schedule / packetize each region. 327 unsigned RemainingCount = MBB->size(); 328 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 329 RegionEnd != MBB->begin();) { 330 // The next region starts above the previous region. Look backward in the 331 // instruction stream until we find the nearest boundary. 332 MachineBasicBlock::iterator I = RegionEnd; 333 for(;I != MBB->begin(); --I, --RemainingCount) { 334 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) 335 break; 336 } 337 I = MBB->begin(); 338 339 // Skip empty scheduling regions. 340 if (I == RegionEnd) { 341 RegionEnd = llvm::prior(RegionEnd); 342 --RemainingCount; 343 continue; 344 } 345 // Skip regions with one instruction. 346 if (I == llvm::prior(RegionEnd)) { 347 RegionEnd = llvm::prior(RegionEnd); 348 continue; 349 } 350 351 Packetizer.PacketizeMIs(MBB, I, RegionEnd); 352 RegionEnd = I; 353 } 354 } 355 356 return true; 357 358 } 359 360 } // end anonymous namespace 361 362 llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { 363 return new R600Packetizer(tm); 364 } 365