1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass implements instructions packetization for R600. It unsets isLast 12 /// bit of instructions inside a bundle and substitutes src register with 13 /// PreviousVector when applicable. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "AMDGPU.h" 18 #include "AMDGPUSubtarget.h" 19 #include "R600InstrInfo.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "llvm/CodeGen/DFAPacketizer.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineLoopInfo.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/CodeGen/ScheduleDAG.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "packets" 33 34 namespace { 35 36 class R600Packetizer : public MachineFunctionPass { 37 38 public: 39 static char ID; 40 R600Packetizer() : MachineFunctionPass(ID) {} 41 42 void getAnalysisUsage(AnalysisUsage &AU) const override { 43 AU.setPreservesCFG(); 44 AU.addRequired<MachineDominatorTree>(); 45 AU.addPreserved<MachineDominatorTree>(); 46 AU.addRequired<MachineLoopInfo>(); 47 AU.addPreserved<MachineLoopInfo>(); 48 MachineFunctionPass::getAnalysisUsage(AU); 49 } 50 51 StringRef getPassName() const override { return "R600 Packetizer"; } 52 53 bool runOnMachineFunction(MachineFunction &Fn) override; 54 }; 55 56 class R600PacketizerList : public VLIWPacketizerList { 57 private: 58 const R600InstrInfo *TII; 59 const R600RegisterInfo &TRI; 60 bool VLIW5; 61 bool ConsideredInstUsesAlreadyWrittenVectorElement; 62 63 unsigned getSlot(const MachineInstr &MI) const { 64 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 65 } 66 67 /// \returns register to PV chan mapping for bundle/single instructions that 68 /// immediately precedes I. 69 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 70 const { 71 DenseMap<unsigned, unsigned> Result; 72 I--; 73 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 74 return Result; 75 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 76 if (I->isBundle()) 77 BI++; 78 int LastDstChan = -1; 79 do { 80 bool isTrans = false; 81 int BISlot = getSlot(*BI); 82 if (LastDstChan >= BISlot) 83 isTrans = true; 84 LastDstChan = BISlot; 85 if (TII->isPredicated(*BI)) 86 continue; 87 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 88 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 89 continue; 90 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 91 if (DstIdx == -1) { 92 continue; 93 } 94 unsigned Dst = BI->getOperand(DstIdx).getReg(); 95 if (isTrans || TII->isTransOnly(*BI)) { 96 Result[Dst] = R600::PS; 97 continue; 98 } 99 if (BI->getOpcode() == R600::DOT4_r600 || 100 BI->getOpcode() == R600::DOT4_eg) { 101 Result[Dst] = R600::PV_X; 102 continue; 103 } 104 if (Dst == R600::OQAP) { 105 continue; 106 } 107 unsigned PVReg = 0; 108 switch (TRI.getHWRegChan(Dst)) { 109 case 0: 110 PVReg = R600::PV_X; 111 break; 112 case 1: 113 PVReg = R600::PV_Y; 114 break; 115 case 2: 116 PVReg = R600::PV_Z; 117 break; 118 case 3: 119 PVReg = R600::PV_W; 120 break; 121 default: 122 llvm_unreachable("Invalid Chan"); 123 } 124 Result[Dst] = PVReg; 125 } while ((++BI)->isBundledWithPred()); 126 return Result; 127 } 128 129 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 130 const { 131 unsigned Ops[] = { 132 R600::OpName::src0, 133 R600::OpName::src1, 134 R600::OpName::src2 135 }; 136 for (unsigned i = 0; i < 3; i++) { 137 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); 138 if (OperandIdx < 0) 139 continue; 140 unsigned Src = MI.getOperand(OperandIdx).getReg(); 141 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 142 if (It != PVs.end()) 143 MI.getOperand(OperandIdx).setReg(It->second); 144 } 145 } 146 public: 147 // Ctor. 148 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 149 MachineLoopInfo &MLI) 150 : VLIWPacketizerList(MF, MLI, nullptr), 151 TII(ST.getInstrInfo()), 152 TRI(TII->getRegisterInfo()) { 153 VLIW5 = !ST.hasCaymanISA(); 154 } 155 156 // initPacketizerState - initialize some internal flags. 157 void initPacketizerState() override { 158 ConsideredInstUsesAlreadyWrittenVectorElement = false; 159 } 160 161 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 162 bool ignorePseudoInstruction(const MachineInstr &MI, 163 const MachineBasicBlock *MBB) override { 164 return false; 165 } 166 167 // isSoloInstruction - return true if instruction MI can not be packetized 168 // with any other instruction, which means that MI itself is a packet. 169 bool isSoloInstruction(const MachineInstr &MI) override { 170 if (TII->isVector(MI)) 171 return true; 172 if (!TII->isALUInstr(MI.getOpcode())) 173 return true; 174 if (MI.getOpcode() == R600::GROUP_BARRIER) 175 return true; 176 // XXX: This can be removed once the packetizer properly handles all the 177 // LDS instruction group restrictions. 178 return TII->isLDSInstr(MI.getOpcode()); 179 } 180 181 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 182 // together. 183 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 184 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 185 if (getSlot(*MII) == getSlot(*MIJ)) 186 ConsideredInstUsesAlreadyWrittenVectorElement = true; 187 // Does MII and MIJ share the same pred_sel ? 188 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 189 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 190 unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, 191 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; 192 if (PredI != PredJ) 193 return false; 194 if (SUJ->isSucc(SUI)) { 195 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 196 const SDep &Dep = SUJ->Succs[i]; 197 if (Dep.getSUnit() != SUI) 198 continue; 199 if (Dep.getKind() == SDep::Anti) 200 continue; 201 if (Dep.getKind() == SDep::Output) 202 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 203 continue; 204 return false; 205 } 206 } 207 208 bool ARDef = 209 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 210 bool ARUse = 211 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 212 213 return !ARDef || !ARUse; 214 } 215 216 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 217 // and SUJ. 218 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 219 return false; 220 } 221 222 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 223 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 224 MI->getOperand(LastOp).setImm(Bit); 225 } 226 227 bool isBundlableWithCurrentPMI(MachineInstr &MI, 228 const DenseMap<unsigned, unsigned> &PV, 229 std::vector<R600InstrInfo::BankSwizzle> &BS, 230 bool &isTransSlot) { 231 isTransSlot = TII->isTransOnly(MI); 232 assert (!isTransSlot || VLIW5); 233 234 // Is the dst reg sequence legal ? 235 if (!isTransSlot && !CurrentPacketMIs.empty()) { 236 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 237 if (ConsideredInstUsesAlreadyWrittenVectorElement && 238 !TII->isVectorOnly(MI) && VLIW5) { 239 isTransSlot = true; 240 LLVM_DEBUG({ 241 dbgs() << "Considering as Trans Inst :"; 242 MI.dump(); 243 }); 244 } 245 else 246 return false; 247 } 248 } 249 250 // Are the Constants limitations met ? 251 CurrentPacketMIs.push_back(&MI); 252 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 253 LLVM_DEBUG({ 254 dbgs() << "Couldn't pack :\n"; 255 MI.dump(); 256 dbgs() << "with the following packets :\n"; 257 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 258 CurrentPacketMIs[i]->dump(); 259 dbgs() << "\n"; 260 } 261 dbgs() << "because of Consts read limitations\n"; 262 }); 263 CurrentPacketMIs.pop_back(); 264 return false; 265 } 266 267 // Is there a BankSwizzle set that meet Read Port limitations ? 268 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 269 PV, BS, isTransSlot)) { 270 LLVM_DEBUG({ 271 dbgs() << "Couldn't pack :\n"; 272 MI.dump(); 273 dbgs() << "with the following packets :\n"; 274 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 275 CurrentPacketMIs[i]->dump(); 276 dbgs() << "\n"; 277 } 278 dbgs() << "because of Read port limitations\n"; 279 }); 280 CurrentPacketMIs.pop_back(); 281 return false; 282 } 283 284 // We cannot read LDS source registers from the Trans slot. 285 if (isTransSlot && TII->readsLDSSrcReg(MI)) 286 return false; 287 288 CurrentPacketMIs.pop_back(); 289 return true; 290 } 291 292 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 293 MachineBasicBlock::iterator FirstInBundle = 294 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 295 const DenseMap<unsigned, unsigned> &PV = 296 getPreviousVector(FirstInBundle); 297 std::vector<R600InstrInfo::BankSwizzle> BS; 298 bool isTransSlot; 299 300 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 301 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 302 MachineInstr *MI = CurrentPacketMIs[i]; 303 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 304 R600::OpName::bank_swizzle); 305 MI->getOperand(Op).setImm(BS[i]); 306 } 307 unsigned Op = 308 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 309 MI.getOperand(Op).setImm(BS.back()); 310 if (!CurrentPacketMIs.empty()) 311 setIsLastBit(CurrentPacketMIs.back(), 0); 312 substitutePV(MI, PV); 313 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 314 if (isTransSlot) { 315 endPacket(std::next(It)->getParent(), std::next(It)); 316 } 317 return It; 318 } 319 endPacket(MI.getParent(), MI); 320 if (TII->isTransOnly(MI)) 321 return MI; 322 return VLIWPacketizerList::addToPacket(MI); 323 } 324 }; 325 326 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 327 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 328 const R600InstrInfo *TII = ST.getInstrInfo(); 329 330 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 331 332 // Instantiate the packetizer. 333 R600PacketizerList Packetizer(Fn, ST, MLI); 334 335 // DFA state table should not be empty. 336 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 337 assert(Packetizer.getResourceTracker()->getInstrItins()); 338 339 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 340 return false; 341 342 // 343 // Loop over all basic blocks and remove KILL pseudo-instructions 344 // These instructions confuse the dependence analysis. Consider: 345 // D0 = ... (Insn 0) 346 // R0 = KILL R0, D0 (Insn 1) 347 // R0 = ... (Insn 2) 348 // Here, Insn 1 will result in the dependence graph not emitting an output 349 // dependence between Insn 0 and Insn 2. This can lead to incorrect 350 // packetization 351 // 352 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 353 MBB != MBBe; ++MBB) { 354 MachineBasicBlock::iterator End = MBB->end(); 355 MachineBasicBlock::iterator MI = MBB->begin(); 356 while (MI != End) { 357 if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || 358 (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { 359 MachineBasicBlock::iterator DeleteMI = MI; 360 ++MI; 361 MBB->erase(DeleteMI); 362 End = MBB->end(); 363 continue; 364 } 365 ++MI; 366 } 367 } 368 369 // Loop over all of the basic blocks. 370 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 371 MBB != MBBe; ++MBB) { 372 // Find scheduling regions and schedule / packetize each region. 373 unsigned RemainingCount = MBB->size(); 374 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 375 RegionEnd != MBB->begin();) { 376 // The next region starts above the previous region. Look backward in the 377 // instruction stream until we find the nearest boundary. 378 MachineBasicBlock::iterator I = RegionEnd; 379 for(;I != MBB->begin(); --I, --RemainingCount) { 380 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 381 break; 382 } 383 I = MBB->begin(); 384 385 // Skip empty scheduling regions. 386 if (I == RegionEnd) { 387 RegionEnd = std::prev(RegionEnd); 388 --RemainingCount; 389 continue; 390 } 391 // Skip regions with one instruction. 392 if (I == std::prev(RegionEnd)) { 393 RegionEnd = std::prev(RegionEnd); 394 continue; 395 } 396 397 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 398 RegionEnd = I; 399 } 400 } 401 402 return true; 403 404 } 405 406 } // end anonymous namespace 407 408 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 409 "R600 Packetizer", false, false) 410 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 411 "R600 Packetizer", false, false) 412 413 char R600Packetizer::ID = 0; 414 415 char &llvm::R600PacketizerID = R600Packetizer::ID; 416 417 llvm::FunctionPass *llvm::createR600Packetizer() { 418 return new R600Packetizer(); 419 } 420