1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief R600 Machine Scheduler interface 12 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "misched" 17 18 #include "R600MachineScheduler.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 21 #include "llvm/Pass.h" 22 #include "llvm/PassManager.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <set> 25 26 using namespace llvm; 27 28 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { 29 30 DAG = dag; 31 TII = static_cast<const R600InstrInfo*>(DAG->TII); 32 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); 33 MRI = &DAG->MRI; 34 Available[IDAlu]->clear(); 35 Available[IDFetch]->clear(); 36 Available[IDOther]->clear(); 37 CurInstKind = IDOther; 38 CurEmitted = 0; 39 OccupedSlotsMask = 15; 40 InstKindLimit[IDAlu] = 120; // 120 minus 8 for security 41 42 43 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); 44 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { 45 InstKindLimit[IDFetch] = 7; // 8 minus 1 for security 46 } else { 47 InstKindLimit[IDFetch] = 15; // 16 minus 1 for security 48 } 49 } 50 51 void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) 52 { 53 if (QSrc->empty()) 54 return; 55 for (ReadyQueue::iterator I = QSrc->begin(), 56 E = QSrc->end(); I != E; ++I) { 57 (*I)->NodeQueueId &= ~QSrc->getID(); 58 QDst->push(*I); 59 } 60 QSrc->clear(); 61 } 62 63 SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { 64 SUnit *SU = 0; 65 IsTopNode = true; 66 NextInstKind = IDOther; 67 68 // check if we might want to switch current clause type 69 bool AllowSwitchToAlu = (CurInstKind == IDOther) || 70 (CurEmitted > InstKindLimit[CurInstKind]) || 71 (Available[CurInstKind]->empty()); 72 bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && 73 (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); 74 75 if ((AllowSwitchToAlu && CurInstKind != IDAlu) || 76 (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { 77 // try to pick ALU 78 SU = pickAlu(); 79 if (SU) { 80 if (CurEmitted > InstKindLimit[IDAlu]) 81 CurEmitted = 0; 82 NextInstKind = IDAlu; 83 } 84 } 85 86 if (!SU) { 87 // try to pick FETCH 88 SU = pickOther(IDFetch); 89 if (SU) 90 NextInstKind = IDFetch; 91 } 92 93 // try to pick other 94 if (!SU) { 95 SU = pickOther(IDOther); 96 if (SU) 97 NextInstKind = IDOther; 98 } 99 100 DEBUG( 101 if (SU) { 102 dbgs() << "picked node: "; 103 SU->dump(DAG); 104 } else { 105 dbgs() << "NO NODE "; 106 for (int i = 0; i < IDLast; ++i) { 107 Available[i]->dump(); 108 Pending[i]->dump(); 109 } 110 for (unsigned i = 0; i < DAG->SUnits.size(); i++) { 111 const SUnit &S = DAG->SUnits[i]; 112 if (!S.isScheduled) 113 S.dump(DAG); 114 } 115 } 116 ); 117 118 return SU; 119 } 120 121 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { 122 123 DEBUG(dbgs() << "scheduled: "); 124 DEBUG(SU->dump(DAG)); 125 126 if (NextInstKind != CurInstKind) { 127 DEBUG(dbgs() << "Instruction Type Switch\n"); 128 if (NextInstKind != IDAlu) 129 OccupedSlotsMask = 15; 130 CurEmitted = 0; 131 CurInstKind = NextInstKind; 132 } 133 134 if (CurInstKind == IDAlu) { 135 switch (getAluKind(SU)) { 136 case AluT_XYZW: 137 CurEmitted += 4; 138 break; 139 case AluDiscarded: 140 break; 141 default: { 142 ++CurEmitted; 143 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), 144 E = SU->getInstr()->operands_end(); It != E; ++It) { 145 MachineOperand &MO = *It; 146 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 147 ++CurEmitted; 148 } 149 } 150 } 151 } else { 152 ++CurEmitted; 153 } 154 155 156 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); 157 158 if (CurInstKind != IDFetch) { 159 MoveUnits(Pending[IDFetch], Available[IDFetch]); 160 } 161 MoveUnits(Pending[IDOther], Available[IDOther]); 162 } 163 164 void R600SchedStrategy::releaseTopNode(SUnit *SU) { 165 int IK = getInstKind(SU); 166 167 DEBUG(dbgs() << IK << " <= "); 168 DEBUG(SU->dump(DAG)); 169 170 Pending[IK]->push(SU); 171 } 172 173 void R600SchedStrategy::releaseBottomNode(SUnit *SU) { 174 } 175 176 bool R600SchedStrategy::regBelongsToClass(unsigned Reg, 177 const TargetRegisterClass *RC) const { 178 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 179 return RC->contains(Reg); 180 } else { 181 return MRI->getRegClass(Reg) == RC; 182 } 183 } 184 185 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { 186 MachineInstr *MI = SU->getInstr(); 187 188 switch (MI->getOpcode()) { 189 case AMDGPU::INTERP_PAIR_XY: 190 case AMDGPU::INTERP_PAIR_ZW: 191 case AMDGPU::INTERP_VEC_LOAD: 192 return AluT_XYZW; 193 case AMDGPU::COPY: 194 if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { 195 // %vregX = COPY Tn_X is likely to be discarded in favor of an 196 // assignement of Tn_X to %vregX, don't considers it in scheduling 197 return AluDiscarded; 198 } 199 else if (MI->getOperand(1).isUndef()) { 200 // MI will become a KILL, don't considers it in scheduling 201 return AluDiscarded; 202 } 203 default: 204 break; 205 } 206 207 // Does the instruction take a whole IG ? 208 if(TII->isVector(*MI) || 209 TII->isCubeOp(MI->getOpcode()) || 210 TII->isReductionOp(MI->getOpcode())) 211 return AluT_XYZW; 212 213 // Is the result already assigned to a channel ? 214 unsigned DestSubReg = MI->getOperand(0).getSubReg(); 215 switch (DestSubReg) { 216 case AMDGPU::sub0: 217 return AluT_X; 218 case AMDGPU::sub1: 219 return AluT_Y; 220 case AMDGPU::sub2: 221 return AluT_Z; 222 case AMDGPU::sub3: 223 return AluT_W; 224 default: 225 break; 226 } 227 228 // Is the result already member of a X/Y/Z/W class ? 229 unsigned DestReg = MI->getOperand(0).getReg(); 230 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || 231 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) 232 return AluT_X; 233 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) 234 return AluT_Y; 235 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) 236 return AluT_Z; 237 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) 238 return AluT_W; 239 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) 240 return AluT_XYZW; 241 242 return AluAny; 243 244 } 245 246 int R600SchedStrategy::getInstKind(SUnit* SU) { 247 int Opcode = SU->getInstr()->getOpcode(); 248 249 if (TII->isALUInstr(Opcode)) { 250 return IDAlu; 251 } 252 253 switch (Opcode) { 254 case AMDGPU::COPY: 255 case AMDGPU::CONST_COPY: 256 case AMDGPU::INTERP_PAIR_XY: 257 case AMDGPU::INTERP_PAIR_ZW: 258 case AMDGPU::INTERP_VEC_LOAD: 259 case AMDGPU::DOT4_eg_pseudo: 260 case AMDGPU::DOT4_r600_pseudo: 261 return IDAlu; 262 case AMDGPU::TEX_VTX_CONSTBUF: 263 case AMDGPU::TEX_VTX_TEXBUF: 264 case AMDGPU::TEX_LD: 265 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 266 case AMDGPU::TEX_GET_GRADIENTS_H: 267 case AMDGPU::TEX_GET_GRADIENTS_V: 268 case AMDGPU::TEX_SET_GRADIENTS_H: 269 case AMDGPU::TEX_SET_GRADIENTS_V: 270 case AMDGPU::TEX_SAMPLE: 271 case AMDGPU::TEX_SAMPLE_C: 272 case AMDGPU::TEX_SAMPLE_L: 273 case AMDGPU::TEX_SAMPLE_C_L: 274 case AMDGPU::TEX_SAMPLE_LB: 275 case AMDGPU::TEX_SAMPLE_C_LB: 276 case AMDGPU::TEX_SAMPLE_G: 277 case AMDGPU::TEX_SAMPLE_C_G: 278 case AMDGPU::TXD: 279 case AMDGPU::TXD_SHADOW: 280 return IDFetch; 281 default: 282 DEBUG( 283 dbgs() << "other inst: "; 284 SU->dump(DAG); 285 ); 286 return IDOther; 287 } 288 } 289 290 SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { 291 if (Q.empty()) 292 return NULL; 293 for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); 294 It != E; ++It) { 295 SUnit *SU = *It; 296 InstructionsGroupCandidate.push_back(SU->getInstr()); 297 if (TII->canBundle(InstructionsGroupCandidate)) { 298 InstructionsGroupCandidate.pop_back(); 299 Q.erase(It); 300 return SU; 301 } else { 302 InstructionsGroupCandidate.pop_back(); 303 } 304 } 305 return NULL; 306 } 307 308 void R600SchedStrategy::LoadAlu() { 309 ReadyQueue *QSrc = Pending[IDAlu]; 310 for (ReadyQueue::iterator I = QSrc->begin(), 311 E = QSrc->end(); I != E; ++I) { 312 (*I)->NodeQueueId &= ~QSrc->getID(); 313 AluKind AK = getAluKind(*I); 314 AvailableAlus[AK].insert(*I); 315 } 316 QSrc->clear(); 317 } 318 319 void R600SchedStrategy::PrepareNextSlot() { 320 DEBUG(dbgs() << "New Slot\n"); 321 assert (OccupedSlotsMask && "Slot wasn't filled"); 322 OccupedSlotsMask = 0; 323 InstructionsGroupCandidate.clear(); 324 LoadAlu(); 325 } 326 327 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { 328 unsigned DestReg = MI->getOperand(0).getReg(); 329 // PressureRegister crashes if an operand is def and used in the same inst 330 // and we try to constraint its regclass 331 for (MachineInstr::mop_iterator It = MI->operands_begin(), 332 E = MI->operands_end(); It != E; ++It) { 333 MachineOperand &MO = *It; 334 if (MO.isReg() && !MO.isDef() && 335 MO.getReg() == MI->getOperand(0).getReg()) 336 return; 337 } 338 // Constrains the regclass of DestReg to assign it to Slot 339 switch (Slot) { 340 case 0: 341 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); 342 break; 343 case 1: 344 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); 345 break; 346 case 2: 347 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); 348 break; 349 case 3: 350 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); 351 break; 352 } 353 } 354 355 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { 356 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; 357 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); 358 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); 359 if (!UnslotedSU) { 360 return SlotedSU; 361 } else if (!SlotedSU) { 362 AssignSlot(UnslotedSU->getInstr(), Slot); 363 return UnslotedSU; 364 } else { 365 //Determine which one to pick (the lesser one) 366 if (CompareSUnit()(SlotedSU, UnslotedSU)) { 367 AvailableAlus[AluAny].insert(UnslotedSU); 368 return SlotedSU; 369 } else { 370 AvailableAlus[IndexToID[Slot]].insert(SlotedSU); 371 AssignSlot(UnslotedSU->getInstr(), Slot); 372 return UnslotedSU; 373 } 374 } 375 } 376 377 bool R600SchedStrategy::isAvailablesAluEmpty() const { 378 return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && 379 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && 380 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && 381 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); 382 } 383 384 SUnit* R600SchedStrategy::pickAlu() { 385 while (!isAvailablesAluEmpty()) { 386 if (!OccupedSlotsMask) { 387 // Flush physical reg copies (RA will discard them) 388 if (!AvailableAlus[AluDiscarded].empty()) { 389 OccupedSlotsMask = 15; 390 return PopInst(AvailableAlus[AluDiscarded]); 391 } 392 // If there is a T_XYZW alu available, use it 393 if (!AvailableAlus[AluT_XYZW].empty()) { 394 OccupedSlotsMask = 15; 395 return PopInst(AvailableAlus[AluT_XYZW]); 396 } 397 } 398 for (unsigned Chan = 0; Chan < 4; ++Chan) { 399 bool isOccupied = OccupedSlotsMask & (1 << Chan); 400 if (!isOccupied) { 401 SUnit *SU = AttemptFillSlot(Chan); 402 if (SU) { 403 OccupedSlotsMask |= (1 << Chan); 404 InstructionsGroupCandidate.push_back(SU->getInstr()); 405 return SU; 406 } 407 } 408 } 409 PrepareNextSlot(); 410 } 411 return NULL; 412 } 413 414 SUnit* R600SchedStrategy::pickOther(int QID) { 415 SUnit *SU = 0; 416 ReadyQueue *AQ = Available[QID]; 417 418 if (AQ->empty()) { 419 MoveUnits(Pending[QID], AQ); 420 } 421 if (!AQ->empty()) { 422 SU = *AQ->begin(); 423 AQ->remove(AQ->begin()); 424 } 425 return SU; 426 } 427 428