Home | History | Annotate | Download | only in R600
      1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief R600 Machine Scheduler interface
     12 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #define DEBUG_TYPE "misched"
     17 
     18 #include "R600MachineScheduler.h"
     19 #include "llvm/CodeGen/MachineRegisterInfo.h"
     20 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
     21 #include "llvm/Pass.h"
     22 #include "llvm/PassManager.h"
     23 #include "llvm/Support/raw_ostream.h"
     24 #include <set>
     25 
     26 using namespace llvm;
     27 
     28 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
     29 
     30   DAG = dag;
     31   TII = static_cast<const R600InstrInfo*>(DAG->TII);
     32   TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
     33   MRI = &DAG->MRI;
     34   Available[IDAlu]->clear();
     35   Available[IDFetch]->clear();
     36   Available[IDOther]->clear();
     37   CurInstKind = IDOther;
     38   CurEmitted = 0;
     39   OccupedSlotsMask = 15;
     40   InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
     41 
     42 
     43   const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
     44   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
     45     InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
     46   } else {
     47     InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
     48   }
     49 }
     50 
     51 void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
     52 {
     53   if (QSrc->empty())
     54     return;
     55   for (ReadyQueue::iterator I = QSrc->begin(),
     56       E = QSrc->end(); I != E; ++I) {
     57     (*I)->NodeQueueId &= ~QSrc->getID();
     58     QDst->push(*I);
     59   }
     60   QSrc->clear();
     61 }
     62 
     63 SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
     64   SUnit *SU = 0;
     65   IsTopNode = true;
     66   NextInstKind = IDOther;
     67 
     68   // check if we might want to switch current clause type
     69   bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
     70       (CurEmitted > InstKindLimit[CurInstKind]) ||
     71       (Available[CurInstKind]->empty());
     72   bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
     73       (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
     74 
     75   if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
     76       (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
     77     // try to pick ALU
     78     SU = pickAlu();
     79     if (SU) {
     80       if (CurEmitted >  InstKindLimit[IDAlu])
     81         CurEmitted = 0;
     82       NextInstKind = IDAlu;
     83     }
     84   }
     85 
     86   if (!SU) {
     87     // try to pick FETCH
     88     SU = pickOther(IDFetch);
     89     if (SU)
     90       NextInstKind = IDFetch;
     91   }
     92 
     93   // try to pick other
     94   if (!SU) {
     95     SU = pickOther(IDOther);
     96     if (SU)
     97       NextInstKind = IDOther;
     98   }
     99 
    100   DEBUG(
    101       if (SU) {
    102         dbgs() << "picked node: ";
    103         SU->dump(DAG);
    104       } else {
    105         dbgs() << "NO NODE ";
    106         for (int i = 0; i < IDLast; ++i) {
    107           Available[i]->dump();
    108           Pending[i]->dump();
    109         }
    110         for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
    111           const SUnit &S = DAG->SUnits[i];
    112           if (!S.isScheduled)
    113             S.dump(DAG);
    114         }
    115       }
    116   );
    117 
    118   return SU;
    119 }
    120 
    121 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
    122 
    123   DEBUG(dbgs() << "scheduled: ");
    124   DEBUG(SU->dump(DAG));
    125 
    126   if (NextInstKind != CurInstKind) {
    127     DEBUG(dbgs() << "Instruction Type Switch\n");
    128     if (NextInstKind != IDAlu)
    129       OccupedSlotsMask = 15;
    130     CurEmitted = 0;
    131     CurInstKind = NextInstKind;
    132   }
    133 
    134   if (CurInstKind == IDAlu) {
    135     switch (getAluKind(SU)) {
    136     case AluT_XYZW:
    137       CurEmitted += 4;
    138       break;
    139     case AluDiscarded:
    140       break;
    141     default: {
    142       ++CurEmitted;
    143       for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
    144           E = SU->getInstr()->operands_end(); It != E; ++It) {
    145         MachineOperand &MO = *It;
    146         if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
    147           ++CurEmitted;
    148       }
    149     }
    150     }
    151   } else {
    152     ++CurEmitted;
    153   }
    154 
    155 
    156   DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
    157 
    158   if (CurInstKind != IDFetch) {
    159     MoveUnits(Pending[IDFetch], Available[IDFetch]);
    160   }
    161   MoveUnits(Pending[IDOther], Available[IDOther]);
    162 }
    163 
    164 void R600SchedStrategy::releaseTopNode(SUnit *SU) {
    165   int IK = getInstKind(SU);
    166 
    167   DEBUG(dbgs() << IK << " <= ");
    168   DEBUG(SU->dump(DAG));
    169 
    170   Pending[IK]->push(SU);
    171 }
    172 
    173 void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
    174 }
    175 
    176 bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
    177                                           const TargetRegisterClass *RC) const {
    178   if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
    179     return RC->contains(Reg);
    180   } else {
    181     return MRI->getRegClass(Reg) == RC;
    182   }
    183 }
    184 
    185 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
    186   MachineInstr *MI = SU->getInstr();
    187 
    188     switch (MI->getOpcode()) {
    189     case AMDGPU::INTERP_PAIR_XY:
    190     case AMDGPU::INTERP_PAIR_ZW:
    191     case AMDGPU::INTERP_VEC_LOAD:
    192       return AluT_XYZW;
    193     case AMDGPU::COPY:
    194       if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
    195         // %vregX = COPY Tn_X is likely to be discarded in favor of an
    196         // assignement of Tn_X to %vregX, don't considers it in scheduling
    197         return AluDiscarded;
    198       }
    199       else if (MI->getOperand(1).isUndef()) {
    200         // MI will become a KILL, don't considers it in scheduling
    201         return AluDiscarded;
    202       }
    203     default:
    204       break;
    205     }
    206 
    207     // Does the instruction take a whole IG ?
    208     if(TII->isVector(*MI) ||
    209         TII->isCubeOp(MI->getOpcode()) ||
    210         TII->isReductionOp(MI->getOpcode()))
    211       return AluT_XYZW;
    212 
    213     // Is the result already assigned to a channel ?
    214     unsigned DestSubReg = MI->getOperand(0).getSubReg();
    215     switch (DestSubReg) {
    216     case AMDGPU::sub0:
    217       return AluT_X;
    218     case AMDGPU::sub1:
    219       return AluT_Y;
    220     case AMDGPU::sub2:
    221       return AluT_Z;
    222     case AMDGPU::sub3:
    223       return AluT_W;
    224     default:
    225       break;
    226     }
    227 
    228     // Is the result already member of a X/Y/Z/W class ?
    229     unsigned DestReg = MI->getOperand(0).getReg();
    230     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
    231         regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
    232       return AluT_X;
    233     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
    234       return AluT_Y;
    235     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
    236       return AluT_Z;
    237     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
    238       return AluT_W;
    239     if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
    240       return AluT_XYZW;
    241 
    242     return AluAny;
    243 
    244 }
    245 
    246 int R600SchedStrategy::getInstKind(SUnit* SU) {
    247   int Opcode = SU->getInstr()->getOpcode();
    248 
    249   if (TII->isALUInstr(Opcode)) {
    250     return IDAlu;
    251   }
    252 
    253   switch (Opcode) {
    254   case AMDGPU::COPY:
    255   case AMDGPU::CONST_COPY:
    256   case AMDGPU::INTERP_PAIR_XY:
    257   case AMDGPU::INTERP_PAIR_ZW:
    258   case AMDGPU::INTERP_VEC_LOAD:
    259   case AMDGPU::DOT4_eg_pseudo:
    260   case AMDGPU::DOT4_r600_pseudo:
    261     return IDAlu;
    262   case AMDGPU::TEX_VTX_CONSTBUF:
    263   case AMDGPU::TEX_VTX_TEXBUF:
    264   case AMDGPU::TEX_LD:
    265   case AMDGPU::TEX_GET_TEXTURE_RESINFO:
    266   case AMDGPU::TEX_GET_GRADIENTS_H:
    267   case AMDGPU::TEX_GET_GRADIENTS_V:
    268   case AMDGPU::TEX_SET_GRADIENTS_H:
    269   case AMDGPU::TEX_SET_GRADIENTS_V:
    270   case AMDGPU::TEX_SAMPLE:
    271   case AMDGPU::TEX_SAMPLE_C:
    272   case AMDGPU::TEX_SAMPLE_L:
    273   case AMDGPU::TEX_SAMPLE_C_L:
    274   case AMDGPU::TEX_SAMPLE_LB:
    275   case AMDGPU::TEX_SAMPLE_C_LB:
    276   case AMDGPU::TEX_SAMPLE_G:
    277   case AMDGPU::TEX_SAMPLE_C_G:
    278   case AMDGPU::TXD:
    279   case AMDGPU::TXD_SHADOW:
    280     return IDFetch;
    281   default:
    282     DEBUG(
    283         dbgs() << "other inst: ";
    284         SU->dump(DAG);
    285     );
    286     return IDOther;
    287   }
    288 }
    289 
    290 SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
    291   if (Q.empty())
    292     return NULL;
    293   for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
    294       It != E; ++It) {
    295     SUnit *SU = *It;
    296     InstructionsGroupCandidate.push_back(SU->getInstr());
    297     if (TII->canBundle(InstructionsGroupCandidate)) {
    298       InstructionsGroupCandidate.pop_back();
    299       Q.erase(It);
    300       return SU;
    301     } else {
    302       InstructionsGroupCandidate.pop_back();
    303     }
    304   }
    305   return NULL;
    306 }
    307 
    308 void R600SchedStrategy::LoadAlu() {
    309   ReadyQueue *QSrc = Pending[IDAlu];
    310   for (ReadyQueue::iterator I = QSrc->begin(),
    311         E = QSrc->end(); I != E; ++I) {
    312       (*I)->NodeQueueId &= ~QSrc->getID();
    313       AluKind AK = getAluKind(*I);
    314       AvailableAlus[AK].insert(*I);
    315     }
    316     QSrc->clear();
    317 }
    318 
    319 void R600SchedStrategy::PrepareNextSlot() {
    320   DEBUG(dbgs() << "New Slot\n");
    321   assert (OccupedSlotsMask && "Slot wasn't filled");
    322   OccupedSlotsMask = 0;
    323   InstructionsGroupCandidate.clear();
    324   LoadAlu();
    325 }
    326 
    327 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
    328   unsigned DestReg = MI->getOperand(0).getReg();
    329   // PressureRegister crashes if an operand is def and used in the same inst
    330   // and we try to constraint its regclass
    331   for (MachineInstr::mop_iterator It = MI->operands_begin(),
    332       E = MI->operands_end(); It != E; ++It) {
    333     MachineOperand &MO = *It;
    334     if (MO.isReg() && !MO.isDef() &&
    335         MO.getReg() == MI->getOperand(0).getReg())
    336       return;
    337   }
    338   // Constrains the regclass of DestReg to assign it to Slot
    339   switch (Slot) {
    340   case 0:
    341     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
    342     break;
    343   case 1:
    344     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
    345     break;
    346   case 2:
    347     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
    348     break;
    349   case 3:
    350     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
    351     break;
    352   }
    353 }
    354 
    355 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
    356   static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
    357   SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
    358   SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
    359   if (!UnslotedSU) {
    360     return SlotedSU;
    361   } else if (!SlotedSU) {
    362     AssignSlot(UnslotedSU->getInstr(), Slot);
    363     return UnslotedSU;
    364   } else {
    365     //Determine which one to pick (the lesser one)
    366     if (CompareSUnit()(SlotedSU, UnslotedSU)) {
    367       AvailableAlus[AluAny].insert(UnslotedSU);
    368       return SlotedSU;
    369     } else {
    370       AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
    371       AssignSlot(UnslotedSU->getInstr(), Slot);
    372       return UnslotedSU;
    373     }
    374   }
    375 }
    376 
    377 bool R600SchedStrategy::isAvailablesAluEmpty() const {
    378   return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
    379       AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
    380       AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
    381       AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
    382 }
    383 
    384 SUnit* R600SchedStrategy::pickAlu() {
    385   while (!isAvailablesAluEmpty()) {
    386     if (!OccupedSlotsMask) {
    387       // Flush physical reg copies (RA will discard them)
    388       if (!AvailableAlus[AluDiscarded].empty()) {
    389         OccupedSlotsMask = 15;
    390         return PopInst(AvailableAlus[AluDiscarded]);
    391       }
    392       // If there is a T_XYZW alu available, use it
    393       if (!AvailableAlus[AluT_XYZW].empty()) {
    394         OccupedSlotsMask = 15;
    395         return PopInst(AvailableAlus[AluT_XYZW]);
    396       }
    397     }
    398     for (unsigned Chan = 0; Chan < 4; ++Chan) {
    399       bool isOccupied = OccupedSlotsMask & (1 << Chan);
    400       if (!isOccupied) {
    401         SUnit *SU = AttemptFillSlot(Chan);
    402         if (SU) {
    403           OccupedSlotsMask |= (1 << Chan);
    404           InstructionsGroupCandidate.push_back(SU->getInstr());
    405           return SU;
    406         }
    407       }
    408     }
    409     PrepareNextSlot();
    410   }
    411   return NULL;
    412 }
    413 
    414 SUnit* R600SchedStrategy::pickOther(int QID) {
    415   SUnit *SU = 0;
    416   ReadyQueue *AQ = Available[QID];
    417 
    418   if (AQ->empty()) {
    419     MoveUnits(Pending[QID], AQ);
    420   }
    421   if (!AQ->empty()) {
    422     SU = *AQ->begin();
    423     AQ->remove(AQ->begin());
    424   }
    425   return SU;
    426 }
    427 
    428