Home | History | Annotate | Download | only in R600
      1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This pass implements instructions packetization for R600. It unsets isLast
     12 /// bit of instructions inside a bundle and substitutes src register with
     13 /// PreviousVector when applicable.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #define DEBUG_TYPE "packets"
     18 #include "llvm/Support/Debug.h"
     19 #include "AMDGPU.h"
     20 #include "R600InstrInfo.h"
     21 #include "llvm/CodeGen/DFAPacketizer.h"
     22 #include "llvm/CodeGen/MachineDominators.h"
     23 #include "llvm/CodeGen/MachineFunctionPass.h"
     24 #include "llvm/CodeGen/MachineLoopInfo.h"
     25 #include "llvm/CodeGen/Passes.h"
     26 #include "llvm/CodeGen/ScheduleDAG.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 
     29 using namespace llvm;
     30 
     31 namespace {
     32 
     33 class R600Packetizer : public MachineFunctionPass {
     34 
     35 public:
     36   static char ID;
     37   R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
     38 
     39   void getAnalysisUsage(AnalysisUsage &AU) const {
     40     AU.setPreservesCFG();
     41     AU.addRequired<MachineDominatorTree>();
     42     AU.addPreserved<MachineDominatorTree>();
     43     AU.addRequired<MachineLoopInfo>();
     44     AU.addPreserved<MachineLoopInfo>();
     45     MachineFunctionPass::getAnalysisUsage(AU);
     46   }
     47 
     48   const char *getPassName() const {
     49     return "R600 Packetizer";
     50   }
     51 
     52   bool runOnMachineFunction(MachineFunction &Fn);
     53 };
     54 char R600Packetizer::ID = 0;
     55 
     56 class R600PacketizerList : public VLIWPacketizerList {
     57 
     58 private:
     59   const R600InstrInfo *TII;
     60   const R600RegisterInfo &TRI;
     61 
     62   unsigned getSlot(const MachineInstr *MI) const {
     63     return TRI.getHWRegChan(MI->getOperand(0).getReg());
     64   }
     65 
     66   /// \returns register to PV chan mapping for bundle/single instructions that
     67   /// immediatly precedes I.
     68   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
     69       const {
     70     DenseMap<unsigned, unsigned> Result;
     71     I--;
     72     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
     73       return Result;
     74     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
     75     if (I->isBundle())
     76       BI++;
     77     do {
     78       if (TII->isPredicated(BI))
     79         continue;
     80       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
     81       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
     82         continue;
     83       int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
     84       if (DstIdx == -1) {
     85         continue;
     86       }
     87       unsigned Dst = BI->getOperand(DstIdx).getReg();
     88       if (TII->isTransOnly(BI)) {
     89         Result[Dst] = AMDGPU::PS;
     90         continue;
     91       }
     92       if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
     93           BI->getOpcode() == AMDGPU::DOT4_eg) {
     94         Result[Dst] = AMDGPU::PV_X;
     95         continue;
     96       }
     97       if (Dst == AMDGPU::OQAP) {
     98         continue;
     99       }
    100       unsigned PVReg = 0;
    101       switch (TRI.getHWRegChan(Dst)) {
    102       case 0:
    103         PVReg = AMDGPU::PV_X;
    104         break;
    105       case 1:
    106         PVReg = AMDGPU::PV_Y;
    107         break;
    108       case 2:
    109         PVReg = AMDGPU::PV_Z;
    110         break;
    111       case 3:
    112         PVReg = AMDGPU::PV_W;
    113         break;
    114       default:
    115         llvm_unreachable("Invalid Chan");
    116       }
    117       Result[Dst] = PVReg;
    118     } while ((++BI)->isBundledWithPred());
    119     return Result;
    120   }
    121 
    122   void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
    123       const {
    124     unsigned Ops[] = {
    125       AMDGPU::OpName::src0,
    126       AMDGPU::OpName::src1,
    127       AMDGPU::OpName::src2
    128     };
    129     for (unsigned i = 0; i < 3; i++) {
    130       int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
    131       if (OperandIdx < 0)
    132         continue;
    133       unsigned Src = MI->getOperand(OperandIdx).getReg();
    134       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
    135       if (It != PVs.end())
    136         MI->getOperand(OperandIdx).setReg(It->second);
    137     }
    138   }
    139 public:
    140   // Ctor.
    141   R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
    142                         MachineDominatorTree &MDT)
    143   : VLIWPacketizerList(MF, MLI, MDT, true),
    144     TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
    145     TRI(TII->getRegisterInfo()) { }
    146 
    147   // initPacketizerState - initialize some internal flags.
    148   void initPacketizerState() { }
    149 
    150   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
    151   bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
    152     return false;
    153   }
    154 
    155   // isSoloInstruction - return true if instruction MI can not be packetized
    156   // with any other instruction, which means that MI itself is a packet.
    157   bool isSoloInstruction(MachineInstr *MI) {
    158     if (TII->isVector(*MI))
    159       return true;
    160     if (!TII->isALUInstr(MI->getOpcode()))
    161       return true;
    162     if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
    163       return true;
    164     // XXX: This can be removed once the packetizer properly handles all the
    165     // LDS instruction group restrictions.
    166     if (TII->isLDSInstr(MI->getOpcode()))
    167       return true;
    168     return false;
    169   }
    170 
    171   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
    172   // together.
    173   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
    174     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
    175     if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
    176       return false;
    177     // Does MII and MIJ share the same pred_sel ?
    178     int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
    179         OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
    180     unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
    181         PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
    182     if (PredI != PredJ)
    183       return false;
    184     if (SUJ->isSucc(SUI)) {
    185       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
    186         const SDep &Dep = SUJ->Succs[i];
    187         if (Dep.getSUnit() != SUI)
    188           continue;
    189         if (Dep.getKind() == SDep::Anti)
    190           continue;
    191         if (Dep.getKind() == SDep::Output)
    192           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
    193             continue;
    194         return false;
    195       }
    196     }
    197     return true;
    198   }
    199 
    200   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
    201   // and SUJ.
    202   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
    203 
    204   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
    205     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
    206     MI->getOperand(LastOp).setImm(Bit);
    207   }
    208 
    209   bool isBundlableWithCurrentPMI(MachineInstr *MI,
    210                                  const DenseMap<unsigned, unsigned> &PV,
    211                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
    212                                  bool &isTransSlot) {
    213     isTransSlot = TII->isTransOnly(MI);
    214 
    215     // Are the Constants limitations met ?
    216     CurrentPacketMIs.push_back(MI);
    217     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
    218       DEBUG(
    219         dbgs() << "Couldn't pack :\n";
    220         MI->dump();
    221         dbgs() << "with the following packets :\n";
    222         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
    223           CurrentPacketMIs[i]->dump();
    224           dbgs() << "\n";
    225         }
    226         dbgs() << "because of Consts read limitations\n";
    227       );
    228       CurrentPacketMIs.pop_back();
    229       return false;
    230     }
    231 
    232     // Is there a BankSwizzle set that meet Read Port limitations ?
    233     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
    234             PV, BS, isTransSlot)) {
    235       DEBUG(
    236         dbgs() << "Couldn't pack :\n";
    237         MI->dump();
    238         dbgs() << "with the following packets :\n";
    239         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
    240           CurrentPacketMIs[i]->dump();
    241           dbgs() << "\n";
    242         }
    243         dbgs() << "because of Read port limitations\n";
    244       );
    245       CurrentPacketMIs.pop_back();
    246       return false;
    247     }
    248 
    249     CurrentPacketMIs.pop_back();
    250     return true;
    251   }
    252 
    253   MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
    254     MachineBasicBlock::iterator FirstInBundle =
    255         CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
    256     const DenseMap<unsigned, unsigned> &PV =
    257         getPreviousVector(FirstInBundle);
    258     std::vector<R600InstrInfo::BankSwizzle> BS;
    259     bool isTransSlot;
    260 
    261     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
    262       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
    263         MachineInstr *MI = CurrentPacketMIs[i];
    264         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
    265             AMDGPU::OpName::bank_swizzle);
    266         MI->getOperand(Op).setImm(BS[i]);
    267       }
    268       unsigned Op = TII->getOperandIdx(MI->getOpcode(),
    269           AMDGPU::OpName::bank_swizzle);
    270       MI->getOperand(Op).setImm(BS.back());
    271       if (!CurrentPacketMIs.empty())
    272         setIsLastBit(CurrentPacketMIs.back(), 0);
    273       substitutePV(MI, PV);
    274       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
    275       if (isTransSlot) {
    276         endPacket(llvm::next(It)->getParent(), llvm::next(It));
    277       }
    278       return It;
    279     }
    280     endPacket(MI->getParent(), MI);
    281     return VLIWPacketizerList::addToPacket(MI);
    282   }
    283 };
    284 
    285 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
    286   const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
    287   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
    288   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
    289 
    290   // Instantiate the packetizer.
    291   R600PacketizerList Packetizer(Fn, MLI, MDT);
    292 
    293   // DFA state table should not be empty.
    294   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
    295 
    296   //
    297   // Loop over all basic blocks and remove KILL pseudo-instructions
    298   // These instructions confuse the dependence analysis. Consider:
    299   // D0 = ...   (Insn 0)
    300   // R0 = KILL R0, D0 (Insn 1)
    301   // R0 = ... (Insn 2)
    302   // Here, Insn 1 will result in the dependence graph not emitting an output
    303   // dependence between Insn 0 and Insn 2. This can lead to incorrect
    304   // packetization
    305   //
    306   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
    307        MBB != MBBe; ++MBB) {
    308     MachineBasicBlock::iterator End = MBB->end();
    309     MachineBasicBlock::iterator MI = MBB->begin();
    310     while (MI != End) {
    311       if (MI->isKill() ||
    312           (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
    313         MachineBasicBlock::iterator DeleteMI = MI;
    314         ++MI;
    315         MBB->erase(DeleteMI);
    316         End = MBB->end();
    317         continue;
    318       }
    319       ++MI;
    320     }
    321   }
    322 
    323   // Loop over all of the basic blocks.
    324   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
    325        MBB != MBBe; ++MBB) {
    326     // Find scheduling regions and schedule / packetize each region.
    327     unsigned RemainingCount = MBB->size();
    328     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
    329         RegionEnd != MBB->begin();) {
    330       // The next region starts above the previous region. Look backward in the
    331       // instruction stream until we find the nearest boundary.
    332       MachineBasicBlock::iterator I = RegionEnd;
    333       for(;I != MBB->begin(); --I, --RemainingCount) {
    334         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
    335           break;
    336       }
    337       I = MBB->begin();
    338 
    339       // Skip empty scheduling regions.
    340       if (I == RegionEnd) {
    341         RegionEnd = llvm::prior(RegionEnd);
    342         --RemainingCount;
    343         continue;
    344       }
    345       // Skip regions with one instruction.
    346       if (I == llvm::prior(RegionEnd)) {
    347         RegionEnd = llvm::prior(RegionEnd);
    348         continue;
    349       }
    350 
    351       Packetizer.PacketizeMIs(MBB, I, RegionEnd);
    352       RegionEnd = I;
    353     }
    354   }
    355 
    356   return true;
    357 
    358 }
    359 
    360 } // end anonymous namespace
    361 
    362 llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
    363   return new R600Packetizer(tm);
    364 }
    365