Home | History | Annotate | Download | only in AMDGPU
      1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This pass implements instructions packetization for R600. It unsets isLast
     12 /// bit of instructions inside a bundle and substitutes src register with
     13 /// PreviousVector when applicable.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "AMDGPU.h"
     18 #include "AMDGPUSubtarget.h"
     19 #include "R600InstrInfo.h"
     20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     21 #include "llvm/CodeGen/DFAPacketizer.h"
     22 #include "llvm/CodeGen/MachineDominators.h"
     23 #include "llvm/CodeGen/MachineFunctionPass.h"
     24 #include "llvm/CodeGen/MachineLoopInfo.h"
     25 #include "llvm/CodeGen/Passes.h"
     26 #include "llvm/CodeGen/ScheduleDAG.h"
     27 #include "llvm/Support/Debug.h"
     28 #include "llvm/Support/raw_ostream.h"
     29 
     30 using namespace llvm;
     31 
     32 #define DEBUG_TYPE "packets"
     33 
     34 namespace {
     35 
     36 class R600Packetizer : public MachineFunctionPass {
     37 
     38 public:
     39   static char ID;
     40   R600Packetizer() : MachineFunctionPass(ID) {}
     41 
     42   void getAnalysisUsage(AnalysisUsage &AU) const override {
     43     AU.setPreservesCFG();
     44     AU.addRequired<MachineDominatorTree>();
     45     AU.addPreserved<MachineDominatorTree>();
     46     AU.addRequired<MachineLoopInfo>();
     47     AU.addPreserved<MachineLoopInfo>();
     48     MachineFunctionPass::getAnalysisUsage(AU);
     49   }
     50 
     51   StringRef getPassName() const override { return "R600 Packetizer"; }
     52 
     53   bool runOnMachineFunction(MachineFunction &Fn) override;
     54 };
     55 
     56 class R600PacketizerList : public VLIWPacketizerList {
     57 private:
     58   const R600InstrInfo *TII;
     59   const R600RegisterInfo &TRI;
     60   bool VLIW5;
     61   bool ConsideredInstUsesAlreadyWrittenVectorElement;
     62 
     63   unsigned getSlot(const MachineInstr &MI) const {
     64     return TRI.getHWRegChan(MI.getOperand(0).getReg());
     65   }
     66 
     67   /// \returns register to PV chan mapping for bundle/single instructions that
     68   /// immediately precedes I.
     69   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
     70       const {
     71     DenseMap<unsigned, unsigned> Result;
     72     I--;
     73     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
     74       return Result;
     75     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
     76     if (I->isBundle())
     77       BI++;
     78     int LastDstChan = -1;
     79     do {
     80       bool isTrans = false;
     81       int BISlot = getSlot(*BI);
     82       if (LastDstChan >= BISlot)
     83         isTrans = true;
     84       LastDstChan = BISlot;
     85       if (TII->isPredicated(*BI))
     86         continue;
     87       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
     88       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
     89         continue;
     90       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
     91       if (DstIdx == -1) {
     92         continue;
     93       }
     94       unsigned Dst = BI->getOperand(DstIdx).getReg();
     95       if (isTrans || TII->isTransOnly(*BI)) {
     96         Result[Dst] = R600::PS;
     97         continue;
     98       }
     99       if (BI->getOpcode() == R600::DOT4_r600 ||
    100           BI->getOpcode() == R600::DOT4_eg) {
    101         Result[Dst] = R600::PV_X;
    102         continue;
    103       }
    104       if (Dst == R600::OQAP) {
    105         continue;
    106       }
    107       unsigned PVReg = 0;
    108       switch (TRI.getHWRegChan(Dst)) {
    109       case 0:
    110         PVReg = R600::PV_X;
    111         break;
    112       case 1:
    113         PVReg = R600::PV_Y;
    114         break;
    115       case 2:
    116         PVReg = R600::PV_Z;
    117         break;
    118       case 3:
    119         PVReg = R600::PV_W;
    120         break;
    121       default:
    122         llvm_unreachable("Invalid Chan");
    123       }
    124       Result[Dst] = PVReg;
    125     } while ((++BI)->isBundledWithPred());
    126     return Result;
    127   }
    128 
    129   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
    130       const {
    131     unsigned Ops[] = {
    132       R600::OpName::src0,
    133       R600::OpName::src1,
    134       R600::OpName::src2
    135     };
    136     for (unsigned i = 0; i < 3; i++) {
    137       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
    138       if (OperandIdx < 0)
    139         continue;
    140       unsigned Src = MI.getOperand(OperandIdx).getReg();
    141       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
    142       if (It != PVs.end())
    143         MI.getOperand(OperandIdx).setReg(It->second);
    144     }
    145   }
    146 public:
    147   // Ctor.
    148   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
    149                      MachineLoopInfo &MLI)
    150       : VLIWPacketizerList(MF, MLI, nullptr),
    151         TII(ST.getInstrInfo()),
    152         TRI(TII->getRegisterInfo()) {
    153     VLIW5 = !ST.hasCaymanISA();
    154   }
    155 
    156   // initPacketizerState - initialize some internal flags.
    157   void initPacketizerState() override {
    158     ConsideredInstUsesAlreadyWrittenVectorElement = false;
    159   }
    160 
    161   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
    162   bool ignorePseudoInstruction(const MachineInstr &MI,
    163                                const MachineBasicBlock *MBB) override {
    164     return false;
    165   }
    166 
    167   // isSoloInstruction - return true if instruction MI can not be packetized
    168   // with any other instruction, which means that MI itself is a packet.
    169   bool isSoloInstruction(const MachineInstr &MI) override {
    170     if (TII->isVector(MI))
    171       return true;
    172     if (!TII->isALUInstr(MI.getOpcode()))
    173       return true;
    174     if (MI.getOpcode() == R600::GROUP_BARRIER)
    175       return true;
    176     // XXX: This can be removed once the packetizer properly handles all the
    177     // LDS instruction group restrictions.
    178     return TII->isLDSInstr(MI.getOpcode());
    179   }
    180 
    181   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
    182   // together.
    183   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
    184     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
    185     if (getSlot(*MII) == getSlot(*MIJ))
    186       ConsideredInstUsesAlreadyWrittenVectorElement = true;
    187     // Does MII and MIJ share the same pred_sel ?
    188     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
    189         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
    190     unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
    191         PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
    192     if (PredI != PredJ)
    193       return false;
    194     if (SUJ->isSucc(SUI)) {
    195       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
    196         const SDep &Dep = SUJ->Succs[i];
    197         if (Dep.getSUnit() != SUI)
    198           continue;
    199         if (Dep.getKind() == SDep::Anti)
    200           continue;
    201         if (Dep.getKind() == SDep::Output)
    202           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
    203             continue;
    204         return false;
    205       }
    206     }
    207 
    208     bool ARDef =
    209         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
    210     bool ARUse =
    211         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
    212 
    213     return !ARDef || !ARUse;
    214   }
    215 
    216   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
    217   // and SUJ.
    218   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
    219     return false;
    220   }
    221 
    222   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
    223     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
    224     MI->getOperand(LastOp).setImm(Bit);
    225   }
    226 
    227   bool isBundlableWithCurrentPMI(MachineInstr &MI,
    228                                  const DenseMap<unsigned, unsigned> &PV,
    229                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
    230                                  bool &isTransSlot) {
    231     isTransSlot = TII->isTransOnly(MI);
    232     assert (!isTransSlot || VLIW5);
    233 
    234     // Is the dst reg sequence legal ?
    235     if (!isTransSlot && !CurrentPacketMIs.empty()) {
    236       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
    237         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
    238             !TII->isVectorOnly(MI) && VLIW5) {
    239           isTransSlot = true;
    240           LLVM_DEBUG({
    241             dbgs() << "Considering as Trans Inst :";
    242             MI.dump();
    243           });
    244         }
    245         else
    246           return false;
    247       }
    248     }
    249 
    250     // Are the Constants limitations met ?
    251     CurrentPacketMIs.push_back(&MI);
    252     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
    253       LLVM_DEBUG({
    254         dbgs() << "Couldn't pack :\n";
    255         MI.dump();
    256         dbgs() << "with the following packets :\n";
    257         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
    258           CurrentPacketMIs[i]->dump();
    259           dbgs() << "\n";
    260         }
    261         dbgs() << "because of Consts read limitations\n";
    262       });
    263       CurrentPacketMIs.pop_back();
    264       return false;
    265     }
    266 
    267     // Is there a BankSwizzle set that meet Read Port limitations ?
    268     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
    269             PV, BS, isTransSlot)) {
    270       LLVM_DEBUG({
    271         dbgs() << "Couldn't pack :\n";
    272         MI.dump();
    273         dbgs() << "with the following packets :\n";
    274         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
    275           CurrentPacketMIs[i]->dump();
    276           dbgs() << "\n";
    277         }
    278         dbgs() << "because of Read port limitations\n";
    279       });
    280       CurrentPacketMIs.pop_back();
    281       return false;
    282     }
    283 
    284     // We cannot read LDS source registers from the Trans slot.
    285     if (isTransSlot && TII->readsLDSSrcReg(MI))
    286       return false;
    287 
    288     CurrentPacketMIs.pop_back();
    289     return true;
    290   }
    291 
    292   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
    293     MachineBasicBlock::iterator FirstInBundle =
    294         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
    295     const DenseMap<unsigned, unsigned> &PV =
    296         getPreviousVector(FirstInBundle);
    297     std::vector<R600InstrInfo::BankSwizzle> BS;
    298     bool isTransSlot;
    299 
    300     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
    301       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
    302         MachineInstr *MI = CurrentPacketMIs[i];
    303         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
    304             R600::OpName::bank_swizzle);
    305         MI->getOperand(Op).setImm(BS[i]);
    306       }
    307       unsigned Op =
    308           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
    309       MI.getOperand(Op).setImm(BS.back());
    310       if (!CurrentPacketMIs.empty())
    311         setIsLastBit(CurrentPacketMIs.back(), 0);
    312       substitutePV(MI, PV);
    313       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
    314       if (isTransSlot) {
    315         endPacket(std::next(It)->getParent(), std::next(It));
    316       }
    317       return It;
    318     }
    319     endPacket(MI.getParent(), MI);
    320     if (TII->isTransOnly(MI))
    321       return MI;
    322     return VLIWPacketizerList::addToPacket(MI);
    323   }
    324 };
    325 
    326 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
    327   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
    328   const R600InstrInfo *TII = ST.getInstrInfo();
    329 
    330   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
    331 
    332   // Instantiate the packetizer.
    333   R600PacketizerList Packetizer(Fn, ST, MLI);
    334 
    335   // DFA state table should not be empty.
    336   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
    337   assert(Packetizer.getResourceTracker()->getInstrItins());
    338 
    339   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
    340     return false;
    341 
    342   //
    343   // Loop over all basic blocks and remove KILL pseudo-instructions
    344   // These instructions confuse the dependence analysis. Consider:
    345   // D0 = ...   (Insn 0)
    346   // R0 = KILL R0, D0 (Insn 1)
    347   // R0 = ... (Insn 2)
    348   // Here, Insn 1 will result in the dependence graph not emitting an output
    349   // dependence between Insn 0 and Insn 2. This can lead to incorrect
    350   // packetization
    351   //
    352   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
    353        MBB != MBBe; ++MBB) {
    354     MachineBasicBlock::iterator End = MBB->end();
    355     MachineBasicBlock::iterator MI = MBB->begin();
    356     while (MI != End) {
    357       if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
    358           (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
    359         MachineBasicBlock::iterator DeleteMI = MI;
    360         ++MI;
    361         MBB->erase(DeleteMI);
    362         End = MBB->end();
    363         continue;
    364       }
    365       ++MI;
    366     }
    367   }
    368 
    369   // Loop over all of the basic blocks.
    370   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
    371        MBB != MBBe; ++MBB) {
    372     // Find scheduling regions and schedule / packetize each region.
    373     unsigned RemainingCount = MBB->size();
    374     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
    375         RegionEnd != MBB->begin();) {
    376       // The next region starts above the previous region. Look backward in the
    377       // instruction stream until we find the nearest boundary.
    378       MachineBasicBlock::iterator I = RegionEnd;
    379       for(;I != MBB->begin(); --I, --RemainingCount) {
    380         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
    381           break;
    382       }
    383       I = MBB->begin();
    384 
    385       // Skip empty scheduling regions.
    386       if (I == RegionEnd) {
    387         RegionEnd = std::prev(RegionEnd);
    388         --RemainingCount;
    389         continue;
    390       }
    391       // Skip regions with one instruction.
    392       if (I == std::prev(RegionEnd)) {
    393         RegionEnd = std::prev(RegionEnd);
    394         continue;
    395       }
    396 
    397       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
    398       RegionEnd = I;
    399     }
    400   }
    401 
    402   return true;
    403 
    404 }
    405 
    406 } // end anonymous namespace
    407 
    408 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
    409                      "R600 Packetizer", false, false)
    410 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
    411                     "R600 Packetizer", false, false)
    412 
    413 char R600Packetizer::ID = 0;
    414 
    415 char &llvm::R600PacketizerID = R600Packetizer::ID;
    416 
    417 llvm::FunctionPass *llvm::createR600Packetizer() {
    418   return new R600Packetizer();
    419 }
    420