Home | History | Annotate | Download | only in AMDGPU
      1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This pass merges inputs of swizzeable instructions into vector sharing
     12 /// common data and/or have enough undef subreg using swizzle abilities.
     13 ///
     14 /// For instance let's consider the following pseudo code :
     15 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
     16 /// ...
     17 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
     18 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
     19 ///
     20 /// is turned into :
     21 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
     22 /// ...
     23 /// %7 = INSERT_SUBREG %4, sub3
     24 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
     25 ///
     26 /// This allow regalloc to reduce register pressure for vector registers and
     27 /// to reduce MOV count.
     28 //===----------------------------------------------------------------------===//
     29 
     30 #include "AMDGPU.h"
     31 #include "AMDGPUSubtarget.h"
     32 #include "R600Defines.h"
     33 #include "R600InstrInfo.h"
     34 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     35 #include "llvm/ADT/DenseMap.h"
     36 #include "llvm/ADT/STLExtras.h"
     37 #include "llvm/ADT/StringRef.h"
     38 #include "llvm/CodeGen/MachineBasicBlock.h"
     39 #include "llvm/CodeGen/MachineDominators.h"
     40 #include "llvm/CodeGen/MachineFunction.h"
     41 #include "llvm/CodeGen/MachineFunctionPass.h"
     42 #include "llvm/CodeGen/MachineInstr.h"
     43 #include "llvm/CodeGen/MachineInstrBuilder.h"
     44 #include "llvm/CodeGen/MachineLoopInfo.h"
     45 #include "llvm/CodeGen/MachineOperand.h"
     46 #include "llvm/CodeGen/MachineRegisterInfo.h"
     47 #include "llvm/IR/DebugLoc.h"
     48 #include "llvm/Pass.h"
     49 #include "llvm/Support/Debug.h"
     50 #include "llvm/Support/ErrorHandling.h"
     51 #include "llvm/Support/raw_ostream.h"
     52 #include <cassert>
     53 #include <utility>
     54 #include <vector>
     55 
     56 using namespace llvm;
     57 
     58 #define DEBUG_TYPE "vec-merger"
     59 
     60 static bool
     61 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
     62   for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
     63       E = MRI.def_instr_end(); It != E; ++It) {
     64     return (*It).isImplicitDef();
     65   }
     66   if (MRI.isReserved(Reg)) {
     67     return false;
     68   }
     69   llvm_unreachable("Reg without a def");
     70   return false;
     71 }
     72 
     73 namespace {
     74 
     75 class RegSeqInfo {
     76 public:
     77   MachineInstr *Instr;
     78   DenseMap<unsigned, unsigned> RegToChan;
     79   std::vector<unsigned> UndefReg;
     80 
     81   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
     82     assert(MI->getOpcode() == R600::REG_SEQUENCE);
     83     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
     84       MachineOperand &MO = Instr->getOperand(i);
     85       unsigned Chan = Instr->getOperand(i + 1).getImm();
     86       if (isImplicitlyDef(MRI, MO.getReg()))
     87         UndefReg.push_back(Chan);
     88       else
     89         RegToChan[MO.getReg()] = Chan;
     90     }
     91   }
     92 
     93   RegSeqInfo() = default;
     94 
     95   bool operator==(const RegSeqInfo &RSI) const {
     96     return RSI.Instr == Instr;
     97   }
     98 };
     99 
    100 class R600VectorRegMerger : public MachineFunctionPass {
    101 private:
    102   using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
    103 
    104   MachineRegisterInfo *MRI;
    105   const R600InstrInfo *TII = nullptr;
    106   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
    107   InstructionSetMap PreviousRegSeqByReg;
    108   InstructionSetMap PreviousRegSeqByUndefCount;
    109 
    110   bool canSwizzle(const MachineInstr &MI) const;
    111   bool areAllUsesSwizzeable(unsigned Reg) const;
    112   void SwizzleInput(MachineInstr &,
    113       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
    114   bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
    115       std::vector<std::pair<unsigned, unsigned>> &Remap) const;
    116   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
    117       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
    118   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
    119       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
    120   MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
    121       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
    122   void RemoveMI(MachineInstr *);
    123   void trackRSI(const RegSeqInfo &RSI);
    124 
    125 public:
    126   static char ID;
    127 
    128   R600VectorRegMerger() : MachineFunctionPass(ID) {}
    129 
    130   void getAnalysisUsage(AnalysisUsage &AU) const override {
    131     AU.setPreservesCFG();
    132     AU.addRequired<MachineDominatorTree>();
    133     AU.addPreserved<MachineDominatorTree>();
    134     AU.addRequired<MachineLoopInfo>();
    135     AU.addPreserved<MachineLoopInfo>();
    136     MachineFunctionPass::getAnalysisUsage(AU);
    137   }
    138 
    139   StringRef getPassName() const override {
    140     return "R600 Vector Registers Merge Pass";
    141   }
    142 
    143   bool runOnMachineFunction(MachineFunction &Fn) override;
    144 };
    145 
    146 } // end anonymous namespace
    147 
    148 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
    149                      "R600 Vector Reg Merger", false, false)
    150 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
    151                     "R600 Vector Reg Merger", false, false)
    152 
    153 char R600VectorRegMerger::ID = 0;
    154 
    155 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
    156 
    157 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
    158     const {
    159   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
    160     return true;
    161   switch (MI.getOpcode()) {
    162   case R600::R600_ExportSwz:
    163   case R600::EG_ExportSwz:
    164     return true;
    165   default:
    166     return false;
    167   }
    168 }
    169 
    170 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
    171     RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
    172     const {
    173   unsigned CurrentUndexIdx = 0;
    174   for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
    175       E = ToMerge->RegToChan.end(); It != E; ++It) {
    176     DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
    177         Untouched->RegToChan.find((*It).first);
    178     if (PosInUntouched != Untouched->RegToChan.end()) {
    179       Remap.push_back(std::pair<unsigned, unsigned>
    180           ((*It).second, (*PosInUntouched).second));
    181       continue;
    182     }
    183     if (CurrentUndexIdx >= Untouched->UndefReg.size())
    184       return false;
    185     Remap.push_back(std::pair<unsigned, unsigned>
    186         ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
    187   }
    188 
    189   return true;
    190 }
    191 
    192 static
    193 unsigned getReassignedChan(
    194     const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
    195     unsigned Chan) {
    196   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
    197     if (RemapChan[j].first == Chan)
    198       return RemapChan[j].second;
    199   }
    200   llvm_unreachable("Chan wasn't reassigned");
    201 }
    202 
    203 MachineInstr *R600VectorRegMerger::RebuildVector(
    204     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
    205     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
    206   unsigned Reg = RSI->Instr->getOperand(0).getReg();
    207   MachineBasicBlock::iterator Pos = RSI->Instr;
    208   MachineBasicBlock &MBB = *Pos->getParent();
    209   DebugLoc DL = Pos->getDebugLoc();
    210 
    211   unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
    212   DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
    213   std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
    214   for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
    215       E = RSI->RegToChan.end(); It != E; ++It) {
    216     unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
    217     unsigned SubReg = (*It).first;
    218     unsigned Swizzle = (*It).second;
    219     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
    220 
    221     MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
    222         DstReg)
    223         .addReg(SrcVec)
    224         .addReg(SubReg)
    225         .addImm(Chan);
    226     UpdatedRegToChan[SubReg] = Chan;
    227     std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
    228     if (ChanPos != UpdatedUndef.end())
    229       UpdatedUndef.erase(ChanPos);
    230     assert(!is_contained(UpdatedUndef, Chan) &&
    231            "UpdatedUndef shouldn't contain Chan more than once!");
    232     LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
    233     (void)Tmp;
    234     SrcVec = DstReg;
    235   }
    236   MachineInstr *NewMI =
    237       BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
    238   LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
    239 
    240   LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
    241   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
    242       E = MRI->use_instr_end(); It != E; ++It) {
    243     LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
    244     SwizzleInput(*It, RemapChan);
    245     LLVM_DEBUG((*It).dump());
    246   }
    247   RSI->Instr->eraseFromParent();
    248 
    249   // Update RSI
    250   RSI->Instr = NewMI;
    251   RSI->RegToChan = UpdatedRegToChan;
    252   RSI->UndefReg = UpdatedUndef;
    253 
    254   return NewMI;
    255 }
    256 
    257 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
    258   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
    259       E = PreviousRegSeqByReg.end(); It != E; ++It) {
    260     std::vector<MachineInstr *> &MIs = (*It).second;
    261     MIs.erase(llvm::find(MIs, MI), MIs.end());
    262   }
    263   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
    264       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
    265     std::vector<MachineInstr *> &MIs = (*It).second;
    266     MIs.erase(llvm::find(MIs, MI), MIs.end());
    267   }
    268 }
    269 
    270 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
    271     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
    272   unsigned Offset;
    273   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
    274     Offset = 2;
    275   else
    276     Offset = 3;
    277   for (unsigned i = 0; i < 4; i++) {
    278     unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
    279     for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
    280       if (RemapChan[j].first == Swizzle) {
    281         MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
    282         break;
    283       }
    284     }
    285   }
    286 }
    287 
    288 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
    289   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
    290       E = MRI->use_instr_end(); It != E; ++It) {
    291     if (!canSwizzle(*It))
    292       return false;
    293   }
    294   return true;
    295 }
    296 
    297 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
    298     RegSeqInfo &CompatibleRSI,
    299     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
    300   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
    301       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
    302     if (!MOp->isReg())
    303       continue;
    304     if (PreviousRegSeqByReg[MOp->getReg()].empty())
    305       continue;
    306     for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
    307       CompatibleRSI = PreviousRegSeq[MI];
    308       if (RSI == CompatibleRSI)
    309         continue;
    310       if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
    311         return true;
    312     }
    313   }
    314   return false;
    315 }
    316 
    317 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
    318     RegSeqInfo &CompatibleRSI,
    319     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
    320   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
    321   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
    322     return false;
    323   std::vector<MachineInstr *> &MIs =
    324       PreviousRegSeqByUndefCount[NeededUndefs];
    325   CompatibleRSI = PreviousRegSeq[MIs.back()];
    326   tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
    327   return true;
    328 }
    329 
    330 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
    331   for (DenseMap<unsigned, unsigned>::const_iterator
    332   It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
    333     PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
    334   }
    335   PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
    336   PreviousRegSeq[RSI.Instr] = RSI;
    337 }
    338 
    339 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
    340   if (skipFunction(Fn.getFunction()))
    341     return false;
    342 
    343   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
    344   TII = ST.getInstrInfo();
    345   MRI = &Fn.getRegInfo();
    346 
    347   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
    348        MBB != MBBe; ++MBB) {
    349     MachineBasicBlock *MB = &*MBB;
    350     PreviousRegSeq.clear();
    351     PreviousRegSeqByReg.clear();
    352     PreviousRegSeqByUndefCount.clear();
    353 
    354     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
    355          MII != MIIE; ++MII) {
    356       MachineInstr &MI = *MII;
    357       if (MI.getOpcode() != R600::REG_SEQUENCE) {
    358         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
    359           unsigned Reg = MI.getOperand(1).getReg();
    360           for (MachineRegisterInfo::def_instr_iterator
    361                It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
    362                It != E; ++It) {
    363             RemoveMI(&(*It));
    364           }
    365         }
    366         continue;
    367       }
    368 
    369       RegSeqInfo RSI(*MRI, &MI);
    370 
    371       // All uses of MI are swizzeable ?
    372       unsigned Reg = MI.getOperand(0).getReg();
    373       if (!areAllUsesSwizzeable(Reg))
    374         continue;
    375 
    376       LLVM_DEBUG({
    377         dbgs() << "Trying to optimize ";
    378         MI.dump();
    379       });
    380 
    381       RegSeqInfo CandidateRSI;
    382       std::vector<std::pair<unsigned, unsigned>> RemapChan;
    383       LLVM_DEBUG(dbgs() << "Using common slots...\n";);
    384       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
    385         // Remove CandidateRSI mapping
    386         RemoveMI(CandidateRSI.Instr);
    387         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
    388         trackRSI(RSI);
    389         continue;
    390       }
    391       LLVM_DEBUG(dbgs() << "Using free slots...\n";);
    392       RemapChan.clear();
    393       if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
    394         RemoveMI(CandidateRSI.Instr);
    395         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
    396         trackRSI(RSI);
    397         continue;
    398       }
    399       //Failed to merge
    400       trackRSI(RSI);
    401     }
    402   }
    403   return false;
    404 }
    405 
    406 llvm::FunctionPass *llvm::createR600VectorRegMerger() {
    407   return new R600VectorRegMerger();
    408 }
    409