Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief SI Machine Scheduler interface
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
     16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
     17 
     18 #include "SIInstrInfo.h"
     19 #include "llvm/CodeGen/MachineScheduler.h"
     20 #include "llvm/CodeGen/RegisterPressure.h"
     21 
     22 using namespace llvm;
     23 
     24 namespace llvm {
     25 
     26 enum SIScheduleCandReason {
     27   NoCand,
     28   RegUsage,
     29   Latency,
     30   Successor,
     31   Depth,
     32   NodeOrder
     33 };
     34 
     35 struct SISchedulerCandidate {
     36   // The reason for this candidate.
     37   SIScheduleCandReason Reason;
     38 
     39   // Set of reasons that apply to multiple candidates.
     40   uint32_t RepeatReasonSet;
     41 
     42   SISchedulerCandidate()
     43     :  Reason(NoCand), RepeatReasonSet(0) {}
     44 
     45   bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
     46   void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
     47 };
     48 
     49 class SIScheduleDAGMI;
     50 class SIScheduleBlockCreator;
     51 
     52 class SIScheduleBlock {
     53   SIScheduleDAGMI *DAG;
     54   SIScheduleBlockCreator *BC;
     55 
     56   std::vector<SUnit*> SUnits;
     57   std::map<unsigned, unsigned> NodeNum2Index;
     58   std::vector<SUnit*> TopReadySUs;
     59   std::vector<SUnit*> ScheduledSUnits;
     60 
     61   /// The top of the unscheduled zone.
     62   IntervalPressure TopPressure;
     63   RegPressureTracker TopRPTracker;
     64 
     65   // Pressure: number of said class of registers needed to
     66   // store the live virtual and real registers.
     67   // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
     68   // Pressure of additional registers required inside the block.
     69   std::vector<unsigned> InternalAdditionnalPressure;
     70   // Pressure of input and output registers
     71   std::vector<unsigned> LiveInPressure;
     72   std::vector<unsigned> LiveOutPressure;
     73   // Registers required by the block, and outputs.
     74   // We do track only virtual registers.
     75   // Note that some registers are not 32 bits,
     76   // and thus the pressure is not equal
     77   // to the number of live registers.
     78   std::set<unsigned> LiveInRegs;
     79   std::set<unsigned> LiveOutRegs;
     80 
     81   bool Scheduled;
     82   bool HighLatencyBlock;
     83 
     84   std::vector<unsigned> HasLowLatencyNonWaitedParent;
     85 
     86   // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
     87   unsigned ID;
     88 
     89   std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
     90   std::vector<SIScheduleBlock*> Succs;  // All blocks successors.
     91   unsigned NumHighLatencySuccessors;
     92 
     93 public:
     94   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
     95                   unsigned ID):
     96     DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
     97     TopRPTracker(TopPressure), Scheduled(false),
     98     HighLatencyBlock(false), ID(ID),
     99     Preds(), Succs(), NumHighLatencySuccessors(0) {};
    100 
    101   ~SIScheduleBlock() {};
    102 
    103   unsigned getID() const { return ID; }
    104 
    105   /// Functions for Block construction.
    106   void addUnit(SUnit *SU);
    107 
    108   // When all SUs have been added.
    109   void finalizeUnits();
    110 
    111   // Add block pred, which has instruction predecessor of SU.
    112   void addPred(SIScheduleBlock *Pred);
    113   void addSucc(SIScheduleBlock *Succ);
    114 
    115   const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
    116   const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
    117 
    118   unsigned Height;  // Maximum topdown path length to block without outputs
    119   unsigned Depth;   // Maximum bottomup path length to block without inputs
    120 
    121   unsigned getNumHighLatencySuccessors() const {
    122     return NumHighLatencySuccessors;
    123   }
    124 
    125   bool isHighLatencyBlock() { return HighLatencyBlock; }
    126 
    127   // This is approximative.
    128   // Ideally should take into accounts some instructions (rcp, etc)
    129   // are 4 times slower.
    130   int getCost() { return SUnits.size(); }
    131 
    132   // The block Predecessors and Successors must be all registered
    133   // before fastSchedule().
    134   // Fast schedule with no particular requirement.
    135   void fastSchedule();
    136 
    137   std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
    138 
    139   // Complete schedule that will try to minimize reg pressure and
    140   // low latencies, and will fill liveins and liveouts.
    141   // Needs all MIs to be grouped between BeginBlock and EndBlock.
    142   // The MIs can be moved after the scheduling,
    143   // it is just used to allow correct track of live registers.
    144   void schedule(MachineBasicBlock::iterator BeginBlock,
    145                 MachineBasicBlock::iterator EndBlock);
    146 
    147   bool isScheduled() { return Scheduled; }
    148 
    149 
    150   // Needs the block to be scheduled inside
    151   // TODO: find a way to compute it.
    152   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
    153     return InternalAdditionnalPressure;
    154   }
    155 
    156   std::set<unsigned> &getInRegs() { return LiveInRegs; }
    157   std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
    158 
    159   void printDebug(bool Full);
    160 
    161 private:
    162   struct SISchedCandidate : SISchedulerCandidate {
    163     // The best SUnit candidate.
    164     SUnit *SU;
    165 
    166     unsigned SGPRUsage;
    167     unsigned VGPRUsage;
    168     bool IsLowLatency;
    169     unsigned LowLatencyOffset;
    170     bool HasLowLatencyNonWaitedParent;
    171 
    172     SISchedCandidate()
    173       : SU(nullptr) {}
    174 
    175     bool isValid() const { return SU; }
    176 
    177     // Copy the status of another candidate without changing policy.
    178     void setBest(SISchedCandidate &Best) {
    179       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
    180       SU = Best.SU;
    181       Reason = Best.Reason;
    182       SGPRUsage = Best.SGPRUsage;
    183       VGPRUsage = Best.VGPRUsage;
    184       IsLowLatency = Best.IsLowLatency;
    185       LowLatencyOffset = Best.LowLatencyOffset;
    186       HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
    187     }
    188   };
    189 
    190   void undoSchedule();
    191 
    192   void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
    193   void releaseSucc(SUnit *SU, SDep *SuccEdge);
    194   // InOrOutBlock: restrict to links pointing inside the block (true),
    195   // or restrict to links pointing outside the block (false).
    196   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
    197 
    198   void nodeScheduled(SUnit *SU);
    199   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
    200   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
    201   SUnit* pickNode();
    202   void traceCandidate(const SISchedCandidate &Cand);
    203   void initRegPressure(MachineBasicBlock::iterator BeginBlock,
    204                        MachineBasicBlock::iterator EndBlock);
    205 };
    206 
    207 struct SIScheduleBlocks {
    208   std::vector<SIScheduleBlock*> Blocks;
    209   std::vector<int> TopDownIndex2Block;
    210   std::vector<int> TopDownBlock2Index;
    211 };
    212 
    213 enum SISchedulerBlockCreatorVariant {
    214     LatenciesAlone,
    215     LatenciesGrouped,
    216     LatenciesAlonePlusConsecutive
    217 };
    218 
    219 class SIScheduleBlockCreator {
    220   SIScheduleDAGMI *DAG;
    221   // unique_ptr handles freeing memory for us.
    222   std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
    223   std::map<SISchedulerBlockCreatorVariant,
    224            SIScheduleBlocks> Blocks;
    225   std::vector<SIScheduleBlock*> CurrentBlocks;
    226   std::vector<int> Node2CurrentBlock;
    227 
    228   // Topological sort
    229   // Maps topological index to the node number.
    230   std::vector<int> TopDownIndex2Block;
    231   std::vector<int> TopDownBlock2Index;
    232   std::vector<int> BottomUpIndex2Block;
    233 
    234   // 0 -> Color not given.
    235   // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
    236   // Above -> Other groups.
    237   int NextReservedID;
    238   int NextNonReservedID;
    239   std::vector<int> CurrentColoring;
    240   std::vector<int> CurrentTopDownReservedDependencyColoring;
    241   std::vector<int> CurrentBottomUpReservedDependencyColoring;
    242 
    243 public:
    244   SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
    245   ~SIScheduleBlockCreator();
    246 
    247   SIScheduleBlocks
    248   getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
    249 
    250   bool isSUInBlock(SUnit *SU, unsigned ID);
    251 
    252 private:
    253   // Give a Reserved color to every high latency.
    254   void colorHighLatenciesAlone();
    255 
    256   // Create groups of high latencies with a Reserved color.
    257   void colorHighLatenciesGroups();
    258 
    259   // Compute coloring for topdown and bottom traversals with
    260   // different colors depending on dependencies on Reserved colors.
    261   void colorComputeReservedDependencies();
    262 
    263   // Give color to all non-colored SUs according to Reserved groups dependencies.
    264   void colorAccordingToReservedDependencies();
    265 
    266   // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
    267   // The new colors are computed according to the dependencies on the other blocks
    268   // formed with colorAccordingToReservedDependencies.
    269   void colorEndsAccordingToDependencies();
    270 
    271   // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
    272   void colorForceConsecutiveOrderInGroup();
    273 
    274   // Merge Constant loads that have all their users into another group to the group.
    275   // (TODO: else if all their users depend on the same group, put them there)
    276   void colorMergeConstantLoadsNextGroup();
    277 
    278   // Merge SUs that have all their users into another group to the group
    279   void colorMergeIfPossibleNextGroup();
    280 
    281   // Merge SUs that have all their users into another group to the group,
    282   // but only for Reserved groups.
    283   void colorMergeIfPossibleNextGroupOnlyForReserved();
    284 
    285   // Merge SUs that have all their users into another group to the group,
    286   // but only if the group is no more than a few SUs.
    287   void colorMergeIfPossibleSmallGroupsToNextGroup();
    288 
    289   // Divides Blocks with important size.
    290   // Idea of implementation: attribute new colors depending on topdown and
    291   // bottom up links to other blocks.
    292   void cutHugeBlocks();
    293 
    294   // Put in one group all instructions with no users in this scheduling region
    295   // (we'd want these groups be at the end).
    296   void regroupNoUserInstructions();
    297 
    298   void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
    299 
    300   void topologicalSort();
    301 
    302   void scheduleInsideBlocks();
    303 
    304   void fillStats();
    305 };
    306 
    307 enum SISchedulerBlockSchedulerVariant {
    308   BlockLatencyRegUsage,
    309   BlockRegUsageLatency,
    310   BlockRegUsage
    311 };
    312 
    313 class SIScheduleBlockScheduler {
    314   SIScheduleDAGMI *DAG;
    315   SISchedulerBlockSchedulerVariant Variant;
    316   std::vector<SIScheduleBlock*> Blocks;
    317 
    318   std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
    319   std::set<unsigned> LiveRegs;
    320   // Num of schedulable unscheduled blocks reading the register.
    321   std::map<unsigned, unsigned> LiveRegsConsumers;
    322 
    323   std::vector<unsigned> LastPosHighLatencyParentScheduled;
    324   int LastPosWaitedHighLatency;
    325 
    326   std::vector<SIScheduleBlock*> BlocksScheduled;
    327   unsigned NumBlockScheduled;
    328   std::vector<SIScheduleBlock*> ReadyBlocks;
    329 
    330   unsigned VregCurrentUsage;
    331   unsigned SregCurrentUsage;
    332 
    333   // Currently is only approximation.
    334   unsigned maxVregUsage;
    335   unsigned maxSregUsage;
    336 
    337   std::vector<unsigned> BlockNumPredsLeft;
    338   std::vector<unsigned> BlockNumSuccsLeft;
    339 
    340 public:
    341   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
    342                            SISchedulerBlockSchedulerVariant Variant,
    343                            SIScheduleBlocks BlocksStruct);
    344   ~SIScheduleBlockScheduler() {};
    345 
    346   std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
    347 
    348   unsigned getVGPRUsage() { return maxVregUsage; };
    349   unsigned getSGPRUsage() { return maxSregUsage; };
    350 
    351 private:
    352   struct SIBlockSchedCandidate : SISchedulerCandidate {
    353     // The best Block candidate.
    354     SIScheduleBlock *Block;
    355 
    356     bool IsHighLatency;
    357     int VGPRUsageDiff;
    358     unsigned NumSuccessors;
    359     unsigned NumHighLatencySuccessors;
    360     unsigned LastPosHighLatParentScheduled;
    361     unsigned Height;
    362 
    363     SIBlockSchedCandidate()
    364       : Block(nullptr) {}
    365 
    366     bool isValid() const { return Block; }
    367 
    368     // Copy the status of another candidate without changing policy.
    369     void setBest(SIBlockSchedCandidate &Best) {
    370       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
    371       Block = Best.Block;
    372       Reason = Best.Reason;
    373       IsHighLatency = Best.IsHighLatency;
    374       VGPRUsageDiff = Best.VGPRUsageDiff;
    375       NumSuccessors = Best.NumSuccessors;
    376       NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
    377       LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
    378       Height = Best.Height;
    379     }
    380   };
    381 
    382   bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
    383                            SIBlockSchedCandidate &TryCand);
    384   bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
    385                             SIBlockSchedCandidate &TryCand);
    386   SIScheduleBlock *pickBlock();
    387 
    388   void addLiveRegs(std::set<unsigned> &Regs);
    389   void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
    390   void releaseBlockSuccs(SIScheduleBlock *Parent);
    391   void blockScheduled(SIScheduleBlock *Block);
    392 
    393   // Check register pressure change
    394   // by scheduling a block with these LiveIn and LiveOut.
    395   std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
    396                                        std::set<unsigned> &OutRegs);
    397 
    398   void schedule();
    399 };
    400 
    401 struct SIScheduleBlockResult {
    402   std::vector<unsigned> SUs;
    403   unsigned MaxSGPRUsage;
    404   unsigned MaxVGPRUsage;
    405 };
    406 
    407 class SIScheduler {
    408   SIScheduleDAGMI *DAG;
    409   SIScheduleBlockCreator BlockCreator;
    410 
    411 public:
    412   SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
    413 
    414   ~SIScheduler() {};
    415 
    416   struct SIScheduleBlockResult
    417   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
    418                   SISchedulerBlockSchedulerVariant ScheduleVariant);
    419 };
    420 
    421 class SIScheduleDAGMI final : public ScheduleDAGMILive {
    422   const SIInstrInfo *SITII;
    423   const SIRegisterInfo *SITRI;
    424 
    425   std::vector<SUnit> SUnitsLinksBackup;
    426 
    427   // For moveLowLatencies. After all Scheduling variants are tested.
    428   std::vector<unsigned> ScheduledSUnits;
    429   std::vector<unsigned> ScheduledSUnitsInv;
    430 
    431   unsigned VGPRSetID;
    432   unsigned SGPRSetID;
    433 
    434 public:
    435   SIScheduleDAGMI(MachineSchedContext *C);
    436 
    437   ~SIScheduleDAGMI() override;
    438 
    439   // Entry point for the schedule.
    440   void schedule() override;
    441 
    442   // To init Block's RPTracker.
    443   void initRPTracker(RegPressureTracker &RPTracker) {
    444     RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
    445   }
    446 
    447   MachineBasicBlock *getBB() { return BB; }
    448   MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
    449   MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
    450   LiveIntervals *getLIS() { return LIS; }
    451   MachineRegisterInfo *getMRI() { return &MRI; }
    452   const TargetRegisterInfo *getTRI() { return TRI; }
    453   SUnit& getEntrySU() { return EntrySU; };
    454   SUnit& getExitSU() { return ExitSU; };
    455 
    456   void restoreSULinksLeft();
    457 
    458   template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
    459                                                      _Iterator End,
    460                                                      unsigned &VgprUsage,
    461                                                      unsigned &SgprUsage);
    462   std::set<unsigned> getInRegs() {
    463     std::set<unsigned> InRegs;
    464     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
    465       InRegs.insert(RegMaskPair.RegUnit);
    466     }
    467     return InRegs;
    468   };
    469 
    470   unsigned getVGPRSetID() const { return VGPRSetID; }
    471   unsigned getSGPRSetID() const { return SGPRSetID; }
    472 
    473 private:
    474   void topologicalSort();
    475   // After scheduling is done, improve low latency placements.
    476   void moveLowLatencies();
    477 
    478 public:
    479   // Some stats for scheduling inside blocks.
    480   std::vector<unsigned> IsLowLatencySU;
    481   std::vector<unsigned> LowLatencyOffset;
    482   std::vector<unsigned> IsHighLatencySU;
    483   // Topological sort
    484   // Maps topological index to the node number.
    485   std::vector<int> TopDownIndex2SU;
    486   std::vector<int> BottomUpIndex2SU;
    487 };
    488 
    489 } // namespace llvm
    490 
    491 #endif /* SIMACHINESCHEDULER_H_ */
    492